From 92bfefccd465052e492f669ce561aa25b0110283 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Mon, 31 May 2004 05:50:10 +0000 Subject: [PATCH] Fully implement arbitrary delimiters on DATA LIST, extending the half implementation that was already there. Update our string ADTs, struct string and struct len_string. Get rid of pool support, which was largely unused. Rename lots of functions to have more obvious or consistent names. Fix a few miscellaneous bugs. --- ChangeLog | 4 + TODO | 4 +- configure.ac | 3 +- doc/ChangeLog | 5 + doc/pspp.texi | 71 ++-- src/ChangeLog | 129 +++++++ src/aggregate.c | 4 +- src/ascii.c | 58 ++-- src/casefile.c | 43 ++- src/command.c | 8 +- src/count.c | 2 +- src/data-in.c | 6 +- src/data-list.c | 332 ++++++++++-------- src/devind.c | 10 +- src/dfm.c | 666 +++++++++++++++++-------------------- src/dfm.h | 35 +- src/error.c | 12 +- src/expr-prs.c | 4 +- src/file-handle.h | 1 + src/file-handle.q | 27 +- src/file-type.c | 18 +- src/filename.c | 50 +-- src/format.c | 8 +- src/getline.c | 26 +- src/glob.c | 2 +- src/html.c | 12 +- src/include.c | 2 +- src/inpt-pgm.c | 6 +- src/lexer.c | 70 ++-- src/matrix-data.c | 127 +++---- src/mis-val.c | 2 +- src/output.c | 18 +- src/postscript.c | 22 +- src/print.c | 2 +- src/q2c.c | 2 +- src/recode.c | 12 +- src/repeat.c | 16 +- src/set.q | 6 +- src/str.c | 147 ++++---- src/str.h | 83 +++-- src/t-test.q | 2 +- src/tab.c | 7 +- src/title.c | 2 +- src/val-labs.c | 4 +- src/var-labs.c | 2 +- tests/ChangeLog | 8 + tests/Makefile.am | 1 + tests/command/data-list.sh | 128 +++++++ tests/command/tabs.sh | 4 +- 49 files changed, 1293 insertions(+), 920 deletions(-) create mode 100755 tests/command/data-list.sh diff --git a/ChangeLog b/ChangeLog index 429564f0..66714b68 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,7 @@ +Sun May 30 18:19:03 2004 Ben Pfaff + + * config.ac: Check for valgrind/valgrind.h. + Mon Mar 29 15:22:48 2004 Ben Pfaff * TODO: Updated. diff --git a/TODO b/TODO index 5a77a930..fb3d8c7e 100644 --- a/TODO +++ b/TODO @@ -1,8 +1,10 @@ -Time-stamp: <2004-04-24 22:23:04 blp> +Time-stamp: <2004-05-30 18:09:06 blp> What Ben's working on now. -------------------------- +Does SET work correctly? + Update q2c input format description. Rewrite output subsystem, break into multiple processes. diff --git a/configure.ac b/configure.ac index 69ac564b..4859d654 100644 --- a/configure.ac +++ b/configure.ac @@ -58,7 +58,8 @@ fi dnl Checks for header files. AC_CHECK_HEADERS([limits.h memory.h sys/stat.h sys/time.h sys/types.h \ - fpu_control.h sys/mman.h sys/wait.h ieeefp.h fenv.h]) + fpu_control.h sys/mman.h sys/wait.h ieeefp.h fenv.h \ + valgrind/valgrind.h]) AC_HEADER_STAT AC_HEADER_STDC AC_HEADER_TIME diff --git a/doc/ChangeLog b/doc/ChangeLog index de43ca95..ceaaacfa 100644 --- a/doc/ChangeLog +++ b/doc/ChangeLog @@ -1,3 +1,8 @@ +Sun May 30 22:44:25 2004 Ben Pfaff + + * pspp.texi: Update FILE HANDLE, DATA LIST FREE, DATA LIST LIST + documentation to reflect latest changes. + Mon Apr 19 22:46:37 2004 Ben Pfaff * pspp.texi: Minor updates to data file and portable file diff --git a/doc/pspp.texi b/doc/pspp.texi index eb1d0148..d15792b3 100644 --- a/doc/pspp.texi +++ b/doc/pspp.texi @@ -5122,6 +5122,7 @@ This example shows keywords abbreviated to their first 3 letters. @display DATA LIST FREE + [(@{TAB,'c'@}, @dots{})] [@{NOTABLE,TABLE@}] FILE='filename' END=end_var @@ -5132,16 +5133,23 @@ where each var_spec takes one of the forms var_list * @end display -In free format, the input data is structured as a series of comma- or -whitespace-delimited fields (end of line is one form of whitespace; it -is not treated specially). Field contents may be surrounded by matched -pairs of apostrophes (@samp{'}) or quotes (@samp{"}), or they may be -unenclosed. For any type of field leading white space (up to the -apostrophe or quote, if any) is not included in the field. - -Multiple consecutive delimiters are equivalent to a single delimiter. -To specify an empty field, write an empty set of single or double -quotes; for instance, @samp{""}. +In free format, the input data is, by default, structured as a series +of fields separated by spaces, tabs, commas, or line breaks. Each +field's content may be unquoted, or it may be quoted with a pairs of +apostrophes (@samp{'}) or double quotes (@samp{"}). Unquoted white +space separates fields but is not part of any field. Any mix of +spaces, tabs, and line breaks is equivalent to a single space for the +purpose of separating fields, but consecutive commas will skip a +field. + +Alternatively, delimiters can be specified explicitly, as a +parenthesized, comma-separated list of single-character strings +immediately following FREE. The word TAB may also be used to specify +a tab character as a delimiter. When delimiters are specified +explicitly, only the given characters, plus line breaks, separate +fields. Furthermore, leading spaces at the beginnings of fields are +not trimmed, consecutive delimiters define empty fields, and no form +of quoting is allowed. The NOTABLE and TABLE subcommands are as in @cmd{DATA LIST FIXED} above. NOTABLE is the default. @@ -5166,6 +5174,7 @@ on field width apply, but they are honored on output. @display DATA LIST LIST + [(@{TAB,'c'@}, @dots{})] [@{NOTABLE,TABLE@}] FILE='filename' END=end_var @@ -5211,14 +5220,19 @@ the current input program. @xref{INPUT PROGRAM}. @display FILE HANDLE handle_name /NAME='filename' - /RECFORM=@{VARIABLE,FIXED,SPANNED@} + /MODE=@{CHARACTER,IMAGE@} /LRECL=rec_len - /MODE=@{CHARACTER,IMAGE,BINARY,MULTIPUNCH,360@} + /TABWIDTH=tab_width @end display -Use @cmd{FILE HANDLE} to define the attributes of a file that does -not use conventional variable-length records terminated by new-line -characters. +Use @cmd{FILE HANDLE} to associate a file handle name with a file and +its attributes, so that later commands can refer to the file by its +handle name. Because names of text files can be specified directly on +commands that access files, @cmd{FILE HANDLE} is only needed when a +file is not an ordinary file containing lines of text. However, +@cmd{FILE HANDLE} may be used even for text files, and it may be +easier to specify a file's name once and later refer to it by an +abstract handle. Specify the file handle name as an identifier. Any given identifier may only appear once in a PSPP run. File handles may not be reassigned to a @@ -5228,18 +5242,19 @@ HANDLE} command name. The NAME subcommand specifies the name of the file associated with the handle. It is the only required subcommand. -The RECFORM subcommand specifies how the file is laid out. VARIABLE -specifies variable-length lines terminated with new-lines, and it is the -default. FIXED specifies fixed-length records. SPANNED is not -supported. - -LRECL specifies the length of fixed-length records. It is required if -@code{/RECFORM FIXED} is specified. +MODE specifies a file mode. In CHARACTER mode, the default, the data +file is opened in ANSI C text mode, so that local end of line +conventions are followed, and each text line is read as one record. +In CHARACTER mode, most input programs will expand tabs to spaces +(@cmd{DATA LIST FREE} with explicitly specified delimiters is an +exception). By default, each tab is 4 characters wide, but an +alternate width may be specified on TABWIDTH. A tab width of 0 +suppresses tab expansion entirely. -MODE specifies a file mode. CHARACTER, the default, causes the data -file to be opened in ANSI C text mode. BINARY causes the data file to -be opened in ANSI C binary mode. The other possibilities are not -supported. +By contrast, in BINARY mode, the data file is opened in ANSI C binary +mode and records are a fixed length. In BINARY mode, LRECL specifies +the record length in bytes, with a default of 1024. Tab characters +are never expanded to spaces in binary mode. @node INPUT PROGRAM, LIST, FILE HANDLE, Data Input and Output @section INPUT PROGRAM @@ -6624,7 +6639,7 @@ character codes. On most modern computers, this is a form of ASCII. The aggregation functions listed above exclude all user-missing values from calculations. To include user-missing values, insert a period (@samp{.}) between the function name and left parenthesis -(e.g.~@samp{SUM.}). +(e.g.@: @samp{SUM.}). Normally, only a single case (for SD and SD., two cases) need be non-missing in each group for the aggregate variable to be @@ -9418,7 +9433,7 @@ character set translation table, followed by an 8-byte tag string. The 200-byte segment is divided into five 40-byte sections, each of which represents the string @code{@var{charset} SPSS PORT FILE} in a different character set encoding, where @var{charset} is the name of -the character set used in the file, e.g. @code{ASCII} or +the character set used in the file, e.g.@: @code{ASCII} or @code{EBCDIC}. Each string is padded on the right with spaces in its respective character set. diff --git a/src/ChangeLog b/src/ChangeLog index 9691ad7c..13b90dcc 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,132 @@ +Sun May 30 18:35:19 2004 Ben Pfaff + + Fully implement arbitrary delimiters on DATA LIST, extending the + half implementation that was already there. + + * data-list.c: (struct data_list_pgm) Remove `delim', add + `delims', `delim_cnt'. + (cmd_data_list) Initialize new members. Parse delimiters and + clean up code a bit. + (cut_field) Extract fields with arbitrary delimiters. Also, fix + handling of leading commas. + (read_from_data_list_fixed) Expand tabs. Adapt to new DFM + interfaces. + (read_from_data_list_free) Adapt to new DFM interfaces. + (read_from_data_list_list) Ditto. + (repeating_data_trns_proc) Ditto. + + * dfm.c: Split up reader and writer into separate code, because + they do different things. Use struct string instead of explicit + allocation code, for clarity. + (enum dfm_reader_flags) New enum. + (struct dfm_fhuser_ext) Removed. + (struct dfm_reader_ext) New. + (get_reader) New function, used by just about all the reader + functions. + (dfm_close) Removed. + (close_reader) New function. + (dfm_open_for_reading) Rewrite initialization of dfm_fhuser_ext. + (dfm_open_for_writing) Ditto. + (macro force_line_buffer_expansion) Removed. + (count_tabs) Removed. + (tabs_to_spaces) Removed. + (read_record) Deal with new dfm_reader_ext. Use struct string + functions. Don't convert tabs to spaces. + (dfm_eof) New function. + (dfm_get_record) Changed interface, rewrote. + (dfm_expand_tabs) New function. + (dfm_fwd_record) Renamed dfm_forward_record(), updated to new + dfm_reader_ext, rewritten. + (dfm_bkwd_record) Renamed dfm_reread_record(), updated to new + dfm_reader_ext, rewritten. + (dfm_set_record) Removed in favor of dfm_forward_columns(). + (dfm_forward_columns) New function. + (dfm_get_cur_col) Renamed dfm_column_start, updated to new + dfm_reader_ext, rewritten. + (static var dfm_r_class) Use close_reader for the destructor. + (struct dfm_writer_ext) New. + (dfm_put_record) Updated to new dfm_writer_ext, rewritten. Uses + bounce buffer now instead of local allocation. + (close_writer) New function. + (static var dfm_writer_ext) Use close_writer for destructor. + (cmd_begin_data) Adapt to new dfm_reader_ext. + + * file-handle.q: Add support for per-file tab width. + (struct private_file_handle) Add tab_width member. + (q2c specifications) Add tabwidth subcommand. + (cmd_file_handle) Put parsed tab width into private_file_handle. + (create_file_handle) Set default tab width. + (handle_get_tab_width) New function. + + * file-type.c: (file_type_source_read) Adapt to new DFM interface. + + * inpt-pgm.c: (reread_trns_proc) Ditto. + + * matrix-data.c: (context) Ditto. + (another_token) Ditto. + (mget_token) Ditto. + (force_eol) Ditto. + +Sun May 30 18:33:59 2004 Ben Pfaff + + * casefile.c: (casefile_destroy) Fix memory leak by freeing + cf->filename. + (casereader_destroy) Don't close file descriptor -1. + + * recode.c: (cmd_recode) Fix memory leak. + + * set.q: (q2c specifications) Fix typo in user message. + + * str.c: (st_bare_pad_len_copy) Change memcpy to memmove to avoid + undefined behavior for overlapping arguments. + +Sun May 30 18:31:48 2004 Ben Pfaff + + * casefile.c: valgrind doesn't implement posix_fadvise() yet, so + don't call it when we're running under valgrind. + (call_posix_fadvise) New function. + (casefile_to_disk) Use call_posix_fadvise(). + (reader_open_file) Ditto. + +Sun May 30 18:20:12 2004 Ben Pfaff + + Update our string ADTs, struct string and struct len_string. Get + rid of pool support, which was largely unused. Rename lots of + functions to have more obvious or consistent names. + + * ascii.c: Get rid of ascii_pool. It was only used for string + allocations. + (ascii_open_global) Don't create ascii_pool. + (ascii_close_driver) Don't destroy ascii_pool. + (ascii_postopen_driver) Don't use pool. + (ascii_close_driver) Destroy strings manually. + + * str.c: (ds_create) Remove pool argument, all references updated. + (ds_init) Ditto. + (ds_replace) Remove pool support, make more efficient when we + don't need to reallocate. + (ds_destroy) Remove pool support. + (ds_rpad) New function. + (ds_size) Renamed ds_capacity(), all references updated. + (ds_value) Renamed ds_c_str(), all references updated. + (ds_concat) Renamed ds_puts(), all references updated. + (ds_concat_buffer) Renamed ds_concat(), all references updated. + (ds_putchar) Renamed ds_putc(), all references updated. + (ds_getline) Renamed ds_gets(), all references updated. + (ls_create) Remove pool argument, all references updated. + (ls_create_buffer) Ditto. + (ls_destroy) Removed pool support. + (ls_value) Renamed ls_c_str(), all references updated. + + * str.h: (ls_length) [__GNUC__] Add inline version. + (ls_c_str) [__GNUC__] Add inline version. + (ls_end) [__GNUC__] Add inline version. + (struct string) Remove pool member. Rename `size' to `capacity', + all references updated. + + * tab.c: (text_format) Instead of using pool argument to + ls_create_buffer(), call pool_register() on allocated data. + Mon Apr 26 22:40:07 2004 Ben Pfaff We're abusing the current ASCII driver by telling it to allocate a diff --git a/src/aggregate.c b/src/aggregate.c index 32de72ef..4f2f1f45 100644 --- a/src/aggregate.c +++ b/src/aggregate.c @@ -388,7 +388,7 @@ parse_aggregate_functions (struct agr_proc *agr) if (token == T_STRING) { ds_truncate (&tokstr, 255); - dest_label[n_dest - 1] = xstrdup (ds_value (&tokstr)); + dest_label[n_dest - 1] = xstrdup (ds_c_str (&tokstr)); lex_get (); } } @@ -454,7 +454,7 @@ parse_aggregate_functions (struct agr_proc *agr) lex_match (','); if (token == T_STRING) { - arg[i].c = xstrdup (ds_value (&tokstr)); + arg[i].c = xstrdup (ds_c_str (&tokstr)); type = ALPHA; } else if (token == T_NUM) diff --git a/src/ascii.c b/src/ascii.c index bdaac410..6fa1573b 100644 --- a/src/ascii.c +++ b/src/ascii.c @@ -184,22 +184,19 @@ struct ascii_driver_ext #endif }; -static struct pool *ascii_pool; - static int postopen (struct file_ext *); static int preclose (struct file_ext *); static int ascii_open_global (struct outp_class *this UNUSED) { - ascii_pool = pool_create (); return 1; } + static int ascii_close_global (struct outp_class *this UNUSED) { - pool_destroy (ascii_pool); return 1; } @@ -288,11 +285,11 @@ ascii_postopen_driver (struct outp_driver *this) this->length = x->l * this->vert; if (ls_null_p (&x->ops[OPS_FORMFEED])) - ls_create (ascii_pool, &x->ops[OPS_FORMFEED], "\f"); + ls_create (&x->ops[OPS_FORMFEED], "\f"); if (ls_null_p (&x->ops[OPS_NEWLINE]) - || !strcmp (ls_value (&x->ops[OPS_NEWLINE]), "default")) + || !strcmp (ls_c_str (&x->ops[OPS_NEWLINE]), "default")) { - ls_create (ascii_pool, &x->ops[OPS_NEWLINE], "\n"); + ls_create (&x->ops[OPS_NEWLINE], "\n"); x->file.mode = "wt"; } @@ -351,7 +348,7 @@ ascii_postopen_driver (struct outp_driver *this) c[0] = '+'; break; } - ls_create (ascii_pool, &x->box[i], c); + ls_create (&x->box[i], c); } } @@ -389,11 +386,18 @@ static int ascii_close_driver (struct outp_driver *this) { struct ascii_driver_ext *x = this->ext; + int i; assert (this->driver_open == 1); msg (VM (2), _("%s: Beginning closing..."), this->name); x = this->ext; + for (i = 0; i < OPS_COUNT; i++) + ls_destroy (&x->ops[i]); + for (i = 0; i < LNS_COUNT; i++) + ls_destroy (&x->box[i]); + for (i = 0; i < FSTY_COUNT; i++) + ls_destroy (&x->fonts[i]); if (x->lines != NULL) { int line; @@ -462,7 +466,7 @@ ascii_option (struct outp_driver *this, const char *key, int cat, subcat; const char *value; - value = ds_value (val); + value = ds_c_str (val); if (!strncmp (key, "box[", 4)) { char *tail; @@ -476,7 +480,7 @@ ascii_option (struct outp_driver *this, const char *key, } if (!ls_null_p (&x->box[indx])) msg (SW, _("Duplicate value for key `%s'."), key); - ls_create (ascii_pool, &x->box[indx], value); + ls_create (&x->box[indx], value); return; } @@ -605,17 +609,17 @@ ascii_option (struct outp_driver *this, const char *key, assert (0); abort (); } - ls_create (ascii_pool, s, value); + ls_create (s, value); } break; case font_string_arg: { if (!strcmp (value, "overstrike")) { - ls_destroy (ascii_pool, &x->fonts[subcat]); + ls_destroy (&x->fonts[subcat]); return; } - ls_create (ascii_pool, &x->fonts[subcat], value); + ls_create (&x->fonts[subcat], value); } break; case boolean_arg: @@ -659,7 +663,7 @@ postopen (struct file_ext *f) struct ascii_driver_ext *x = f->param; struct len_string *s = &x->ops[OPS_INIT]; - if (!ls_empty_p (s) && fwrite (ls_value (s), ls_length (s), 1, f->file) < 1) + if (!ls_empty_p (s) && fwrite (ls_c_str (s), ls_length (s), 1, f->file) < 1) { msg (ME, _("ASCII output driver: %s: %s"), f->filename, strerror (errno)); @@ -674,7 +678,7 @@ preclose (struct file_ext *f) struct ascii_driver_ext *x = f->param; struct len_string *d = &x->ops[OPS_DONE]; - if (!ls_empty_p (d) && fwrite (ls_value (d), ls_length (d), 1, f->file) < 1) + if (!ls_empty_p (d) && fwrite (ls_c_str (d), ls_length (d), 1, f->file) < 1) { msg (ME, _("ASCII output driver: %s: %s"), f->filename, strerror (errno)); @@ -971,7 +975,7 @@ delineate (struct outp_driver *this, struct outp_text *t, int draw) int max_y; /* Current position in string, character following end of string. */ - const char *s = ls_value (&t->s); + const char *s = ls_c_str (&t->s); const char *end = ls_end (&t->s); /* Temporary struct outp_text to pass to low-level function. */ @@ -1103,7 +1107,7 @@ text_draw (struct outp_driver *this, struct outp_text *t) int x = t->x; int y = t->y; - char *s = ls_value (&t->s); + char *s = ls_c_str (&t->s); /* Expand the line with the assumption that S takes up LEN character spaces (sometimes it takes up less). */ @@ -1201,7 +1205,7 @@ output_shorts (struct outp_driver *this, if (remaining >= len) { - memcpy (line_p, ls_value (box), len); + memcpy (line_p, ls_c_str (box), len); line_p += len; remaining -= len; } @@ -1209,7 +1213,7 @@ output_shorts (struct outp_driver *this, { if (!commit_line_buf (this)) return 0; - output_string (this, ls_value (box), ls_end (box)); + output_string (this, ls_c_str (box), ls_end (box)); remaining = LINE_BUF_SIZE - (line_p - line_buf); } } @@ -1426,7 +1430,7 @@ output_lines (struct outp_driver *this, int first, int count) abort (); } if (off) - output_string (this, ls_value (off), ls_end (off)); + output_string (this, ls_c_str (off), ls_end (off)); } /* Turn on new font. */ @@ -1451,7 +1455,7 @@ output_lines (struct outp_driver *this, int first, int count) abort (); } if (on) - output_string (this, ls_value (on), ls_end (on)); + output_string (this, ls_c_str (on), ls_end (on)); } ep = bp + 1; @@ -1510,7 +1514,7 @@ output_lines (struct outp_driver *this, int first, int count) } } - output_string (this, ls_value (newline), ls_end (newline)); + output_string (this, ls_c_str (newline), ls_end (newline)); } } @@ -1542,7 +1546,7 @@ ascii_close_page (struct outp_driver *this) } for (cp = s, i = 0; i < x->top_margin; i++) { - memcpy (cp, ls_value (&x->ops[OPS_NEWLINE]), nl_len); + memcpy (cp, ls_c_str (&x->ops[OPS_NEWLINE]), nl_len); cp += nl_len; } output_string (this, s, &s[total_len]); @@ -1572,7 +1576,7 @@ ascii_close_page (struct outp_driver *this) len = min ((int) strlen (outp_title), x->w); memcpy (s, outp_title, len); } - memcpy (&s[x->w], ls_value (&x->ops[OPS_NEWLINE]), nl_len); + memcpy (&s[x->w], ls_c_str (&x->ops[OPS_NEWLINE]), nl_len); output_string (this, s, &s[total_len]); memset (s, ' ', x->w); @@ -1585,7 +1589,7 @@ ascii_close_page (struct outp_driver *this) len = min ((int) strlen (string), x->w); memcpy (s, string, len); } - memcpy (&s[x->w], ls_value (&x->ops[OPS_NEWLINE]), nl_len); + memcpy (&s[x->w], ls_c_str (&x->ops[OPS_NEWLINE]), nl_len); output_string (this, s, &s[total_len]); output_string (this, &s[x->w], &s[total_len]); } @@ -1600,10 +1604,10 @@ ascii_close_page (struct outp_driver *this) s = xrealloc (s, total_len); for (cp = s, i = 0; i < x->bottom_margin; i++) { - memcpy (cp, ls_value (&x->ops[OPS_NEWLINE]), nl_len); + memcpy (cp, ls_c_str (&x->ops[OPS_NEWLINE]), nl_len); cp += nl_len; } - memcpy (cp, ls_value (&x->ops[OPS_FORMFEED]), ff_len); + memcpy (cp, ls_c_str (&x->ops[OPS_FORMFEED]), ff_len); if ( x->paginate ) output_string (this, s, &s[total_len]); if (line_p != line_buf && !commit_line_buf (this)) diff --git a/src/casefile.c b/src/casefile.c index 9c889b64..d2c10383 100644 --- a/src/casefile.c +++ b/src/casefile.c @@ -32,6 +32,10 @@ #include "var.h" #include "workspace.h" +#ifdef HAVE_VALGRIND_VALGRIND_H +#include +#endif + #define IO_BUF_SIZE 8192 /* A casefile is a sequentially accessible array of immutable @@ -153,6 +157,7 @@ casefile_destroy (struct casefile *cf) if (cf->filename != NULL && remove (cf->filename) == -1) msg (ME, _("%s: Removing temporary file: %s."), cf->filename, strerror (errno)); + free (cf->filename); free (cf->buffer); @@ -280,6 +285,23 @@ make_temp_file (int *fd, char **filename) return 1; } +static void +call_posix_fadvise (int fd UNUSED, + off_t offset UNUSED, off_t len UNUSED, + int advice UNUSED) +{ +#ifdef HAVE_VALGRIND_VALGRIND_H + /* Valgrind doesn't know about posix_fadvise() as of this + writing. */ + if (RUNNING_ON_VALGRIND) + return; +#endif + +#ifdef HAVE_POSIX_FADVISE + posix_fadvise (fd, offset, len, advice); +#endif +} + /* If CF is currently stored in memory, writes it to disk. Readers, if any, retain their current positions. */ void @@ -299,9 +321,7 @@ casefile_to_disk (struct casefile *cf) cf->storage = DISK; if (!make_temp_file (&cf->fd, &cf->filename)) err_failure (); -#if HAVE_POSIX_FADVISE - posix_fadvise (cf->fd, 0, 0, POSIX_FADV_SEQUENTIAL); -#endif + call_posix_fadvise (cf->fd, 0, 0, POSIX_FADV_SEQUENTIAL); cf->buffer = xmalloc (cf->buffer_size); memset (cf->buffer, 0, cf->buffer_size); @@ -493,9 +513,7 @@ reader_open_file (struct casereader *reader) } else file_ofs = 0; -#if HAVE_POSIX_FADVISE - posix_fadvise (reader->fd, file_ofs, 0, POSIX_FADV_SEQUENTIAL); -#endif + call_posix_fadvise (reader->fd, file_ofs, 0, POSIX_FADV_SEQUENTIAL); if (lseek (reader->fd, file_ofs, SEEK_SET) != file_ofs) msg (FE, _("%s: Seeking temporary file: %s."), reader->cf->filename, strerror (errno)); @@ -567,11 +585,14 @@ casereader_destroy (struct casereader *reader) else free (reader->buffer); - if (reader->cf->fd == -1) - reader->cf->fd = reader->fd; - else - safe_close (reader->fd); - + if (reader->fd != -1) + { + if (reader->cf->fd == -1) + reader->cf->fd = reader->fd; + else + safe_close (reader->fd); + } + free (reader); } diff --git a/src/command.c b/src/command.c index cd2b2038..5ff4f0de 100644 --- a/src/command.c +++ b/src/command.c @@ -486,7 +486,7 @@ parse_command_name (void) assert (word_cnt < sizeof words / sizeof *words); if (token == T_ID) - words[word_cnt++] = xstrdup (ds_value (&tokstr)); + words[word_cnt++] = xstrdup (ds_c_str (&tokstr)); else words[word_cnt++] = xstrdup ("-"); @@ -632,10 +632,10 @@ cmd_erase (void) if (!lex_force_string ()) return CMD_FAILURE; - if (remove (ds_value (&tokstr)) == -1) + if (remove (ds_c_str (&tokstr)) == -1) { msg (SW, _("Error removing `%s': %s."), - ds_value (&tokstr), strerror (errno)); + ds_c_str (&tokstr), strerror (errno)); return CMD_FAILURE; } @@ -713,7 +713,7 @@ run_command (void) lex_get (); if (!lex_force_string ()) return CMD_FAILURE; - cmd = ds_value (&tokstr); + cmd = ds_c_str (&tokstr); string = 1; } else diff --git a/src/count.c b/src/count.c index 058d60d5..dd88e01a 100644 --- a/src/count.c +++ b/src/count.c @@ -347,7 +347,7 @@ parse_string_criteria (struct counting * c) cur = &c->crit.s[n++]; cur->type = CNT_SINGLE; cur->s = malloc (len + 1); - st_pad_copy (cur->s, ds_value (&tokstr), len + 1); + st_pad_copy (cur->s, ds_c_str (&tokstr), len + 1); lex_get (); lex_match (','); diff --git a/src/data-in.c b/src/data-in.c index 3931067d..c082516d 100644 --- a/src/data-in.c +++ b/src/data-in.c @@ -64,9 +64,9 @@ dls_error (const struct data_in *i, const char *format, ...) struct error e; struct string title; - ds_init (NULL, &title, 64); + ds_init (&title, 64); if (!getl_reading_script) - ds_concat (&title, _("data-file error: ")); + ds_puts (&title, _("data-file error: ")); if (i->f1 == i->f2) ds_printf (&title, _("(column %d"), i->f1); else @@ -75,7 +75,7 @@ dls_error (const struct data_in *i, const char *format, ...) e.class = DE; err_location (&e.where); - e.title = ds_value (&title); + e.title = ds_c_str (&title); e.text = buf; err_vmsg (&e); diff --git a/src/data-list.c b/src/data-list.c index 62734de1..c9203122 100644 --- a/src/data-list.c +++ b/src/data-list.c @@ -88,7 +88,8 @@ struct data_list_pgm int eof; /* End of file encountered. */ int nrec; /* Number of records. */ size_t case_size; /* Case size in bytes. */ - int delim; /* Specified delimeter */ + char *delims; /* Delimiters if any; not null-terminated. */ + size_t delim_cnt; /* Number of delimiter, or 0 for spaces. */ }; static int parse_fixed (struct data_list_pgm *); @@ -121,7 +122,8 @@ cmd_data_list (void) dls->end = NULL; dls->eof = 0; dls->nrec = 0; - dls->delim=0; + dls->delims = NULL; + dls->delim_cnt = 0; dls->first = dls->last = NULL; while (token != '/') @@ -168,45 +170,58 @@ cmd_data_list (void) } else if (token == T_ID) { - /* Must match DLS_* constants. */ - static const char *id[] = {"FIXED", "FREE", "LIST", "NOTABLE", - "TABLE", NULL}; - const char **p; - int index; - - for (p = id; *p; p++) - if (lex_id_match (*p, tokid)) - break; - if (*p == NULL) - { - lex_error (NULL); - goto error; - } - - lex_get (); + if (lex_match_id ("NOTABLE")) + table = 0; + else if (lex_match_id ("TABLE")) + table = 1; + else + { + int type; + if (lex_match_id ("FIXED")) + type = DLS_FIXED; + else if (lex_match_id ("FREE")) + type = DLS_FREE; + else if (lex_match_id ("LIST")) + type = DLS_LIST; + else + { + lex_error (NULL); + goto error; + } - index = p - id; - if (index < 3) - { if (dls->type != -1) { msg (SE, _("Only one of FIXED, FREE, or LIST may " - "be specified.")); + "be specified.")); goto error; } - - dls->type = index; - } - else - table = index - 3; - } - else if (token=='(') { - lex_get(); - if (lex_match_id ("TAB")) { - dls->delim='\t'; + dls->type = type; + + if ((dls->type == DLS_FREE || dls->type == DLS_LIST) + && lex_match ('(')) + { + while (!lex_match (')')) + { + int delim; + + if (lex_match_id ("TAB")) + delim = '\t'; + else if (token == T_STRING && tokstr.length == 1) + delim = tokstr.string[0]; + else + { + lex_error (NULL); + goto error; + } + + dls->delims = xrealloc (dls->delims, dls->delim_cnt + 1); + dls->delims[dls->delim_cnt++] = delim; + + lex_match (','); + } + } + } } - lex_get(); - } else { lex_error (NULL); @@ -938,90 +953,111 @@ dump_free_table (const struct data_list_pgm *dls) /* Input procedure. */ -/* Extracts a field from the current position in the current record. - Fields can be unquoted or quoted with single- or double-quote - characters. *RET_LEN is set to the field length, *RET_CP is set to - the field itself. After parsing the field, sets the current - position in the record to just past the field. Returns 0 on - failure or a 1-based column number indicating the beginning of the - field on success. */ +/* Extracts a field from the current position in the current + record. Fields can be unquoted or quoted with single- or + double-quote characters. *FIELD is set to the field content. + After parsing the field, sets the current position in the + record to just past the field and any trailing delimiter. + END_BLANK is used internally; it should be initialized by the + caller to 0 and left alone afterward. Returns 0 on failure or + a 1-based column number indicating the beginning of the field + on success. */ static int -cut_field (const struct data_list_pgm *dls, char **ret_cp, int *ret_len) +cut_field (const struct data_list_pgm *dls, struct len_string *field, + int *end_blank) { - char *cp, *ep; - int len; - - cp = dfm_get_record (dls->handle, &len); - if (!cp) - return 0; - - ep = cp + len; - if (dls->delim != 0) { - if (*cp==dls->delim) { - cp++; - } - } else { + struct len_string line; + char *cp; + size_t column_start; - /* Skip leading whitespace and commas. */ - while ((isspace ((unsigned char) *cp) || *cp == ',') && cp < ep) - cp++; - } - if (cp >= ep) + if (dfm_eof (dls->handle)) return 0; + if (dls->delim_cnt == 0) + dfm_expand_tabs (dls->handle); + dfm_get_record (dls->handle, &line); - /* Three types of fields: quoted with ', quoted with ", unquoted. */ - /* Quoting does not escape the effects of delimiters for explicitly */ - /* specified delims */ - /* (consistency with SPSS doco: */ - /* For data with explicitly specified value delimiters (for example, */ - /* DATA LIST FREE (","): */ - /* - Multiple delimiters without any intervening space can be used */ - /* to specify missing data. */ - /* - The specified delimiters cannot occur within a data value, even */ - /* if you enclose the value in quotation marks or apostrophes. */ - if (dls->delim==0 && (*cp == '\'' || *cp == '"')) + cp = ls_c_str (&line); + if (dls->delim_cnt == 0) { - int quote = *cp; - - *ret_cp = ++cp; - while (cp < ep && *cp != quote) - cp++; - if (dls->delim!=0) { - while(cpdelim) { - cp++; + /* Skip leading whitespace. */ + while (cp < ls_end (&line) && isspace ((unsigned char) *cp)) + cp++; + if (cp >= ls_end (&line)) + return 0; + + /* Handle actual data, whether quoted or unquoted. */ + if (*cp == '\'' || *cp == '"') + { + int quote = *cp; + + field->string = ++cp; + while (cp < ls_end (&line) && *cp != quote) + cp++; + field->length = cp - field->string; + if (cp < ls_end (&line)) + cp++; + else + msg (SW, _("Quoted string missing terminating `%c'."), quote); } - } - *ret_len = cp - *ret_cp; - if (cp < ep) - cp++; else - msg (SW, _("Scope of string exceeds line.")); + { + field->string = cp; + while (cp < ls_end (&line) + && !isspace ((unsigned char) *cp) && *cp != ',') + cp++; + field->length = cp - field->string; + } + + /* Skip trailing whitespace and a single comma if present. */ + while (cp < ls_end (&line) && isspace ((unsigned char) *cp)) + cp++; + if (cp < ls_end (&line) && *cp == ',') + cp++; } - else + else { - *ret_cp = cp; - if (dls->delim!=0) { - while(cpdelim) { - cp++; + if (cp >= ls_end (&line)) + { + int column = dfm_column_start (dls->handle); + /* A blank line or a line that ends in \t has a + trailing blank field. */ + if (column == 1 || (column > 1 && cp[-1] == '\t')) + { + if (*end_blank == 0) + { + *end_blank = 1; + field->string = ls_end (&line); + field->length = 0; + dfm_forward_record (dls->handle); + return column; + } + else + { + *end_blank = 0; + return 0; + } + } + else + return 0; + } + else + { + field->string = cp; + while (cp < ls_end (&line) + && memchr (dls->delims, *cp, dls->delim_cnt) == NULL) + cp++; + field->length = cp - field->string; + if (cp < ls_end (&line)) + cp++; } - } else { - - while (cp < ep && !isspace ((unsigned char) *cp) && *cp != ',') - cp++; - } - *ret_len = cp - *ret_cp; } - - { - int beginning_column; - - dfm_set_record (dls->handle, *ret_cp); - beginning_column = dfm_get_cur_col (dls->handle) + 1; + + dfm_forward_columns (dls->handle, field->string - line.string); + column_start = dfm_column_start (dls->handle); - dfm_set_record (dls->handle, cp); + dfm_forward_columns (dls->handle, cp - field->string); - return beginning_column; - } + return column_start; } typedef int data_list_read_func (const struct data_list_pgm *, struct ccase *); @@ -1061,26 +1097,28 @@ read_from_data_list_fixed (const struct data_list_pgm *dls, struct dls_var_spec *var_spec = dls->first; int i; - if (!dfm_get_record (dls->handle, NULL)) + if (dfm_eof (dls->handle)) return -2; for (i = 1; i <= dls->nrec; i++) { - int len; - char *line = dfm_get_record (dls->handle, &len); + struct len_string line; - if (!line) + if (dfm_eof (dls->handle)) { /* Note that this can't occur on the first record. */ msg (SW, _("Partial case of %d of %d records discarded."), i - 1, dls->nrec); return -2; } + dfm_expand_tabs (dls->handle); + dfm_get_record (dls->handle, &line); for (; var_spec && i == var_spec->rec; var_spec = var_spec->next) { struct data_in di; - data_in_finite_line (&di, line, len, var_spec->fc, var_spec->lc); + data_in_finite_line (&di, ls_c_str (&line), ls_length (&line), + var_spec->fc, var_spec->lc); di.v = &c->data[var_spec->fv]; di.flags = 0; di.f1 = var_spec->fc; @@ -1089,7 +1127,7 @@ read_from_data_list_fixed (const struct data_list_pgm *dls, data_in (&di); } - dfm_fwd_record (dls->handle); + dfm_forward_record (dls->handle); } return -1; @@ -1103,27 +1141,27 @@ read_from_data_list_free (const struct data_list_pgm *dls, struct ccase *c) { struct dls_var_spec *var_spec; - char *field; - int len; + int end_blank = 0; for (var_spec = dls->first; var_spec; var_spec = var_spec->next) { + struct len_string field; int column; /* Cut out a field and read in a new record if necessary. */ for (;;) { - column = cut_field (dls, &field, &len); + column = cut_field (dls, &field, &end_blank); if (column != 0) break; - if (dfm_get_record (dls->handle, NULL)) - dfm_fwd_record (dls->handle); - if (!dfm_get_record (dls->handle, NULL)) + if (!dfm_eof (dls->handle)) + dfm_forward_record (dls->handle); + if (dfm_eof (dls->handle)) { if (var_spec != dls->first) msg (SW, _("Partial case discarded. The first variable " - "missing was %s."), var_spec->name); + "missing was %s."), var_spec->name); return -2; } } @@ -1131,8 +1169,8 @@ read_from_data_list_free (const struct data_list_pgm *dls, { struct data_in di; - di.s = field; - di.e = field + len; + di.s = ls_c_str (&field); + di.e = ls_end (&field); di.v = &c->data[var_spec->fv]; di.flags = 0; di.f1 = column; @@ -1151,25 +1189,26 @@ read_from_data_list_list (const struct data_list_pgm *dls, struct ccase *c) { struct dls_var_spec *var_spec; - char *field; - int len; + int end_blank = 0; - if (!dfm_get_record (dls->handle, NULL)) + if (dfm_eof (dls->handle)) return -2; for (var_spec = dls->first; var_spec; var_spec = var_spec->next) { + struct len_string field; + int column; + /* Cut out a field and check for end-of-line. */ - int column = cut_field (dls, &field, &len); - + column = cut_field (dls, &field, &end_blank); if (column == 0) { - if (get_undefined() ) + if (get_undefined ()) msg (SW, _("Missing value(s) for all variables from %s onward. " - "These will be filled with the system-missing value " - "or blanks, as appropriate."), + "These will be filled with the system-missing value " + "or blanks, as appropriate."), var_spec->name); - for (; var_spec; var_spec = var_spec->next) + for (; var_spec; var_spec = var_spec->next) { int width = get_format_var_width (&var_spec->input); if (width == 0) @@ -1183,8 +1222,8 @@ read_from_data_list_list (const struct data_list_pgm *dls, { struct data_in di; - di.s = field; - di.e = field + len; + di.s = ls_c_str (&field); + di.e = ls_end (&field); di.v = &c->data[var_spec->fv]; di.flags = 0; di.f1 = column; @@ -1193,7 +1232,7 @@ read_from_data_list_list (const struct data_list_pgm *dls, } } - dfm_fwd_record (dls->handle); + dfm_forward_record (dls->handle); return -1; } @@ -1882,15 +1921,14 @@ repeating_data_trns_proc (struct trns_header *trns, struct ccase *c, { struct repeating_data_trns *t = (struct repeating_data_trns *) trns; - char *line; /* Current record. */ - int len; /* Length of current record. */ + struct len_string line; /* Current record. */ int starts_beg; /* Starting column. */ int starts_end; /* Ending column. */ int occurs; /* Number of repetitions. */ int length; /* Length of each occurrence. */ - int cont_beg; /* Starting column for continuation lines. */ - int cont_end; /* Ending column for continuation lines. */ + int cont_beg; /* Starting column for continuation lines. */ + int cont_end; /* Ending column for continuation lines. */ int occurs_left; /* Number of occurrences remaining. */ @@ -1901,11 +1939,12 @@ repeating_data_trns_proc (struct trns_header *trns, struct ccase *c, dfm_push (t->handle); /* Read the current record. */ - dfm_bkwd_record (t->handle, 1); - line = dfm_get_record (t->handle, &len); - if (line == NULL) + dfm_reread_record (t->handle, 1); + dfm_expand_tabs (t->handle); + if (dfm_eof (t->handle)) return -2; - dfm_fwd_record (t->handle); + dfm_get_record (t->handle, &line); + dfm_forward_record (t->handle); /* Calculate occurs, length. */ occurs_left = occurs = realize_value (&t->occurs, c); @@ -1959,8 +1998,8 @@ repeating_data_trns_proc (struct trns_header *trns, struct ccase *c, { struct rpd_parse_info info; info.trns = t; - info.line = line; - info.len = len; + info.line = ls_c_str (&line); + info.len = ls_length (&line); info.beg = starts_beg; info.end = starts_end; info.ofs = length; @@ -1995,8 +2034,7 @@ repeating_data_trns_proc (struct trns_header *trns, struct ccase *c, assert (occurs_left >= 0); /* Read in another record. */ - line = dfm_get_record (t->handle, &len); - if (line == NULL) + if (dfm_eof (t->handle)) { tmsg (SE, RPD_ERR, _("Unexpected end of file with %d repetitions " @@ -2004,12 +2042,14 @@ repeating_data_trns_proc (struct trns_header *trns, struct ccase *c, occurs_left, occurs); return -2; } - dfm_fwd_record (t->handle); + dfm_expand_tabs (t->handle); + dfm_get_record (t->handle, &line); + dfm_forward_record (t->handle); /* Parse this record. */ info.trns = t; - info.line = line; - info.len = len; + info.line = ls_c_str (&line); + info.len = ls_length (&line); info.beg = cont_beg; info.end = cont_end; info.ofs = length; diff --git a/src/devind.c b/src/devind.c index 834e96e0..ad0cbeae 100644 --- a/src/devind.c +++ b/src/devind.c @@ -220,7 +220,7 @@ devind_option (struct outp_driver *this, const char *key, const struct string *v break; case 1: free (x->file.filename); - x->file.filename = xstrdup (ds_value (val)); + x->file.filename = xstrdup (ds_c_str (val)); break; default: assert (0); @@ -320,7 +320,7 @@ output_tab_table (struct outp_driver *this, struct tab_table *t) if (t->nr == 1 && t->nc == 1) { fputs ("p:", x->file.file); - escape_string (x->file.file, ls_value (t->cc), ls_length (t->cc)); + escape_string (x->file.file, ls_c_str (t->cc), ls_length (t->cc)); putc ('\n', x->file.file); return; @@ -347,7 +347,7 @@ output_tab_table (struct outp_driver *this, struct tab_table *t) if (!ls_empty_p (&t->title)) { putc ('T', x->file.file); - escape_string (x->file.file, ls_value (&t->title), + escape_string (x->file.file, ls_c_str (&t->title), ls_length (&t->title)); putc ('\n', x->file.file); } @@ -376,7 +376,7 @@ output_tab_table (struct outp_driver *this, struct tab_table *t) cc = t->cc + c + r * t->nc; if (*ct & TAB_JOIN) { - j = (struct tab_joined_cell *) ls_value (cc); + j = (struct tab_joined_cell *) ls_c_str (cc); cc = &j->contents; if (c != j->x1 || r != j->y1) continue; @@ -399,7 +399,7 @@ output_tab_table (struct outp_driver *this, struct tab_table *t) else putc ('c', x->file.file); putc ('t', x->file.file); - escape_string (x->file.file, ls_value (cc), ls_length (cc)); + escape_string (x->file.file, ls_c_str (cc), ls_length (cc)); putc ('\n', x->file.file); } } diff --git a/src/dfm.c b/src/dfm.c index 910bb32c..786c33e4 100644 --- a/src/dfm.c +++ b/src/dfm.c @@ -36,54 +36,66 @@ #include "debug-print.h" +/* Flags for DFM readers. */ +enum dfm_reader_flags + { + DFM_EOF = 001, /* At end-of-file? */ + DFM_ADVANCE = 002, /* Read next line on dfm_get_record() call? */ + DFM_SAW_BEGIN_DATA = 004, /* For inline_file only, whether we've + already read a BEGIN DATA line. */ + DFM_TABS_EXPANDED = 010, /* Tabs have been expanded. */ + }; + /* file_handle extension structure. */ -struct dfm_fhuser_ext +struct dfm_reader_ext { struct file_ext file; /* Associated file. */ struct file_locator where; /* Current location in data file. */ - char *line; /* Current line, not null-terminated. */ - size_t size; /* Number of bytes allocated for line. */ - size_t len; /* Length of line. */ - - char *ptr; /* Pointer into line that is returned by - dfm_get_record(). */ - int advance; /* Nonzero=dfm_get_record() reads a new - record; otherwise returns current record. */ - int saw_begin_data; /* For inline_file only, whether we've - already read a BEGIN DATA line. */ + struct string line; /* Current line. */ + size_t pos; /* Offset in line of current character. */ + struct string scratch; /* Extra line buffer. */ + enum dfm_reader_flags flags; /* Zero or more of DFM_*. */ }; -/* These are defined at the end of this file. */ static struct fh_ext_class dfm_r_class; -static struct fh_ext_class dfm_w_class; static void read_record (struct file_handle *h); - -/* Internal (low level). */ -/* Closes the file handle H which was opened by open_file_r() or - open_file_w(). */ +/* Asserts that H represents a DFM reader and returns H->ext + converted to a struct dfm_reader_ext *. */ +static inline struct dfm_reader_ext * +get_reader (struct file_handle *h) +{ + assert (h != NULL); + assert (h->class == &dfm_r_class); + assert (h->ext != NULL); + + return h->ext; +} + +/* Closes file handle H opened by dfm_open_for_reading(). */ static void -dfm_close (struct file_handle *h) +close_reader (struct file_handle *h) { - struct dfm_fhuser_ext *ext = h->ext; + struct dfm_reader_ext *ext = get_reader (h); /* Skip any remaining data on the inline file. */ if (h == inline_file) - while (ext->line != NULL) + while ((ext->flags & DFM_EOF) == 0) read_record (h); msg (VM (2), _("%s: Closing data-file handle %s."), handle_get_filename (h), handle_get_name (h)); - assert (h->class == &dfm_r_class || h->class == &dfm_w_class); + assert (h->class == &dfm_r_class); if (ext->file.file) { fn_close_ext (&ext->file); free (ext->file.filename); ext->file.filename = NULL; } - free (ext->line); + ds_destroy (&ext->line); + ds_destroy (&ext->scratch); free (ext); } @@ -92,7 +104,7 @@ dfm_close (struct file_handle *h) int dfm_open_for_reading (struct file_handle *h) { - struct dfm_fhuser_ext *ext; + struct dfm_reader_ext *ext; if (h->class != NULL) { @@ -110,12 +122,9 @@ dfm_open_for_reading (struct file_handle *h) ext->where.filename = handle_get_filename (h); ext->where.line_number = 0; ext->file.file = NULL; - ext->line = xmalloc (128); - ext->len = 0; - ext->ptr = NULL; - ext->size = 128; - ext->advance = 1; - ext->saw_begin_data = 0; + ds_init (&ext->line, 64); + ds_init (&ext->scratch, 0); + ext->flags = DFM_ADVANCE; msg (VM (1), _("%s: Opening data-file handle %s for reading."), handle_get_filename (h), handle_get_name (h)); @@ -149,190 +158,6 @@ dfm_open_for_reading (struct file_handle *h) return 0; } -/* Opens a file handle for writing as a data file. */ -int -dfm_open_for_writing (struct file_handle *h) -{ - struct dfm_fhuser_ext *ext; - - if (h->class != NULL) - { - if (h->class == &dfm_w_class) - return 1; - else - { - msg (ME, _("Cannot write to file %s already opened for %s."), - handle_get_name (h), gettext (h->class->name)); - err_cond_fail (); - return 0; - } - } - - ext = xmalloc (sizeof *ext); - ext->where.filename = handle_get_filename (h); - ext->where.line_number = 0; - ext->file.file = NULL; - ext->line = NULL; - ext->len = 0; - ext->ptr = NULL; - ext->size = 0; - ext->advance = 0; - - msg (VM (1), _("%s: Opening data-file handle %s for writing."), - handle_get_filename (h), handle_get_name (h)); - - assert (h != NULL); - if (h == inline_file) - { - msg (ME, _("Cannot open the inline file for writing.")); - goto error; - } - - ext->file.filename = xstrdup (handle_get_filename (h)); - ext->file.mode = "wb"; - ext->file.file = NULL; - ext->file.sequence_no = NULL; - ext->file.param = NULL; - ext->file.postopen = NULL; - ext->file.preclose = NULL; - - if (!fn_open_ext (&ext->file)) - { - msg (ME, _("An error occurred while opening \"%s\" for writing " - "as a data file: %s."), - handle_get_filename (h), strerror (errno)); - goto error; - } - - h->class = &dfm_w_class; - h->ext = ext; - return 1; - - error: - free (ext); - err_cond_fail (); - return 0; -} - -/* Ensures that the line buffer in file handle with extension EXT is - big enough to hold a line of length EXT->LEN characters not - including null terminator. */ -#define force_line_buffer_expansion() \ - do \ - { \ - if (ext->len + 1 > ext->size) \ - { \ - ext->size = ext->len * 2; \ - ext->line = xrealloc (ext->line, ext->size); \ - } \ - } \ - while (0) - -/* Counts the number of tabs in string STRING of length LEN. */ -static inline int -count_tabs (char *s, size_t len) -{ - int n_tabs = 0; - - for (;;) - { - char *cp = memchr (s, '\t', len); - if (cp == NULL) - return n_tabs; - n_tabs++; - len -= cp - s + 1; - s = cp + 1; - } -} - -/* Converts all the tabs in H->EXT->LINE to an equivalent number of - spaces, if necessary. */ -static void -tabs_to_spaces (struct file_handle *h) -{ - struct dfm_fhuser_ext *ext = h->ext; - - char *first_tab; /* Location of first tab (if any). */ - char *second_tab; /* Location of second tab (if any). */ - size_t orig_len; /* Line length at function entry. */ - - /* If there aren't any tabs then there's nothing to do. */ - first_tab = memchr (ext->line, '\t', ext->len); - if (first_tab == NULL) - return; - orig_len = ext->len; - - /* If there's just one tab then expand it inline. Otherwise do a - full string copy to another buffer. */ - second_tab = memchr (first_tab + 1, '\t', - ext->len - (first_tab - ext->line + 1)); - if (second_tab == NULL) - { - int n_spaces = 8 - (first_tab - ext->line) % 8; - - ext->len += n_spaces - 1; - - /* Expand the line if necessary, keeping the first_tab pointer - valid. */ - { - size_t ofs = first_tab - ext->line; - force_line_buffer_expansion (); - first_tab = ext->line + ofs; - } - - memmove (first_tab + n_spaces, first_tab + 1, - orig_len - (first_tab - ext->line + 1)); - memset (first_tab, ' ', n_spaces); - } else { - /* Make a local copy of original text. */ - char *orig_line = local_alloc (ext->len + 1); - memcpy (orig_line, ext->line, ext->len); - - /* Allocate memory assuming we need to add 8 spaces for every tab. */ - ext->len += 2 + count_tabs (second_tab + 1, - ext->len - (second_tab - ext->line + 1)); - - /* Expand the line if necessary, keeping the first_tab pointer - valid. */ - { - size_t ofs = first_tab - ext->line; - force_line_buffer_expansion (); - first_tab = ext->line + ofs; - } - - /* Walk through orig_line, expanding tabs into ext->line. */ - { - char *src_p = orig_line + (first_tab - ext->line); - char *dest_p = first_tab; - - for (; src_p < orig_line + orig_len; src_p++) - { - /* Most characters simply pass through untouched. */ - if (*src_p != '\t') - { - *dest_p++ = *src_p; - continue; - } - - /* Tabs are expanded into an equivalent number of - spaces. */ - { - int n_spaces = 8 - (dest_p - ext->line) % 8; - - memset (dest_p, ' ', n_spaces); - dest_p += n_spaces; - } - } - - /* Supply null terminator and actual string length. */ - *dest_p = 0; - ext->len = dest_p - ext->line; - } - - local_free (orig_line); - } -} - /* Reads a record from H->EXT->FILE into H->EXT->LINE, setting H->EXT->PTR to H->EXT->LINE, and setting H->EXT-LEN to the length of the line. The line is not null-terminated. If an error occurs @@ -340,15 +165,15 @@ tabs_to_spaces (struct file_handle *h) static void read_record (struct file_handle *h) { - struct dfm_fhuser_ext *ext = h->ext; + struct dfm_reader_ext *ext = get_reader (h); if (h == inline_file) { - if (!ext->saw_begin_data) + if ((ext->flags & DFM_SAW_BEGIN_DATA) == 0) { char *s; - ext->saw_begin_data = 1; + ext->flags |= DFM_SAW_BEGIN_DATA; /* FIXME: WTF can't this just be done with tokens? Is this really a special case? */ @@ -362,9 +187,10 @@ read_record (struct file_handle *h) err_failure (); } - /* Skip leading whitespace, separate out first word, so that - S points to a single word reduced to lowercase. */ - s = ds_value (&getl_buf); + /* Skip leading whitespace, separate out first + word, so that S points to a single word reduced + to lowercase. */ + s = ds_c_str (&getl_buf); while (isspace ((unsigned char) *s)) s++; for (cp = s; isalpha ((unsigned char) *cp); cp++) @@ -385,36 +211,31 @@ read_record (struct file_handle *h) if (!getl_read_line ()) { msg (SE, _("Unexpected end-of-file while reading data in BEGIN " - "DATA. This probably indicates " - "a missing or misformatted END DATA command. " - "END DATA must appear by itself on a single line " - "with exactly one space between words.")); + "DATA. This probably indicates " + "a missing or misformatted END DATA command. " + "END DATA must appear by itself on a single line " + "with exactly one space between words.")); err_failure (); } ext->where.line_number++; if (ds_length (&getl_buf) >= 8 - && !strncasecmp (ds_value (&getl_buf), "end data", 8)) + && !strncasecmp (ds_c_str (&getl_buf), "end data", 8)) { - lex_set_prog (ds_value (&getl_buf) + ds_length (&getl_buf)); + lex_set_prog (ds_c_str (&getl_buf) + ds_length (&getl_buf)); goto eof; } - ext->len = ds_length (&getl_buf); - force_line_buffer_expansion (); - strcpy (ext->line, ds_value (&getl_buf)); + ds_replace (&ext->line, ds_c_str (&getl_buf)); } else { if (handle_get_mode (h) == MODE_TEXT) { - /* PORTME: here you should adapt the routine to your - system's concept of a "line" of text. */ - int read_len = getline (&ext->line, &ext->size, ext->file.file); - - if (read_len == -1) - { + ds_clear (&ext->line); + if (!ds_gets (&ext->line, ext->file.file)) + { if (ferror (ext->file.file)) { msg (ME, _("Error reading file %s: %s."), @@ -423,19 +244,17 @@ read_record (struct file_handle *h) } goto eof; } - ext->len = (size_t) read_len; } else if (handle_get_mode (h) == MODE_BINARY) { size_t record_width = handle_get_record_width (h); size_t amt; - if (ext->size < record_width) - { - ext->size = record_width; - ext->line = xmalloc (ext->size); - } - amt = fread (ext->line, 1, record_width, ext->file.file); + if (ds_length (&ext->line) < record_width) + ds_rpad (&ext->line, record_width, 0); + + amt = fread (ds_c_str (&ext->line), 1, record_width, + ext->file.file); if (record_width != amt) { if (ferror (ext->file.file)) @@ -457,117 +276,246 @@ read_record (struct file_handle *h) ext->where.line_number++; } - /* Strip trailing whitespace, I forget why. But there's a good - reason, I'm sure. I'm too scared to eliminate this code. */ - if (handle_get_mode (h) == MODE_TEXT) - { - /* while (ext->len && isspace ((unsigned char) ext->line[ext->len - 1])) - ext->len--;*/ - - /* Convert tabs to spaces. */ - - ext->ptr = ext->line; - } + ext->pos = 0; return; eof: /* Hit eof or an error, clean up everything. */ - if (ext->line) - free (ext->line); - ext->size = 0; - ext->line = ext->ptr = NULL; - return; + ext->flags |= DFM_EOF; } - -/* Public (high level). */ - -/* Returns the current record in the file corresponding to HANDLE. - Opens files and reads records, etc., as necessary. Sets *LEN to - the length of the line. The line returned is not null-terminated. - Returns NULL at end of file. Calls fail() on attempt to read past - end of file. */ -char * -dfm_get_record (struct file_handle *h, int *len) -{ - struct dfm_fhuser_ext *ext; - - assert (h != NULL); - assert (h->class == &dfm_r_class); - assert (h->ext != NULL); - ext = h->ext; - if (ext->advance) +/* Returns nonzero if end of file has been reached on HANDLE. + Reads forward in HANDLE's file, if necessary to tell. */ +int +dfm_eof (struct file_handle *h) +{ + struct dfm_reader_ext *ext = get_reader (h); + if (ext->flags & DFM_ADVANCE) { - if (ext->line) + ext->flags &= ~DFM_ADVANCE; + if ((ext->flags & DFM_EOF) == 0) read_record (h); else { msg (SE, _("Attempt to read beyond end-of-file on file %s."), handle_get_name (h)); - goto lossage; + err_cond_fail (); } } - ext->advance = 0; - if (len) - *len = ext->len - (ext->ptr - ext->line); - return ext->ptr; + return (ext->flags & DFM_EOF) != 0; +} -lossage: - /* Come here on reading beyond eof or reading from a file already - open for something else. */ - err_cond_fail (); +/* Returns the current record in the file corresponding to + HANDLE. Aborts if reading from the file is necessary or at + end of file, so call dfm_eof() first. Sets *LINE to the line, + which is not null-terminated. The caller must not free or + modify the returned string. */ +void +dfm_get_record (struct file_handle *h, struct len_string *line) +{ + struct dfm_reader_ext *ext = get_reader (h); + assert ((ext->flags & DFM_ADVANCE) == 0); + assert ((ext->flags & DFM_EOF) == 0); + assert (ext->pos <= ds_length (&ext->line)); + + line->string = ds_data (&ext->line) + ext->pos; + line->length = ds_length (&ext->line) - ext->pos; +} + +/* Expands tabs in the current line into the equivalent number of + spaces, if appropriate for this kind of file. Aborts if + reading from the file is necessary or at end of file, so call + dfm_eof() first.*/ +void +dfm_expand_tabs (struct file_handle *h) +{ + struct dfm_reader_ext *ext = get_reader (h); + struct string temp; + size_t ofs, new_pos, tab_width; + + assert ((ext->flags & DFM_ADVANCE) == 0); + assert ((ext->flags & DFM_EOF) == 0); + assert (ext->pos <= ds_length (&ext->line)); + + if (ext->flags & DFM_TABS_EXPANDED) + return; + ext->flags |= DFM_TABS_EXPANDED; + + if (handle_get_mode (h) == MODE_BINARY + || handle_get_tab_width (h) == 0 + || memchr (ds_c_str (&ext->line), '\t', ds_length (&ext->line)) == NULL) + return; + + /* Expand tabs from ext->line into ext->scratch, and figure out + new value for ext->pos. */ + tab_width = handle_get_tab_width (h); + ds_clear (&ext->scratch); + new_pos = 0; + for (ofs = 0; ofs < ds_length (&ext->line); ofs++) + { + unsigned char c; + + if (ofs == ext->pos) + new_pos = ds_length (&ext->scratch); + + c = ds_c_str (&ext->line)[ofs]; + if (c != '\t') + ds_putc (&ext->scratch, c); + else + { + do + ds_putc (&ext->scratch, ' '); + while (ds_length (&ext->scratch) % tab_width != 0); + } + } - return NULL; + /* Swap ext->line and ext->scratch and set new ext->pos. */ + temp = ext->line; + ext->line = ext->scratch; + ext->scratch = temp; + ext->pos = new_pos; } /* Causes dfm_get_record() to read in the next record the next time it is executed on file HANDLE. */ void -dfm_fwd_record (struct file_handle *h) +dfm_forward_record (struct file_handle *h) { - struct dfm_fhuser_ext *ext = h->ext; + struct dfm_reader_ext *ext = get_reader (h); + ext->flags |= DFM_ADVANCE; +} - assert (h->class == &dfm_r_class); - ext->advance = 1; +/* Cancels the effect of any previous dfm_fwd_record() executed + on file HANDLE. Sets the current line to begin in the 1-based + column COLUMN. */ +void +dfm_reread_record (struct file_handle *h, size_t column) +{ + struct dfm_reader_ext *ext = get_reader (h); + ext->flags &= ~DFM_ADVANCE; + if (column < 1) + ext->pos = 0; + else if (column > ds_length (&ext->line)) + ext->pos = ds_length (&ext->line); + else + ext->pos = column - 1; } -/* Cancels the effect of any previous dfm_fwd_record() executed on - file HANDLE. Sets the current line to begin in the 1-based column - COLUMN, as with dfm_set_record but based on a column number instead - of a character pointer. */ +/* Sets the current line to begin COLUMNS characters following + the current start. */ void -dfm_bkwd_record (struct file_handle *h, int column) +dfm_forward_columns (struct file_handle *h, size_t columns) { - struct dfm_fhuser_ext *ext = h->ext; + struct dfm_reader_ext *ext = get_reader (h); + dfm_reread_record (h, (ext->pos + 1) + columns); +} - assert (h->class == &dfm_r_class); - ext->advance = 0; - ext->ptr = ext->line + min ((int) ext->len + 1, column) - 1; +/* Returns the 1-based column to which the line pointer in HANDLE + is set. Unless dfm_reread_record() or dfm_forward_columns() + have been called, this is 1. */ +size_t +dfm_column_start (struct file_handle *h) +{ + struct dfm_reader_ext *ext = get_reader (h); + return ext->pos + 1; } -/* Sets the current line in HANDLE to NEW_LINE, which must point - somewhere in the line last returned by dfm_get_record(). Used by - DATA LIST FREE to strip the leading portion off the current line. */ +/* Pushes the filename and line number on the fn/ln stack. */ void -dfm_set_record (struct file_handle *h, char *new_line) +dfm_push (struct file_handle *h) { - struct dfm_fhuser_ext *ext = h->ext; + struct dfm_reader_ext *ext = get_reader (h); + if (h != inline_file) + err_push_file_locator (&ext->where); +} - assert (h->class == &dfm_r_class); - ext->ptr = new_line; +/* Pops the filename and line number from the fn/ln stack. */ +void +dfm_pop (struct file_handle *h) +{ + struct dfm_reader_ext *ext = get_reader (h); + if (h != inline_file) + err_pop_file_locator (&ext->where); } -/* Returns the 0-based current column to which the line pointer in - HANDLE is set. Unless dfm_set_record() or dfm_bkwd_record() have - been called, this is 0. */ +/* DFM reader class. */ +static struct fh_ext_class dfm_r_class = +{ + 1, + N_("reading as a data file"), + close_reader, +}; + +/* file_handle extension structure. */ +struct dfm_writer_ext + { + struct file_ext file; /* Associated file. */ + struct file_locator where; /* Current location in data file. */ + char *bounce; /* Bounce buffer for fixed-size fields. */ + }; + +static struct fh_ext_class dfm_w_class; + +/* Opens a file handle for writing as a data file. */ int -dfm_get_cur_col (struct file_handle *h) +dfm_open_for_writing (struct file_handle *h) { - struct dfm_fhuser_ext *ext = h->ext; + struct dfm_writer_ext *ext; + + if (h->class != NULL) + { + if (h->class == &dfm_w_class) + return 1; + else + { + msg (ME, _("Cannot write to file %s already opened for %s."), + handle_get_name (h), gettext (h->class->name)); + err_cond_fail (); + return 0; + } + } - assert (h->class == &dfm_r_class); - return ext->ptr - ext->line; + ext = xmalloc (sizeof *ext); + ext->where.filename = handle_get_filename (h); + ext->where.line_number = 0; + ext->file.file = NULL; + ext->bounce = NULL; + + msg (VM (1), _("%s: Opening data-file handle %s for writing."), + handle_get_filename (h), handle_get_name (h)); + + assert (h != NULL); + if (h == inline_file) + { + msg (ME, _("Cannot open the inline file for writing.")); + goto error; + } + + ext->file.filename = xstrdup (handle_get_filename (h)); + ext->file.mode = "wb"; + ext->file.file = NULL; + ext->file.sequence_no = NULL; + ext->file.param = NULL; + ext->file.postopen = NULL; + ext->file.preclose = NULL; + + if (!fn_open_ext (&ext->file)) + { + msg (ME, _("An error occurred while opening \"%s\" for writing " + "as a data file: %s."), + handle_get_filename (h), strerror (errno)); + goto error; + } + + h->class = &dfm_w_class; + h->ext = ext; + return 1; + + error: + free (ext); + err_cond_fail (); + return 0; } /* Writes record REC having length LEN to the file corresponding to @@ -576,9 +524,7 @@ dfm_get_cur_col (struct file_handle *h) int dfm_put_record (struct file_handle *h, const char *rec, size_t len) { - struct dfm_fhuser_ext *ext; - char *ptr; - size_t amt; + struct dfm_writer_ext *ext; assert (h != NULL); assert (h->class == &dfm_w_class); @@ -587,18 +533,16 @@ dfm_put_record (struct file_handle *h, const char *rec, size_t len) ext = h->ext; if (handle_get_mode (h) == MODE_BINARY && len < handle_get_record_width (h)) { - amt = handle_get_record_width (h); - ptr = local_alloc (amt); - memcpy (ptr, rec, len); - memset (&ptr[len], 0, amt - len); - } - else - { - ptr = (char *) rec; - amt = len; + size_t rec_width = handle_get_record_width (h); + if (ext->bounce == NULL) + ext->bounce = xmalloc (rec_width); + memcpy (ext->bounce, rec, len); + memset (&ext->bounce[len], 0, rec_width - len); + rec = ext->bounce; + len = rec_width; } - if (1 != fwrite (ptr, amt, 1, ext->file.file)) + if (fwrite (rec, len, 1, ext->file.file) != 1) { msg (ME, _("Error writing file %s: %s."), handle_get_name (h), strerror (errno)); @@ -606,35 +550,37 @@ dfm_put_record (struct file_handle *h, const char *rec, size_t len) return 0; } - if (ptr != rec) - local_free (ptr); - return 1; } -/* Pushes the filename and line number on the fn/ln stack. */ -void -dfm_push (struct file_handle *h) +/* Closes file handle H opened by dfm_open_for_writing(). */ +static void +close_writer (struct file_handle *h) { - struct dfm_fhuser_ext *ext = h->ext; + struct dfm_writer_ext *ext; - assert (h->class == &dfm_r_class || h->class == &dfm_w_class); - assert (ext != NULL); - if (h != inline_file) - err_push_file_locator (&ext->where); + assert (h->class == &dfm_w_class); + ext = h->ext; + + msg (VM (2), _("%s: Closing data-file handle %s."), + handle_get_filename (h), handle_get_name (h)); + if (ext->file.file) + { + fn_close_ext (&ext->file); + free (ext->file.filename); + ext->file.filename = NULL; + } + free (ext->bounce); + free (ext); } -/* Pops the filename and line number from the fn/ln stack. */ -void -dfm_pop (struct file_handle *h) +/* DFM writer class. */ +static struct fh_ext_class dfm_w_class = { - struct dfm_fhuser_ext *ext = h->ext; - - assert (h->class == &dfm_r_class || h->class == &dfm_w_class); - assert (ext != NULL); - if (h != inline_file) - err_pop_file_locator (&ext->where); -} + 2, + N_("writing as a data file"), + close_writer, +}; /* BEGIN DATA...END DATA procedure. */ @@ -642,7 +588,7 @@ dfm_pop (struct file_handle *h) int cmd_begin_data (void) { - struct dfm_fhuser_ext *ext; + struct dfm_reader_ext *ext; /* FIXME: figure out the *exact* conditions, not these really lenient conditions. */ @@ -651,7 +597,7 @@ cmd_begin_data (void) || case_source_is_class (vfm_source, &sort_source_class)) { msg (SE, _("This command is not valid here since the current " - "input program does not access the inline file.")); + "input program does not access the inline file.")); err_cond_fail (); return CMD_FAILURE; } @@ -660,7 +606,7 @@ cmd_begin_data (void) msg (VM (1), _("inline file: Opening for reading.")); dfm_open_for_reading (inline_file); ext = inline_file->ext; - ext->saw_begin_data = 1; + ext->flags |= DFM_SAW_BEGIN_DATA; /* We don't actually read from the inline file. The input procedure is what reads from it. */ @@ -668,27 +614,13 @@ cmd_begin_data (void) procedure (NULL, NULL); ext = inline_file->ext; - if (ext && ext->line) + if (ext && (ext->flags & DFM_EOF) == 0) { msg (MW, _("Skipping remaining inline data.")); - for (read_record (inline_file); ext->line; read_record (inline_file)) - ; + while ((ext->flags & DFM_EOF) == 0) + read_record (inline_file); } assert (inline_file->ext == NULL); return CMD_SUCCESS; } - -static struct fh_ext_class dfm_r_class = -{ - 1, - N_("reading as a data file"), - dfm_close, -}; - -static struct fh_ext_class dfm_w_class = -{ - 2, - N_("writing as a data file"), - dfm_close, -}; diff --git a/src/dfm.h b/src/dfm.h index 30f12846..df2307c8 100644 --- a/src/dfm.h +++ b/src/dfm.h @@ -28,21 +28,26 @@ #include -/* I/O utilities. */ struct file_handle; -int dfm_open_for_reading (struct file_handle *handle); -int dfm_open_for_writing (struct file_handle *handle); -char *dfm_get_record (struct file_handle *handle, int *len); -int dfm_put_record (struct file_handle *handle, const char *rec, size_t len); - -/* Motion control. */ -void dfm_fwd_record (struct file_handle *handle); -void dfm_bkwd_record (struct file_handle *handle, int column); - -/* Weirdness. */ -void dfm_set_record (struct file_handle *handle, char *new_line); -int dfm_get_cur_col (struct file_handle *handle); -void dfm_push (struct file_handle *handle); -void dfm_pop (struct file_handle *handle); +struct len_string; + +/* Input. */ +int dfm_open_for_reading (struct file_handle *); +int dfm_eof (struct file_handle *); +void dfm_get_record (struct file_handle *, struct len_string *); +void dfm_expand_tabs (struct file_handle *); + +void dfm_forward_record (struct file_handle *); +void dfm_reread_record (struct file_handle *, size_t column); +void dfm_forward_columns (struct file_handle *, size_t columns); +size_t dfm_column_start (struct file_handle *); + +/* Output. */ +int dfm_open_for_writing (struct file_handle *); +int dfm_put_record (struct file_handle *, const char *rec, size_t len); + +/* File stack. */ +void dfm_push (struct file_handle *); +void dfm_pop (struct file_handle *); #endif /* dfm_h */ diff --git a/src/error.c b/src/error.c index 7a7e997d..2e012b4b 100644 --- a/src/error.c +++ b/src/error.c @@ -81,7 +81,7 @@ msg (int class, const char *format, ...) { struct string buf; - ds_init (NULL, &buf, 1024); + ds_init (&buf, 1024); /* Format the message into BUF. */ { @@ -283,13 +283,13 @@ err_vmsg (const struct error *e) assert (class >= 0 && class < ERR_CLASS_COUNT); assert (e->text != NULL); - ds_init (NULL, &msg, 64); + ds_init (&msg, 64); if (e->where.filename && (error_classes[class].flags & ERR_WITH_FILE)) { ds_printf (&msg, "%s:", e->where.filename); if (e->where.line_number != -1) ds_printf (&msg, "%d:", e->where.line_number); - ds_putchar (&msg, ' '); + ds_putc (&msg, ' '); } ds_printf (&msg, "%s: ", gettext (error_classes[class].banner)); @@ -304,9 +304,9 @@ err_vmsg (const struct error *e) ds_printf (&msg, "%s: ", cur_proc); if (e->title) - ds_concat (&msg, e->title); + ds_puts (&msg, e->title); - ds_concat (&msg, e->text); + ds_puts (&msg, e->text); /* FIXME: Check set_messages and set_errors to determine where to send errors and messages. @@ -314,7 +314,7 @@ err_vmsg (const struct error *e) Please note that this is not trivial. We have to avoid an infinite loop in reporting errors that originate in the output section. */ - dump_message (ds_value (&msg), 8, puts_stdout, get_viewwidth()); + dump_message (ds_c_str (&msg), 8, puts_stdout, get_viewwidth()); ds_destroy (&msg); diff --git a/src/expr-prs.c b/src/expr-prs.c index a6a4a7d6..05395bc4 100644 --- a/src/expr-prs.c +++ b/src/expr-prs.c @@ -615,7 +615,7 @@ parse_primary (union any_node **n) case T_STRING: { - *n = allocate_str_con (ds_value (&tokstr), ds_length (&tokstr)); + *n = allocate_str_con (ds_c_str (&tokstr), ds_length (&tokstr)); lex_get (); return EXPR_STRING; } @@ -1223,7 +1223,7 @@ parse_function (union any_node ** n) } ds_truncate (&tokstr, 31); - strcpy (fname, ds_value (&tokstr)); + strcpy (fname, ds_c_str (&tokstr)); cp = strrchr (fname, '.'); if (cp && isdigit ((unsigned char) cp[1])) { diff --git a/src/file-handle.h b/src/file-handle.h index 145d5202..5f18202a 100644 --- a/src/file-handle.h +++ b/src/file-handle.h @@ -72,5 +72,6 @@ const char *handle_get_name (const struct file_handle *handle); const char *handle_get_filename (const struct file_handle *handle); enum file_handle_mode handle_get_mode (const struct file_handle *); size_t handle_get_record_width (const struct file_handle *); +size_t handle_get_tab_width (const struct file_handle *); #endif /* !file_handle.h */ diff --git a/src/file-handle.q b/src/file-handle.q index 1133fd31..96aa9486 100644 --- a/src/file-handle.q +++ b/src/file-handle.q @@ -41,6 +41,7 @@ struct private_file_handle struct file_locator where; /* Used for reporting error messages. */ enum file_handle_mode mode; /* File mode. */ size_t length; /* Length of fixed-format records. */ + size_t tab_width; /* Tab width, 0=do not expand tabs. */ }; /* Linked list of file handles. */ @@ -59,9 +60,9 @@ static struct file_handle *create_file_handle (const char *handle_name, /* (specification) "FILE HANDLE" (fh_): name=string; - recform=recform:fixed/!variable/spanned; lrecl=integer; - mode=mode:!character/image/binary/multipunch/_360. + tabwidth=integer "x>=0" "%s must be nonnegative"; + mode=mode:!character/image. */ /* (declarations) */ /* (functions) */ @@ -153,6 +154,10 @@ cmd_file_handle (void) { case FH_CHARACTER: handle->private->mode = MODE_TEXT; + if (cmd.sbc_tabwidth) + handle->private->tab_width = cmd.n_tabwidth; + else + handle->private->tab_width = 4; break; case FH_IMAGE: handle->private->mode = MODE_BINARY; @@ -204,6 +209,7 @@ create_file_handle (const char *handle_name, const char *filename) handle->private->where.line_number = 0; handle->private->mode = MODE_TEXT; handle->private->length = 1024; + handle->private->tab_width = 4; handle->ext = NULL; handle->class = NULL; @@ -262,10 +268,10 @@ fh_parse_file_handle (void) if (token == T_ID) handle = get_handle_with_name (tokid); if (handle == NULL) - handle = get_handle_for_filename (ds_value (&tokstr)); + handle = get_handle_for_filename (ds_c_str (&tokstr)); if (handle == NULL) { - char *filename = ds_value (&tokstr); + char *filename = ds_c_str (&tokstr); char *handle_name = xmalloc (strlen (filename) + 3); sprintf (handle_name, "\"%s\"", filename); handle = create_file_handle (handle_name, filename); @@ -304,7 +310,8 @@ handle_get_mode (const struct file_handle *handle) return handle->private->mode; } -/* Returns the width of a logical record on HANDLE. */ +/* Returns the width of a logical record on HANDLE. Applicable + only to MODE_BINARY files. */ size_t handle_get_record_width (const struct file_handle *handle) { @@ -312,6 +319,16 @@ handle_get_record_width (const struct file_handle *handle) return handle->private->length; } +/* Returns the number of characters per tab stop for HANDLE, or + zero if tabs are not to be expanded. Applicable only to + MODE_TEXT files. */ +size_t +handle_get_tab_width (const struct file_handle *handle) +{ + assert (handle != NULL); + return handle->private->tab_width; +} + /* Local variables: mode: c diff --git a/src/file-type.c b/src/file-type.c index bc33599d..2b11a4ab 100644 --- a/src/file-type.c +++ b/src/file-type.c @@ -444,7 +444,7 @@ cmd_record_type (void) if (!lex_force_string ()) goto error; rct->v[rct->nv].c = xmalloc (fty->record.nc + 1); - st_bare_pad_copy (rct->v[rct->nv].c, ds_value (&tokstr), + st_bare_pad_copy (rct->v[rct->nv].c, ds_c_str (&tokstr), fty->record.nc + 1); } else @@ -623,9 +623,6 @@ file_type_source_read (struct case_source *source, write_case_data wc_data UNUSED) { struct file_type_pgm *fty = source->aux; - char *line; - int len; - struct fmt_spec format; dfm_push (fty->handle); @@ -633,19 +630,22 @@ file_type_source_read (struct case_source *source, format.type = fty->record.fmt; format.w = fty->record.nc; format.d = 0; - while (NULL != (line = dfm_get_record (fty->handle, &len))) + while (!dfm_eof (fty->handle)) { + struct len_string line; struct record_type *iter; union value v; int i; + dfm_expand_tabs (fty->handle); + dfm_get_record (fty->handle, &line); if (formats[fty->record.fmt].cat & FCAT_STRING) { struct data_in di; v.c = c->data[fty->record.v->fv].s; - data_in_finite_line (&di, line, len, + data_in_finite_line (&di, ls_c_str (&line), ls_length (&line), fty->record.fc, fty->record.fc + fty->record.nc); di.v = (union value *) v.c; di.flags = 0; @@ -668,7 +668,7 @@ file_type_source_read (struct case_source *source, { struct data_in di; - data_in_finite_line (&di, line, len, + data_in_finite_line (&di, ls_c_str (&line), ls_length (&line), fty->record.fc, fty->record.fc + fty->record.nc); di.v = &v; di.flags = 0; @@ -688,13 +688,13 @@ file_type_source_read (struct case_source *source, if (fty->wild) msg (SW, _("Unknown record type %g."), v.f); } - dfm_fwd_record (fty->handle); + dfm_forward_record (fty->handle); continue; found: /* Arrive here if there is a matching record_type, which is in iter. */ - dfm_fwd_record (fty->handle); + dfm_forward_record (fty->handle); } /* switch(fty->type) diff --git a/src/filename.c b/src/filename.c index aa92a323..a07b7d18 100644 --- a/src/filename.c +++ b/src/filename.c @@ -75,20 +75,20 @@ fn_interp_vars (const char *input, const char *(*getenv) (const char *)) if (NULL == strchr (input, '$')) return xstrdup (input); - ds_init (NULL, &output, strlen (input)); + ds_init (&output, strlen (input)); for (;;) switch (*input) { case '\0': - return ds_value (&output); + return ds_c_str (&output); case '$': input++; if (*input == '$') { - ds_putchar (&output, '$'); + ds_putc (&output, '$'); input++; } else @@ -114,18 +114,18 @@ fn_interp_vars (const char *input, const char *(*getenv) (const char *)) while (*input && *input != stop && (stop || isalpha ((unsigned char) *input))) - ds_putchar (&output, *input++); + ds_putc (&output, *input++); - value = getenv (ds_value (&output) + start); + value = getenv (ds_c_str (&output) + start); ds_truncate (&output, start); - ds_concat (&output, value); + ds_puts (&output, value); if (stop && *input == stop) input++; } default: - ds_putchar (&output, *input++); + ds_putc (&output, *input++); } } @@ -140,13 +140,13 @@ fn_tilde_expand (const char *input) if (NULL == strchr (input, '~')) return xstrdup (input); - ds_init (NULL, &output, strlen (input)); + ds_init (&output, strlen (input)); ip = input; for (ip = input; *ip; ) if (*ip != '~' || (ip != input && ip[-1] != PATH_DELIMITER)) - ds_putchar (&output, *ip++); + ds_putc (&output, *ip++); else { static const char stop_set[3] = {DIR_SEPARATOR, PATH_DELIMITER, 0}; @@ -166,23 +166,23 @@ fn_tilde_expand (const char *input) pwd = getpwnam (username); if (!pwd || !pwd->pw_dir) - ds_putchar (&output, *ip++); + ds_putc (&output, *ip++); else - ds_concat (&output, pwd->pw_dir); + ds_puts (&output, pwd->pw_dir); } else { const char *home = fn_getenv ("HOME"); if (!home) - ds_putchar (&output, *ip++); + ds_putc (&output, *ip++); else - ds_concat (&output, home); + ds_puts (&output, home); } ip = cp; } - return ds_value (&output); + return ds_c_str (&output); } #else /* !unix */ char * @@ -219,7 +219,7 @@ fn_search_path (const char *basename, const char *path, const char *prepend) } msg (VM (4), _("Searching for `%s'..."), basename); - ds_init (NULL, &filename, 64); + ds_init (&filename, 64); for (;;) { @@ -239,21 +239,21 @@ fn_search_path (const char *basename, const char *path, const char *prepend) ds_clear (&filename); if (prepend && !fn_absolute_p (bp)) { - ds_concat (&filename, prepend); - ds_putchar (&filename, DIR_SEPARATOR); + ds_puts (&filename, prepend); + ds_putc (&filename, DIR_SEPARATOR); } - ds_concat_buffer (&filename, bp, ep - bp); + ds_concat (&filename, bp, ep - bp); if (ep - bp - && ds_value (&filename)[ds_length (&filename) - 1] != DIR_SEPARATOR) - ds_putchar (&filename, DIR_SEPARATOR); - ds_concat (&filename, basename); + && ds_c_str (&filename)[ds_length (&filename) - 1] != DIR_SEPARATOR) + ds_putc (&filename, DIR_SEPARATOR); + ds_puts (&filename, basename); - msg (VM (5), " - %s", ds_value (&filename)); - if (fn_exists_p (ds_value (&filename))) + msg (VM (5), " - %s", ds_c_str (&filename)); + if (fn_exists_p (ds_c_str (&filename))) { - msg (VM (4), _("Found `%s'."), ds_value (&filename)); + msg (VM (4), _("Found `%s'."), ds_c_str (&filename)); free (subst_path); - return ds_value (&filename); + return ds_c_str (&filename); } if (0 == *ep) diff --git a/src/format.c b/src/format.c index 44fbff46..744cd6fe 100644 --- a/src/format.c +++ b/src/format.c @@ -49,7 +49,7 @@ parse_format_specifier_name (const char **cp, int allow_xt) char *sp, *ep; int idx; - sp = ep = ds_value (&tokstr); + sp = ep = ds_c_str (&tokstr); while (isalpha ((unsigned char) *ep)) ep++; @@ -74,7 +74,7 @@ parse_format_specifier_name (const char **cp, int allow_xt) { /* No match. */ msg (SE, _("%.*s is not a valid data format."), - (int) (ep - sp), ds_value (&tokstr)); + (int) (ep - sp), ds_c_str (&tokstr)); idx = -1; } } @@ -338,7 +338,7 @@ parse_format_specifier (struct fmt_spec *input, int allow_xt) if (cp2 == cp && type != FMT_X) { msg (SE, _("Data format %s does not specify a width."), - ds_value (&tokstr)); + ds_c_str (&tokstr)); return 0; } @@ -354,7 +354,7 @@ parse_format_specifier (struct fmt_spec *input, int allow_xt) if (*cp) { - msg (SE, _("Data format %s is not valid."), ds_value (&tokstr)); + msg (SE, _("Data format %s is not valid."), ds_c_str (&tokstr)); return 0; } lex_get (); diff --git a/src/getline.c b/src/getline.c index f55cd9f9..a5b0148e 100644 --- a/src/getline.c +++ b/src/getline.c @@ -72,9 +72,9 @@ static int read_console (void); void getl_initialize (void) { - ds_create (NULL, &getl_include_path, + ds_create (&getl_include_path, fn_getenv_default ("STAT_INCLUDE_PATH", include_path)); - ds_init (NULL, &getl_buf, 256); + ds_init (&getl_buf, 256); } /* Close getline. */ @@ -109,9 +109,9 @@ void getl_add_include_dir (const char *path) { if (ds_length (&getl_include_path)) - ds_putchar (&getl_include_path, PATH_DELIMITER); + ds_putc (&getl_include_path, PATH_DELIMITER); - ds_concat (&getl_include_path, path); + ds_puts (&getl_include_path, path); } /* Adds FN to the tail end of the list of script files to execute. @@ -154,7 +154,7 @@ getl_include (const char *fn) { char *cur_dir = getl_get_current_directory (); - real_fn = fn_search_path (fn, ds_value (&getl_include_path), cur_dir); + real_fn = fn_search_path (fn, ds_c_str (&getl_include_path), cur_dir); free (cur_dir); } @@ -273,7 +273,7 @@ handle_line_buffer (void) } while (s->cur_line == NULL); - ds_concat_buffer (&getl_buf, s->cur_line->line, s->cur_line->len); + ds_concat (&getl_buf, s->cur_line->line, s->cur_line->len); /* Advance pointers. */ s->cur_line = s->cur_line->next; @@ -309,7 +309,7 @@ getl_read_line (void) perform_DO_REPEAT_substitutions (); if (getl_head->print) tab_output_text (TAB_LEFT | TAT_FIX | TAT_PRINTF, "+%s", - ds_value (&getl_buf)); + ds_c_str (&getl_buf)); return 1; } @@ -326,7 +326,7 @@ getl_read_line (void) } } - if (!ds_getline (&getl_buf, s->f)) + if (!ds_gets (&getl_buf, s->f)) { if (ferror (s->f)) msg (ME, _("Reading `%s': %s."), s->fn, strerror (errno)); @@ -337,13 +337,13 @@ getl_read_line (void) ds_truncate (&getl_buf, ds_length (&getl_buf) - 1); if (get_echo()) - tab_output_text (TAB_LEFT | TAT_FIX, ds_value (&getl_buf)); + tab_output_text (TAB_LEFT | TAT_FIX, ds_c_str (&getl_buf)); getl_head->ln++; /* Allows shebang invocation: `#! /usr/local/bin/pspp'. */ - if (ds_value (&getl_buf)[0] == '#' - && ds_value (&getl_buf)[1] == '!') + if (ds_c_str (&getl_buf)[0] == '#' + && ds_c_str (&getl_buf)[1] == '!') continue; return 1; @@ -464,7 +464,7 @@ read_console (void) #endif ds_clear (&getl_buf); - ds_concat (&getl_buf, line); + ds_puts (&getl_buf, line); return 1; } @@ -477,7 +477,7 @@ read_console (void) fputs (getl_prompt ? get_cprompt() : get_prompt(), stdout); ds_clear (&getl_buf); - if (ds_getline (&getl_buf, stdin)) + if (ds_gets (&getl_buf, stdin)) return 1; if (ferror (stdin)) diff --git a/src/glob.c b/src/glob.c index e1d5bba0..83dd2b66 100644 --- a/src/glob.c +++ b/src/glob.c @@ -151,7 +151,7 @@ init_glob (int argc UNUSED, char **argv) last_vfm_invocation = time (NULL); /* lexer.h */ - ds_init (NULL, &tokstr, 64); + ds_init (&tokstr, 64); /* common.h */ { diff --git a/src/html.c b/src/html.c index f0b4f5fd..ea11459d 100644 --- a/src/html.c +++ b/src/html.c @@ -156,7 +156,7 @@ html_option (struct outp_driver *this, const char *key, const struct string *val break; case 1: free (x->file.filename); - x->file.filename = xstrdup (ds_value (val)); + x->file.filename = xstrdup (ds_c_str (val)); break; case string_arg: { @@ -172,7 +172,7 @@ html_option (struct outp_driver *this, const char *key, const struct string *val } if (*dest) free (*dest); - *dest = xstrdup (ds_value (val)); + *dest = xstrdup (ds_c_str (val)); } break; default: @@ -454,7 +454,7 @@ output_tab_table (struct outp_driver *this, struct tab_table *t) { fputs ("

", x->file.file); if (!ls_empty_p (t->cc)) - escape_string (x->file.file, ls_value (t->cc), ls_length (t->cc)); + escape_string (x->file.file, ls_c_str (t->cc), ls_length (t->cc)); fputs ("

\n", x->file.file); return; @@ -465,7 +465,7 @@ output_tab_table (struct outp_driver *this, struct tab_table *t) if (!ls_empty_p (&t->title)) { fprintf (x->file.file, " \n ", t->nc); - escape_string (x->file.file, ls_value (&t->title), + escape_string (x->file.file, ls_c_str (&t->title), ls_length (&t->title)); fputs ("\n \n", x->file.file); } @@ -490,7 +490,7 @@ output_tab_table (struct outp_driver *this, struct tab_table *t) cc = t->cc + c + r * t->nc; if (*ct & TAB_JOIN) { - j = (struct tab_joined_cell *) ls_value (cc); + j = (struct tab_joined_cell *) ls_c_str (cc); cc = &j->contents; if (j->x1 != c || j->y1 != r) continue; @@ -533,7 +533,7 @@ output_tab_table (struct outp_driver *this, struct tab_table *t) if ( ! (*ct & TAB_EMPTY) ) { - char *s = ls_value (cc); + char *s = ls_c_str (cc); size_t l = ls_length (cc); while (l && isspace ((unsigned char) *s)) diff --git a/src/include.c b/src/include.c index 19f5ca1b..3ea80ef0 100644 --- a/src/include.c +++ b/src/include.c @@ -40,7 +40,7 @@ cmd_include (void) lex_error (_("expecting filename")); return CMD_FAILURE; } - getl_include (ds_value (&tokstr)); + getl_include (ds_c_str (&tokstr)); lex_get (); return lex_end_of_command (); diff --git a/src/inpt-pgm.c b/src/inpt-pgm.c index 69f5b048..3d8ef88a 100644 --- a/src/inpt-pgm.c +++ b/src/inpt-pgm.c @@ -383,7 +383,7 @@ reread_trns_proc (struct trns_header * pt, struct ccase * c, struct reread_trns *t = (struct reread_trns *) pt; if (t->column == NULL) - dfm_bkwd_record (t->handle, 1); + dfm_reread_record (t->handle, 1); else { union value column; @@ -393,10 +393,10 @@ reread_trns_proc (struct trns_header * pt, struct ccase * c, { msg (SE, _("REREAD: Column numbers must be positive finite " "numbers. Column set to 1.")); - dfm_bkwd_record (t->handle, 1); + dfm_reread_record (t->handle, 1); } else - dfm_bkwd_record (t->handle, column.f); + dfm_reread_record (t->handle, column.f); } return -1; } diff --git a/src/lexer.c b/src/lexer.c index df48d4ec..97e65101 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -95,7 +95,7 @@ static void dump_token (void); void lex_init (void) { - ds_init (NULL, &put_tokstr, 64); + ds_init (&put_tokstr, 64); if (!lex_get_line ()) unexpected_eof (); } @@ -109,8 +109,8 @@ restore_token (void) { assert (put_token != 0); token = put_token; - ds_replace (&tokstr, ds_value (&put_tokstr)); - strncpy (tokid, ds_value (&put_tokstr), 8); + ds_replace (&tokstr, ds_c_str (&put_tokstr)); + strncpy (tokid, ds_c_str (&put_tokstr), 8); tokid[8] = 0; tokval = put_tokval; put_token = 0; @@ -122,7 +122,7 @@ static void save_token (void) { put_token = token; - ds_replace (&put_tokstr, ds_value (&tokstr)); + ds_replace (&put_tokstr, ds_c_str (&tokstr)); put_tokval = tokval; } @@ -208,7 +208,7 @@ lex_get (void) negative numbers into two tokens. */ if (*cp == '-') { - ds_putchar (&tokstr, *prog++); + ds_putc (&tokstr, *prog++); while (isspace ((unsigned char) *prog)) prog++; @@ -221,32 +221,32 @@ lex_get (void) /* Parse the number, copying it into tokstr. */ while (isdigit ((unsigned char) *prog)) - ds_putchar (&tokstr, *prog++); + ds_putc (&tokstr, *prog++); if (*prog == '.') { - ds_putchar (&tokstr, *prog++); + ds_putc (&tokstr, *prog++); while (isdigit ((unsigned char) *prog)) - ds_putchar (&tokstr, *prog++); + ds_putc (&tokstr, *prog++); } if (*prog == 'e' || *prog == 'E') { - ds_putchar (&tokstr, *prog++); + ds_putc (&tokstr, *prog++); if (*prog == '+' || *prog == '-') - ds_putchar (&tokstr, *prog++); + ds_putc (&tokstr, *prog++); while (isdigit ((unsigned char) *prog)) - ds_putchar (&tokstr, *prog++); + ds_putc (&tokstr, *prog++); } /* Parse as floating point. */ - tokval = strtod (ds_value (&tokstr), &tail); + tokval = strtod (ds_c_str (&tokstr), &tail); if (*tail) { msg (SE, _("%s does not form a valid number."), - ds_value (&tokstr)); + ds_c_str (&tokstr)); tokval = 0.0; ds_clear (&tokstr); - ds_putchar (&tokstr, '0'); + ds_putc (&tokstr, '0'); } token = T_NUM; @@ -346,15 +346,15 @@ lex_get (void) } /* Copy id to tokstr. */ - ds_putchar (&tokstr, toupper ((unsigned char) *prog++)); + ds_putc (&tokstr, toupper ((unsigned char) *prog++)); while (CHAR_IS_IDN (*prog)) - ds_putchar (&tokstr, toupper ((unsigned char) *prog++)); + ds_putc (&tokstr, toupper ((unsigned char) *prog++)); /* Copy tokstr to tokid, truncating it to 8 characters. */ - strncpy (tokid, ds_value (&tokstr), 8); + strncpy (tokid, ds_c_str (&tokstr), 8); tokid[8] = 0; - token = check_id (ds_value (&tokstr), ds_length (&tokstr)); + token = check_id (ds_c_str (&tokstr), ds_length (&tokstr)); break; default: @@ -690,7 +690,7 @@ lex_put_back_id (const char *id) save_token (); token = T_ID; ds_replace (&tokstr, id); - strncpy (tokid, ds_value (&tokstr), 8); + strncpy (tokid, ds_c_str (&tokstr), 8); tokid[8] = 0; } @@ -700,7 +700,7 @@ lex_put_back_id (const char *id) const char * lex_entire_line (void) { - return ds_value (&getl_buf); + return ds_c_str (&getl_buf); } /* As lex_entire_line(), but only returns the part of the current line @@ -764,7 +764,7 @@ lex_preprocess_line (void) /* Remove C-style comments begun by slash-star and terminated by star-slash or newline. */ quote = comment = 0; - for (cp = ds_value (&getl_buf); *cp; ) + for (cp = ds_c_str (&getl_buf); *cp; ) { /* If we're not commented out, toggle quoting. */ if (!comment) @@ -805,7 +805,7 @@ lex_preprocess_line (void) /* Strip trailing whitespace and terminal dot. */ { size_t len = ds_length (&getl_buf); - char *s = ds_value (&getl_buf); + char *s = ds_c_str (&getl_buf); /* Strip trailing whitespace. */ while (len > 0 && isspace ((unsigned char) s[len - 1])) @@ -830,7 +830,7 @@ lex_preprocess_line (void) as necessary. */ if (getl_interactive != 2 && getl_mode == GETL_MODE_BATCH) { - char *s = ds_value (&getl_buf); + char *s = ds_c_str (&getl_buf); if (s[0] == '+' || s[0] == '-' || s[0] == '.') s[0] = ' '; @@ -838,7 +838,7 @@ lex_preprocess_line (void) put_token = '.'; } - prog = ds_value (&getl_buf); + prog = ds_c_str (&getl_buf); } /* Token names. */ @@ -871,7 +871,7 @@ lex_token_representation (void) { case T_ID: case T_NUM: - return xstrdup (ds_value (&tokstr)); + return xstrdup (ds_c_str (&tokstr)); break; case T_STRING: @@ -879,7 +879,7 @@ lex_token_representation (void) int hexstring = 0; char *sp, *dp; - for (sp = ds_value (&tokstr); sp < ds_end (&tokstr); sp++) + for (sp = ds_c_str (&tokstr); sp < ds_end (&tokstr); sp++) if (!isprint ((unsigned char) *sp)) { hexstring = 1; @@ -894,14 +894,14 @@ lex_token_representation (void) *dp++ = '\''; if (!hexstring) - for (sp = ds_value (&tokstr); *sp; ) + for (sp = ds_c_str (&tokstr); *sp; ) { if (*sp == '\'') *dp++ = '\''; *dp++ = (unsigned char) *sp++; } else - for (sp = ds_value (&tokstr); sp < ds_end (&tokstr); sp++) + for (sp = ds_c_str (&tokstr); sp < ds_end (&tokstr); sp++) { *dp++ = (((unsigned char) *sp) >> 4)["0123456789ABCDEF"]; *dp++ = (((unsigned char) *sp) & 15)["0123456789ABCDEF"]; @@ -949,7 +949,7 @@ lex_negative_to_dash (void) { token = T_NUM; tokval = -tokval; - ds_replace (&tokstr, ds_value (&tokstr) + 1); + ds_replace (&tokstr, ds_c_str (&tokstr) + 1); save_token (); token = '-'; } @@ -1034,7 +1034,7 @@ convert_numeric_string_to_char_string (int type) "multiple of %d."), gettext (base_name), ds_length (&tokstr), cpb); - p = ds_value (&tokstr); + p = ds_c_str (&tokstr); for (i = 0; i < nb; i++) { int value; @@ -1064,7 +1064,7 @@ convert_numeric_string_to_char_string (int type) value = value * base + v; } - ds_value (&tokstr)[i] = (unsigned char) value; + ds_c_str (&tokstr)[i] = (unsigned char) value; } ds_truncate (&tokstr, nb); @@ -1103,7 +1103,7 @@ parse_string (int type) break; } - ds_putchar (&tokstr, *prog++); + ds_putc (&tokstr, *prog++); } prog++; @@ -1173,7 +1173,7 @@ finish: int warned = 0; for (i = 0; i < ds_length (&tokstr); i++) - if (ds_value (&tokstr)[i] == 0) + if (ds_c_str (&tokstr)[i] == 0) { if (!warned) { @@ -1181,7 +1181,7 @@ finish: "characters. Replacing with spaces.")); warned = 1; } - ds_value (&tokstr)[i] = ' '; + ds_c_str (&tokstr)[i] = ' '; } } @@ -1214,7 +1214,7 @@ dump_token (void) break; case T_STRING: - fprintf (stderr, "STRING\t\"%s\"\n", ds_value (&tokstr)); + fprintf (stderr, "STRING\t\"%s\"\n", ds_c_str (&tokstr)); break; case T_STOP: diff --git a/src/matrix-data.c b/src/matrix-data.c index 6f3d58ad..4d7c5d73 100644 --- a/src/matrix-data.c +++ b/src/matrix-data.c @@ -831,22 +831,34 @@ static const char * context (struct file_handle *data_file) { static char buf[32]; - int len; - char *p = dfm_get_record (data_file, &len); - - if (!p || !len) - strcpy (buf, "at end of line"); - else + + if (dfm_eof (data_file)) + strcpy (buf, "at end of file"); + else { - char *cp = buf; - int n_copy = min (10, len); - cp = stpcpy (buf, "before `"); - while (n_copy && isspace ((unsigned char) *p)) - p++, n_copy++; - while (n_copy && !isspace ((unsigned char) *p)) - *cp++ = *p++, n_copy--; - *cp++ = '\''; - *cp = 0; + struct len_string line; + const char *sp; + + dfm_get_record (data_file, &line); + sp = ls_c_str (&line); + while (sp < ls_end (&line) && isspace ((unsigned char) *sp)) + sp++; + if (sp >= ls_end (&line)) + strcpy (buf, "at end of line"); + else + { + char *dp; + size_t copy_cnt = 0; + + dp = stpcpy (buf, "before `"); + while (sp < ls_end (&line) && !isspace ((unsigned char) *sp) + && copy_cnt < 10) + { + *dp++ = *sp++; + copy_cnt++; + } + strcpy (dp, "'"); + } } return buf; @@ -856,68 +868,55 @@ context (struct file_handle *data_file) static int another_token (struct file_handle *data_file) { - char *cp, *ep; - int len; - for (;;) { - cp = dfm_get_record (data_file, &len); - if (!cp) - return 0; + struct len_string line; + const char *cp; + + if (dfm_eof (data_file)) + return 0; + dfm_get_record (data_file, &line); - ep = cp + len; - while (isspace ((unsigned char) *cp) && cp < ep) + cp = ls_c_str (&line); + while (isspace ((unsigned char) *cp) && cp < ls_end (&line)) cp++; - if (cp < ep) - break; + if (cp < ls_end (&line)) + { + dfm_forward_columns (data_file, cp - ls_c_str (&line)); + return 1; + } - dfm_fwd_record (data_file); + dfm_forward_record (data_file); } - - dfm_set_record (data_file, cp); - - return 1; } /* Parse a MATRIX DATA token from mx->data_file into TOKEN. */ static int (mget_token) (struct matrix_token *token, struct file_handle *data_file) { - char *cp, *ep; - int len; + struct len_string line; int first_column; - - for (;;) - { - cp = dfm_get_record (data_file, &len); - if (!cp) - return 0; + char *cp; - ep = cp + len; - while (isspace ((unsigned char) *cp) && cp < ep) - cp++; + if (!another_token (data_file)) + return 0; - if (cp < ep) - break; - - dfm_fwd_record (data_file); - } - - dfm_set_record (data_file, cp); - first_column = dfm_get_cur_col (data_file) + 1; + dfm_get_record (data_file, &line); + first_column = dfm_column_start (data_file); /* Three types of fields: quoted with ', quoted with ", unquoted. */ + cp = ls_c_str (&line); if (*cp == '\'' || *cp == '"') { int quote = *cp; token->type = MSTR; token->string = ++cp; - while (cp < ep && *cp != quote) + while (cp < ls_end (&line) && *cp != quote) cp++; token->length = cp - token->string; - if (cp < ep) + if (cp < ls_end (&line)) cp++; else msg (SW, _("Scope of string exceeds line.")); @@ -927,7 +926,8 @@ static int int is_num = isdigit ((unsigned char) *cp) || *cp == '.'; token->string = cp++; - while (cp < ep && !isspace ((unsigned char) *cp) && *cp != ',' + while (cp < ls_end (&line) + && !isspace ((unsigned char) *cp) && *cp != ',' && *cp != '-' && *cp != '+') { if (isdigit ((unsigned char) *cp)) @@ -963,7 +963,7 @@ static int token->type = MSTR; } - dfm_set_record (data_file, cp); + dfm_forward_columns (data_file, cp - ls_c_str (&line)); return 1; } @@ -973,24 +973,25 @@ static int static int force_eol (struct file_handle *data_file, const char *content) { - char *cp; - int len; - - cp = dfm_get_record (data_file, &len); - if (!cp) + struct len_string line; + const char *cp; + + if (dfm_eof (data_file)) return 0; - while (len && isspace (*cp)) - cp++, len--; + dfm_get_record (data_file, &line); + + cp = ls_c_str (&line); + while (isspace ((unsigned char) *cp) && cp < ls_end (&line)) + cp++; - if (len) + if (cp < ls_end (&line)) { msg (SE, _("End of line expected %s while reading %s."), context (data_file), content); return 0; } - dfm_fwd_record (data_file); - + dfm_forward_record (data_file); return 1; } diff --git a/src/mis-val.c b/src/mis-val.c index 66ceb9ce..f388583a 100644 --- a/src/mis-val.c +++ b/src/mis-val.c @@ -315,7 +315,7 @@ parse_alpha (void) msg (SE, _("String is not of proper length.")); return 0; } - strncpy (missing[miss_type].s, ds_value (&tokstr), MAX_SHORT_STRING); + strncpy (missing[miss_type].s, ds_c_str (&tokstr), MAX_SHORT_STRING); lex_get (); lex_match (','); } diff --git a/src/output.c b/src/output.c index c0c4e0b0..e33172e2 100644 --- a/src/output.c +++ b/src/output.c @@ -288,7 +288,7 @@ outp_read_devices (void) where.line_number = 0; err_push_file_locator (&where); - ds_init (NULL, &line, 128); + ds_init (&line, 128); if (init_fn == NULL) { @@ -315,7 +315,7 @@ outp_read_devices (void) msg (ME, _("Reading %s: %s."), init_fn, strerror (errno)); break; } - for (cp = ds_value (&line); isspace ((unsigned char) *cp); cp++); + for (cp = ds_c_str (&line); isspace ((unsigned char) *cp); cp++); if (!strncmp ("define", cp, 6) && isspace ((unsigned char) cp[6])) outp_configure_macro (&cp[7]); else if (*cp) @@ -511,7 +511,7 @@ tokener (void) while (*prog && *prog != quote) { if (*prog != '\\') - ds_putchar (&op_tokstr, *prog++); + ds_putc (&op_tokstr, *prog++); else { int c; @@ -590,14 +590,14 @@ tokener (void) msg (IS, _("Syntax error in string constant.")); continue; } - ds_putchar (&op_tokstr, (unsigned char) c); + ds_putc (&op_tokstr, (unsigned char) c); } } prog++; } else while (*prog && !isspace ((unsigned char) *prog) && *prog != '=') - ds_putchar (&op_tokstr, *prog++); + ds_putc (&op_tokstr, *prog++); op_token = 'a'; } @@ -612,7 +612,7 @@ parse_options (char *s, struct outp_driver * d) prog = s; op_token = -1; - ds_init (NULL, &op_tokstr, 64); + ds_init (&op_tokstr, 64); while (tokener ()) { char key[65]; @@ -624,7 +624,7 @@ parse_options (char *s, struct outp_driver * d) } ds_truncate (&op_tokstr, 64); - strcpy (key, ds_value (&op_tokstr)); + strcpy (key, ds_c_str (&op_tokstr)); tokener (); if (op_token != '=') @@ -1150,7 +1150,7 @@ outp_get_paper_size (char *size, int *h, int *v) where.filename = pprsz_fn; where.line_number = 0; err_push_file_locator (&where); - ds_init (NULL, &line, 128); + ds_init (&line, 128); if (pprsz_fn == NULL) { @@ -1176,7 +1176,7 @@ outp_get_paper_size (char *size, int *h, int *v) msg (ME, _("Reading %s: %s."), pprsz_fn, strerror (errno)); break; } - for (cp = ds_value (&line); isspace ((unsigned char) *cp); cp++); + for (cp = ds_c_str (&line); isspace ((unsigned char) *cp); cp++); if (*cp == 0) continue; if (*cp != '"') diff --git a/src/postscript.c b/src/postscript.c index 8a4c9acb..4a2fab0b 100644 --- a/src/postscript.c +++ b/src/postscript.c @@ -567,7 +567,7 @@ ps_option (struct outp_driver *this, const char *key, const struct string *val) { struct ps_driver_ext *x = this->ext; int cat, subcat; - char *value = ds_value (val); + char *value = ds_c_str (val); cat = outp_match_keyword (key, option_tab, &option_info, &subcat); @@ -898,8 +898,8 @@ output_encodings (struct outp_driver *this) struct string line, buf; - ds_init (NULL, &line, 128); - ds_init (NULL, &buf, 128); + ds_init (&line, 128); + ds_init (&buf, 128); for (pe = hsh_first (x->encodings, &iter); pe != NULL; pe = hsh_next (x->encodings, &iter)) { @@ -946,7 +946,7 @@ output_encodings (struct outp_driver *this) if (buf.length == 0) continue; - pschar = strtok_r (ds_value (&buf), " \t\r\n", &sp); + pschar = strtok_r (ds_c_str (&buf), " \t\r\n", &sp); code = strtok_r (NULL, " \t\r\n", &sp); if (*pschar == 0 || *code == 0) continue; @@ -984,14 +984,14 @@ output_encodings (struct outp_driver *this) if (ds_length (&line) + strlen (temp) > 70) { - ds_concat (&line, x->eol); - fputs (ds_value (&line), x->file.file); + ds_puts (&line, x->eol); + fputs (ds_c_str (&line), x->file.file); ds_clear (&line); } - ds_concat (&line, temp); + ds_puts (&line, temp); } - ds_concat (&line, x->eol); - fputs (ds_value (&line), x->file.file); + ds_puts (&line, x->eol); + fputs (ds_c_str (&line), x->file.file); if (fclose (f) == EOF) msg (MW, _("PostScript driver: Error closing encoding file `%s'."), @@ -1104,7 +1104,7 @@ read_ps_encodings (struct outp_driver *this) where.line_number = 0; err_push_file_locator (&where); - ds_init (NULL, &line, 128); + ds_init (&line, 128); for (;;) { @@ -2543,7 +2543,7 @@ text (struct outp_driver *this, struct outp_text *t, int draw) buf_loc = buf; assert (!ls_null_p (&t->s)); - cp = ls_value (&t->s); + cp = ls_c_str (&t->s); end = ls_end (&t->s); if (draw) { diff --git a/src/print.c b/src/print.c index 273ab291..b3098d67 100644 --- a/src/print.c +++ b/src/print.c @@ -338,7 +338,7 @@ parse_string_argument (void) { fx.spec.type = PRT_CONST; fx.spec.fc = fx.sc - 1; - fx.spec.u.c = xstrdup (ds_value (&tokstr)); + fx.spec.u.c = xstrdup (ds_c_str (&tokstr)); lex_get (); /* Parse the included column range. */ diff --git a/src/q2c.c b/src/q2c.c index 707cf272..1c0ddb79 100644 --- a/src/q2c.c +++ b/src/q2c.c @@ -1570,7 +1570,7 @@ dump_subcommand (const subcommand *sbc) outdent (); } dump (0, "free(p->s_%s);", st_lower(sbc->name) ); - dump (0, "p->s_%s = xstrdup (ds_value (&tokstr));", + dump (0, "p->s_%s = xstrdup (ds_c_str (&tokstr));", st_lower (sbc->name)); dump (0, "lex_get ();"); if (sbc->restriction) diff --git a/src/recode.c b/src/recode.c index f2158f4f..a4abc534 100644 --- a/src/recode.c +++ b/src/recode.c @@ -388,14 +388,14 @@ cmd_recode (void) } + free (v); + v = NULL; + if (!lex_match ('/')) break; while (rcd->next) rcd = rcd->next; rcd = rcd->next = xmalloc (sizeof *rcd); - - free (v); - v = NULL; } if (token != '.') @@ -461,7 +461,7 @@ parse_dest_spec (struct rcd_var * rcd, union value * v, size_t *max_dst_width) if (toklen > max) max = toklen; v->c = xmalloc (max + 1); - st_pad_copy (v->c, ds_value (&tokstr), max + 1); + st_pad_copy (v->c, ds_c_str (&tokstr), max + 1); flags = RCD_DEST_STRING; *max_dst_width = max; lex_get (); @@ -624,7 +624,7 @@ parse_src_spec (struct rcd_var * rcd, int type, size_t max_src_width) if (!lex_force_string ()) return 0; c->f1.c = xmalloc (max_src_width + 1); - st_pad_copy (c->f1.c, ds_value (&tokstr), max_src_width + 1); + st_pad_copy (c->f1.c, ds_c_str (&tokstr), max_src_width + 1); lex_get (); } } @@ -810,7 +810,7 @@ recode_trns_proc (struct trns_header * t, struct ccase * c, c->data[v->src->fv].s, v->dest->width, v->src->width); else - memcpy (c->data[v->dest->fv].s, cp->t.c, v->dest->width); + memmove (c->data[v->dest->fv].s, cp->t.c, v->dest->width); } } diff --git a/src/repeat.c b/src/repeat.c index dad14549..e992b29f 100644 --- a/src/repeat.c +++ b/src/repeat.c @@ -245,7 +245,7 @@ internal_cmd_do_repeat (void) command names must appear on a single line--they can't be spread out. */ { - char *cp = ds_value (&getl_buf); + char *cp = ds_c_str (&getl_buf); /* Skip leading indentors and any whitespace. */ if (*cp == '+' || *cp == '-' || *cp == '.') @@ -291,7 +291,7 @@ internal_cmd_do_repeat (void) line_buf_tail->len = ds_length (&getl_buf); line_buf_tail->line = xmalloc (ds_length (&getl_buf) + 1); memcpy (line_buf_tail->line, - ds_value (&getl_buf), ds_length (&getl_buf) + 1); + ds_c_str (&getl_buf), ds_length (&getl_buf) + 1); } } @@ -539,7 +539,7 @@ perform_DO_REPEAT_substitutions (void) /* Terminal dot. */ int dot = 0; - ds_init (NULL, &output, ds_size (&getl_buf)); + ds_init (&output, ds_capacity (&getl_buf)); /* Strip trailing whitespace, check for & remove terminal dot. */ while (ds_length (&getl_buf) > 0 @@ -551,7 +551,7 @@ perform_DO_REPEAT_substitutions (void) ds_truncate (&getl_buf, ds_length (&getl_buf) - 1); } - for (cp = ds_value (&getl_buf); cp < ds_end (&getl_buf); ) + for (cp = ds_c_str (&getl_buf); cp < ds_end (&getl_buf); ) { if (*cp == '\'' && !in_quote) in_apos ^= 1; @@ -560,7 +560,7 @@ perform_DO_REPEAT_substitutions (void) if (in_quote || in_apos || !CHAR_IS_ID1 (*cp)) { - ds_putchar (&output, *cp++); + ds_putc (&output, *cp++); continue; } @@ -580,16 +580,16 @@ perform_DO_REPEAT_substitutions (void) substitution = find_DO_REPEAT_substitution (name); if (!substitution) { - ds_concat_buffer (&output, start, cp - start); + ds_concat (&output, start, cp - start); continue; } /* Force output buffer size, copy substitution. */ - ds_concat (&output, substitution); + ds_puts (&output, substitution); } } if (dot) - ds_putchar (&output, get_endcmd() ); + ds_putc (&output, get_endcmd() ); ds_destroy (&getl_buf); getl_buf = output; diff --git a/src/set.q b/src/set.q index 8d9afd61..3c1b38d5 100644 --- a/src/set.q +++ b/src/set.q @@ -158,7 +158,7 @@ static int set_ccx (const char *cc_string, struct set_cust_currency * cc, listing=custom; log=custom; lowres=lores:auto/on/off; - lpi=integer "x>0" "% must be greater than 0"; + lpi=integer "x>0" "%s must be greater than 0"; menus=menus:standard/extended; messages=messages:on/off/terminal/listing/both/none; mexpand=mexp:on/off; @@ -595,7 +595,7 @@ stc_custom_pager (struct cmd_set *cmd UNUSED) return 0; if (set_pager) free (set_pager); - set_pager = xstrdup (ds_value (&tokstr)); + set_pager = xstrdup (ds_c_str (&tokstr)); lex_get (); } return 1; @@ -776,7 +776,7 @@ stc_custom_journal (struct cmd_set *cmd UNUSED) set_journaling = 0; if (token == T_STRING) { - set_journal = xstrdup (ds_value (&tokstr)); + set_journal = xstrdup (ds_c_str (&tokstr)); lex_get (); } return 1; diff --git a/src/str.c b/src/str.c index 056ef6fc..9d2d0b3c 100644 --- a/src/str.c +++ b/src/str.c @@ -163,10 +163,10 @@ void st_bare_pad_len_copy (char *dest, const char *src, size_t n, size_t len) { if (len >= n) - memcpy (dest, src, n); + memmove (dest, src, n); else { - memcpy (dest, src, len); + memmove (dest, src, len); memset (&dest[len], ' ', n - len); } } @@ -195,30 +195,26 @@ st_pad_copy (char *dest, const char *src, size_t n) } } -/* Initializes ST inside pool POOL (which may be a null pointer) with - initial contents S. */ +/* Initializes ST with initial contents S. */ void -ds_create (struct pool *pool, struct string *st, const char *s) +ds_create (struct string *st, const char *s) { - st->pool = pool; st->length = strlen (s); - st->size = 8 + st->length * 2; - st->string = pool_malloc (pool, st->size + 1); + st->capacity = 8 + st->length * 2; + st->string = xmalloc (st->capacity + 1); strcpy (st->string, s); } -/* Initializes ST inside POOL (which may be null), making room for at - least SIZE characters. */ +/* Initializes ST, making room for at least CAPACITY characters. */ void -ds_init (struct pool *pool, struct string *st, size_t size) +ds_init (struct string *st, size_t capacity) { - st->pool = pool; st->length = 0; - if (size > 8) - st->size = size; + if (capacity > 8) + st->capacity = capacity; else - st->size = 8; - st->string = pool_malloc (pool, st->size + 1); + st->capacity = 8; + st->string = xmalloc (st->capacity + 1); } /* Replaces the contents of ST with STRING. STRING may overlap with @@ -226,17 +222,28 @@ ds_init (struct pool *pool, struct string *st, size_t size) void ds_replace (struct string *st, const char *string) { - char *s = st->string; - st->string = NULL; - ds_create (st->pool, st, string); - pool_free (st->pool, s); + size_t new_length = strlen (string); + if (new_length > st->capacity) + { + /* The new length is longer than the allocated length, so + there can be no overlap. */ + st->length = 0; + ds_concat (st, string, new_length); + } + else + { + /* Overlap is possible, but the new string will fit in the + allocated space, so we can just copy data. */ + st->length = new_length; + memmove (st->string, string, st->length); + } } /* Frees ST. */ void ds_destroy (struct string *st) { - pool_free (st->pool, st->string); + free (st->string); } /* Truncates ST to zero length. */ @@ -246,29 +253,45 @@ ds_clear (struct string *st) st->length = 0; } -/* Ensures that ST can hold at least MIN_SIZE characters plus a null +/* Pad ST on the right with copies of PAD until ST is at least + LENGTH characters in size. If ST is initially LENGTH + characters or longer, this is a no-op. */ +void +ds_rpad (struct string *st, size_t length, char pad) +{ + assert (st != NULL); + if (st->length < length) + { + if (st->capacity < length) + ds_extend (st, length); + memset (&st->string[st->length], pad, length - st->length); + st->length = length; + } +} + +/* Ensures that ST can hold at least MIN_CAPACITY characters plus a null terminator. */ void -ds_extend (struct string *st, size_t min_size) +ds_extend (struct string *st, size_t min_capacity) { - if (min_size > st->size) + if (min_capacity > st->capacity) { - st->size *= 2; - if (st->size < min_size) - st->size = min_size * 2; + st->capacity *= 2; + if (st->capacity < min_capacity) + st->capacity = min_capacity * 2; - st->string = pool_realloc (st->pool, st->string, st->size + 1); + st->string = xrealloc (st->string, st->capacity + 1); } } -/* Shrink ST to the minimum size need to contain its content. */ +/* Shrink ST to the minimum capacity need to contain its content. */ void ds_shrink (struct string *st) { - if (st->size != st->length) + if (st->capacity != st->length) { - st->size = st->length; - st->string = pool_realloc (st->pool, st->string, st->size + 1); + st->capacity = st->length; + st->string = xrealloc (st->string, st->capacity + 1); } } @@ -290,21 +313,21 @@ ds_length (const struct string *st) /* Returns the allocation size of ST. */ size_t -ds_size (const struct string *st) +ds_capacity (const struct string *st) { - return st->size; + return st->capacity; } /* Returns the value of ST as a null-terminated string. */ char * -ds_value (const struct string *st) +ds_c_str (const struct string *st) { ((char *) st->string)[st->length] = '\0'; return st->string; } /* Returns a pointer to the null terminator ST. - This might not be an actual null character unless ds_value() has + This might not be an actual null character unless ds_c_str() has been called since the last modification to ST. */ char * ds_end (const struct string *st) @@ -314,7 +337,7 @@ ds_end (const struct string *st) /* Concatenates S onto ST. */ void -ds_concat (struct string *st, const char *s) +ds_puts (struct string *st, const char *s) { size_t s_len; @@ -328,7 +351,7 @@ ds_concat (struct string *st, const char *s) /* Concatenates LEN characters from BUF onto ST. */ void -ds_concat_buffer (struct string *st, const char *buf, size_t len) +ds_concat (struct string *st, const char *buf, size_t len) { ds_extend (st, st->length + len); memcpy (st->string + st->length, buf, len); @@ -361,7 +384,7 @@ ds_vprintf (struct string *st, const char *format, va_list args) int avail, needed; - avail = st->size - st->length + 1; + avail = st->capacity - st->length + 1; needed = vsnprintf (st->string + st->length, avail, format, args); @@ -374,8 +397,8 @@ ds_vprintf (struct string *st, const char *format, va_list args) else while (needed == -1) { - ds_extend (st, (st->size + 1) * 2); - avail = st->size - st->length + 1; + ds_extend (st, (st->capacity + 1) * 2); + avail = st->capacity - st->length + 1; needed = vsnprintf (st->string + st->length, avail, format, args); @@ -386,21 +409,21 @@ ds_vprintf (struct string *st, const char *format, va_list args) /* Appends character CH to ST. */ void -ds_putchar (struct string *st, int ch) +ds_putc (struct string *st, int ch) { - if (st->length == st->size) + if (st->length == st->capacity) ds_extend (st, st->length + 1); st->string[st->length++] = ch; } -/* Reads a newline-terminated line from STREAM into ST. +/* Appends to ST a newline-terminated line read from STREAM. Newline is the last character of ST on return, unless an I/O error or end of file is encountered after reading some characters. Returns 1 if a line is successfully read, or 0 if no characters at all were read before an I/O error or end of file was encountered. */ int -ds_getline (struct string *st, FILE *stream) +ds_gets (struct string *st, FILE *stream) { int c; @@ -410,7 +433,7 @@ ds_getline (struct string *st, FILE *stream) for (;;) { - ds_putchar (st, c); + ds_putc (st, c); if (c == '\n') return 1; @@ -438,7 +461,7 @@ ds_get_config_line (FILE *stream, struct string *st, struct file_locator *where) /* Read the first line. */ ds_clear (st); where->line_number++; - if (!ds_getline (st, stream)) + if (!ds_gets (st, stream)) return 0; /* Read additional lines, if any. */ @@ -446,7 +469,7 @@ ds_get_config_line (FILE *stream, struct string *st, struct file_locator *where) { /* Remove trailing whitespace. */ { - char *s = ds_value (st); + char *s = ds_c_str (st); size_t len = ds_length (st); while (len > 0 && isspace ((unsigned char) s[len - 1])) @@ -455,13 +478,13 @@ ds_get_config_line (FILE *stream, struct string *st, struct file_locator *where) } /* Check for trailing \. Remove if found, bail otherwise. */ - if (ds_length (st) == 0 || ds_value (st)[ds_length (st) - 1] != '\\') + if (ds_length (st) == 0 || ds_c_str (st)[ds_length (st) - 1] != '\\') break; ds_truncate (st, ds_length (st) - 1); /* Append another line and go around again. */ { - int success = ds_getline (st, stream); + int success = ds_gets (st, stream); where->line_number++; if (!success) return 1; @@ -473,7 +496,7 @@ ds_get_config_line (FILE *stream, struct string *st, struct file_locator *where) char *cp; int quote = 0; - for (cp = ds_value (st); *cp; cp++) + for (cp = ds_c_str (st); *cp; cp++) if (quote) { if (*cp == quote) @@ -485,7 +508,7 @@ ds_get_config_line (FILE *stream, struct string *st, struct file_locator *where) quote = *cp; else if (*cp == '#') { - ds_truncate (st, cp - ds_value (st)); + ds_truncate (st, cp - ds_c_str (st)); break; } } @@ -495,24 +518,24 @@ ds_get_config_line (FILE *stream, struct string *st, struct file_locator *where) /* Lengthed strings. */ -/* Creates a new lengthed string LS in POOL with contents as a copy of +/* Creates a new lengthed string LS with contents as a copy of S. */ void -ls_create (struct pool *pool, struct len_string *ls, const char *s) +ls_create (struct len_string *ls, const char *s) { ls->length = strlen (s); - ls->string = pool_alloc (pool, ls->length + 1); + ls->string = xmalloc (ls->length + 1); memcpy (ls->string, s, ls->length + 1); } -/* Creates a new lengthed string LS in POOL with contents as a copy of +/* Creates a new lengthed string LS with contents as a copy of BUFFER with length LEN. */ void -ls_create_buffer (struct pool *pool, struct len_string *ls, +ls_create_buffer (struct len_string *ls, const char *buffer, size_t len) { ls->length = len; - ls->string = pool_malloc (pool, len + 1); + ls->string = xmalloc (len + 1); memcpy (ls->string, buffer, len); ls->string[len] = '\0'; } @@ -532,11 +555,11 @@ ls_shallow_copy (struct len_string *dst, const struct len_string *src) *dst = *src; } -/* Frees the memory in POOL backing LS. */ +/* Frees the memory backing LS. */ void -ls_destroy (struct pool *pool, struct len_string *ls) +ls_destroy (struct len_string *ls) { - pool_free (pool, ls->string); + free (ls->string); } /* Sets LS to a null pointer value. */ @@ -569,7 +592,7 @@ ls_length (const struct len_string *ls) /* Returns a pointer to the character string in LS. */ char * -ls_value (const struct len_string *ls) +ls_c_str (const struct len_string *ls) { return (char *) ls->string; } diff --git a/src/str.h b/src/str.h index 3520e280..b1a44e22 100644 --- a/src/str.h +++ b/src/str.h @@ -134,61 +134,90 @@ struct len_string size_t length; }; -struct pool; -void ls_create (struct pool *, struct len_string *, const char *); -void ls_create_buffer (struct pool *, struct len_string *, +void ls_create (struct len_string *, const char *); +void ls_create_buffer (struct len_string *, const char *, size_t len); void ls_init (struct len_string *, const char *, size_t); void ls_shallow_copy (struct len_string *, const struct len_string *); -void ls_destroy (struct pool *, struct len_string *); +void ls_destroy (struct len_string *); void ls_null (struct len_string *); int ls_null_p (const struct len_string *); int ls_empty_p (const struct len_string *); size_t ls_length (const struct len_string *); -char *ls_value (const struct len_string *); +char *ls_c_str (const struct len_string *); char *ls_end (const struct len_string *); + +#if __GNUC__ > 1 +extern inline size_t +ls_length (const struct len_string *st) +{ + return st->length; +} + +extern inline char * +ls_c_str (const struct len_string *st) +{ + return st->string; +} + +extern inline char * +ls_end (const struct len_string *st) +{ + return st->string + st->length; +} +#endif /* Dynamic strings. */ struct string { - struct pool *pool; - size_t length; - size_t size; - char *string; + size_t length; /* Length, not including a null terminator. */ + size_t capacity; /* Allocated capacity, not including one + extra byte allocated for null terminator. */ + char *string; /* String data, not necessarily null + terminated. */ }; -void ds_create (struct pool *, struct string *, const char *); -void ds_init (struct pool *, struct string *, size_t size); -void ds_replace (struct string *, const char *); +/* Constructors, destructors. */ +void ds_create (struct string *, const char *); +void ds_init (struct string *, size_t); void ds_destroy (struct string *); + +/* Copy, shrink, extend. */ +void ds_replace (struct string *, const char *); void ds_clear (struct string *); -void ds_extend (struct string *, size_t min_size); +void ds_extend (struct string *, size_t); void ds_shrink (struct string *); -void ds_truncate (struct string *, size_t length); +void ds_truncate (struct string *, size_t); +void ds_rpad (struct string *, size_t length, char pad); +/* Inspectors. */ size_t ds_length (const struct string *); -char *ds_value (const struct string *); +char *ds_c_str (const struct string *); +char *ds_data (const struct string *); char *ds_end (const struct string *); -size_t ds_size (const struct string *); +size_t ds_capacity (const struct string *); +/* File input. */ struct file_locator; -int ds_getline (struct string *st, FILE *stream); +int ds_gets (struct string *, FILE *); int ds_get_config_line (FILE *, struct string *, struct file_locator *); -void ds_putchar (struct string *, int ch); -void ds_concat (struct string *, const char *); -void ds_concat_buffer (struct string *, const char *buf, size_t len); -void ds_vprintf (struct string *st, const char *format, va_list args); + +/* Append. */ +void ds_putc (struct string *, int ch); +void ds_puts (struct string *, const char *); +void ds_concat (struct string *, const char *, size_t); +void ds_vprintf (struct string *st, const char *, va_list); void ds_printf (struct string *, const char *, ...) PRINTF_FORMAT (2, 3); #if __GNUC__ > 1 extern inline void -ds_putchar (struct string *st, int ch) +ds_putc (struct string *st, int ch) { - if (st->length == st->size) + if (st->length == st->capacity) ds_extend (st, st->length + 1); st->string[st->length++] = ch; } @@ -200,12 +229,18 @@ ds_length (const struct string *st) } extern inline char * -ds_value (const struct string *st) +ds_c_str (const struct string *st) { ((char *) st->string)[st->length] = '\0'; return st->string; } +extern inline char * +ds_data (const struct string *st) +{ + return st->string; +} + extern inline char * ds_end (const struct string *st) { diff --git a/src/t-test.q b/src/t-test.q index 4c4135ba..376477c3 100644 --- a/src/t-test.q +++ b/src/t-test.q @@ -532,7 +532,7 @@ parse_value (union value * v, int type ) { if (!lex_force_string ()) return 0; - strncpy (v->s, ds_value (&tokstr), ds_length (&tokstr)); + strncpy (v->s, ds_c_str (&tokstr), ds_length (&tokstr)); } lex_get (); diff --git a/src/tab.c b/src/tab.c index 82df4cd2..084c5873 100644 --- a/src/tab.c +++ b/src/tab.c @@ -433,7 +433,8 @@ text_format (struct tab_table *table, int opt, const char *text, va_list args, else len = strlen (text); - ls_create_buffer (table->container, s, text, len); + ls_create_buffer (s, text, len); + pool_register (table->container, free, s->string); if (opt & TAT_PRINTF) local_free (text); @@ -1162,7 +1163,7 @@ tabi_title (int x, int y) cp = stpcpy (cp, ". "); if (!ls_empty_p (&t->title)) { - memcpy (cp, ls_value (&t->title), ls_length (&t->title)); + memcpy (cp, ls_c_str (&t->title), ls_length (&t->title)); cp += ls_length (&t->title); } *cp = 0; @@ -1337,7 +1338,7 @@ render_strip (int x, int y, int r, int c1, int c2, int r1 UNUSED, int r2) } } else { struct tab_joined_cell *j = - (struct tab_joined_cell *) ls_value (&t->cc[index]); + (struct tab_joined_cell *) ls_c_str (&t->cc[index]); if (j->hit != tab_hit) { diff --git a/src/title.c b/src/title.c index cb56f84d..e0191acc 100644 --- a/src/title.c +++ b/src/title.c @@ -60,7 +60,7 @@ get_title (const char *cmd, char **title) return CMD_FAILURE; if (*title) free (*title); - *title = xstrdup (ds_value (&tokstr)); + *title = xstrdup (ds_c_str (&tokstr)); lex_get (); if (token != '.') { diff --git a/src/val-labs.c b/src/val-labs.c index 9d05e77f..1af46a2c 100644 --- a/src/val-labs.c +++ b/src/val-labs.c @@ -151,7 +151,7 @@ get_label (struct variable **vars, int var_cnt) lex_error (_("expecting string")); return 0; } - st_bare_pad_copy (value.s, ds_value (&tokstr), MAX_SHORT_STRING); + st_bare_pad_copy (value.s, ds_c_str (&tokstr), MAX_SHORT_STRING); } else { @@ -174,7 +174,7 @@ get_label (struct variable **vars, int var_cnt) msg (SW, _("Truncating value label to 60 characters.")); ds_truncate (&tokstr, 60); } - label = ds_value (&tokstr); + label = ds_c_str (&tokstr); for (i = 0; i < var_cnt; i++) val_labs_replace (vars[i]->val_labs, value, label); diff --git a/src/var-labs.c b/src/var-labs.c index 48ef56a0..a62ec95e 100644 --- a/src/var-labs.c +++ b/src/var-labs.c @@ -61,7 +61,7 @@ cmd_variable_labels (void) { if (v[i]->label) free (v[i]->label); - v[i]->label = xstrdup (ds_value (&tokstr)); + v[i]->label = xstrdup (ds_c_str (&tokstr)); } lex_get (); diff --git a/tests/ChangeLog b/tests/ChangeLog index 22053d6a..cf91b148 100644 --- a/tests/ChangeLog +++ b/tests/ChangeLog @@ -1,3 +1,11 @@ +Sun May 30 19:18:26 2004 Ben Pfaff + + * command/tabs.sh: Default tab width is now 4. + + * command/data-list.sh: New test. + + * Makefile.am: (TESTS) Add command/data-list.sh. + Sun Apr 11 14:21:16 2004 Ben Pfaff * stats/moments.sh: Now that our one-pass moments algorithm is diff --git a/tests/Makefile.am b/tests/Makefile.am index 8020127f..ac622c1b 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -7,6 +7,7 @@ TESTS = \ command/beg-data.sh \ command/bignum.sh \ command/count.sh \ + command/data-list.sh \ command/erase.sh \ command/file-label.sh \ command/filter.sh \ diff --git a/tests/command/data-list.sh b/tests/command/data-list.sh new file mode 100755 index 00000000..814a9d22 --- /dev/null +++ b/tests/command/data-list.sh @@ -0,0 +1,128 @@ +#!/bin/sh + +# This program tests the DATA LIST input program. + +TEMPDIR=/tmp/pspp-tst-$$ + +here=`pwd`; + +# ensure that top_srcdir is absolute +cd $top_srcdir; top_srcdir=`pwd` + +export STAT_CONFIG_PATH=$top_srcdir/config + + +cleanup() +{ + rm -rf $TEMPDIR +} + + +fail() +{ + echo $activity + echo FAILED + cleanup; + exit 1; +} + + +no_result() +{ + echo $activity + echo NO RESULT; + cleanup; + exit 2; +} + +pass() +{ + cleanup; + exit 0; +} + +mkdir -p $TEMPDIR + +cd $TEMPDIR + +# Create command file. +activity="create program" +cat > $TEMPDIR/data-list.stat << EOF +data list free/A B C D. +begin data. +,1,2,3 +,4,,5 +6 +7, +8 9 +0,1,,, +,,,, +2 + +3 +4 +5 +end data. +list. + +data list free (tab)/A B C D. +begin data. +1 2 3 4 +1 2 3 +1 2 4 +1 2 +1 3 4 +1 3 +1 4 +1 + 2 3 4 + 2 3 + 2 4 + 2 + 3 4 + 3 + 4 + +end data. +list. +EOF +if [ $? -ne 0 ] ; then no_result ; fi + + +activity="run program" +$SUPERVISOR $here/../src/pspp --testing-mode -o raw-ascii --testing-mode $TEMPDIR/data-list.stat # > $TEMPDIR/errs +if [ $? -ne 0 ] ; then fail ; fi + +activity="compare output" +diff -b -B $TEMPDIR/pspp.list - << EOF + A B C D +-------- -------- -------- -------- + . 1.00 2.00 3.00 + . 4.00 . 5.00 + 6.00 7.00 8.00 9.00 + .00 1.00 . . + . . . . + 2.00 3.00 4.00 5.00 + + A B C D +-------- -------- -------- -------- + 1.00 2.00 3.00 4.00 + 1.00 2.00 3.00 . + 1.00 2.00 . 4.00 + 1.00 2.00 . . + 1.00 . 3.00 4.00 + 1.00 . 3.00 . + 1.00 . . 4.00 + 1.00 . . . + . 2.00 3.00 4.00 + . 2.00 3.00 . + . 2.00 . 4.00 + . 2.00 . . + . . 3.00 4.00 + . . 3.00 . + . . . 4.00 + . . . . +EOF +if [ $? -ne 0 ] ; then fail ; fi + +pass; diff --git a/tests/command/tabs.sh b/tests/command/tabs.sh index 9d57c008..05178199 100755 --- a/tests/command/tabs.sh +++ b/tests/command/tabs.sh @@ -53,7 +53,7 @@ EOF if [ $? -ne 0 ] ; then no_result ; fi activity="create program 2" -printf "\t1\t12\t123\t1234\t12345\t123456\t\t1234567\t12345678\tasdf\tjkl\n" >> $TEMPDIR/tabs.stat +printf "\t1\t12\t123\t1234\t12345\n" >> $TEMPDIR/tabs.stat if [ $? -ne 0 ] ; then no_result ; fi @@ -78,7 +78,7 @@ diff -B -b $TEMPDIR/pspp.list - << EOF #========#======#=======#======# |X | 1| 1- 80|A80 | +--------+------+-------+------+ - 1 12 123 1234 12345 123456 1234567 12345678 + 1 12 123 1234 12345 EOF if [ $? -ne 0 ] ; then fail ; fi -- 2.30.2