From b74d09af5e07f954c18e7cdb8aca3af47fa10208 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Fri, 9 Jun 2006 22:51:23 +0000 Subject: [PATCH] Reform string library. --- src/data/ChangeLog | 7 + src/data/data-in.c | 12 +- src/data/file-name.c | 184 ++--- src/data/file-name.h | 6 +- src/data/por-file-reader.c | 10 +- src/data/sys-file-reader.c | 6 +- src/data/sys-file-writer.c | 14 +- src/language/command.c | 16 +- src/language/control/repeat.c | 24 +- src/language/data-io/ChangeLog | 26 + src/language/data-io/data-list.c | 205 ++---- src/language/data-io/data-reader.c | 90 ++- src/language/data-io/data-reader.h | 8 +- src/language/data-io/file-handle.q | 4 +- src/language/data-io/list.q | 28 +- src/language/data-io/matrix-data.c | 143 ++-- src/language/data-io/print.c | 2 +- src/language/dictionary/value-labels.c | 4 +- src/language/dictionary/variable-label.c | 2 +- src/language/expressions/ChangeLog | 7 + src/language/expressions/evaluate.c | 12 +- src/language/expressions/generate.pl | 2 +- src/language/expressions/helpers.c | 12 +- src/language/expressions/helpers.h | 10 +- src/language/expressions/operations.def | 24 +- src/language/expressions/optimize.c | 12 +- src/language/expressions/parse.c | 48 +- src/language/expressions/private.h | 8 +- src/language/lexer/ChangeLog | 7 + src/language/lexer/format-parser.c | 8 +- src/language/lexer/lexer.c | 67 +- src/language/lexer/q2c.c | 2 +- src/language/line-buffer.c | 18 +- src/language/stats/aggregate.c | 4 +- src/language/stats/crosstabs.q | 4 +- src/language/stats/t-test.q | 2 +- src/language/utilities/echo.c | 2 +- src/language/utilities/include.c | 2 +- src/language/utilities/permissions.c | 2 +- src/language/utilities/title.c | 2 +- src/language/xforms/count.c | 2 +- src/libpspp/ChangeLog | 114 +++ src/libpspp/str.c | 900 +++++++++++++++-------- src/libpspp/str.h | 200 ++--- src/output/ChangeLog | 21 + src/output/afm.c | 40 +- src/output/ascii.c | 38 +- src/output/html.c | 27 +- src/output/htmlP.h | 2 +- src/output/output.c | 215 +++--- src/output/output.h | 6 +- src/output/postscript.c | 34 +- src/output/table.c | 58 +- src/output/table.h | 8 +- src/ui/terminal/msg-ui.c | 16 +- src/ui/terminal/read-line.c | 4 +- 56 files changed, 1544 insertions(+), 1187 deletions(-) diff --git a/src/data/ChangeLog b/src/data/ChangeLog index 6538ebf4..238601c6 100644 --- a/src/data/ChangeLog +++ b/src/data/ChangeLog @@ -1,3 +1,10 @@ +Fri Jun 9 12:20:09 2006 Ben Pfaff + + Reform string library. + + * file-name.c (fn_interp_vars): Change interface to take a + substring as input. Updated all users. + Fri Jun 9 12:11:24 2006 Ben Pfaff * format.c (measure_is_valid): Really return false when m >= diff --git a/src/data/data-in.c b/src/data/data-in.c index 4e2e8fcb..462f8557 100644 --- a/src/data/data-in.c +++ b/src/data/data-in.c @@ -54,18 +54,18 @@ vdls_error (const struct data_in *i, const char *format, va_list args) if (i->flags & DI_IGNORE_ERROR) return; - ds_init (&text); + ds_init_empty (&text); if (i->f1 == i->f2) - ds_printf (&text, _("(column %d"), i->f1); + ds_put_format (&text, _("(column %d"), i->f1); else - ds_printf (&text, _("(columns %d-%d"), i->f1, i->f2); - ds_printf (&text, _(", field type %s) "), fmt_to_string (&i->format)); - ds_vprintf (&text, format, args); + ds_put_format (&text, _("(columns %d-%d"), i->f1, i->f2); + ds_put_format (&text, _(", field type %s) "), fmt_to_string (&i->format)); + ds_put_vformat (&text, format, args); m.category = MSG_DATA; m.severity = MSG_ERROR; msg_location (&m.where); - m.text = ds_c_str (&text); + m.text = ds_cstr (&text); msg_emit (&m); } diff --git a/src/data/file-name.c b/src/data/file-name.c index ea950020..8211e55d 100644 --- a/src/data/file-name.c +++ b/src/data/file-name.c @@ -27,6 +27,7 @@ #include #include "intprops.h" +#include "minmax.h" #include "settings.h" #include "xreadlink.h" @@ -73,90 +74,49 @@ fn_init (void) /* Functions for performing operations on file names. */ -/* Substitutes $variables as defined by GETENV into TARGET. - TARGET must be a string containing the text for which substitution - is required. - Supports $var and ${var} syntaxes; - $$ substitutes as $. -*/ +/* Substitutes $variables in SRC, putting the result in DST, + properly handling the case where SRC is a substring of DST. + Variables are as defined by GETENV. Supports $var and ${var} + syntaxes; $$ substitutes as $. */ void -fn_interp_vars (struct string *target, - const char *(*getenv) (const char *)) +fn_interp_vars (struct substring src, const char *(*getenv) (const char *), + struct string *dst_) { - char *input ; - char *s ; - - assert (target); - - input = xmalloc(ds_length(target) + 1); - s = input; - - strcpy(input, ds_c_str(target)); - - if (NULL == strchr (ds_c_str(target), '$')) - goto done; - - ds_clear(target); - - for (;;) - { - switch (*s) - { - case '\0': - goto done ; - - case '$': - s++; - - if (*s == '$') - { - ds_putc (target, '$'); - s++; - } - else - { - int stop; - int start; - const char *value; - - start = ds_length (target); - - if (*s == '(') - { - stop = ')'; - s++; - } - else if (*s == '{') - { - stop = '}'; - s++; - } - else - stop = 0; - - while (*s && *s != stop - && (stop || isalpha ((unsigned char) *s))) - { - ds_putc (target, *s++); - } - - value = getenv (ds_c_str (target) + start); - ds_truncate (target, start); - ds_puts (target, value); - - if (stop && *s == stop) - s++; - } - break; - - default: - ds_putc (target, *s++); - } - } - - done: - free(input); - return; + struct string dst = DS_EMPTY_INITIALIZER; + int c; + + while ((c = ss_get_char (&src)) != EOF) + if (c != '$') + ds_put_char (&dst, c); + else + { + if (ss_match_char (&src, '$') || ss_is_empty (src)) + ds_put_char (&dst, '$'); + else + { + struct substring var_name; + size_t start; + const char *value; + + if (ss_match_char (&src, '(')) + ss_get_until (&src, ')', &var_name); + else if (ss_match_char (&src, '{')) + ss_get_until (&src, '}', &var_name); + else + ss_get_chars (&src, MIN (1, ss_span (src, ss_cstr (CC_ALNUM))), + &var_name); + + start = ds_length (&dst); + ds_put_substring (&dst, var_name); + value = getenv (ds_cstr (&dst) + start); + ds_truncate (&dst, start); + + ds_put_cstr (&dst, value); + } + } + + ds_swap (&dst, dst_); + ds_destroy (&dst); } #ifdef unix @@ -165,7 +125,7 @@ fn_interp_vars (struct string *target, char * fn_tilde_expand (const char *input) { - struct string output = DS_INITIALIZER; + struct string output = DS_EMPTY_INITIALIZER; if (input[0] == '~') { const char *home = NULL; @@ -177,11 +137,13 @@ fn_tilde_expand (const char *input) } else { - struct string user_name = DS_INITIALIZER; + struct string user_name = DS_EMPTY_INITIALIZER; struct passwd *pwd; - ds_assign_buffer (&user_name, input + 1, strcspn (input + 1, "/")); - pwd = getpwnam (ds_c_str (&user_name)); + ds_assign_substring (&user_name, + ss_buffer (input + 1, + strcspn (input + 1, "/"))); + pwd = getpwnam (ds_cstr (&user_name)); if (pwd != NULL && pwd->pw_dir[0] != '\0') { home = pwd->pw_dir; @@ -192,14 +154,14 @@ fn_tilde_expand (const char *input) if (home != NULL) { - ds_puts (&output, home); + ds_put_cstr (&output, home); if (*remainder != '\0') - ds_puts (&output, remainder); + ds_put_cstr (&output, remainder); } } if (ds_is_empty (&output)) - ds_puts (&output, input); - return ds_c_str (&output); + ds_put_cstr (&output, input); + return ds_cstr (&output); } #else /* !unix */ char * @@ -222,55 +184,57 @@ char * fn_search_path (const char *base_name, const char *path_, const char *prefix) { struct string path; - struct string dir = DS_INITIALIZER; - struct string file = DS_INITIALIZER; + struct substring dir_; + struct string file = DS_EMPTY_INITIALIZER; size_t save_idx = 0; if (fn_is_absolute (base_name)) return fn_tilde_expand (base_name); /* Interpolate environment variables. */ - ds_create (&path, path_); - fn_interp_vars (&path, fn_getenv); + ds_init_cstr (&path, path_); + fn_interp_vars (ds_ss (&path), fn_getenv, &path); verbose_msg (2, _("searching for \"%s\" in path \"%s\""), - base_name, ds_c_str (&path)); - while (ds_separate (&path, &dir, ":", &save_idx)) + base_name, ds_cstr (&path)); + while (ds_separate (&path, ss_cstr (":"), &save_idx, &dir_)) { + struct string dir; + /* Do tilde expansion. */ + ds_init_substring (&dir, dir_); if (ds_first (&dir) == '~') { - char *tmp_str = fn_tilde_expand (ds_c_str (&dir)); - ds_assign_c_str (&dir, tmp_str); + char *tmp_str = fn_tilde_expand (ds_cstr (&dir)); + ds_assign_cstr (&dir, tmp_str); free (tmp_str); } /* Construct file name. */ ds_clear (&file); - if (prefix != NULL && !fn_is_absolute (ds_c_str (&dir))) + if (prefix != NULL && !fn_is_absolute (ds_cstr (&dir))) { - ds_puts (&file, prefix); - ds_putc (&file, '/'); + ds_put_cstr (&file, prefix); + ds_put_char (&file, '/'); } - ds_puts (&file, ds_c_str (&dir)); - if (ds_length (&dir) && ds_last (&file) != '/') - ds_putc (&file, '/'); - ds_puts (&file, base_name); + ds_put_cstr (&file, ds_cstr (&dir)); + if (!ds_is_empty (&file) && ds_last (&file) != '/') + ds_put_char (&file, '/'); + ds_put_cstr (&file, base_name); + ds_destroy (&dir); /* Check whether file exists. */ - if (fn_exists (ds_c_str (&file))) + if (fn_exists (ds_cstr (&file))) { - verbose_msg (2, _("...found \"%s\""), ds_c_str (&file)); + verbose_msg (2, _("...found \"%s\""), ds_cstr (&file)); ds_destroy (&path); - ds_destroy (&dir); - return ds_c_str (&file); + return ds_cstr (&file); } } /* Failure. */ verbose_msg (2, _("...not found")); ds_destroy (&path); - ds_destroy (&dir); ds_destroy (&file); return NULL; } diff --git a/src/data/file-name.h b/src/data/file-name.h index 427fa8dc..f38d0f3e 100644 --- a/src/data/file-name.h +++ b/src/data/file-name.h @@ -28,8 +28,10 @@ extern const char *config_path; void fn_init (void); struct string; -void fn_interp_vars (struct string *target, - const char *(*getenv) (const char *)); +struct substring; +void fn_interp_vars (struct substring src, + const char *(*getenv) (const char *), + struct string *dst); char *fn_tilde_expand (const char *fn); char *fn_search_path (const char *base_name, const char *path, const char *prefix); diff --git a/src/data/por-file-reader.c b/src/data/por-file-reader.c index 377241ef..fa3af602 100644 --- a/src/data/por-file-reader.c +++ b/src/data/por-file-reader.c @@ -89,18 +89,18 @@ error (struct pfm_reader *r, const char *msg, ...) struct string text; va_list args; - ds_init (&text); - ds_printf (&text, _("portable file %s corrupt at offset %ld: "), - fh_get_file_name (r->fh), ftell (r->file)); + ds_init_empty (&text); + ds_put_format (&text, _("portable file %s corrupt at offset %ld: "), + fh_get_file_name (r->fh), ftell (r->file)); va_start (args, msg); - ds_vprintf (&text, msg, args); + ds_put_vformat (&text, msg, args); va_end (args); m.category = MSG_GENERAL; m.severity = MSG_ERROR; m.where.file_name = NULL; m.where.line_number = 0; - m.text = ds_c_str (&text); + m.text = ds_cstr (&text); msg_emit (&m); diff --git a/src/data/sys-file-reader.c b/src/data/sys-file-reader.c index 296d53b8..98e1a062 100644 --- a/src/data/sys-file-reader.c +++ b/src/data/sys-file-reader.c @@ -134,16 +134,16 @@ corrupt_msg (int class, const char *format,...) va_list args; struct string text; - ds_create (&text, _("corrupt system file: ")); + ds_init_cstr (&text, _("corrupt system file: ")); va_start (args, format); - ds_vprintf (&text, format, args); + ds_put_vformat (&text, format, args); va_end (args); m.category = msg_class_to_category (class); m.severity = msg_class_to_severity (class); m.where.file_name = NULL; m.where.line_number = 0; - m.text = ds_c_str (&text); + m.text = ds_cstr (&text); msg_emit (&m); } diff --git a/src/data/sys-file-writer.c b/src/data/sys-file-writer.c index dda351b9..2639c358 100644 --- a/src/data/sys-file-writer.c +++ b/src/data/sys-file-writer.c @@ -686,7 +686,7 @@ write_vls_length_table (struct sfm_writer *w, struct string vls_length_map; - ds_init (&vls_length_map); + ds_init_empty (&vls_length_map); vls_hdr.rec_type = 7; vls_hdr.subtype = 14; @@ -700,9 +700,9 @@ write_vls_length_table (struct sfm_writer *w, if ( v->width <= MAX_LONG_STRING ) continue; - ds_printf (&vls_length_map, "%s=%05d", v->short_name, v->width); - ds_putc (&vls_length_map, '\0'); - ds_putc (&vls_length_map, '\t'); + ds_put_format (&vls_length_map, "%s=%05d", v->short_name, v->width); + ds_put_char (&vls_length_map, '\0'); + ds_put_char (&vls_length_map, '\t'); } vls_hdr.n_elem = ds_length (&vls_length_map); @@ -732,14 +732,14 @@ write_longvar_table (struct sfm_writer *w, const struct dictionary *dict) struct string long_name_map; size_t i; - ds_init (&long_name_map); + ds_init_empty (&long_name_map); for (i = 0; i < dict_get_var_cnt (dict); i++) { struct variable *v = dict_get_var (dict, i); if (i) - ds_putc (&long_name_map, '\t'); - ds_printf (&long_name_map, "%s=%s", v->short_name, v->name); + ds_put_char (&long_name_map, '\t'); + ds_put_format (&long_name_map, "%s=%s", v->short_name, v->name); } lv_hdr.rec_type = 7; diff --git a/src/language/command.c b/src/language/command.c index 7b903ab1..ed043d21 100644 --- a/src/language/command.c +++ b/src/language/command.c @@ -430,15 +430,15 @@ unknown_command_error (char *const words[], size_t word_cnt) struct string s; size_t i; - ds_init (&s); + ds_init_empty (&s); for (i = 0; i < word_cnt; i++) { if (i != 0) - ds_putc (&s, ' '); - ds_puts (&s, words[i]); + ds_put_char (&s, ' '); + ds_put_cstr (&s, words[i]); } - msg (SE, _("Unknown command %s."), ds_c_str (&s)); + msg (SE, _("Unknown command %s."), ds_cstr (&s)); ds_destroy (&s); } @@ -467,7 +467,7 @@ parse_command_name (void) assert (word_cnt < sizeof words / sizeof *words); if (token == T_ID) { - words[word_cnt] = xstrdup (ds_c_str (&tokstr)); + words[word_cnt] = ds_xstrdup (&tokstr); str_uppercase (words[word_cnt]); } else if (token == '-') @@ -686,10 +686,10 @@ cmd_erase (void) if (!lex_force_string ()) return CMD_FAILURE; - if (remove (ds_c_str (&tokstr)) == -1) + if (remove (ds_cstr (&tokstr)) == -1) { msg (SW, _("Error removing `%s': %s."), - ds_c_str (&tokstr), strerror (errno)); + ds_cstr (&tokstr), strerror (errno)); return CMD_FAILURE; } @@ -767,7 +767,7 @@ run_command (void) lex_get (); if (!lex_force_string ()) return CMD_FAILURE; - cmd = ds_c_str (&tokstr); + cmd = ds_cstr (&tokstr); string = 1; } else diff --git a/src/language/control/repeat.c b/src/language/control/repeat.c index b5efdc99..d2d894a9 100644 --- a/src/language/control/repeat.c +++ b/src/language/control/repeat.c @@ -288,11 +288,11 @@ parse_lines (struct repeat_block *block) || !strcmp (cur_file_name, previous_file_name)) previous_file_name = pool_strdup (block->pool, cur_file_name); - ds_rtrim_spaces (&getl_buf); + ds_rtrim (&getl_buf, ss_cstr (CC_SPACES)); dot = ds_chomp (&getl_buf, get_endcmd ()); - if (recognize_do_repeat (ds_c_str (&getl_buf))) + if (recognize_do_repeat (ds_cstr (&getl_buf))) nesting_level++; - else if (recognize_end_repeat (ds_c_str (&getl_buf), &block->print)) + else if (recognize_end_repeat (ds_cstr (&getl_buf), &block->print)) { if (nesting_level-- == 0) { @@ -301,13 +301,13 @@ parse_lines (struct repeat_block *block) } } if (dot) - ds_putc (&getl_buf, get_endcmd ()); + ds_put_char (&getl_buf, get_endcmd ()); line = *last_line = pool_alloc (block->pool, sizeof *line); line->next = NULL; line->file_name = previous_file_name; line->line_number = cur_line_number; - line->line = pool_strdup (block->pool, ds_c_str (&getl_buf)); + line->line = pool_strdup (block->pool, ds_cstr (&getl_buf)); last_line = &line->next; } @@ -469,7 +469,7 @@ do_repeat_filter (struct string *line, void *block_) struct string output; bool dot; - ds_init (&output); + ds_init_empty (&output); /* Strip trailing whitespace, check for & remove terminal dot. */ while (isspace (ds_last (line))) @@ -477,7 +477,7 @@ do_repeat_filter (struct string *line, void *block_) dot = ds_chomp (line, get_endcmd ()); in_apos = in_quote = false; - for (cp = ds_c_str (line); cp < ds_end (line); ) + for (cp = ds_cstr (line); cp < ds_end (line); ) { if (*cp == '\'' && !in_quote) in_apos = !in_apos; @@ -485,7 +485,7 @@ do_repeat_filter (struct string *line, void *block_) in_quote = !in_quote; if (in_quote || in_apos || !lex_is_id1 (*cp)) - ds_putc (&output, *cp++); + ds_put_char (&output, *cp++); else { const char *start = cp; @@ -493,14 +493,14 @@ do_repeat_filter (struct string *line, void *block_) const char *substitution = find_substitution (block, start, end - start); if (substitution != NULL) - ds_puts (&output, substitution); + ds_put_cstr (&output, substitution); else - ds_concat (&output, start, end - start); + ds_put_substring (&output, ss_buffer (start, end - start)); cp = end; } } if (dot) - ds_putc (&output, get_endcmd ()); + ds_put_char (&output, get_endcmd ()); ds_swap (line, &output); ds_destroy (&output); @@ -526,7 +526,7 @@ do_repeat_read (struct string *output, char **file_name, int *line_number, } line = block->cur_line; - ds_assign_c_str (output, line->line); + ds_assign_cstr (output, line->line); *file_name = line->file_name; *line_number = -line->line_number; block->cur_line = line->next; diff --git a/src/language/data-io/ChangeLog b/src/language/data-io/ChangeLog index 23f215cf..31e809e5 100644 --- a/src/language/data-io/ChangeLog +++ b/src/language/data-io/ChangeLog @@ -1,3 +1,29 @@ +Fri Jun 9 13:56:00 2006 Ben Pfaff + + Reform string library. + + * matrix-data.c (context): Use dynamic string. + (another_token) Deal with changed dfm_get_record() interface. + (mget_token) Ditto. + (force_eol) Ditto. + + * data-list.c (struct data_list_pgm) Delete delims, delim_cnt + members, replacing them by struct string delims. Update all + references to use struct string functions. + (cut_field) Change interface to avoid needing "end_blank", by + getting the data-reader to remember that state for us. Change + internals to use substring. Update both callers. + + * data-reader.c (read_file_record): Use ds_read_stream(). + (dfm_get_record) Change interface to return substring. Updated + all callers. + (dfm_expand_tabs) Use ds_find_char(). Now maintain position + relative to end-of-line. Use ds_swap(). + (dfm_reread_record) Don't limit position by line length. + (dfm_column_start) Make parameter const. + (dfm_columns_past_end) New function. + (dfm_get_column) New function. + Thu May 25 18:26:26 WST 2006 John Darrington * print.c (print_trns_free): Made the code agree with the comment, by diff --git a/src/language/data-io/data-list.c b/src/language/data-io/data-list.c index 89505dbc..92ad0793 100644 --- a/src/language/data-io/data-list.c +++ b/src/language/data-io/data-list.c @@ -47,6 +47,8 @@ #include #include +#include "size_max.h" + #include "gettext.h" #define _(msgid) gettext (msgid) @@ -90,8 +92,7 @@ struct data_list_pgm struct variable *end; /* Variable specified on END subcommand. */ int rec_cnt; /* Number of records. */ size_t case_size; /* Case size in bytes. */ - char *delims; /* Delimiters if any; not null-terminated. */ - size_t delim_cnt; /* Number of delimiter, or 0 for spaces. */ + struct string delims; /* Field delimiters. */ }; static const struct case_source_class data_list_source_class; @@ -122,8 +123,7 @@ cmd_data_list (void) dls->type = -1; dls->end = NULL; dls->rec_cnt = 0; - dls->delims = NULL; - dls->delim_cnt = 0; + ds_init_empty (&dls->delims); dls->first = dls->last = NULL; while (token != '/') @@ -199,9 +199,9 @@ cmd_data_list (void) if (lex_match_id ("TAB")) delim = '\t'; - else if (token == T_STRING && tokstr.length == 1) + else if (token == T_STRING && ds_length (&tokstr) == 1) { - delim = tokstr.string[0]; + delim = ds_first (&tokstr); lex_get(); } else @@ -210,8 +210,7 @@ cmd_data_list (void) goto error; } - dls->delims = xrealloc (dls->delims, dls->delim_cnt + 1); - dls->delims[dls->delim_cnt++] = delim; + ds_put_char (&dls->delims, delim); lex_match (','); } @@ -903,109 +902,73 @@ dump_free_table (const struct data_list_pgm *dls, /* Extracts a field from the current position in the current record. Fields can be unquoted or quoted with single- or - double-quote characters. *FIELD is set to the field content. + double-quote characters. + + *FIELD is set to the field content. The caller must not + or destroy this constant string. + After parsing the field, sets the current position in the record to just past the field and any trailing delimiter. - END_BLANK is used internally; it should be initialized by the - caller to 0 and left alone afterward. Returns 0 on failure or - a 1-based column number indicating the beginning of the field - on success. */ -static int -cut_field (const struct data_list_pgm *dls, struct fixed_string *field, - int *end_blank) + Returns 0 on failure or a 1-based column number indicating the + beginning of the field on success. */ +static bool +cut_field (const struct data_list_pgm *dls, struct substring *field) { - struct fixed_string line; - char *cp; - size_t column_start; + struct substring line, p; if (dfm_eof (dls->reader)) - return 0; - if (dls->delim_cnt == 0) + return false; + if (ds_is_empty (&dls->delims)) dfm_expand_tabs (dls->reader); - dfm_get_record (dls->reader, &line); + line = p = dfm_get_record (dls->reader); - cp = ls_c_str (&line); - if (dls->delim_cnt == 0) + if (ds_is_empty (&dls->delims)) { + bool missing_quote = false; + /* Skip leading whitespace. */ - while (cp < ls_end (&line) && isspace ((unsigned char) *cp)) - cp++; - if (cp >= ls_end (&line)) - return 0; + ss_ltrim (&p, ss_cstr (CC_SPACES)); + if (ss_is_empty (p)) + return false; /* Handle actual data, whether quoted or unquoted. */ - if (*cp == '\'' || *cp == '"') - { - int quote = *cp; - - field->string = ++cp; - while (cp < ls_end (&line) && *cp != quote) - cp++; - field->length = cp - field->string; - if (cp < ls_end (&line)) - cp++; - else - msg (SW, _("Quoted string missing terminating `%c'."), quote); - } + if (ss_match_char (&p, '\'')) + missing_quote = !ss_get_until (&p, '\'', field); + else if (ss_match_char (&p, '"')) + missing_quote = !ss_get_until (&p, '"', field); else - { - field->string = cp; - while (cp < ls_end (&line) - && !isspace ((unsigned char) *cp) && *cp != ',') - cp++; - field->length = cp - field->string; - } + ss_get_chars (&p, ss_cspan (p, ss_cstr ("," CC_SPACES)), field); + if (missing_quote) + msg (SW, _("Quoted string extends beyond end of line.")); /* Skip trailing whitespace and a single comma if present. */ - while (cp < ls_end (&line) && isspace ((unsigned char) *cp)) - cp++; - if (cp < ls_end (&line) && *cp == ',') - cp++; + ss_ltrim (&p, ss_cstr (CC_SPACES)); + ss_match_char (&p, ','); + + dfm_forward_columns (dls->reader, ss_length (line) - ss_length (p)); } else { - if (cp >= ls_end (&line)) + if (!ss_is_empty (p)) + ss_get_chars (&p, ss_cspan (p, ds_ss (&dls->delims)), field); + else if (dfm_columns_past_end (dls->reader) == 0) { - int column = dfm_column_start (dls->reader); - /* A blank line or a line that ends in \t has a + /* A blank line or a line that ends in a delimiter has a trailing blank field. */ - if (column == 1 || (column > 1 && cp[-1] == '\t')) - { - if (*end_blank == 0) - { - *end_blank = 1; - field->string = ls_end (&line); - field->length = 0; - dfm_forward_record (dls->reader); - return column; - } - else - { - *end_blank = 0; - return 0; - } - } - else - return 0; + *field = p; } else - { - field->string = cp; - while (cp < ls_end (&line) - && memchr (dls->delims, *cp, dls->delim_cnt) == NULL) - cp++; - field->length = cp - field->string; - if (cp < ls_end (&line)) - cp++; - } + return false; + + /* Advance past the field. + + Also advance past a trailing delimiter, regardless of + whether one actually existed. If we "skip" a delimiter + that was not actually there, then we will return + end-of-line on our next call, which is what we want. */ + dfm_forward_columns (dls->reader, ss_length (line) - ss_length (p) + 1); } - - dfm_forward_columns (dls->reader, field->string - line.string); - column_start = dfm_column_start (dls->reader); - - dfm_forward_columns (dls->reader, cp - field->string); - - return column_start; + return true; } static bool read_from_data_list_fixed (const struct data_list_pgm *, @@ -1055,7 +1018,7 @@ read_from_data_list_fixed (const struct data_list_pgm *dls, struct ccase *c) return false; for (i = 1; i <= dls->rec_cnt; i++) { - struct fixed_string line; + struct substring line; if (dfm_eof (dls->reader)) { @@ -1065,13 +1028,13 @@ read_from_data_list_fixed (const struct data_list_pgm *dls, struct ccase *c) return false; } dfm_expand_tabs (dls->reader); - dfm_get_record (dls->reader, &line); + line = dfm_get_record (dls->reader); for (; var_spec && i == var_spec->rec; var_spec = var_spec->next) { struct data_in di; - data_in_finite_line (&di, ls_c_str (&line), ls_length (&line), + data_in_finite_line (&di, ss_data (line), ss_length (line), var_spec->fc, var_spec->lc); di.v = case_data_rw (c, var_spec->fv); di.flags = DI_IMPLIED_DECIMALS; @@ -1094,20 +1057,15 @@ static bool read_from_data_list_free (const struct data_list_pgm *dls, struct ccase *c) { struct dls_var_spec *var_spec; - int end_blank = 0; for (var_spec = dls->first; var_spec; var_spec = var_spec->next) { - struct fixed_string field; - int column; + struct substring field; + struct data_in di; /* Cut out a field and read in a new record if necessary. */ - for (;;) + while (!cut_field (dls, &field)) { - column = cut_field (dls, &field, &end_blank); - if (column != 0) - break; - if (!dfm_eof (dls->reader)) dfm_forward_record (dls->reader); if (dfm_eof (dls->reader)) @@ -1119,17 +1077,13 @@ read_from_data_list_free (const struct data_list_pgm *dls, struct ccase *c) } } - { - struct data_in di; - - di.s = ls_c_str (&field); - di.e = ls_end (&field); - di.v = case_data_rw (c, var_spec->fv); - di.flags = 0; - di.f1 = column; - di.format = var_spec->input; - data_in (&di); - } + di.s = ss_data (field); + di.e = ss_end (field); + di.v = case_data_rw (c, var_spec->fv); + di.flags = 0; + di.f1 = dfm_get_column (dls->reader, ss_data (field)); + di.format = var_spec->input; + data_in (&di); } return true; } @@ -1141,19 +1095,16 @@ static bool read_from_data_list_list (const struct data_list_pgm *dls, struct ccase *c) { struct dls_var_spec *var_spec; - int end_blank = 0; if (dfm_eof (dls->reader)) return false; for (var_spec = dls->first; var_spec; var_spec = var_spec->next) { - struct fixed_string field; - int column; + struct substring field; + struct data_in di; - /* Cut out a field and check for end-of-line. */ - column = cut_field (dls, &field, &end_blank); - if (column == 0) + if (!cut_field (dls, &field)) { if (get_undefined ()) msg (SW, _("Missing value(s) for all variables from %s onward. " @@ -1171,17 +1122,13 @@ read_from_data_list_list (const struct data_list_pgm *dls, struct ccase *c) break; } - { - struct data_in di; - - di.s = ls_c_str (&field); - di.e = ls_end (&field); - di.v = case_data_rw (c, var_spec->fv); - di.flags = 0; - di.f1 = column; - di.format = var_spec->input; - data_in (&di); - } + di.s = ss_data (field); + di.e = ss_end (field); + di.v = case_data_rw (c, var_spec->fv); + di.flags = 0; + di.f1 = dfm_get_column (dls->reader, ss_data (field)); + di.format = var_spec->input; + data_in (&di); } dfm_forward_record (dls->reader); @@ -1208,7 +1155,7 @@ static bool data_list_trns_free (void *dls_) { struct data_list_pgm *dls = dls_; - free (dls->delims); + ds_destroy (&dls->delims); destroy_dls_var_spec (dls->first); dfm_close_reader (dls->reader); free (dls); diff --git a/src/language/data-io/data-reader.c b/src/language/data-io/data-reader.c index e26941b0..c2da5504 100644 --- a/src/language/data-io/data-reader.c +++ b/src/language/data-io/data-reader.c @@ -37,6 +37,9 @@ #include #include +#include "minmax.h" +#include "size_max.h" + #include "gettext.h" #define _(msgid) gettext (msgid) @@ -120,8 +123,8 @@ dfm_open_reader (struct file_handle *fh) r = xmalloc (sizeof *r); r->fh = fh; - ds_init (&r->line); - ds_init (&r->scratch); + ds_init_empty (&r->line); + ds_init_empty (&r->scratch); r->flags = DFM_ADVANCE; r->eof_cnt = 0; if (fh != fh_inline_file ()) @@ -177,9 +180,9 @@ read_inline_record (struct dfm_reader *r) } if (ds_length (&getl_buf) >= 8 - && !strncasecmp (ds_c_str (&getl_buf), "end data", 8)) + && !strncasecmp (ds_cstr (&getl_buf), "end data", 8)) { - lex_set_prog (ds_c_str (&getl_buf) + ds_length (&getl_buf)); + lex_set_prog (ds_end (&getl_buf)); return false; } @@ -193,10 +196,10 @@ static bool read_file_record (struct dfm_reader *r) { assert (r->fh != fh_inline_file ()); + ds_clear (&r->line); if (fh_get_mode (r->fh) == FH_MODE_TEXT) { - ds_clear (&r->line); - if (!ds_gets (&r->line, r->file)) + if (!ds_read_line (&r->line, r->file)) { if (ferror (r->file)) msg (ME, _("Error reading file %s: %s."), @@ -207,12 +210,7 @@ read_file_record (struct dfm_reader *r) else if (fh_get_mode (r->fh) == FH_MODE_BINARY) { size_t record_width = fh_get_record_width (r->fh); - size_t amt; - - if (ds_length (&r->line) < record_width) - ds_rpad (&r->line, record_width, 0); - - amt = fread (ds_c_str (&r->line), 1, record_width, r->file); + size_t amt = ds_read_stream (&r->line, 1, record_width, r->file); if (record_width != amt) { if (ferror (r->file)) @@ -282,18 +280,14 @@ dfm_eof (struct dfm_reader *r) /* Returns the current record in the file corresponding to HANDLE. Aborts if reading from the file is necessary or at - end of file, so call dfm_eof() first. Sets *LINE to the line, - which is not null-terminated. The caller must not free or - modify the returned string. */ -void -dfm_get_record (struct dfm_reader *r, struct fixed_string *line) + end of file, so call dfm_eof() first. */ +struct substring +dfm_get_record (struct dfm_reader *r) { assert ((r->flags & DFM_ADVANCE) == 0); assert (r->eof_cnt == 0); - assert (r->pos <= ds_length (&r->line)); - line->string = ds_data (&r->line) + r->pos; - line->length = ds_length (&r->line) - r->pos; + return ds_substr (&r->line, r->pos, SIZE_MAX); } /* Expands tabs in the current line into the equivalent number of @@ -303,12 +297,10 @@ dfm_get_record (struct dfm_reader *r, struct fixed_string *line) void dfm_expand_tabs (struct dfm_reader *r) { - struct string temp; size_t ofs, new_pos, tab_width; assert ((r->flags & DFM_ADVANCE) == 0); assert (r->eof_cnt == 0); - assert (r->pos <= ds_length (&r->line)); if (r->flags & DFM_TABS_EXPANDED) return; @@ -317,14 +309,14 @@ dfm_expand_tabs (struct dfm_reader *r) if (r->fh != fh_inline_file () && (fh_get_mode (r->fh) == FH_MODE_BINARY || fh_get_tab_width (r->fh) == 0 - || memchr (ds_c_str (&r->line), '\t', ds_length (&r->line)) == NULL)) + || ds_find_char (&r->line, '\t') == SIZE_MAX)) return; /* Expand tabs from r->line into r->scratch, and figure out new value for r->pos. */ tab_width = fh_get_tab_width (r->fh); ds_clear (&r->scratch); - new_pos = 0; + new_pos = SIZE_MAX; for (ofs = 0; ofs < ds_length (&r->line); ofs++) { unsigned char c; @@ -332,26 +324,34 @@ dfm_expand_tabs (struct dfm_reader *r) if (ofs == r->pos) new_pos = ds_length (&r->scratch); - c = ds_c_str (&r->line)[ofs]; + c = ds_data (&r->line)[ofs]; if (c != '\t') - ds_putc (&r->scratch, c); + ds_put_char (&r->scratch, c); else { do - ds_putc (&r->scratch, ' '); + ds_put_char (&r->scratch, ' '); while (ds_length (&r->scratch) % tab_width != 0); } } + if (new_pos == SIZE_MAX) + { + /* Maintain the same relationship between position and line + length that we had before. DATA LIST uses a + beyond-the-end position to deal with an empty field at + the end of the line. */ + assert (r->pos >= ds_length (&r->line)); + new_pos = (r->pos - ds_length (&r->line)) + ds_length (&r->scratch); + } /* Swap r->line and r->scratch and set new r->pos. */ - temp = r->line; - r->line = r->scratch; - r->scratch = temp; + ds_swap (&r->line, &r->scratch); r->pos = new_pos; } -/* Causes dfm_get_record() to read in the next record the next time it - is executed on file HANDLE. */ +/* Causes dfm_get_record() or dfm_get_whole_record() to read in + the next record the next time it is executed on file + HANDLE. */ void dfm_forward_record (struct dfm_reader *r) { @@ -365,12 +365,7 @@ void dfm_reread_record (struct dfm_reader *r, size_t column) { r->flags &= ~DFM_ADVANCE; - if (column < 1) - r->pos = 0; - else if (column > ds_length (&r->line)) - r->pos = ds_length (&r->line); - else - r->pos = column - 1; + r->pos = MAX (column, 1) - 1; } /* Sets the current line to begin COLUMNS characters following @@ -385,11 +380,28 @@ dfm_forward_columns (struct dfm_reader *r, size_t columns) is set. Unless dfm_reread_record() or dfm_forward_columns() have been called, this is 1. */ size_t -dfm_column_start (struct dfm_reader *r) +dfm_column_start (const struct dfm_reader *r) { return r->pos + 1; } +/* Returns the number of columns we are currently beyond the end + of the line. At or before end-of-line, this is 0; one column + after end-of-line, this is 1; and so on. */ +size_t +dfm_columns_past_end (const struct dfm_reader *r) +{ + return r->pos < ds_length (&r->line) ? 0 : ds_length (&r->line) - r->pos; +} + +/* Returns the 1-based column within the current line that P + designates. */ +size_t +dfm_get_column (const struct dfm_reader *r, const char *p) +{ + return ds_pointer_to_position (&r->line, p) + 1; +} + /* Pushes the file name and line number on the fn/ln stack. */ void dfm_push (struct dfm_reader *r) diff --git a/src/language/data-io/data-reader.h b/src/language/data-io/data-reader.h index bbf5c52b..e7f4cd69 100644 --- a/src/language/data-io/data-reader.h +++ b/src/language/data-io/data-reader.h @@ -30,21 +30,23 @@ #include struct file_handle; -struct fixed_string; +struct string; /* Input. */ struct dfm_reader *dfm_open_reader (struct file_handle *); void dfm_close_reader (struct dfm_reader *); bool dfm_reader_error (const struct dfm_reader *); unsigned dfm_eof (struct dfm_reader *); -void dfm_get_record (struct dfm_reader *, struct fixed_string *); +struct substring dfm_get_record (struct dfm_reader *); void dfm_expand_tabs (struct dfm_reader *); /* Line control. */ void dfm_forward_record (struct dfm_reader *); void dfm_reread_record (struct dfm_reader *, size_t column); void dfm_forward_columns (struct dfm_reader *, size_t columns); -size_t dfm_column_start (struct dfm_reader *); +size_t dfm_column_start (const struct dfm_reader *); +size_t dfm_columns_past_end (const struct dfm_reader *); +size_t dfm_get_column (const struct dfm_reader *, const char *); /* File stack. */ void dfm_push (struct dfm_reader *); diff --git a/src/language/data-io/file-handle.q b/src/language/data-io/file-handle.q index 94430627..5e5ddbad 100644 --- a/src/language/data-io/file-handle.q +++ b/src/language/data-io/file-handle.q @@ -181,12 +181,12 @@ fh_parse (enum fh_referent referent_mask) if (token == T_ID) handle = fh_from_name (tokid); if (handle == NULL) - handle = fh_from_file_name (ds_c_str (&tokstr)); + handle = fh_from_file_name (ds_cstr (&tokstr)); if (handle == NULL) { if (token != T_ID || tokid[0] != '#' || get_syntax () != ENHANCED) { - char *file_name = ds_c_str (&tokstr); + char *file_name = ds_cstr (&tokstr); char *handle_name = xasprintf ("\"%s\"", file_name); handle = fh_create_file (handle_name, file_name, fh_default_properties ()); diff --git a/src/language/data-io/list.q b/src/language/data-io/list.q index 84b1ac81..e377cb41 100644 --- a/src/language/data-io/list.q +++ b/src/language/data-io/list.q @@ -118,7 +118,7 @@ write_line (struct outp_driver *d, const char *s) assert (d->cp_y + d->font_height <= d->length); text.font = OUTP_FIXED; text.justification = OUTP_LEFT; - ls_init (&text.string, s, strlen (s)); + text.string = ss_cstr (s); text.x = d->cp_x; text.y = d->cp_y; text.h = text.v = INT_MAX; @@ -411,7 +411,7 @@ write_varname (struct outp_driver *d, char *string, int indent) text.font = OUTP_FIXED; text.justification = OUTP_LEFT; - ls_init (&text.string, string, strlen (string)); + text.string = ss_cstr (string); text.x = d->cp_x; text.y = d->cp_y; text.h = text.v = INT_MAX; @@ -448,7 +448,7 @@ write_fallback_headers (struct outp_driver *d) sprintf (leader, "%s %d:", Line, ++line_number); text.font = OUTP_FIXED; text.justification = OUTP_LEFT; - ls_init (&text.string, leader, strlen (leader)); + text.string = ss_cstr (leader); text.x = 0; text.y = d->cp_y; text.h = text.v = INT_MAX; @@ -604,7 +604,7 @@ determine_layout (void) d->cp_y += d->font_height; } - ds_init (&line_buffer); + ds_init_empty (&line_buffer); } /* Writes case C to output. */ @@ -627,7 +627,7 @@ list_cases (const struct ccase *c, void *aux UNUSED) if (!prc->header_rows) { - ds_printf(&line_buffer, "%8s: ", cmd.v_variables[0]->name); + ds_put_format(&line_buffer, "%8s: ", cmd.v_variables[0]->name); } @@ -650,34 +650,34 @@ list_cases (const struct ccase *c, void *aux UNUSED) write_header (d); } - write_line (d, ds_c_str(&line_buffer)); + write_line (d, ds_cstr (&line_buffer)); ds_clear(&line_buffer); if (!prc->header_rows) { - ds_printf (&line_buffer, "%8s: ", v->name); + ds_put_format (&line_buffer, "%8s: ", v->name); } } if (width > v->print.w) { - ds_putc_multiple(&line_buffer, ' ', width - v->print.w); + ds_put_char_multiple(&line_buffer, ' ', width - v->print.w); } if ((formats[v->print.type].cat & FCAT_STRING) || v->fv != -1) { - data_out (ds_append_uninit(&line_buffer, v->print.w), + data_out (ds_put_uninit(&line_buffer, v->print.w), &v->print, case_data (c, v->fv)); } else { union value case_idx_value; case_idx_value.f = case_idx; - data_out (ds_append_uninit(&line_buffer,v->print.w), + data_out (ds_put_uninit(&line_buffer,v->print.w), &v->print, &case_idx_value); } - ds_putc(&line_buffer, ' '); + ds_put_char(&line_buffer, ' '); } if (!n_lines_remaining (d)) @@ -686,7 +686,7 @@ list_cases (const struct ccase *c, void *aux UNUSED) write_header (d); } - write_line (d, ds_c_str(&line_buffer)); + write_line (d, ds_cstr (&line_buffer)); ds_clear(&line_buffer); } else if (d->class == &html_class) @@ -700,7 +700,6 @@ list_cases (const struct ccase *c, void *aux UNUSED) { struct variable *v = cmd.v_variables[column]; char buf[256]; - struct fixed_string s; if ((formats[v->print.type].cat & FCAT_STRING) || v->fv != -1) data_out (buf, &v->print, case_data (c, v->fv)); @@ -711,9 +710,8 @@ list_cases (const struct ccase *c, void *aux UNUSED) data_out (buf, &v->print, &case_idx_value); } - ls_init (&s, buf, v->print.w); fputs (" ", x->file); - html_put_cell_contents (d, TAB_FIX, &s); + html_put_cell_contents (d, TAB_FIX, ss_buffer (buf, v->print.w)); fputs ("\n", x->file); } diff --git a/src/language/data-io/matrix-data.c b/src/language/data-io/matrix-data.c index d5b489c3..c725ed4a 100644 --- a/src/language/data-io/matrix-data.c +++ b/src/language/data-io/matrix-data.c @@ -42,6 +42,8 @@ #include #include +#include "size_max.h" + #include "gettext.h" #define _(msgid) gettext (msgid) @@ -769,38 +771,25 @@ mget_token_dump (struct matrix_token *token, struct dfm_reader *reader) static const char * context (struct dfm_reader *reader) { - static char buf[32]; + static struct string buf = DS_EMPTY_INITIALIZER; + ds_clear (&buf); if (dfm_eof (reader)) - strcpy (buf, "at end of file"); + ds_assign_cstr (&buf, "at end of file"); else { - struct fixed_string line; - const char *sp; + struct substring p; - dfm_get_record (reader, &line); - sp = ls_c_str (&line); - while (sp < ls_end (&line) && isspace ((unsigned char) *sp)) - sp++; - if (sp >= ls_end (&line)) - strcpy (buf, "at end of line"); + p = dfm_get_record (reader); + ss_ltrim (&p, ss_cstr (CC_SPACES)); + if (ss_is_empty (p)) + ds_assign_cstr (&buf, "at end of line"); else - { - char *dp; - size_t copy_cnt = 0; - - dp = stpcpy (buf, "before `"); - while (sp < ls_end (&line) && !isspace ((unsigned char) *sp) - && copy_cnt < 10) - { - *dp++ = *sp++; - copy_cnt++; - } - strcpy (dp, "'"); - } + ds_put_format (&buf, "before `%.*s'", + (int) ss_cspan (p, ss_cstr (CC_SPACES)), ss_data (p)); } - return buf; + return ds_cstr (&buf); } /* Is there at least one token left in the data file? */ @@ -809,20 +798,17 @@ another_token (struct dfm_reader *reader) { for (;;) { - struct fixed_string line; - const char *cp; + struct substring p; + size_t space_cnt; if (dfm_eof (reader)) return 0; - dfm_get_record (reader, &line); - cp = ls_c_str (&line); - while (isspace ((unsigned char) *cp) && cp < ls_end (&line)) - cp++; - - if (cp < ls_end (&line)) + p = dfm_get_record (reader); + space_cnt = ss_span (p, ss_cstr (CC_SPACES)); + if (space_cnt < ss_length (p)) { - dfm_forward_columns (reader, cp - ls_c_str (&line)); + dfm_forward_columns (reader, space_cnt); return 1; } @@ -834,73 +820,65 @@ another_token (struct dfm_reader *reader) static int (mget_token) (struct matrix_token *token, struct dfm_reader *reader) { - struct fixed_string line; - int first_column; - char *cp; + struct substring line, p; + struct substring s; + int c; if (!another_token (reader)) return 0; - dfm_get_record (reader, &line); - first_column = dfm_column_start (reader); + line = p = dfm_get_record (reader); /* Three types of fields: quoted with ', quoted with ", unquoted. */ - cp = ls_c_str (&line); - if (*cp == '\'' || *cp == '"') + c = ss_first (p); + if (c == '\'' || c == '"') { - int quote = *cp; - - token->type = MSTR; - token->string = ++cp; - while (cp < ls_end (&line) && *cp != quote) - cp++; - token->length = cp - token->string; - if (cp < ls_end (&line)) - cp++; - else - msg (SW, _("Scope of string exceeds line.")); + ss_get_char (&p); + if (!ss_get_until (&p, c, &s)) + msg (SW, _("Scope of string exceeds line.")); } else { - int is_num = isdigit ((unsigned char) *cp) || *cp == '.'; - - token->string = cp++; - while (cp < ls_end (&line) - && !isspace ((unsigned char) *cp) && *cp != ',' - && *cp != '-' && *cp != '+') - { - if (isdigit ((unsigned char) *cp)) - is_num = 1; - - if ((tolower ((unsigned char) *cp) == 'd' - || tolower ((unsigned char) *cp) == 'e') - && (cp[1] == '+' || cp[1] == '-')) - cp += 2; - else - cp++; - } + bool is_num = isdigit (c) || c == '.'; + const char *start = ss_data (p); - token->length = cp - token->string; - assert (token->length); + for (;;) + { + c = ss_first (p); + if (strchr (CC_SPACES ",-+", c) != NULL) + break; + + if (isdigit (c)) + is_num = true; + if (strchr ("deDE", c) && strchr ("+-", ss_at (p, 1))) + { + is_num = true; + ss_advance (&p, 2); + } + else + ss_advance (&p, 1); + } + s = ss_buffer (start, ss_data (p) - start); if (is_num) { struct data_in di; - di.s = token->string; - di.e = token->string + token->length; + di.s = ss_data (s); + di.e = ss_end (s); di.v = (union value *) &token->number; - di.f1 = first_column; + di.f1 = dfm_get_column (reader, di.s); di.format = make_output_format (FMT_F, token->length, 0); - if (!data_in (&di)) - return 0; + data_in (&di); } else token->type = MSTR; } - - dfm_forward_columns (reader, cp - ls_c_str (&line)); + token->string = ss_data (s); + token->length = ss_length (s); + + dfm_reread_record (reader, dfm_get_column (reader, ss_end (s))); return 1; } @@ -910,18 +888,13 @@ static int static int force_eol (struct dfm_reader *reader, const char *content) { - struct fixed_string line; - const char *cp; + struct substring p; if (dfm_eof (reader)) return 0; - dfm_get_record (reader, &line); - cp = ls_c_str (&line); - while (isspace ((unsigned char) *cp) && cp < ls_end (&line)) - cp++; - - if (cp < ls_end (&line)) + p = dfm_get_record (reader); + if (ss_span (p, ss_cstr (CC_SPACES)) != ss_length (p)) { msg (SE, _("End of line expected %s while reading %s."), context (reader), content); diff --git a/src/language/data-io/print.c b/src/language/data-io/print.c index b96ee13b..34cca99a 100644 --- a/src/language/data-io/print.c +++ b/src/language/data-io/print.c @@ -350,7 +350,7 @@ parse_string_argument (void) { fx.spec.type = PRT_CONST; fx.spec.fc = fx.sc - 1; - fx.spec.u.c = xstrdup (ds_c_str (&tokstr)); + fx.spec.u.c = ds_xstrdup (&tokstr); lex_get (); /* Parse the included column range. */ diff --git a/src/language/dictionary/value-labels.c b/src/language/dictionary/value-labels.c index 89e0160a..dec757c3 100644 --- a/src/language/dictionary/value-labels.c +++ b/src/language/dictionary/value-labels.c @@ -157,7 +157,7 @@ get_label (struct variable **vars, size_t var_cnt) lex_error (_("expecting string")); return 0; } - buf_copy_str_rpad (value.s, MAX_SHORT_STRING, ds_c_str (&tokstr)); + buf_copy_str_rpad (value.s, MAX_SHORT_STRING, ds_cstr (&tokstr)); } else { @@ -180,7 +180,7 @@ get_label (struct variable **vars, size_t var_cnt) msg (SW, _("Truncating value label to 60 characters.")); ds_truncate (&tokstr, 60); } - label = ds_c_str (&tokstr); + label = ds_cstr (&tokstr); for (i = 0; i < var_cnt; i++) val_labs_replace (vars[i]->val_labs, value, label); diff --git a/src/language/dictionary/variable-label.c b/src/language/dictionary/variable-label.c index 7380d3d6..dcd607fe 100644 --- a/src/language/dictionary/variable-label.c +++ b/src/language/dictionary/variable-label.c @@ -61,7 +61,7 @@ cmd_variable_labels (void) { if (v[i]->label) free (v[i]->label); - v[i]->label = xstrdup (ds_c_str (&tokstr)); + v[i]->label = ds_xstrdup (&tokstr); } lex_get (); diff --git a/src/language/expressions/ChangeLog b/src/language/expressions/ChangeLog index 791fa194..82421672 100644 --- a/src/language/expressions/ChangeLog +++ b/src/language/expressions/ChangeLog @@ -1,3 +1,10 @@ +Fri Jun 9 13:59:15 2006 Ben Pfaff + + Reform string library. + + * generate.pl (init_all_types): `struct fixed_string' is now + `struct substring'. + Sun May 7 10:05:42 WST 2006 John Darrington * evaluate.c: Removed call to copy_mangle, and replaced with the diff --git a/src/language/expressions/evaluate.c b/src/language/expressions/evaluate.c index 39b1ac4d..9398b41c 100644 --- a/src/language/expressions/evaluate.c +++ b/src/language/expressions/evaluate.c @@ -34,7 +34,7 @@ expr_evaluate (struct expression *e, const struct ccase *c, int case_idx, union operation_data *op = e->ops; double *ns = e->number_stack; - struct fixed_string *ss = e->string_stack; + struct substring *ss = e->string_stack; assert ((c != NULL) == (e->dict != NULL)); pool_clear (e->eval_pool); @@ -51,7 +51,7 @@ expr_evaluate (struct expression *e, const struct ccase *c, int case_idx, case OP_string: { - const struct fixed_string *s = &op++->string; + const struct substring *s = &op++->string; *ss++ = copy_string (e, s->string, s->length); } break; @@ -61,7 +61,7 @@ expr_evaluate (struct expression *e, const struct ccase *c, int case_idx, return; case OP_return_string: - *(struct fixed_string *) result = ss[-1]; + *(struct substring *) result = ss[-1]; return; #include "evaluate.inc" @@ -88,7 +88,7 @@ void expr_evaluate_str (struct expression *e, const struct ccase *c, int case_idx, char *dst, size_t dst_size) { - struct fixed_string s; + struct substring s; assert (e->type == OP_string); assert ((dst == NULL) == (dst_size == 0)); @@ -140,7 +140,7 @@ cmd_debug_evaluate (void) else if (token == T_STRING) { width = ds_length (&tokstr); - fprintf (stderr, "(%s = \"%.2s\")", name, ds_c_str (&tokstr)); + fprintf (stderr, "(%s = \"%.2s\")", name, ds_cstr (&tokstr)); } else { @@ -223,7 +223,7 @@ cmd_debug_evaluate (void) case OP_string: { - struct fixed_string s; + struct substring s; expr_evaluate (expr, c, 0, &s); fputc ('"', stderr); diff --git a/src/language/expressions/generate.pl b/src/language/expressions/generate.pl index 4ad1764b..8216521a 100644 --- a/src/language/expressions/generate.pl +++ b/src/language/expressions/generate.pl @@ -68,7 +68,7 @@ sub init_all_types { init_type ('number', 'any', C_TYPE => 'double', ATOM => 'number', MANGLE => 'n', HUMAN_NAME => 'num', STACK => 'ns', MISSING_VALUE => 'SYSMIS'); - init_type ('string', 'any', C_TYPE => 'struct fixed_string', + init_type ('string', 'any', C_TYPE => 'struct substring', ATOM => 'string', MANGLE => 's', HUMAN_NAME => 'string', STACK => 'ss', MISSING_VALUE => 'empty_string'); init_type ('boolean', 'any', C_TYPE => 'double', diff --git a/src/language/expressions/helpers.c b/src/language/expressions/helpers.c index 4dd3cf3d..fd78e734 100644 --- a/src/language/expressions/helpers.c +++ b/src/language/expressions/helpers.c @@ -5,7 +5,7 @@ #include #include "private.h" -const struct fixed_string empty_string = {NULL, 0}; +const struct substring empty_string = {NULL, 0}; static void expr_error (void *aux UNUSED, const char *format, ...) @@ -121,7 +121,7 @@ expr_yrmoda (double year, double month, double day) } int -compare_string (const struct fixed_string *a, const struct fixed_string *b) +compare_string (const struct substring *a, const struct substring *b) { size_t i; @@ -149,19 +149,19 @@ count_valid (double *d, size_t d_cnt) return valid_cnt; } -struct fixed_string +struct substring alloc_string (struct expression *e, size_t length) { - struct fixed_string s; + struct substring s; s.length = length; s.string = pool_alloc (e->eval_pool, length); return s; } -struct fixed_string +struct substring copy_string (struct expression *e, const char *old, size_t length) { - struct fixed_string s = alloc_string (e, length); + struct substring s = alloc_string (e, length); memcpy (s.string, old, length); return s; } diff --git a/src/language/expressions/helpers.h b/src/language/expressions/helpers.h index b6157f70..0f44b35f 100644 --- a/src/language/expressions/helpers.h +++ b/src/language/expressions/helpers.h @@ -44,9 +44,9 @@ static inline double check_errno (double x) #define MIN_S 60. /* Seconds per minute. */ #define WEEK_DAY 7. /* Days per week. */ -extern const struct fixed_string empty_string; +extern const struct substring empty_string; -int compare_string (const struct fixed_string *, const struct fixed_string *); +int compare_string (const struct substring *, const struct substring *); double expr_ymd_to_date (double year, double month, double day); double expr_ymd_to_ofs (double year, double month, double day); @@ -54,9 +54,9 @@ double expr_wkyr_to_date (double wk, double yr); double expr_yrday_to_date (double yr, double day); double expr_yrmoda (double year, double month, double day); -struct fixed_string alloc_string (struct expression *, size_t length); -struct fixed_string copy_string (struct expression *, - const char *, size_t length); +struct substring alloc_string (struct expression *, size_t length); +struct substring copy_string (struct expression *, + const char *, size_t length); static inline bool is_valid (double d) diff --git a/src/language/expressions/operations.def b/src/language/expressions/operations.def index 717bc15e..fe3bc5de 100644 --- a/src/language/expressions/operations.def +++ b/src/language/expressions/operations.def @@ -133,7 +133,7 @@ function MAX.1 (a[n]) string function MAX (string a[n]) { - struct fixed_string *max; + struct substring *max; size_t i; max = &a[0]; @@ -164,7 +164,7 @@ function MIN.1 (a[n]) string function MIN (string a[n]) { - struct fixed_string *min; + struct substring *min; size_t i; min = &a[0]; @@ -220,8 +220,8 @@ boolean function RANGE (string x, string a[n*2]) for (i = 0; i < n; i++) { - struct fixed_string *w = &a[2 * i]; - struct fixed_string *y = &a[2 * i + 1]; + struct substring *w = &a[2 * i]; + struct substring *y = &a[2 * i + 1]; if (compare_string (w, &x) <= 0 && compare_string (&x, y) <= 0) return 1.; } @@ -303,14 +303,14 @@ function XDATE.YEAR (date >= DAY_S) = calendar_offset_to_year (date / DAY_S); string function CONCAT (string a[n]) expression e; { - struct fixed_string dst; + struct substring dst; size_t i; dst = alloc_string (e, MAX_STRING); dst.length = 0; for (i = 0; i < n; i++) { - struct fixed_string *src = &a[i]; + struct substring *src = &a[i]; size_t copy_len; copy_len = src->length; @@ -446,7 +446,7 @@ absorb_miss string function LPAD (string s, n) return s; else { - struct fixed_string t = alloc_string (e, n); + struct substring t = alloc_string (e, n); memset (t.string, ' ', n - s.length); memcpy (&t.string[(int) n - s.length], s.string, s.length); return t; @@ -462,7 +462,7 @@ absorb_miss string function LPAD (string s, n, string c) return s; else { - struct fixed_string t = alloc_string (e, n); + struct substring t = alloc_string (e, n); memset (t.string, c.string[0], n - s.length); memcpy (&t.string[(int) n - s.length], s.string, s.length); return t; @@ -478,7 +478,7 @@ absorb_miss string function RPAD (string s, n) return s; else { - struct fixed_string t = alloc_string (e, n); + struct substring t = alloc_string (e, n); memcpy (t.string, s.string, s.length); memset (&t.string[s.length], ' ', n - s.length); return t; @@ -494,7 +494,7 @@ absorb_miss string function RPAD (string s, n, string c) return s; else { - struct fixed_string t = alloc_string (e, n); + struct substring t = alloc_string (e, n); memcpy (t.string, s.string, s.length); memset (&t.string[s.length], c.string[0], n - s.length); return t; @@ -563,7 +563,7 @@ absorb_miss string function STRING (x, no_format f) expression e; { union value v; - struct fixed_string dst; + struct substring dst; v.f = x; dst = alloc_string (e, f->w); @@ -951,7 +951,7 @@ no_opt string operator STR_VAR () expression e; str_var v; { - struct fixed_string s = alloc_string (e, v->width); + struct substring s = alloc_string (e, v->width); memcpy (s.string, case_str (c, v->fv), v->width); return s; } diff --git a/src/language/expressions/optimize.c b/src/language/expressions/optimize.c index 9f9626bc..c5ba6224 100644 --- a/src/language/expressions/optimize.c +++ b/src/language/expressions/optimize.c @@ -141,9 +141,9 @@ static double get_number_arg (struct composite_node *, size_t arg_idx); static double *get_number_args (struct composite_node *, size_t arg_idx, size_t arg_cnt, struct expression *); -static struct fixed_string get_string_arg (struct composite_node *, +static struct substring get_string_arg (struct composite_node *, size_t arg_idx); -static struct fixed_string *get_string_args (struct composite_node *, +static struct substring *get_string_args (struct composite_node *, size_t arg_idx, size_t arg_cnt, struct expression *); static const struct fmt_spec *get_format_arg (struct composite_node *, @@ -187,7 +187,7 @@ get_number_args (struct composite_node *c, size_t arg_idx, size_t arg_cnt, return d; } -static struct fixed_string +static struct substring get_string_arg (struct composite_node *c, size_t arg_idx) { assert (arg_idx < c->arg_cnt); @@ -195,11 +195,11 @@ get_string_arg (struct composite_node *c, size_t arg_idx) return c->args[arg_idx]->string.s; } -static struct fixed_string * +static struct substring * get_string_args (struct composite_node *c, size_t arg_idx, size_t arg_cnt, struct expression *e) { - struct fixed_string *s; + struct substring *s; size_t i; s = pool_alloc (e->expr_pool, sizeof *s * arg_cnt); @@ -236,7 +236,7 @@ emit_number (struct expression *e, double n) } static void -emit_string (struct expression *e, struct fixed_string s) +emit_string (struct expression *e, struct substring s) { allocate_aux (e, OP_string)->string = s; } diff --git a/src/language/expressions/parse.c b/src/language/expressions/parse.c index 81ed4da8..f220509a 100644 --- a/src/language/expressions/parse.c +++ b/src/language/expressions/parse.c @@ -829,8 +829,8 @@ parse_primary (struct expression *e) case T_STRING: { - union any_node *node = expr_allocate_string_buffer (e, ds_c_str (&tokstr), - ds_length (&tokstr)); + union any_node *node = expr_allocate_string_buffer ( + e, ds_cstr (&tokstr), ds_length (&tokstr)); lex_get (); return node; } @@ -1094,14 +1094,14 @@ put_invocation (struct string *s, { size_t i; - ds_printf (s, "%s(", func_name); + ds_put_format (s, "%s(", func_name); for (i = 0; i < arg_cnt; i++) { if (i > 0) - ds_puts (s, ", "); - ds_puts (s, operations[expr_node_returns (args[i])].prototype); + ds_put_cstr (s, ", "); + ds_put_cstr (s, operations[expr_node_returns (args[i])].prototype); } - ds_putc (s, ')'); + ds_put_char (s, ')'); } static void @@ -1112,25 +1112,25 @@ no_match (const char *func_name, struct string s; const struct operation *f; - ds_init (&s); + ds_init_empty (&s); if (last - first == 1) { - ds_printf (&s, _("Type mismatch invoking %s as "), first->prototype); + ds_put_format (&s, _("Type mismatch invoking %s as "), first->prototype); put_invocation (&s, func_name, args, arg_cnt); } else { - ds_puts (&s, _("Function invocation ")); + ds_put_cstr (&s, _("Function invocation ")); put_invocation (&s, func_name, args, arg_cnt); - ds_puts (&s, _(" does not match any known function. Candidates are:")); + ds_put_cstr (&s, _(" does not match any known function. Candidates are:")); for (f = first; f < last; f++) - ds_printf (&s, "\n%s", f->prototype); + ds_put_format (&s, "\n%s", f->prototype); } - ds_putc (&s, '.'); + ds_put_char (&s, '.'); - msg (SE, "%s", ds_c_str (&s)); + msg (SE, "%s", ds_cstr (&s)); ds_destroy (&s); } @@ -1145,23 +1145,23 @@ parse_function (struct expression *e) int arg_cnt = 0; int arg_cap = 0; - struct fixed_string func_name; + struct string func_name; union any_node *n; - ls_create (&func_name, ds_c_str (&tokstr)); - min_valid = extract_min_valid (ds_c_str (&tokstr)); - if (!lookup_function (ds_c_str (&tokstr), &first, &last)) + ds_init_string (&func_name, &tokstr); + min_valid = extract_min_valid (ds_cstr (&tokstr)); + if (!lookup_function (ds_cstr (&tokstr), &first, &last)) { - msg (SE, _("No function or vector named %s."), ds_c_str (&tokstr)); - ls_destroy (&func_name); + msg (SE, _("No function or vector named %s."), ds_cstr (&tokstr)); + ds_destroy (&func_name); return NULL; } lex_get (); if (!lex_force_match ('(')) { - ls_destroy (&func_name); + ds_destroy (&func_name); return NULL; } @@ -1206,7 +1206,7 @@ parse_function (struct expression *e) break; if (f >= last) { - no_match (ls_c_str (&func_name), args, arg_cnt, first, last); + no_match (ds_cstr (&func_name), args, arg_cnt, first, last); goto fail; } @@ -1246,12 +1246,12 @@ parse_function (struct expression *e) } free (args); - ls_destroy (&func_name); + ds_destroy (&func_name); return n; fail: free (args); - ls_destroy (&func_name); + ds_destroy (&func_name); return NULL; } @@ -1428,7 +1428,7 @@ expr_allocate_string_buffer (struct expression *e, } union any_node * -expr_allocate_string (struct expression *e, struct fixed_string s) +expr_allocate_string (struct expression *e, struct substring s) { union any_node *n = pool_alloc (e->expr_pool, sizeof n->string); n->type = OP_string; diff --git a/src/language/expressions/private.h b/src/language/expressions/private.h index 0d81c22c..3fa108d5 100644 --- a/src/language/expressions/private.h +++ b/src/language/expressions/private.h @@ -92,7 +92,7 @@ struct number_node struct string_node { operation_type type; /* OP_string. */ - struct fixed_string s; + struct substring s; }; struct variable_node @@ -145,7 +145,7 @@ union operation_data { operation_type operation; double number; - struct fixed_string string; + struct substring string; struct variable *variable; const struct vector *vector; struct fmt_spec *format; @@ -164,7 +164,7 @@ struct expression size_t op_cnt, op_cap; /* Number of ops, amount of allocated space. */ double *number_stack; /* Evaluation stack: numerics, Booleans. */ - struct fixed_string *string_stack; /* Evaluation stack: strings. */ + struct substring *string_stack; /* Evaluation stack: strings. */ struct pool *eval_pool; /* Pool for evaluation temporaries. */ }; @@ -190,7 +190,7 @@ union any_node *expr_allocate_pos_int (struct expression *e, int); union any_node *expr_allocate_string_buffer (struct expression *e, const char *string, size_t length); union any_node *expr_allocate_string (struct expression *e, - struct fixed_string); + struct substring); union any_node *expr_allocate_variable (struct expression *e, struct variable *); union any_node *expr_allocate_format (struct expression *e, diff --git a/src/language/lexer/ChangeLog b/src/language/lexer/ChangeLog index d9634c22..081aa8de 100644 --- a/src/language/lexer/ChangeLog +++ b/src/language/lexer/ChangeLog @@ -1,3 +1,10 @@ +Fri Jun 9 14:02:19 2006 Ben Pfaff + + Reform string library. + + * q2c.c (dump_subcommand): ds_c_str() became ds_cstr(), in + generated code. + Wed Apr 26 13:30:41 2006 Ben Pfaff * lexer.c: (lex_discard_rest_of_command) New function. diff --git a/src/language/lexer/format-parser.c b/src/language/lexer/format-parser.c index b539432e..aa4729b0 100644 --- a/src/language/lexer/format-parser.c +++ b/src/language/lexer/format-parser.c @@ -45,7 +45,7 @@ parse_format_specifier_name (const char **cp, enum fmt_parse_flags flags) char *sp, *ep; int idx; - sp = ep = ds_c_str (&tokstr); + sp = ep = ds_cstr (&tokstr); while (isalpha ((unsigned char) *ep)) ep++; @@ -72,7 +72,7 @@ parse_format_specifier_name (const char **cp, enum fmt_parse_flags flags) /* No match. */ if (!(flags & FMTP_SUPPRESS_ERRORS)) msg (SE, _("%.*s is not a valid data format."), - (int) (ep - sp), ds_c_str (&tokstr)); + (int) (ep - sp), ds_cstr (&tokstr)); idx = -1; } } @@ -125,7 +125,7 @@ parse_format_specifier (struct fmt_spec *input, enum fmt_parse_flags flags) { if (!(flags & FMTP_SUPPRESS_ERRORS)) msg (SE, _("Data format %s does not specify a width."), - ds_c_str (&tokstr)); + ds_cstr (&tokstr)); return 0; } if ( w > MAX_STRING ) @@ -147,7 +147,7 @@ parse_format_specifier (struct fmt_spec *input, enum fmt_parse_flags flags) if (*cp) { if (!(flags & FMTP_SUPPRESS_ERRORS)) - msg (SE, _("Data format %s is not valid."), ds_c_str (&tokstr)); + msg (SE, _("Data format %s is not valid."), ds_cstr (&tokstr)); return 0; } lex_get (); diff --git a/src/language/lexer/lexer.c b/src/language/lexer/lexer.c index 750f3053..c9e71b9f 100644 --- a/src/language/lexer/lexer.c +++ b/src/language/lexer/lexer.c @@ -91,7 +91,6 @@ enum string_type HEX_STRING /* Hexadecimal digits. */ }; -static void convert_numeric_string_to_char_string (enum string_type); static int parse_string (enum string_type); #if DUMP_TOKENS @@ -104,8 +103,8 @@ static void dump_token (void); void lex_init (void) { - ds_init (&tokstr); - ds_init (&put_tokstr); + ds_init_empty (&tokstr); + ds_init_empty (&put_tokstr); if (!lex_get_line ()) eof = true; } @@ -128,7 +127,7 @@ restore_token (void) assert (put_token != 0); token = put_token; ds_assign_string (&tokstr, &put_tokstr); - str_copy_trunc (tokid, sizeof tokid, ds_c_str (&tokstr)); + str_copy_trunc (tokid, sizeof tokid, ds_cstr (&tokstr)); tokval = put_tokval; put_token = 0; } @@ -225,7 +224,7 @@ lex_get (void) negative numbers into two tokens. */ if (*prog == '-') { - ds_putc (&tokstr, *prog++); + ds_put_char (&tokstr, *prog++); while (isspace ((unsigned char) *prog)) prog++; @@ -241,32 +240,32 @@ lex_get (void) /* Parse the number, copying it into tokstr. */ while (isdigit ((unsigned char) *prog)) - ds_putc (&tokstr, *prog++); + ds_put_char (&tokstr, *prog++); if (*prog == '.') { - ds_putc (&tokstr, *prog++); + ds_put_char (&tokstr, *prog++); while (isdigit ((unsigned char) *prog)) - ds_putc (&tokstr, *prog++); + ds_put_char (&tokstr, *prog++); } if (*prog == 'e' || *prog == 'E') { - ds_putc (&tokstr, *prog++); + ds_put_char (&tokstr, *prog++); if (*prog == '+' || *prog == '-') - ds_putc (&tokstr, *prog++); + ds_put_char (&tokstr, *prog++); while (isdigit ((unsigned char) *prog)) - ds_putc (&tokstr, *prog++); + ds_put_char (&tokstr, *prog++); } /* Parse as floating point. */ - tokval = strtod (ds_c_str (&tokstr), &tail); + tokval = strtod (ds_cstr (&tokstr), &tail); if (*tail) { msg (SE, _("%s does not form a valid number."), - ds_c_str (&tokstr)); + ds_cstr (&tokstr)); tokval = 0.0; ds_clear (&tokstr); - ds_putc (&tokstr, '0'); + ds_put_char (&tokstr, '0'); } break; @@ -388,9 +387,9 @@ parse_id (void) const char *start = prog; prog = lex_skip_identifier (start); - ds_concat (&tokstr, start, prog - start); - str_copy_trunc (tokid, sizeof tokid, ds_c_str (&tokstr)); - return lex_id_to_token (ds_c_str (&tokstr), ds_length (&tokstr)); + ds_put_substring (&tokstr, ss_buffer (start, prog - start)); + str_copy_trunc (tokid, sizeof tokid, ds_cstr (&tokstr)); + return lex_id_to_token (ds_cstr (&tokstr), ds_length (&tokstr)); } /* Reports an error to the effect that subcommand SBC may only be @@ -695,8 +694,8 @@ lex_put_back_id (const char *id) assert (lex_id_to_token (id, strlen (id)) == T_ID); save_token (); token = T_ID; - ds_assign_c_str (&tokstr, id); - str_copy_trunc (tokid, sizeof tokid, ds_c_str (&tokstr)); + ds_assign_cstr (&tokstr, id); + str_copy_trunc (tokid, sizeof tokid, ds_cstr (&tokstr)); } /* Weird line processing functions. */ @@ -705,7 +704,7 @@ lex_put_back_id (const char *id) const char * lex_entire_line (void) { - return ds_c_str (&getl_buf); + return ds_cstr (&getl_buf); } /* As lex_entire_line(), but only returns the part of the current line @@ -769,7 +768,7 @@ strip_comments (struct string *string) in_comment = false; quote = EOF; - for (cp = ds_c_str (string); *cp; ) + for (cp = ds_cstr (string); *cp; ) { /* If we're not in a comment, check for quote marks. */ if (!in_comment) @@ -819,7 +818,7 @@ lex_get_line (void) return false; strip_comments (line); - ds_rtrim_spaces (line); + ds_rtrim (line, ss_cstr (CC_SPACES)); /* Check for and remove terminal dot. */ dot = (ds_chomp (line, get_endcmd ()) @@ -837,7 +836,7 @@ lex_get_line (void) put_token = '.'; } - prog = ds_c_str (line); + prog = ds_cstr (line); return true; } @@ -873,7 +872,7 @@ lex_token_representation (void) case T_ID: case T_POS_NUM: case T_NEG_NUM: - return xstrdup (ds_c_str (&tokstr)); + return ds_xstrdup (&tokstr); break; case T_STRING: @@ -881,7 +880,7 @@ lex_token_representation (void) int hexstring = 0; char *sp, *dp; - for (sp = ds_c_str (&tokstr); sp < ds_end (&tokstr); sp++) + for (sp = ds_cstr (&tokstr); sp < ds_end (&tokstr); sp++) if (!isprint ((unsigned char) *sp)) { hexstring = 1; @@ -896,14 +895,14 @@ lex_token_representation (void) *dp++ = '\''; if (!hexstring) - for (sp = ds_c_str (&tokstr); *sp; ) + for (sp = ds_cstr (&tokstr); *sp; ) { if (*sp == '\'') *dp++ = '\''; *dp++ = (unsigned char) *sp++; } else - for (sp = ds_c_str (&tokstr); sp < ds_end (&tokstr); sp++) + for (sp = ds_cstr (&tokstr); sp < ds_end (&tokstr); sp++) { *dp++ = (((unsigned char) *sp) >> 4)["0123456789ABCDEF"]; *dp++ = (((unsigned char) *sp) & 15)["0123456789ABCDEF"]; @@ -951,7 +950,7 @@ lex_negative_to_dash (void) { token = T_POS_NUM; tokval = -tokval; - ds_assign_substring (&tokstr, &tokstr, 1, SIZE_MAX); + ds_assign_substring (&tokstr, ds_substr (&tokstr, 1, SIZE_MAX)); save_token (); token = '-'; } @@ -1028,7 +1027,7 @@ convert_numeric_string_to_char_string (enum string_type type) "multiple of %d."), base_name, ds_length (&tokstr), chars_per_byte); - p = ds_c_str (&tokstr); + p = ds_cstr (&tokstr); for (i = 0; i < byte_cnt; i++) { int value; @@ -1058,7 +1057,7 @@ convert_numeric_string_to_char_string (enum string_type type) value = value * base + v; } - ds_c_str (&tokstr)[i] = (unsigned char) value; + ds_cstr (&tokstr)[i] = (unsigned char) value; } ds_truncate (&tokstr, byte_cnt); @@ -1097,7 +1096,7 @@ parse_string (enum string_type type) break; } - ds_putc (&tokstr, *prog++); + ds_put_char (&tokstr, *prog++); } prog++; @@ -1170,7 +1169,7 @@ finish: int warned = 0; for (i = 0; i < ds_length (&tokstr); i++) - if (ds_c_str (&tokstr)[i] == 0) + if (ds_cstr (&tokstr)[i] == 0) { if (!warned) { @@ -1178,7 +1177,7 @@ finish: "characters. Replacing with spaces.")); warned = 1; } - ds_c_str (&tokstr)[i] = ' '; + ds_cstr (&tokstr)[i] = ' '; } } @@ -1212,7 +1211,7 @@ dump_token (void) break; case T_STRING: - fprintf (stderr, "STRING\t\"%s\"\n", ds_c_str (&tokstr)); + fprintf (stderr, "STRING\t\"%s\"\n", ds_cstr (&tokstr)); break; case T_STOP: diff --git a/src/language/lexer/q2c.c b/src/language/lexer/q2c.c index 1b12c269..2919bd5b 100644 --- a/src/language/lexer/q2c.c +++ b/src/language/lexer/q2c.c @@ -1624,7 +1624,7 @@ dump_subcommand (const subcommand *sbc) outdent (); } dump (0, "free(p->s_%s);", st_lower(sbc->name) ); - dump (0, "p->s_%s = xstrdup (ds_c_str (&tokstr));", + dump (0, "p->s_%s = ds_xstrdup (&tokstr);", st_lower (sbc->name)); dump (0, "lex_get ();"); if (sbc->restriction) diff --git a/src/language/line-buffer.c b/src/language/line-buffer.c index 709bc2f0..1b48f538 100644 --- a/src/language/line-buffer.c +++ b/src/language/line-buffer.c @@ -109,9 +109,9 @@ static const char *get_prompt (void); void getl_initialize (void) { - ds_create (&getl_include_path, - fn_getenv_default ("STAT_INCLUDE_PATH", include_path)); - ds_init (&getl_buf); + ds_init_cstr (&getl_include_path, + fn_getenv_default ("STAT_INCLUDE_PATH", include_path)); + ds_init_empty (&getl_buf); init_prompts (); } @@ -127,9 +127,9 @@ void getl_add_include_dir (const char *path) { if (ds_length (&getl_include_path)) - ds_putc (&getl_include_path, ':'); + ds_put_char (&getl_include_path, ':'); - ds_puts (&getl_include_path, path); + ds_put_cstr (&getl_include_path, path); } /* Appends source S to the list of source files. */ @@ -239,7 +239,7 @@ getl_include_syntax_file (const char *fn) { if (cur_source != NULL) { - char *found_fn = fn_search_path (fn, ds_c_str (&getl_include_path), + char *found_fn = fn_search_path (fn, ds_cstr (&getl_include_path), fn_dir_name (cur_source->fn)); if (found_fn != NULL) { @@ -457,7 +457,7 @@ read_syntax_file (struct string *line, struct getl_source *s) do { s->ln++; - if (!ds_gets (line, s->u.syntax_file)) + if (!ds_read_line (line, s->u.syntax_file)) { if (ferror (s->u.syntax_file)) msg (ME, _("Reading `%s': %s."), s->fn, strerror (errno)); @@ -465,11 +465,11 @@ read_syntax_file (struct string *line, struct getl_source *s) } ds_chomp (line, '\n'); } - while (s->ln == 1 && !memcmp (ds_c_str (line), "#!", 2)); + while (s->ln == 1 && !memcmp (ds_cstr (line), "#!", 2)); /* Echo to listing file, if configured to do so. */ if (get_echo ()) - tab_output_text (TAB_LEFT | TAB_FIX, ds_c_str (line)); + tab_output_text (TAB_LEFT | TAB_FIX, ds_cstr (line)); return true; } diff --git a/src/language/stats/aggregate.c b/src/language/stats/aggregate.c index 2497d064..aef4bbbf 100644 --- a/src/language/stats/aggregate.c +++ b/src/language/stats/aggregate.c @@ -404,7 +404,7 @@ parse_aggregate_functions (struct agr_proc *agr) if (token == T_STRING) { ds_truncate (&tokstr, 255); - dest_label[n_dest - 1] = xstrdup (ds_c_str (&tokstr)); + dest_label[n_dest - 1] = ds_xstrdup (&tokstr); lex_get (); } } @@ -472,7 +472,7 @@ parse_aggregate_functions (struct agr_proc *agr) lex_match (','); if (token == T_STRING) { - arg[i].c = xstrdup (ds_c_str (&tokstr)); + arg[i].c = ds_xstrdup (&tokstr); type = ALPHA; } else if (lex_is_number ()) diff --git a/src/language/stats/crosstabs.q b/src/language/stats/crosstabs.q index d7828bd5..ee729920 100644 --- a/src/language/stats/crosstabs.q +++ b/src/language/stats/crosstabs.q @@ -1661,7 +1661,7 @@ static void table_value_missing (struct tab_table *table, int c, int r, unsigned char opt, const union value *v, const struct variable *var) { - struct fixed_string s; + struct substring s; const char *label = val_labs_find (var->val_labs, *v); if (label) @@ -1708,7 +1708,7 @@ format_cell_entry (struct tab_table *table, int c, int r, double value, { const struct fmt_spec f = {FMT_F, 10, 1}; union value v; - struct fixed_string s; + struct substring s; s.length = 10; s.string = tab_alloc (table, 16); diff --git a/src/language/stats/t-test.q b/src/language/stats/t-test.q index 7c2352c4..7685bd82 100644 --- a/src/language/stats/t-test.q +++ b/src/language/stats/t-test.q @@ -582,7 +582,7 @@ parse_value (union value * v, int type ) { if (!lex_force_string ()) return 0; - strncpy (v->s, ds_c_str (&tokstr), ds_length (&tokstr)); + strncpy (v->s, ds_cstr (&tokstr), ds_length (&tokstr)); } lex_get (); diff --git a/src/language/utilities/echo.c b/src/language/utilities/echo.c index 09c42382..e4d8e685 100644 --- a/src/language/utilities/echo.c +++ b/src/language/utilities/echo.c @@ -41,7 +41,7 @@ cmd_echo(void) tab_dim (tab, tab_natural_dimensions); tab_flags (tab, SOMF_NO_TITLE ); - tab_text(tab, 0, 0, 0, tokstr.string); + tab_text(tab, 0, 0, 0, ds_cstr (&tokstr)); tab_submit(tab); diff --git a/src/language/utilities/include.c b/src/language/utilities/include.c index 42614f1b..19f5c2e2 100644 --- a/src/language/utilities/include.c +++ b/src/language/utilities/include.c @@ -43,7 +43,7 @@ cmd_include (void) lex_error (_("expecting file name")); return CMD_CASCADING_FAILURE; } - getl_include_syntax_file (ds_c_str (&tokstr)); + getl_include_syntax_file (ds_cstr (&tokstr)); lex_get (); return lex_end_of_command (); diff --git a/src/language/utilities/permissions.c b/src/language/utilities/permissions.c index 9dd911a1..03b2241a 100644 --- a/src/language/utilities/permissions.c +++ b/src/language/utilities/permissions.c @@ -51,7 +51,7 @@ cmd_permissions (void) if (lex_match_id ("FILE")) lex_match ('='); - fn = strdup(ds_c_str(&tokstr)); + fn = ds_xstrdup (&tokstr); lex_force_match(T_STRING); diff --git a/src/language/utilities/title.c b/src/language/utilities/title.c index 9fa1309d..18e4bb27 100644 --- a/src/language/utilities/title.c +++ b/src/language/utilities/title.c @@ -63,7 +63,7 @@ get_title (const char *cmd, char **title) return CMD_FAILURE; if (*title) free (*title); - *title = xstrdup (ds_c_str (&tokstr)); + *title = ds_xstrdup (&tokstr); lex_get (); if (token != '.') { diff --git a/src/language/xforms/count.c b/src/language/xforms/count.c index 8673c6de..284cdeeb 100644 --- a/src/language/xforms/count.c +++ b/src/language/xforms/count.c @@ -254,7 +254,7 @@ parse_string_criteria (struct pool *pool, struct criteria *crit) return false; cur = &crit->values.str[crit->value_cnt++]; *cur = pool_alloc (pool, len + 1); - str_copy_rpad (*cur, len + 1, ds_c_str (&tokstr)); + str_copy_rpad (*cur, len + 1, ds_cstr (&tokstr)); lex_get (); lex_match (','); diff --git a/src/libpspp/ChangeLog b/src/libpspp/ChangeLog index 852b71b1..ec8a40de 100644 --- a/src/libpspp/ChangeLog +++ b/src/libpspp/ChangeLog @@ -1,3 +1,117 @@ +Fri Jun 9 14:03:29 2006 Ben Pfaff + + Reform string library. + + * str.c (ss_empty): New function. Replaces some uses of ls_init() + or ls_null(). + (ss_cstr) New function. Replaces some uses of ls_init(). + (ss_buffer) New function. Replaces some uses of ls_init(). + (ss_substr) New function. + (ss_head) New function. + (ss_tail) New function. + (ss_alloc_substring) New function. Replaces use of ls_create(). + (ss_alloc_uninit) New function. + (ss_dealloc) New function. Replaces use of ls_destroy(). + (ss_truncate) New function. + (ss_rtrim) New function. + (ss_ltrim) New function. + (ss_trim) New function. + (ss_chomp) New function. + (ss_separate) New function. + (ss_tokenize) New function. + (ss_advance) New function. + (ds_create) Renamed ds_init_cstr(). Updated all callers. + (ss_match_char) New function. + (ss_get_char) New function. + (ss_get_until) New function. + (ss_get_chars) New function. + (ss_is_empty) New function. + (ss_length) New function. Replaces ls_length(). + (ss_data) New function. Replaces many uses of ls_c_str(). + (ss_end) New function. Replaces ls_end(). + (ss_at) New function. + (ss_first) New function. + (ss_last) New function. + (ss_span) New function. + (ss_cspan) New function. + (ss_compare) New function. + (ss_pointer_to_position) New function. + (ss_xstrdup) New function. + (ds_init) Renamed ds_init_empty(). All callers updated. + (ds_init_string) New function. + (ds_init_substring) Changed interface to take a struct substring. + Updated all callers. + (ds_init_cstr) New function. Replaces ds_create(). All callers + updated. + (ds_assign_substring) Changed interface to take a struct + substring. Updated all callers. + (ds_assign_buffer) Removed. Changed all callers to use + ds_assign_substring(). + (ds_assign_c_str) Renamed ds_assign_cstr(). All callers updated. + (ds_ss) New function. + (ds_substr) New function. + (ds_head) New function. + (ds_tail) New function. + (ds_rtrim) New function. Replaces ds_rtrim_spaces(). All callers + updated. + (ds_ltrim) New function. Replaces ds_ltrim_spaces(). All callers + updated. + (ds_trim) New function. Replaces ds_trim_spaces(). All callers + updated. + (ds_rtrim_spaces) Removed. + (ds_ltrim_spaces) Removed. + (ds_trim_spaces) Removed. + (ds_separate) Changed interface to use substrings. All callers + updated. + (ds_tokenize) Changed interface to use substrings. All callers + updated. + (ds_c_str) Renamed ds_cstr(). All callers updated. + (ds_span) Changed interface to use substring for SKIP_SET and + dropped OFS. All callers updated. + (ds_cspan) Changed interface to use substring for STOP_SET and + dropped OFS. All callers updated. + (ds_find_char) New function. + (ds_compare) New function. + (ds_pointer_to_position) New function. + (ds_xstrdup) New function. Replaced all users of + xstrdup(ds_c_str(s)) by a call to this function. + (ds_gets) Renamed ds_read_line(). All callers updated. + (ds_get_config_line) Renamed ds_read_config_line(). All callers + updated. + (ds_puts) Renamed ds_put_cstr(). All callers updated. + (ds_put_substring) New function. Replaces ds_concat(). All + callers updated. + (ds_concat) Removed. + (ds_append_uninit) Renamed ds_put_uninit(). All callers updated. + (ds_printf) Renamed ds_put_format(). All callers updated. + (ds_vprintf) Renamed ds_put_vformat(). All callers updated. + (ds_putc) Renamed ds_put_char(). All callers updated. + (ds_putc_multiple) Renamed ds_put_char_multiple(). All callers + updated. + (ls_create) Removed. Replaced by ss_alloc_substring(). + (ls_create_buffer) Removed. Replaced by ss_alloc_substring(). + (ls_init) Removed. Replaced by ss_buffer(). + (ls_shallow_copy) Removed. Just use assignment. + (ls_destroy) Removed. Replaced by ss_dealloc(). + (ls_null) Removed. + (ls_null_p) Removed. + (ls_empty_p) Removed. Replaced by ss_is_empty(). + (ls_c_str) Removed. + (ls_end) Removed. Replaced by ss_end(). + + * str.h (struct fixed_string): Renamed struct substring, updated + all users. + (CC_SPACES) New macro. + (CC_DIGITS) Ditto. + (CC_XDIGITS) Ditto. + (CC_LETTERS) Ditto. + (CC_ALNUM) Ditto. + (SS_EMPTY_INITIALIZER) Ditto. + (SS_LITERAL_INITIALIZER) Ditto. + (struct string) Removed string, length members. Add substring + member. + (DS_INITIALIZER) Rename DS_EMPTY_INITIALIZER. + Tue May 30 19:45:12 WST 2006 John Darrington * i18n.c i18n.h: New files. diff --git a/src/libpspp/str.c b/src/libpspp/str.c index 655774d6..67d189dd 100644 --- a/src/libpspp/str.c +++ b/src/libpspp/str.c @@ -30,6 +30,7 @@ #include "minmax.h" #include "size_max.h" +#include "xsize.h" /* Reverses the order of NBYTES bytes at address P, thus converting between little- and big-endian byte orders. */ @@ -252,23 +253,405 @@ spprintf (char *dst, const char *format, ...) return dst + count; } -/* Initializes ST with initial contents S. */ +/* Substrings. */ + +/* Returns an empty substring. */ +struct substring +ss_empty (void) +{ + struct substring ss; + ss.string = NULL; + ss.length = 0; + return ss; +} + +/* Returns a substring whose contents are the given C-style + string CSTR. */ +struct substring +ss_cstr (const char *cstr) +{ + return ss_buffer (cstr, strlen (cstr)); +} + +/* Returns a substring whose contents are the CNT characters in + BUFFER. */ +struct substring +ss_buffer (const char *buffer, size_t cnt) +{ + struct substring ss; + ss.string = (char *) buffer; + ss.length = cnt; + return ss; +} + +/* Returns a substring whose contents are the CNT characters + starting at the (0-based) position START in SS. */ +struct substring +ss_substr (struct substring ss, size_t start, size_t cnt) +{ + if (start < ss.length) + return ss_buffer (ss.string + start, MIN (cnt, ss.length - start)); + else + return ss_buffer (ss.string + ss.length, 0); +} + +/* Returns a substring whose contents are the first CNT + characters in SS. */ +struct substring +ss_head (struct substring ss, size_t cnt) +{ + return ss_buffer (ss.string, MIN (cnt, ss.length)); +} + +/* Returns a substring whose contents are the last CNT characters + in SS. */ +struct substring +ss_tail (struct substring ss, size_t cnt) +{ + if (cnt < ss.length) + return ss_buffer (ss.string + (ss.length - cnt), cnt); + else + return ss; +} + +/* Makes a malloc()'d copy of the contents of OLD + and stores it in NEW. */ +void +ss_alloc_substring (struct substring *new, struct substring old) +{ + new->string = xmalloc (old.length); + new->length = old.length; + memcpy (new->string, old.string, old.length); +} + +/* Allocates room for a CNT-character string in NEW. */ +void +ss_alloc_uninit (struct substring *new, size_t cnt) +{ + new->string = xmalloc (cnt); + new->length = cnt; +} + +/* Frees the string that SS points to. */ +void +ss_dealloc (struct substring *ss) +{ + free (ss->string); +} + +/* Truncates SS to at most CNT characters in length. */ +void +ss_truncate (struct substring *ss, size_t cnt) +{ + if (ss->length > cnt) + ss->length = cnt; +} + +/* Removes trailing characters in TRIM_SET from SS. + Returns number of characters removed. */ +size_t +ss_rtrim (struct substring *ss, struct substring trim_set) +{ + size_t cnt = 0; + while (cnt < ss->length + && ss_find_char (trim_set, + ss->string[ss->length - cnt - 1]) != SIZE_MAX) + cnt++; + ss->length -= cnt; + return cnt; +} + +/* Removes leading characters in TRIM_SET from SS. + Returns number of characters removed. */ +size_t +ss_ltrim (struct substring *ss, struct substring trim_set) +{ + size_t cnt = ss_span (*ss, trim_set); + ss_advance (ss, cnt); + return cnt; +} + +/* Trims leading and trailing characters in TRIM_SET from SS. */ void -ds_create (struct string *st, const char *s) +ss_trim (struct substring *ss, struct substring trim_set) { - st->length = strlen (s); - st->capacity = MAX (8, st->length * 2); - st->string = xmalloc (st->capacity + 1); - strcpy (st->string, s); + ss_ltrim (ss, trim_set); + ss_rtrim (ss, trim_set); } +/* If the last character in SS is C, removes it and returns true. + Otherwise, returns false without changing the string. */ +bool +ss_chomp (struct substring *ss, char c) +{ + if (ss_last (*ss) == c) + { + ss->length--; + return true; + } + else + return false; +} + +/* Divides SS into tokens separated by any of the DELIMITERS. + Each call replaces TOKEN by the next token in SS, or by an + empty string if no tokens remain. Returns true if a token was + obtained, false otherwise. + + Before the first call, initialize *SAVE_IDX to 0. Do not + modify *SAVE_IDX between calls. + + SS divides into exactly one more tokens than it contains + delimiters. That is, a delimiter at the start or end of SS or + a pair of adjacent delimiters yields an empty token, and the + empty string contains a single token. */ +bool +ss_separate (struct substring ss, struct substring delimiters, + size_t *save_idx, struct substring *token) +{ + if (*save_idx <= ss_length (ss)) + { + struct substring tmp = ss_substr (ss, *save_idx, SIZE_MAX); + size_t length = ss_cspan (tmp, delimiters); + *token = ss_head (tmp, length); + *save_idx += length + 1; + return true; + } + else + { + *token = ss_empty (); + return false; + } +} + +/* Divides SS into tokens separated by any of the DELIMITERS, + merging adjacent delimiters so that the empty string is never + produced as a token. Each call replaces TOKEN by the next + token in SS, or by an empty string if no tokens remain. + Returns true if a token was obtained, false otherwise. + + Before the first call, initialize *SAVE_IDX to 0. Do not + modify *SAVE_IDX between calls. */ +bool +ss_tokenize (struct substring ss, struct substring delimiters, + size_t *save_idx, struct substring *token) +{ + ss_advance (&ss, *save_idx); + *save_idx += ss_ltrim (&ss, delimiters); + *save_idx += ss_get_chars (&ss, ss_cspan (ss, delimiters), token); + return ss_length (*token) > 0; +} + +/* Removes the first CNT characters from SS. */ +void +ss_advance (struct substring *ss, size_t cnt) +{ + if (cnt > ss->length) + cnt = ss->length; + ss->string += cnt; + ss->length -= cnt; +} + +/* If the first character in SS is C, removes it and returns true. + Otherwise, returns false without changing the string. */ +bool +ss_match_char (struct substring *ss, char c) +{ + if (ss_first (*ss) == c) + { + ss->string++; + ss->length--; + return true; + } + else + return false; +} + +/* Removes the first character from SS and returns it. + If SS is empty, returns EOF without modifying SS. */ +int +ss_get_char (struct substring *ss) +{ + int c = ss_first (*ss); + if (c != EOF) + { + ss->string++; + ss->length--; + } + return c; +} + +/* Stores the prefix of SS up to the first DELIMITER in OUT (if + any). Trims those same characters from SS. DELIMITER is + removed from SS but not made part of OUT. Returns true if + DELIMITER was found (and removed), false otherwise. */ +bool +ss_get_until (struct substring *ss, char delimiter, struct substring *out) +{ + ss_get_chars (ss, ss_cspan (*ss, ss_buffer (&delimiter, 1)), out); + return ss_match_char (ss, delimiter); +} + +/* Stores the first CNT characters in SS in OUT (or fewer, if SS + is shorter than CNT characters). Trims the same characters + from the beginning of SS. */ +size_t +ss_get_chars (struct substring *ss, size_t cnt, struct substring *out) +{ + *out = ss_head (*ss, cnt); + ss_advance (ss, cnt); + return cnt; +} + +/* Returns true if SS is empty (contains no characters), + false otherwise. */ +bool +ss_is_empty (struct substring ss) +{ + return ss.length == 0; +} + +/* Returns the number of characters in SS. */ +size_t +ss_length (struct substring ss) +{ + return ss.length; +} + +/* Returns a pointer to the characters in SS. */ +char * +ss_data (struct substring ss) +{ + return ss.string; +} + +/* Returns a pointer just past the last character in SS. */ +char * +ss_end (struct substring ss) +{ + return ss.string + ss.length; +} + +/* Returns the character in position IDX in SS, as a value in the + range of unsigned char. Returns EOF if IDX is out of the + range of indexes for SS. */ +int +ss_at (struct substring ss, size_t idx) +{ + return idx < ss.length ? (unsigned char) ss.string[idx] : EOF; +} + +/* Returns the first character in SS as a value in the range of + unsigned char. Returns EOF if SS is the empty string. */ +int +ss_first (struct substring ss) +{ + return ss_at (ss, 0); +} + +/* Returns the last character in SS as a value in the range of + unsigned char. Returns EOF if SS is the empty string. */ +int +ss_last (struct substring ss) +{ + return ss.length > 0 ? (unsigned char) ss.string[ss.length - 1] : EOF; +} + +/* Returns the number of contiguous characters at the beginning + of SS that are in SKIP_SET. */ +size_t +ss_span (struct substring ss, struct substring skip_set) +{ + size_t i; + for (i = 0; i < ss.length; i++) + if (ss_find_char (skip_set, ss.string[i]) == SIZE_MAX) + break; + return i; +} + +/* Returns the number of contiguous characters at the beginning + of SS that are not in SKIP_SET. */ +size_t +ss_cspan (struct substring ss, struct substring stop_set) +{ + size_t i; + for (i = 0; i < ss.length; i++) + if (ss_find_char (stop_set, ss.string[i]) != SIZE_MAX) + break; + return i; +} + +/* Returns the offset in SS of the first instance of C, + or SIZE_MAX if C does not occur in SS. */ +size_t +ss_find_char (struct substring ss, char c) +{ + const char *p = memchr (ss.string, c, ss.length); + return p != NULL ? p - ss.string : SIZE_MAX; +} + +/* Compares A and B and returns a strcmp()-type comparison + result. */ +int +ss_compare (struct substring a, struct substring b) +{ + int retval = memcmp (a.string, b.string, MIN (a.length, b.length)); + if (retval == 0) + retval = a.length < b.length ? -1 : a.length > b.length; + return retval; +} + +/* Returns the position in SS that the character at P occupies. + P must point within SS or one past its end. */ +size_t +ss_pointer_to_position (struct substring ss, const char *p) +{ + size_t pos = p - ss.string; + assert (pos <= ss.length); + return pos; +} + +/* Allocates and returns a null-terminated string that contains + SS. */ +char * +ss_xstrdup (struct substring ss) +{ + char *s = xmalloc (ss.length + 1); + memcpy (s, ss.string, ss.length); + s[ss.length] = '\0'; + return s; +} + /* Initializes ST as an empty string. */ void -ds_init (struct string *st) +ds_init_empty (struct string *st) { - st->length = 0; + st->ss = ss_empty (); st->capacity = 0; - st->string = NULL; +} + +/* Initializes ST with initial contents S. */ +void +ds_init_string (struct string *st, const struct string *s) +{ + ds_init_substring (st, ds_ss (s)); +} + +/* Initializes ST with initial contents SS. */ +void +ds_init_substring (struct string *st, struct substring ss) +{ + st->capacity = MAX (8, ss.length * 2); + st->ss.string = xmalloc (st->capacity + 1); + memcpy (st->ss.string, ss.string, ss.length); + st->ss.length = ss.length; +} + +/* Initializes ST with initial contents S. */ +void +ds_init_cstr (struct string *st, const char *s) +{ + ds_init_substring (st, ss_cstr (s)); } /* Frees ST. */ @@ -277,9 +660,9 @@ ds_destroy (struct string *st) { if (st != NULL) { - free (st->string); - st->string = NULL; - st->length = 0; + ss_dealloc (&st->ss); + st->ss.string = NULL; + st->ss.length = 0; st->capacity = 0; } } @@ -293,61 +676,75 @@ ds_swap (struct string *a, struct string *b) *b = tmp; } -/* Initializes DST with the CNT characters from SRC starting at - position IDX. */ -void -ds_init_substring (struct string *dst, - const struct string *src, size_t idx, size_t cnt) -{ - assert (dst != src); - ds_init (dst); - ds_assign_substring (dst, src, idx, cnt); -} - /* Copies SRC into DST. DST and SRC may be the same string. */ void ds_assign_string (struct string *dst, const struct string *src) { - ds_assign_buffer (dst, ds_data (src), ds_length (src)); + ds_assign_substring (dst, ds_ss (src)); } -/* Replaces DST by CNT characters from SRC starting at position - IDX. - DST and SRC may be the same string. */ +/* Replaces DST by SS. + SS may be a substring of DST. */ void -ds_assign_substring (struct string *dst, - const struct string *src, size_t idx, size_t cnt) +ds_assign_substring (struct string *dst, struct substring ss) { - if (idx < src->length) - ds_assign_buffer (dst, src->string + idx, MIN (cnt, src->length - idx)); - else - ds_clear (dst); -} - -/* Replaces DST by the LENGTH characters in SRC. - SRC may be a substring within DST. */ -void -ds_assign_buffer (struct string *dst, const char *src, size_t length) -{ - dst->length = length; - ds_extend (dst, length); - memmove (dst->string, src, length); + dst->ss.length = ss.length; + ds_extend (dst, ss.length); + memmove (dst->ss.string, ss.string, ss.length); } /* Replaces DST by null-terminated string SRC. SRC may overlap with DST. */ void -ds_assign_c_str (struct string *dst, const char *src) +ds_assign_cstr (struct string *dst, const char *src) { - ds_assign_buffer (dst, src, strlen (src)); + ds_assign_substring (dst, ss_cstr (src)); } /* Truncates ST to zero length. */ void ds_clear (struct string *st) { - st->length = 0; + st->ss.length = 0; +} + +/* Returns a substring that contains ST. */ +struct substring +ds_ss (const struct string *st) +{ + return st->ss; +} + +/* Returns a substring that contains CNT characters from ST + starting at position START. + + If START is greater than or equal to the length of ST, then + the substring will be the empty string. If START + CNT + exceeds the length of ST, then the substring will only be + ds_length(ST) - START characters long. */ +struct substring +ds_substr (const struct string *st, size_t start, size_t cnt) +{ + return ss_substr (ds_ss (st), start, cnt); +} + +/* Returns a substring that contains the first CNT characters in + ST. If CNT exceeds the length of ST, then the substring will + contain all of ST. */ +struct substring +ds_head (const struct string *st, size_t cnt) +{ + return ss_head (ds_ss (st), cnt); +} + +/* Returns a substring that contains the last CNT characters in + ST. If CNT exceeds the length of ST, then the substring will + contain all of ST. */ +struct substring +ds_tail (const struct string *st, size_t cnt) +{ + return ss_tail (ds_ss (st), cnt); } /* Ensures that ST can hold at least MIN_CAPACITY characters plus a null @@ -361,7 +758,7 @@ ds_extend (struct string *st, size_t min_capacity) if (st->capacity < min_capacity) st->capacity = 2 * min_capacity; - st->string = xrealloc (st->string, st->capacity + 1); + st->ss.string = xrealloc (st->ss.string, st->capacity + 1); } } @@ -369,10 +766,10 @@ ds_extend (struct string *st, size_t min_capacity) void ds_shrink (struct string *st) { - if (st->capacity != st->length) + if (st->capacity != st->ss.length) { - st->capacity = st->length; - st->string = xrealloc (st->string, st->capacity + 1); + st->capacity = st->ss.length; + st->ss.string = xrealloc (st->ss.string, st->capacity + 1); } } @@ -380,68 +777,43 @@ ds_shrink (struct string *st) void ds_truncate (struct string *st, size_t length) { - if (st->length > length) - st->length = length; -} - -/* Pad ST on the right with copies of PAD until ST is at least - LENGTH characters in size. If ST is initially LENGTH - characters or longer, this is a no-op. */ -void -ds_rpad (struct string *st, size_t length, char pad) -{ - if (length > st->length) - ds_putc_multiple (st, pad, length - st->length); + ss_truncate (&st->ss, length); } -/* Removes trailing spaces from ST. - Returns number of spaces removed. */ -int -ds_rtrim_spaces (struct string *st) +/* Removes trailing characters in TRIM_SET from ST. + Returns number of characters removed. */ +size_t +ds_rtrim (struct string *st, struct substring trim_set) { - int cnt = 0; - while (isspace (ds_last (st))) - { - st->length--; - cnt++; - } - return cnt; + return ss_rtrim (&st->ss, trim_set); } -/* Removes leading spaces from ST. - Returns number of spaces removed. */ -int -ds_ltrim_spaces (struct string *st) +/* Removes leading characters in TRIM_SET from ST. + Returns number of characters removed. */ +size_t +ds_ltrim (struct string *st, struct substring trim_set) { - size_t cnt = 0; - while (isspace (ds_at (st, cnt))) - cnt++; + size_t cnt = ds_span (st, trim_set); if (cnt > 0) - ds_assign_substring (st, st, cnt, SIZE_MAX); + ds_assign_substring (st, ds_substr (st, cnt, SIZE_MAX)); return cnt; } -/* Trims leading and trailing spaces from ST. */ -void -ds_trim_spaces (struct string *st) +/* Trims leading and trailing characters in TRIM_SET from ST. + Returns number of charactesr removed. */ +size_t +ds_trim (struct string *st, struct substring trim_set) { - ds_rtrim_spaces (st); - ds_ltrim_spaces (st); + size_t cnt = ds_rtrim (st, trim_set); + return cnt + ds_ltrim (st, trim_set); } /* If the last character in ST is C, removes it and returns true. Otherwise, returns false without modifying ST. */ bool -ds_chomp (struct string *st, char c_) +ds_chomp (struct string *st, char c) { - unsigned char c = c_; - if (ds_last (st) == c) - { - st->length--; - return true; - } - else - return false; + return ss_chomp (&st->ss, c); } /* Divides ST into tokens separated by any of the DELIMITERS. @@ -457,18 +829,10 @@ ds_chomp (struct string *st, char c_) a pair of adjacent delimiters yields an empty token, and the empty string contains a single token. */ bool -ds_separate (const struct string *st, struct string *token, - const char *delimiters, size_t *save_idx) +ds_separate (const struct string *st, struct substring delimiters, + size_t *save_idx, struct substring *token) { - if (*save_idx <= ds_length (st)) - { - size_t length = ds_cspan (st, *save_idx, delimiters); - ds_assign_substring (token, st, *save_idx, length); - *save_idx += length + 1; - return true; - } - else - return false; + return ss_separate (ds_ss (st), delimiters, save_idx, token); } /* Divides ST into tokens separated by any of the DELIMITERS, @@ -480,46 +844,41 @@ ds_separate (const struct string *st, struct string *token, Before the first call, initialize *SAVE_IDX to 0. Do not modify *SAVE_IDX between calls. */ bool -ds_tokenize (const struct string *st, struct string *token, - const char *delimiters, size_t *save_idx) +ds_tokenize (const struct string *st, struct substring delimiters, + size_t *save_idx, struct substring *token) { - size_t start = *save_idx + ds_span (st, *save_idx, delimiters); - size_t length = ds_cspan (st, start, delimiters); - ds_assign_substring (token, st, start, length); - *save_idx = start + length; - return length > 0; + return ss_tokenize (ds_ss (st), delimiters, save_idx, token); +} + +/* Pad ST on the right with copies of PAD until ST is at least + LENGTH characters in size. If ST is initially LENGTH + characters or longer, this is a no-op. */ +void +ds_rpad (struct string *st, size_t length, char pad) +{ + if (length > st->ss.length) + ds_put_char_multiple (st, pad, length - st->ss.length); } /* Returns true if ST is empty, false otherwise. */ bool ds_is_empty (const struct string *st) { - return st->length == 0; + return ss_is_empty (st->ss); } /* Returns the length of ST. */ size_t ds_length (const struct string *st) { - return st->length; -} - -/* Returns the value of ST as a null-terminated string. */ -char * -ds_c_str (const struct string *st_) -{ - struct string *st = (struct string *) st_; - if (st->string == NULL) - ds_extend (st, 1); - st->string[st->length] = '\0'; - return st->string; + return ss_length (ds_ss (st)); } /* Returns the string data inside ST. */ char * ds_data (const struct string *st) { - return st->string; + return ss_data (ds_ss (st)); } /* Returns a pointer to the null terminator ST. @@ -528,14 +887,7 @@ ds_data (const struct string *st) char * ds_end (const struct string *st) { - return st->string + st->length; -} - -/* Returns the allocation size of ST. */ -size_t -ds_capacity (const struct string *st) -{ - return st->capacity; + return ss_end (ds_ss (st)); } /* Returns the character in position IDX in ST, as a value in the @@ -544,7 +896,7 @@ ds_capacity (const struct string *st) int ds_at (const struct string *st, size_t idx) { - return idx < st->length ? (unsigned char) st->string[idx] : EOF; + return ss_at (ds_ss (st), idx); } /* Returns the first character in ST as a value in the range of @@ -552,7 +904,7 @@ ds_at (const struct string *st, size_t idx) int ds_first (const struct string *st) { - return ds_at (st, 0); + return ss_first (ds_ss (st)); } /* Returns the last character in ST as a value in the range of @@ -560,39 +912,74 @@ ds_first (const struct string *st) int ds_last (const struct string *st) { - return st->length > 0 ? (unsigned char) st->string[st->length - 1] : EOF; + return ss_last (ds_ss (st)); } -/* Returns the number of consecutive characters starting at OFS - in ST that are in SKIP_SET. (The null terminator is not - considered to be part of SKIP_SET.) */ +/* Returns the number of consecutive characters at the beginning + of ST that are in SKIP_SET. */ size_t -ds_span (const struct string *st, size_t ofs, const char skip_set[]) +ds_span (const struct string *st, struct substring skip_set) { - size_t i; - for (i = ofs; i < st->length; i++) - { - int c = st->string[i]; - if (strchr (skip_set, c) == NULL || c == '\0') - break; - } - return i - ofs; + return ss_span (ds_ss (st), skip_set); } -/* Returns the number of consecutive characters starting at OFS - in ST that are not in STOP_SET. (The null terminator is not - considered to be part of STOP_SET.) */ +/* Returns the number of consecutive characters at the beginning + of ST that are not in STOP_SET. */ size_t -ds_cspan (const struct string *st, size_t ofs, const char stop_set[]) +ds_cspan (const struct string *st, struct substring stop_set) { - size_t i; - for (i = ofs; i < st->length; i++) - { - int c = st->string[i]; - if (strchr (stop_set, c) != NULL) - break; - } - return i - ofs; + return ss_cspan (ds_ss (st), stop_set); +} + +/* Returns the position of the first occurrence of character C in + ST at or after position OFS, or SIZE_MAX if there is no such + occurrence. */ +size_t +ds_find_char (const struct string *st, char c) +{ + return ss_find_char (ds_ss (st), c); +} + +/* Compares A and B and returns a strcmp()-type comparison + result. */ +int +ds_compare (const struct string *a, const struct string *b) +{ + return ss_compare (ds_ss (a), ds_ss (b)); +} + +/* Returns the position in ST that the character at P occupies. + P must point within ST or one past its end. */ +size_t +ds_pointer_to_position (const struct string *st, const char *p) +{ + return ss_pointer_to_position (ds_ss (st), p); +} + +/* Allocates and returns a null-terminated string that contains + ST. */ +char * +ds_xstrdup (const struct string *st) +{ + return ss_xstrdup (ds_ss (st)); +} + +/* Returns the allocation size of ST. */ +size_t +ds_capacity (const struct string *st) +{ + return st->capacity; +} + +/* Returns the value of ST as a null-terminated string. */ +char * +ds_cstr (const struct string *st_) +{ + struct string *st = (struct string *) st_; + if (st->ss.string == NULL) + ds_extend (st, 1); + st->ss.string[st->ss.length] = '\0'; + return st->ss.string; } /* Appends to ST a newline-terminated line read from STREAM. @@ -602,7 +989,7 @@ ds_cspan (const struct string *st, size_t ofs, const char stop_set[]) all were read before an I/O error or end of file was encountered. */ bool -ds_gets (struct string *st, FILE *stream) +ds_read_line (struct string *st, FILE *stream) { int c; @@ -612,7 +999,7 @@ ds_gets (struct string *st, FILE *stream) for (;;) { - ds_putc (st, c); + ds_put_char (st, c); if (c == '\n') return true; @@ -630,7 +1017,7 @@ remove_comment (struct string *st) char *cp; int quote = 0; - for (cp = ds_c_str (st); cp < ds_end (st); cp++) + for (cp = ds_data (st); cp < ds_end (st); cp++) if (quote) { if (*cp == quote) @@ -642,7 +1029,7 @@ remove_comment (struct string *st) quote = *cp; else if (*cp == '#') { - ds_truncate (st, cp - ds_c_str (st)); + ds_truncate (st, cp - ds_cstr (st)); break; } } @@ -660,15 +1047,15 @@ remove_comment (struct string *st) failure. If LINE_NUMBER is non-null, then *LINE_NUMBER is incremented by the number of lines read. */ bool -ds_get_config_line (FILE *stream, struct string *st, int *line_number) +ds_read_config_line (struct string *st, int *line_number, FILE *stream) { ds_clear (st); do { - if (!ds_gets (st, stream)) + if (!ds_read_line (st, stream)) return false; (*line_number)++; - ds_rtrim_spaces (st); + ds_rtrim (st, ss_cstr (CC_SPACES)); } while (ds_chomp (st, '\\')); @@ -676,73 +1063,79 @@ ds_get_config_line (FILE *stream, struct string *st, int *line_number) return true; } +/* Attempts to read SIZE * CNT bytes from STREAM and append them + to ST. + Returns number of bytes actually read. */ +size_t +ds_read_stream (struct string *st, size_t size, size_t cnt, FILE *stream) +{ + if (size != 0) + { + size_t try_bytes = xtimes (cnt, size); + if (size_in_bounds_p (xsum (ds_length (st), try_bytes))) + { + char *buffer = ds_put_uninit (st, try_bytes); + size_t got_bytes = fread (buffer, size, cnt, stream); + ds_truncate (st, ds_length (st) - (try_bytes - got_bytes)); + return got_bytes; + } + } + return 0; +} + /* Concatenates S onto ST. */ void -ds_puts (struct string *st, const char *s) +ds_put_cstr (struct string *st, const char *s) { - size_t s_len; - - if (!s) return; - - s_len = strlen (s); - ds_extend (st, st->length + s_len); - strcpy (st->string + st->length, s); - st->length += s_len; + if (s != NULL) + ds_put_substring (st, ss_cstr (s)); } -/* Concatenates LEN characters from BUF onto ST. */ +/* Concatenates SS to ST. */ void -ds_concat (struct string *st, const char *buf, size_t len) +ds_put_substring (struct string *st, struct substring ss) { - ds_extend (st, st->length + len); - memcpy (st->string + st->length, buf, len); - st->length += len; + memcpy (ds_put_uninit (st, ss_length (ss)), ss_data (ss), ss_length (ss)); } /* Returns ds_end(ST) and THEN increases the length by INCR. */ char * -ds_append_uninit(struct string *st, size_t incr) +ds_put_uninit (struct string *st, size_t incr) { char *end; - - ds_extend(st, ds_length(st) + incr); - - end = ds_end(st); - - st->length += incr; - + ds_extend (st, ds_length (st) + incr); + end = ds_end (st); + st->ss.length += incr; return end; } /* Formats FORMAT as a printf string and appends the result to ST. */ void -ds_printf (struct string *st, const char *format, ...) +ds_put_format (struct string *st, const char *format, ...) { va_list args; va_start (args, format); - ds_vprintf(st, format, args); + ds_put_vformat (st, format, args); va_end (args); } /* Formats FORMAT as a printf string and appends the result to ST. */ void -ds_vprintf (struct string *st, const char *format, va_list args_) +ds_put_vformat (struct string *st, const char *format, va_list args_) { int avail, needed; va_list args; va_copy (args, args_); - avail = st->string != NULL ? st->capacity - st->length + 1 : 0; - needed = vsnprintf (st->string + st->length, avail, format, args); + avail = st->ss.string != NULL ? st->capacity - st->ss.length + 1 : 0; + needed = vsnprintf (st->ss.string + st->ss.length, avail, format, args); va_end (args); if (needed >= avail) { - ds_extend (st, st->length + needed); - va_copy (args, args_); - vsprintf (st->string + st->length, format, args); + vsprintf (ds_put_uninit (st, needed), format, args); va_end (args); } else @@ -752,121 +1145,26 @@ ds_vprintf (struct string *st, const char *format, va_list args_) while (needed == -1) { ds_extend (st, (st->capacity + 1) * 2); - avail = st->capacity - st->length + 1; + avail = st->capacity - st->ss.length + 1; va_copy (args, args_); - needed = vsnprintf (st->string + st->length, avail, format, args); + needed = vsnprintf (ds_end (st), avail, format, args); va_end (args); } + st->ss.length += needed; } - - st->length += needed; } /* Appends character CH to ST. */ void -ds_putc (struct string *st, int ch) +ds_put_char (struct string *st, int ch) { - if (st->length >= st->capacity) - ds_extend (st, st->length + 1); - st->string[st->length++] = ch; + ds_put_uninit (st, 1)[0] = ch; } /* Appends CNT copies of character CH to ST. */ void -ds_putc_multiple (struct string *st, int ch, size_t cnt) -{ - ds_extend (st, st->length + cnt); - memset (&st->string[st->length], ch, cnt); - st->length += cnt; -} - - -/* Lengthed strings. */ - -/* Creates a new lengthed string LS with contents as a copy of - S. */ -void -ls_create (struct fixed_string *ls, const char *s) -{ - ls->length = strlen (s); - ls->string = xmalloc (ls->length + 1); - memcpy (ls->string, s, ls->length + 1); -} - -/* Creates a new lengthed string LS with contents as a copy of - BUFFER with length LEN. */ -void -ls_create_buffer (struct fixed_string *ls, - const char *buffer, size_t len) -{ - ls->length = len; - ls->string = xmalloc (len + 1); - memcpy (ls->string, buffer, len); - ls->string[len] = '\0'; -} - -/* Sets the fields of LS to the specified values. */ -void -ls_init (struct fixed_string *ls, const char *string, size_t length) -{ - ls->string = (char *) string; - ls->length = length; -} - -/* Copies the fields of SRC to DST. */ -void -ls_shallow_copy (struct fixed_string *dst, const struct fixed_string *src) -{ - *dst = *src; -} - -/* Frees the memory backing LS. */ -void -ls_destroy (struct fixed_string *ls) -{ - free (ls->string); -} - -/* Sets LS to a null pointer value. */ -void -ls_null (struct fixed_string *ls) -{ - ls->string = NULL; -} - -/* Returns nonzero only if LS has a null pointer value. */ -int -ls_null_p (const struct fixed_string *ls) -{ - return ls->string == NULL; -} - -/* Returns nonzero only if LS is a null pointer or has length 0. */ -int -ls_empty_p (const struct fixed_string *ls) -{ - return ls->string == NULL || ls->length == 0; -} - -/* Returns the length of LS, which must not be null. */ -size_t -ls_length (const struct fixed_string *ls) -{ - return ls->length; -} - -/* Returns a pointer to the character string in LS. */ -char * -ls_c_str (const struct fixed_string *ls) -{ - return (char *) ls->string; -} - -/* Returns a pointer to the null terminator of the character string in - LS. */ -char * -ls_end (const struct fixed_string *ls) +ds_put_char_multiple (struct string *st, int ch, size_t cnt) { - return (char *) (ls->string + ls->length); + memset (ds_put_uninit (st, cnt), ch, cnt); } diff --git a/src/libpspp/str.h b/src/libpspp/str.h index cd7cdb05..c3c008a6 100644 --- a/src/libpspp/str.h +++ b/src/libpspp/str.h @@ -64,74 +64,97 @@ void str_lowercase (char *); char *spprintf (char *dst, const char *format, ...); -/* Fixed-length strings. */ -struct fixed_string +/* Common character classes for use with substring and string functions. */ + +#define CC_SPACES " \t\v\r\n" +#define CC_DIGITS "0123456789" +#define CC_XDIGITS "0123456789abcdefABCDEF" +#define CC_LETTERS "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" +#define CC_ALNUM CC_LETTERS CC_DIGITS + +/* Substrings. */ +struct substring { char *string; size_t length; }; -void ls_create (struct fixed_string *, const char *); -void ls_create_buffer (struct fixed_string *, - const char *, size_t len); -void ls_init (struct fixed_string *, const char *, size_t); -void ls_shallow_copy (struct fixed_string *, const struct fixed_string *); -void ls_destroy (struct fixed_string *); - -void ls_null (struct fixed_string *); -int ls_null_p (const struct fixed_string *); -int ls_empty_p (const struct fixed_string *); - -size_t ls_length (const struct fixed_string *); -char *ls_c_str (const struct fixed_string *); -char *ls_end (const struct fixed_string *); - -#if __GNUC__ > 1 -extern inline size_t -ls_length (const struct fixed_string *st) -{ - return st->length; -} - -extern inline char * -ls_c_str (const struct fixed_string *st) -{ - return st->string; -} - -extern inline char * -ls_end (const struct fixed_string *st) -{ - return st->string + st->length; -} -#endif +#define SS_EMPTY_INITIALIZER {NULL, 0} +#define SS_LITERAL_INITIALIZER(LITERAL) {LITERAL, (sizeof LITERAL) - 1} + +/* Constructors. + These functions do not allocate any memory, so the substrings + they create should not normally be destroyed. */ +struct substring ss_empty (void); +struct substring ss_cstr (const char *); +struct substring ss_buffer (const char *, size_t); +struct substring ss_substr (struct substring, size_t start, size_t); +struct substring ss_head (struct substring, size_t); +struct substring ss_tail (struct substring, size_t); + +/* Constructors and destructor that allocate and deallocate + memory. */ +void ss_alloc_substring (struct substring *, struct substring); +void ss_alloc_uninit (struct substring *, size_t); +void ss_dealloc (struct substring *); + +/* Mutators. + Functions that advance the beginning of a string should not be + used if a substring is to be deallocated. */ +void ss_truncate (struct substring *, size_t); +size_t ss_rtrim (struct substring *, struct substring trim_set); +size_t ss_ltrim (struct substring *, struct substring trim_set); +void ss_trim (struct substring *, struct substring trim_set); +bool ss_chomp (struct substring *, char); +bool ss_separate (struct substring src, struct substring delimiters, + size_t *save_idx, struct substring *token); +bool ss_tokenize (struct substring src, struct substring delimiters, + size_t *save_idx, struct substring *token); +void ss_advance (struct substring *, size_t); +bool ss_match_char (struct substring *, char); +int ss_get_char (struct substring *); +size_t ss_get_chars (struct substring *, size_t cnt, struct substring *); +bool ss_get_until (struct substring *, char delimiter, struct substring *); + +/* Inspectors. */ +bool ss_is_empty (struct substring); +size_t ss_length (struct substring); +char *ss_data (struct substring); +char *ss_end (struct substring); +int ss_at (struct substring, size_t idx); +int ss_first (struct substring); +int ss_last (struct substring); +size_t ss_span (struct substring, struct substring skip_set); +size_t ss_cspan (struct substring, struct substring stop_set); +size_t ss_find_char (struct substring, char); +int ss_compare (struct substring, struct substring); +size_t ss_pointer_to_position (struct substring, const char *); +char *ss_xstrdup (struct substring); /* Variable length strings. */ struct string { - char *string; /* String data, not necessarily null terminated. */ - size_t length; /* Length, not including a null terminator. */ + struct substring ss; + size_t capacity; /* Allocated capacity, not including one extra byte allocated for null terminator. */ }; -#define DS_INITIALIZER {NULL, 0, 0} +#define DS_EMPTY_INITIALIZER {SS_EMPTY_INITIALIZER, 0} /* Constructors, destructors. */ -void ds_init (struct string *); -void ds_init_substring (struct string *, - const struct string *src, size_t start, size_t cnt); -void ds_create (struct string *, const char *); +void ds_init_empty (struct string *); +void ds_init_string (struct string *, const struct string *); +void ds_init_substring (struct string *, struct substring); +void ds_init_cstr (struct string *, const char *); void ds_destroy (struct string *); void ds_swap (struct string *, struct string *); /* Replacement. */ void ds_assign_string (struct string *, const struct string *); -void ds_assign_substring (struct string *, - const struct string *, size_t start, size_t cnt); -void ds_assign_buffer (struct string *, const char *, size_t); -void ds_assign_c_str (struct string *, const char *); +void ds_assign_substring (struct string *, struct substring); +void ds_assign_cstr (struct string *, const char *); /* Shrink, extend. */ void ds_clear (struct string *); @@ -140,69 +163,54 @@ void ds_shrink (struct string *); void ds_truncate (struct string *, size_t); /* Padding, trimming. */ -void ds_rpad (struct string *, size_t length, char pad); -int ds_rtrim_spaces (struct string *); -int ds_ltrim_spaces (struct string *); -void ds_trim_spaces (struct string *); +size_t ds_rtrim (struct string *, struct substring trim_set); +size_t ds_ltrim (struct string *, struct substring trim_set); +size_t ds_trim (struct string *, struct substring trim_set); bool ds_chomp (struct string *, char); -bool ds_separate (const struct string *src, struct string *token, - const char *delimiters, size_t *save_idx); -bool ds_tokenize (const struct string *src, struct string *token, - const char *delimiters, size_t *save_idx); +bool ds_separate (const struct string *src, struct substring delimiters, + size_t *save_idx, struct substring *token); +bool ds_tokenize (const struct string *src, struct substring delimiters, + size_t *save_idx, struct substring *token); +void ds_rpad (struct string *, size_t length, char pad); + +/* Extracting substrings. */ +struct substring ds_ss (const struct string *); +struct substring ds_substr (const struct string *, size_t start, size_t); +struct substring ds_head (const struct string *, size_t); +struct substring ds_tail (const struct string *, size_t); /* Inspectors. */ bool ds_is_empty (const struct string *); size_t ds_length (const struct string *); -char *ds_c_str (const struct string *); char *ds_data (const struct string *); char *ds_end (const struct string *); -size_t ds_capacity (const struct string *); int ds_at (const struct string *, size_t idx); int ds_first (const struct string *); int ds_last (const struct string *); -size_t ds_span (const struct string *st, size_t ofs, const char skip_set[]); -size_t ds_cspan (const struct string *st, size_t ofs, const char stop_set[]); +size_t ds_span (const struct string *, struct substring skip_set); +size_t ds_cspan (const struct string *, struct substring stop_set); +size_t ds_find_char (const struct string *, char); +int ds_compare (const struct string *, const struct string *); +size_t ds_pointer_to_position (const struct string *, const char *); +char *ds_xstrdup (const struct string *); + +size_t ds_capacity (const struct string *); +char *ds_cstr (const struct string *); /* File input. */ -bool ds_gets (struct string *, FILE *); -bool ds_get_config_line (FILE *, struct string *, int *line_number); +bool ds_read_line (struct string *, FILE *); +bool ds_read_config_line (struct string *, int *line_number, FILE *); +size_t ds_read_stream (struct string *, size_t size, size_t cnt, FILE *stream); /* Append. */ -void ds_putc (struct string *, int ch); -void ds_putc_multiple (struct string *, int ch, size_t); -void ds_puts (struct string *, const char *); -void ds_concat (struct string *, const char *, size_t); -void ds_vprintf (struct string *st, const char *, va_list); -void ds_printf (struct string *, const char *, ...) +void ds_put_char (struct string *, int ch); +void ds_put_char_multiple (struct string *, int ch, size_t); +void ds_put_cstr (struct string *, const char *); +void ds_put_substring (struct string *, struct substring); +void ds_put_vformat (struct string *st, const char *, va_list) + PRINTF_FORMAT (2, 0); +void ds_put_format (struct string *, const char *, ...) PRINTF_FORMAT (2, 3); -char *ds_append_uninit (struct string *st, size_t incr); - -#if __GNUC__ > 1 -extern inline void -ds_putc (struct string *st, int ch) -{ - if (st->length == st->capacity) - ds_extend (st, st->length + 1); - st->string[st->length++] = ch; -} - -extern inline size_t -ds_length (const struct string *st) -{ - return st->length; -} - -extern inline char * -ds_data (const struct string *st) -{ - return st->string; -} - -extern inline char * -ds_end (const struct string *st) -{ - return st->string + st->length; -} -#endif +char *ds_put_uninit (struct string *st, size_t incr); #endif /* str_h */ diff --git a/src/output/ChangeLog b/src/output/ChangeLog index 04d1dd1a..2b88fec4 100644 --- a/src/output/ChangeLog +++ b/src/output/ChangeLog @@ -1,3 +1,24 @@ +Fri Jun 9 14:42:35 2006 Ben Pfaff + + Reform string library. + + * output.c (init_default_drivers): Update call to + configure_driver() to new interface. + (get_option_token) Use a modifiable substring in interface instead + of a separate position parameter. Use ss_get_*() functions. + (configure_driver) Take and work with substrings. + (configure_driver_line) Update call to configure_driver() to new + interface. + (outp_get_paper_size) Use substrings. + + * output.h (struct outp_class): Change open_driver interface to + use substring. Update all implementations. + + * table.c (text_format): Change to return substring. + (tab_title) Use xvasprintf(). + + * table.h (struct tab_table): Change title member to char *. + Thu May 25 18:02:53 WST 2006 John Darrington * table.c: Removed redundant extern declaration. diff --git a/src/output/afm.c b/src/output/afm.c index 643ce870..fedd9315 100644 --- a/src/output/afm.c +++ b/src/output/afm.c @@ -720,11 +720,11 @@ get_word (struct parser *p, char **word) struct string s; int c; - ds_init (&s); + ds_init_empty (&s); while (!isspace (c = getc (p->file)) && c != EOF) - ds_putc (&s, c); + ds_put_char (&s, c); ungetc (c, p->file); - *word = ds_c_str (&s); + *word = ds_cstr (&s); pool_register (p->pool, free, *word); return true; } @@ -756,7 +756,7 @@ force_get_word (struct parser *p) static bool get_string (struct parser *p, char **string) { - struct string s = DS_INITIALIZER; + struct string s = DS_EMPTY_INITIALIZER; skip_spaces (p); for (;;) @@ -764,14 +764,14 @@ get_string (struct parser *p, char **string) int c = getc (p->file); if (c == EOF || c == '\n') break; - ds_putc (&s, c); + ds_put_char (&s, c); } ungetc ('\n', p->file); - ds_rtrim_spaces (&s); + ds_rtrim (&s, ss_cstr (CC_SPACES)); if (!ds_is_empty (&s)) { - *string = ds_c_str (&s); + *string = ds_cstr (&s); pool_register (p->pool, free, *string); return true; } @@ -874,7 +874,7 @@ static size_t encode_one_byte (const struct afm_character **s, size_t n, struct string *out) { - ds_putc (out, '('); + ds_put_char (out, '('); for (; n > 0; s++, n--) { uint8_t code = (*s)->code; @@ -882,13 +882,13 @@ encode_one_byte (const struct afm_character **s, size_t n, break; if (code == '(' || code == ')' || code == '\\') - ds_printf (out, "\\%c", code); + ds_put_format (out, "\\%c", code); else if (!c_isprint (code)) - ds_printf (out, "\\%03o", code); + ds_put_format (out, "\\%03o", code); else - ds_putc (out, code); + ds_put_char (out, code); } - ds_putc (out, ')'); + ds_put_char (out, ')'); return n; } @@ -935,7 +935,7 @@ append_ascii85_block (unsigned b, size_t n, struct string *out) c[i] = value_to_ascii85 (b % 85); b /= 85; } - ds_concat (out, c, n); + ds_put_substring (out, ss_buffer (c, n)); } /* Encodes BYTE with encoder E. */ @@ -947,12 +947,12 @@ binary_put (struct binary_encoder *e, uint8_t byte) if (e->n % 4 == 0) { if (e->n == 4) - ds_puts (e->out, "<~"); + ds_put_cstr (e->out, "<~"); if (e->b != 0) append_ascii85_block (e->b, 5, e->out); else - ds_putc (e->out, 'z'); + ds_put_char (e->out, 'z'); } } @@ -967,7 +967,7 @@ binary_finish (struct binary_encoder *e) size_t n = e->n % 4; if (n > 0) append_ascii85_block (e->b << 8 * (4 - n), n + 1, e->out); - ds_puts (e->out, "~>"); + ds_put_cstr (e->out, "~>"); } else if (e->n > 0) { @@ -976,19 +976,19 @@ binary_finish (struct binary_encoder *e) uint32_t b; size_t i; - ds_puts (e->out, "<"); + ds_put_cstr (e->out, "<"); b = e->b << 8 * (4 - e->n); for (i = 0; i < e->n; i++) { - ds_printf (e->out, "%02x", b >> 24); + ds_put_format (e->out, "%02x", b >> 24); b <<= 8; } - ds_puts (e->out, ">"); + ds_put_cstr (e->out, ">"); } else { /* Empty string. */ - ds_puts (e->out, "()"); + ds_put_cstr (e->out, "()"); } } diff --git a/src/output/ascii.c b/src/output/ascii.c index 8f960fe3..e4b10270 100644 --- a/src/output/ascii.c +++ b/src/output/ascii.c @@ -122,7 +122,7 @@ static bool handle_option (struct outp_driver *this, const char *key, const struct string *val); static bool -ascii_open_driver (struct outp_driver *this, const struct string *options) +ascii_open_driver (struct outp_driver *this, struct substring options) { struct ascii_driver_ext *x; int i; @@ -274,7 +274,7 @@ handle_option (struct outp_driver *this, const char *key, int subcat; const char *value; - value = ds_c_str (val); + value = ds_cstr (val); if (!strncmp (key, "box[", 4)) { char *tail; @@ -537,7 +537,7 @@ delineate (struct outp_driver *this, const struct outp_text *text, bool draw, int max_width; int height_left; - const char *cp = ls_c_str (&text->string); + const char *cp = ss_data (text->string); max_width = 0; height_left = text->v; @@ -549,7 +549,7 @@ delineate (struct outp_driver *this, const struct outp_text *text, bool draw, const char *end; /* Initially the line is up to text->h characters long. */ - chars_left = ls_end (&text->string) - cp; + chars_left = ss_end (text->string) - cp; if (chars_left == 0) break; line_len = MIN (chars_left, text->h); @@ -560,7 +560,7 @@ delineate (struct outp_driver *this, const struct outp_text *text, bool draw, line_len = end - cp; /* Don't cut off words if it can be avoided. */ - if (cp + line_len < ls_end (&text->string)) + if (cp + line_len < ss_end (text->string)) { size_t space_len = line_len; while (space_len > 0 && !isspace ((unsigned char) cp[space_len])) @@ -584,7 +584,7 @@ delineate (struct outp_driver *this, const struct outp_text *text, bool draw, /* Next line. */ cp += line_len; - if (cp < ls_end (&text->string) && isspace ((unsigned char) *cp)) + if (cp < ss_end (text->string) && isspace ((unsigned char) *cp)) cp++; } @@ -622,20 +622,20 @@ output_line (struct outp_driver *this, const struct line *line, for (length = line->char_cnt; length-- > 0; s++) if (*s & ATTR_BOX) - ds_puts (out, ext->box[*s & 0xff]); + ds_put_cstr (out, ext->box[*s & 0xff]); else { if (*s & ATTR_EMPHASIS) { if (ext->emphasis == EMPH_BOLD) { - ds_putc (out, *s); - ds_putc (out, '\b'); + ds_put_char (out, *s); + ds_put_char (out, '\b'); } else if (ext->emphasis == EMPH_UNDERLINE) - ds_puts (out, "_\b"); + ds_put_cstr (out, "_\b"); } - ds_putc (out, *s); + ds_put_char (out, *s); } } @@ -643,7 +643,7 @@ static void append_lr_justified (struct string *out, int width, const char *left, const char *right) { - ds_putc_multiple (out, ' ', width); + ds_put_char_multiple (out, ' ', width); if (left != NULL) { size_t length = MIN (strlen (left), width); @@ -654,7 +654,7 @@ append_lr_justified (struct string *out, int width, size_t length = MIN (strlen (right), width); memcpy (ds_end (out) - length, right, length); } - ds_putc (out, '\n'); + ds_put_char (out, '\n'); } static void @@ -672,9 +672,9 @@ ascii_close_page (struct outp_driver *this) struct string out; int line_num; - ds_init (&out); + ds_init_empty (&out); - ds_putc_multiple (&out, '\n', x->top_margin); + ds_put_char_multiple (&out, '\n', x->top_margin); if (x->headers) { char *r1, *r2; @@ -684,7 +684,7 @@ ascii_close_page (struct outp_driver *this) append_lr_justified (&out, this->width, outp_title, r1); append_lr_justified (&out, this->width, outp_subtitle, r2); - ds_putc (&out, '\n'); + ds_put_char (&out, '\n'); free (r1); free (r2); @@ -708,13 +708,13 @@ ascii_close_page (struct outp_driver *this) if (line_num < x->line_cap) output_line (this, &x->lines[line_num], &out); - ds_putc (&out, '\n'); + ds_put_char (&out, '\n'); dump_output (this, &out); } - ds_putc_multiple (&out, '\n', x->bottom_margin); + ds_put_char_multiple (&out, '\n', x->bottom_margin); if (x->paginate) - ds_putc (&out, '\f'); + ds_put_char (&out, '\f'); dump_output (this, &out); ds_destroy (&out); diff --git a/src/output/html.c b/src/output/html.c index d5b4f5d4..95120871 100644 --- a/src/output/html.c +++ b/src/output/html.c @@ -50,7 +50,7 @@ static void print_title_tag (FILE *file, const char *name, const char *content); static bool -html_open_driver (struct outp_driver *this, const struct string *options) +html_open_driver (struct outp_driver *this, struct substring options) { struct html_driver_ext *x; @@ -165,7 +165,7 @@ handle_option (struct outp_driver *this, break; case string_arg: free (x->file_name); - x->file_name = xstrdup (ds_c_str (val)); + x->file_name = ds_xstrdup (val); break; default: abort (); @@ -233,7 +233,7 @@ escape_string (FILE *file, TEXT. */ void html_put_cell_contents (struct outp_driver *this, - unsigned int opts, struct fixed_string *text) + unsigned int opts, const struct substring text) { struct html_driver_ext *x = this->ext; @@ -244,15 +244,15 @@ html_put_cell_contents (struct outp_driver *this, if (opts & TAB_FIX) { fputs ("", x->file); - escape_string (x->file, ls_c_str (text), ls_length (text), " "); + escape_string (x->file, ss_data (text), ss_length (text), " "); fputs ("", x->file); } else { - size_t initial_spaces = strspn (ls_c_str (text), " \t"); + size_t initial_spaces = ss_span (text, ss_cstr (CC_SPACES)); escape_string (x->file, - ls_c_str (text) + initial_spaces, - ls_length (text) - initial_spaces, + ss_data (text) + initial_spaces, + ss_length (text) - initial_spaces, " "); } if (opts & TAB_EMPH) @@ -269,7 +269,7 @@ output_tab_table (struct outp_driver *this, struct tab_table *t) if (t->nr == 1 && t->nc == 1) { fputs ("

", x->file); - html_put_cell_contents (this, t->ct[0], t->cc); + html_put_cell_contents (this, t->ct[0], *t->cc); fputs ("

\n", x->file); return; @@ -277,11 +277,10 @@ output_tab_table (struct outp_driver *this, struct tab_table *t) fputs ("\n", x->file); - if (!ls_empty_p (&t->title)) + if (t->title != NULL) { fprintf (x->file, " \n", x->file); } @@ -296,14 +295,14 @@ output_tab_table (struct outp_driver *this, struct tab_table *t) fputs (" \n", x->file); for (c = 0; c < t->nc; c++, ct++) { - struct fixed_string *cc; + struct substring *cc; const char *tag; struct tab_joined_cell *j = NULL; cc = t->cc + c + r * t->nc; if (*ct & TAB_JOIN) { - j = (struct tab_joined_cell *) ls_c_str (cc); + j = (struct tab_joined_cell *) ss_data (*cc); cc = &j->contents; if (j->x1 != c || j->y1 != r) continue; @@ -327,7 +326,7 @@ output_tab_table (struct outp_driver *this, struct tab_table *t) putc ('>', x->file); /* Output cell contents. */ - html_put_cell_contents (this, *ct, cc); + html_put_cell_contents (this, *ct, *cc); /* Output or . */ fprintf (x->file, "\n", tag); diff --git a/src/output/htmlP.h b/src/output/htmlP.h index 1a29caeb..da292ab0 100644 --- a/src/output/htmlP.h +++ b/src/output/htmlP.h @@ -33,6 +33,6 @@ extern struct outp_class html_class; struct outp_driver; void html_put_cell_contents (struct outp_driver *this, - unsigned int opts, struct fixed_string *text); + unsigned int opts, struct substring text); #endif /* !htmlP_h */ diff --git a/src/output/output.c b/src/output/output.c index 19761e75..a8ed0eab 100644 --- a/src/output/output.c +++ b/src/output/output.c @@ -85,9 +85,9 @@ char *outp_subtitle; static int disabled_devices; static void destroy_driver (struct outp_driver *); -static void configure_driver_line (struct string *); -static void configure_driver (const struct string *, const struct string *, - const struct string *, const struct string *); +static void configure_driver_line (struct substring); +static void configure_driver (const struct substring, const struct substring, + const struct substring, const struct substring); /* Add a class to the class list. */ static void @@ -206,7 +206,7 @@ find_defn_value (const char *key) for (d = outp_macros; d; d = d->next) if (!strcmp (key, d->key)) - return ds_c_str(&d->value); + return ds_cstr (&d->value); if (!strcmp (key, "viewwidth")) { sprintf (buf, "%d", get_viewwidth ()); @@ -256,15 +256,11 @@ delete_macros (void) static void init_default_drivers (void) { - struct string s; - error (0, 0, _("using default output driver configuration")); - - ds_create (&s, - "list:ascii:listing:" - "length=66 width=79 output-file=\"pspp.list\""); - configure_driver_line (&s); - ds_destroy (&s); + configure_driver (ss_cstr ("list"), + ss_cstr ("ascii"), + ss_cstr ("listing"), + ss_cstr ("length=66 width=79 output-file=\"pspp.list\"")); } /* Reads the initialization file; initializes @@ -286,7 +282,7 @@ outp_read_devices (void) config_path), NULL); - ds_init (&line); + ds_init_empty (&line); if (init_fn == NULL) { @@ -307,13 +303,13 @@ outp_read_devices (void) { char *cp; - if (!ds_get_config_line (f, &line, &line_number)) + if (!ds_read_config_line (&line, &line_number, f)) { if (ferror (f)) error (0, errno, _("reading \"%s\""), init_fn); break; } - for (cp = ds_c_str (&line); isspace ((unsigned char) *cp); cp++); + for (cp = ds_cstr (&line); isspace ((unsigned char) *cp); cp++); if (!strncmp ("define", cp, 6) && isspace ((unsigned char) cp[6])) outp_configure_macro (&cp[7]); else if (*cp) @@ -327,7 +323,7 @@ outp_read_devices (void) struct outp_names *n = search_names (cp, ep); if (n) { - configure_driver_line (&line); + configure_driver_line (ds_ss (&line)); delete_name (n); } } @@ -415,8 +411,8 @@ outp_configure_macro (char *bp) while (isspace ((unsigned char) *ep)) ep++; - ds_create(&d->value, ep); - fn_interp_vars(&d->value, find_defn_value); + ds_init_cstr (&d->value, ep); + fn_interp_vars (ds_ss (&d->value), find_defn_value, &d->value); d->next = outp_macros; d->prev = NULL; if (outp_macros) @@ -485,20 +481,20 @@ outp_list_classes (void) putc('\n', stdout); } -/* Obtains a token from S starting at position *POS, which is - updated. Errors are reported against the given DRIVER_NAME. +/* Obtains a token from S and advances its position. Errors are + reported against the given DRIVER_NAME. The token is stored in TOKEN. Returns true if successful, false on syntax error. Caller is responsible for skipping leading spaces. */ static bool -get_option_token (const struct string *s, const char *driver_name, - size_t *pos, struct string *token) +get_option_token (struct substring *s, const char *driver_name, + struct string *token) { int c; ds_clear (token); - c = ds_at (s, *pos); + c = ss_get_char (s); if (c == EOF) { error (0, 0, _("syntax error parsing options for \"%s\" driver"), @@ -509,10 +505,9 @@ get_option_token (const struct string *s, const char *driver_name, { int quote = c; - ++*pos; for (;;) { - c = ds_at (s, (*pos)++); + c = ss_get_char (s); if (c == quote) break; else if (c == EOF) @@ -524,12 +519,13 @@ get_option_token (const struct string *s, const char *driver_name, return false; } else if (c != '\\') - ds_putc (token, c); + ds_put_char (token, c); else { int out; - - switch (ds_at (s, *pos)) + + c = ss_get_char (s); + switch (c) { case '\'': out = '\''; @@ -570,19 +566,15 @@ get_option_token (const struct string *s, const char *driver_name, case '6': case '7': out = c - '0'; - while (ds_at (s, *pos) >= '0' && ds_at (s, *pos) <= '7') - out = c * 8 + ds_at (s, (*pos)++) - '0'; + while (ss_first (*s) >= '0' && ss_first (*s) <= '7') + out = c * 8 + (ss_get_char (s) - '0'); break; case 'x': case 'X': out = 0; - while (isxdigit (ds_at (s, *pos))) + while (isxdigit (ss_first (*s))) { - c = ds_at (s, *pos); - if (!isxdigit (c)) - break; - (*pos)++; - + c = ss_get_char (s); out *= 16; if (isdigit (c)) out += c - '0'; @@ -596,58 +588,60 @@ get_option_token (const struct string *s, const char *driver_name, driver_name); return false; } - ds_putc (token, out); + ds_put_char (token, out); } } } else { - do + for (;;) { - ds_putc (token, c); - c = ds_at (s, ++*pos); + ds_put_char (token, c); + + c = ss_first (*s); + if (c == EOF || c == '=' || isspace (c)) + break; + ss_advance (s, 1); } - while (c != EOF && c != '=' && !isspace (c)); } return 1; } bool -outp_parse_options (const struct string *options, +outp_parse_options (struct substring options, bool (*callback) (struct outp_driver *, const char *key, const struct string *value), struct outp_driver *driver) { - struct string key = DS_INITIALIZER; - struct string value = DS_INITIALIZER; - size_t pos = 0; + struct string key = DS_EMPTY_INITIALIZER; + struct string value = DS_EMPTY_INITIALIZER; + struct substring left = options; bool ok = true; do { - pos += ds_span (options, pos, " \t"); - if (ds_at (options, pos) == EOF) + ss_ltrim (&left, ss_cstr (CC_SPACES)); + if (ss_is_empty (left)) break; - if (!get_option_token (options, driver->name, &pos, &key)) + if (!get_option_token (&left, driver->name, &key)) break; - pos += ds_span (options, pos, " \t"); - if (ds_at (options, pos) != '=') + ss_ltrim (&left, ss_cstr (CC_SPACES)); + if (!ss_match_char (&left, '=')) { error (0, 0, _("syntax error expecting `=' " "parsing options for driver \"%s\""), driver->name); break; } - pos++; - - pos += ds_span (options, pos, " \t"); - if (!get_option_token (options, driver->name, &pos, &value)) + + ss_ltrim (&left, ss_cstr (CC_SPACES)); + if (!get_option_token (&left, driver->name, &value)) break; - ok = callback (driver, ds_c_str (&key), &value); + ok = callback (driver, ds_cstr (&key), &value); } while (ok); @@ -669,58 +663,48 @@ find_driver (char *name) return NULL; } -/* String S is in format: - DRIVERNAME:CLASSNAME:DEVICETYPE:OPTIONS - Adds a driver to outp_driver_list pursuant to the specification - provided. */ +/* Adds a driver to outp_driver_list pursuant to the + specification provided. */ static void -configure_driver (const struct string *driver_name, - const struct string *class_name, - const struct string *device_type, - const struct string *options) +configure_driver (struct substring driver_name, struct substring class_name, + struct substring device_type, struct substring options) { struct outp_driver *d, *iter; struct outp_driver_class_list *c; + + struct substring token; + size_t save_idx = 0; int device; /* Find class. */ for (c = outp_class_list; c; c = c->next) - if (!strcmp (c->class->name, ds_c_str (class_name))) + if (!ss_compare (ss_cstr (c->class->name), class_name)) break; if (c == NULL) { - error (0, 0, _("unknown output driver class `%s'"), - ds_c_str (class_name)); + error (0, 0, _("unknown output driver class `%.*s'"), + (int) ss_length (class_name), ss_data (class_name)); return; } /* Parse device type. */ device = 0; - if (device_type != NULL) - { - struct string token = DS_INITIALIZER; - size_t save_idx = 0; - - while (ds_tokenize (device_type, &token, " \t\r\v", &save_idx)) - { - const char *type = ds_c_str (&token); - if (!strcmp (type, "listing")) - device |= OUTP_DEV_LISTING; - else if (!strcmp (type, "screen")) - device |= OUTP_DEV_SCREEN; - else if (!strcmp (type, "printer")) - device |= OUTP_DEV_PRINTER; - else - error (0, 0, _("unknown device type `%s'"), type); - } - ds_destroy (&token); - } + while (ss_tokenize (device_type, ss_cstr (CC_SPACES), &save_idx, &token)) + if (!ss_compare (token, ss_cstr ("listing"))) + device |= OUTP_DEV_LISTING; + else if (!ss_compare (token, ss_cstr ("screen"))) + device |= OUTP_DEV_SCREEN; + else if (!ss_compare (token, ss_cstr ("printer"))) + device |= OUTP_DEV_PRINTER; + else + error (0, 0, _("unknown device type `%.*s'"), + (int) ss_length (token), ss_data (token)); /* Open the device. */ d = xmalloc (sizeof *d); d->next = d->prev = NULL; d->class = c->class; - d->name = xstrdup (ds_c_str (driver_name)); + d->name = ss_xstrdup (driver_name); d->page_open = false; d->device = OUTP_DEV_NONE; d->cp_x = d->cp_y = 0; @@ -755,31 +739,30 @@ configure_driver (const struct string *driver_name, Adds a driver to outp_driver_list pursuant to the specification provided. */ static void -configure_driver_line (struct string *line) +configure_driver_line (struct substring line_) { - struct string tokens[4]; + struct string line = DS_EMPTY_INITIALIZER; + struct substring tokens[4]; size_t save_idx; size_t i; - fn_interp_vars (line, find_defn_value); + fn_interp_vars (line_, find_defn_value, &line); save_idx = 0; for (i = 0; i < 4; i++) { - struct string *token = &tokens[i]; - ds_init (token); - ds_separate (line, token, i < 3 ? ":" : "", &save_idx); - ds_trim_spaces (token); + struct substring *token = &tokens[i]; + ds_separate (&line, ss_cstr (i < 3 ? ":" : ""), &save_idx, token); + ss_trim (token, ss_cstr (CC_SPACES)); } - if (!ds_is_empty (&tokens[0]) && !ds_is_empty (&tokens[1])) - configure_driver (&tokens[0], &tokens[1], &tokens[2], &tokens[3]); + if (!ss_is_empty (tokens[0]) && !ss_is_empty (tokens[1])) + configure_driver (tokens[0], tokens[1], tokens[2], tokens[3]); else error (0, 0, _("driver definition line missing driver name or class name")); - for (i = 0; i < 4; i++) - ds_destroy (&tokens[i]); + ds_destroy (&line); } /* Destroys output driver D. */ @@ -1003,7 +986,7 @@ outp_get_paper_size (char *size, int *h, int *v) struct string line; int line_number = 0; - int free_it = 0; + bool free_it = false; int result = 0; char *ep; @@ -1032,7 +1015,7 @@ outp_get_paper_size (char *size, int *h, int *v) config_path), NULL); - ds_init (&line); + ds_init_empty (&line); if (pprsz_fn == NULL) { @@ -1049,35 +1032,33 @@ outp_get_paper_size (char *size, int *h, int *v) for (;;) { - char *cp, *bp, *ep; + struct substring p, name; - if (!ds_get_config_line (f, &line, &line_number)) + if (!ds_read_config_line (&line, &line_number, f)) { if (ferror (f)) error (0, errno, _("error reading \"%s\""), pprsz_fn); break; } - for (cp = ds_c_str (&line); isspace ((unsigned char) *cp); cp++); - if (*cp == 0) - continue; - if (*cp != '"') - goto lex_error; - for (bp = ep = cp + 1; *ep && *ep != '"'; ep++); - if (!*ep) + + p = ds_ss (&line); + ss_ltrim (&p, ss_cstr (CC_SPACES)); + if (!ss_match_char (&p, '"') || !ss_get_until (&p, '"', &name)) goto lex_error; - *ep = 0; - if (0 != strcasecmp (bp, size)) + if (ss_compare (name, ss_cstr (size))) continue; - for (cp = ep + 1; isspace ((unsigned char) *cp); cp++); - if (*cp == '=') + ss_ltrim (&p, ss_cstr (CC_SPACES)); + if (ss_match_char (&p, '=')) { - size = xmalloc (ep - bp + 1); - strcpy (size, bp); - free_it = 1; + if (free_it) + free (size); + ss_trim (&p, ss_cstr (CC_SPACES)); + size = ss_xstrdup (p); + free_it = true; continue; } - size = &ep[1]; + size = ss_data (p); break; lex_error: @@ -1182,7 +1163,7 @@ outp_string_width (struct outp_driver *d, const char *s, enum outp_font font) text.font = font; text.justification = OUTP_LEFT; - ls_init (&text.string, (char *) s, strlen (s)); + text.string = ss_cstr (s); text.h = text.v = INT_MAX; d->class->text_metrics (d, &text, &width, NULL); diff --git a/src/output/output.h b/src/output/output.h index 8f44b1d9..a61161ab 100644 --- a/src/output/output.h +++ b/src/output/output.h @@ -55,7 +55,7 @@ struct outp_text { enum outp_font font; enum outp_justification justification; - struct fixed_string string; + struct substring string; int h, v; /* Horizontal, vertical size. */ int x, y; /* Position. */ }; @@ -70,7 +70,7 @@ struct outp_class const char *name; /* Name of this driver class. */ int special; /* Boolean value. */ - bool (*open_driver) (struct outp_driver *, const struct string *options); + bool (*open_driver) (struct outp_driver *, struct substring options); bool (*close_driver) (struct outp_driver *); void (*open_page) (struct outp_driver *); @@ -148,7 +148,7 @@ void outp_list_classes (void); void outp_enable_device (int enable, int device); struct outp_driver *outp_drivers (struct outp_driver *); -bool outp_parse_options (const struct string *options, +bool outp_parse_options (struct substring options, bool (*) (struct outp_driver *, const char *key, const struct string *value), struct outp_driver *); diff --git a/src/output/postscript.c b/src/output/postscript.c index 919d1bb7..d65162da 100644 --- a/src/output/postscript.c +++ b/src/output/postscript.c @@ -129,7 +129,7 @@ static void setup_font (struct outp_driver *this, struct font *, int index); /* Driver initialization. */ static bool -ps_open_driver (struct outp_driver *this, const struct string *options) +ps_open_driver (struct outp_driver *this, struct substring options) { struct ps_driver_ext *x; size_t i; @@ -300,7 +300,7 @@ handle_option (struct outp_driver *this, const char *key, { struct ps_driver_ext *x = this->ext; int subcat; - char *value = ds_c_str (val); + char *value = ds_cstr (val); switch (outp_match_keyword (key, option_tab, &subcat)) { @@ -542,15 +542,15 @@ quote_ps_name (const char *string) if (!isalpha (c) && strchr ("^_|!$&:;.,-+", c) == NULL && (cp == string || !isdigit (c))) { - struct string out = DS_INITIALIZER; - ds_putc (&out, '<'); + struct string out = DS_EMPTY_INITIALIZER; + ds_put_char (&out, '<'); for (cp = string; *cp != '\0'; cp++) { c = *cp; - ds_printf (&out, "%02x", c); + ds_put_format (&out, "%02x", c); } - ds_puts (&out, ">cvn"); - return ds_c_str (&out); + ds_put_cstr (&out, ">cvn"); + return ds_cstr (&out); } } return xasprintf ("/%s", string); @@ -784,7 +784,7 @@ draw_text (struct outp_driver *this, text.font = OUTP_PROPORTIONAL; text.justification = justification; - ls_init (&text.string, (char *) string, strlen (string)); + text.string = ss_cstr (string); text.h = max_width; text.v = this->font_height; text.x = x; @@ -874,7 +874,7 @@ write_text (struct outp_driver *this, fprintf (ext->file, "F%d setfont\n", font); } - ds_init (&out); + ds_init_empty (&out); for (i = 0; i < char_cnt; i = j) { for (j = i + 1; j < char_cnt; j++) @@ -888,7 +888,7 @@ write_text (struct outp_driver *this, size_t encoded = afm_encode_string (afm, chars + i, j - i, &out); if (encoded > 0) { - fprintf (ext->file, "%sS\n", ds_c_str (&out)); + fprintf (ext->file, "%sS\n", ds_cstr (&out)); ds_clear (&out); i += encoded; } @@ -990,8 +990,8 @@ text (struct outp_driver *this, const struct outp_text *text, bool draw, s.max_width = 0; - cp = ls_c_str (&s.text->string); - while (s.height_left >= this->font_height && cp < ls_end (&s.text->string)) + cp = ss_data (s.text->string); + while (s.height_left >= this->font_height && cp < ss_end (s.text->string)) { const struct afm_character *cur; int char_width; @@ -1006,7 +1006,7 @@ text (struct outp_driver *this, const struct outp_text *text, bool draw, /* Get character and resolve ligatures. */ cur = afm_get_character (afm, *cp); - while (++cp < ls_end (&s.text->string)) + while (++cp < ss_end (s.text->string)) { const struct afm_character *next = afm_get_character (afm, *cp); const struct afm_character *ligature = afm_get_ligature (cur, next); @@ -1024,7 +1024,7 @@ text (struct outp_driver *this, const struct outp_text *text, bool draw, kern_adjust = 0; /* Record the current status if this is a space character. */ - if (cur->code == ' ' && cp > ls_c_str (&s.text->string)) + if (cur->code == ' ' && cp > ss_data (s.text->string)) { s.space_char = cp; s.space_glyph_cnt = s.glyph_cnt; @@ -1382,8 +1382,8 @@ reencode_font (struct outp_driver *this, struct font *font) line_number = 0; - ds_init (&line); - while (ds_get_config_line (file, &line, &line_number)) + ds_init_empty (&line); + while (ds_read_config_line (&line, &line_number, file)) { char *pschar, *code; char *save_ptr, *tail; @@ -1392,7 +1392,7 @@ reencode_font (struct outp_driver *this, struct font *font) if (ds_is_empty (&line) == 0) continue; - pschar = strtok_r (ds_c_str (&line), " \t\r\n", &save_ptr); + pschar = strtok_r (ds_cstr (&line), " \t\r\n", &save_ptr); code = strtok_r (NULL, " \t\r\n", &save_ptr); if (pschar == NULL || code == NULL) continue; diff --git a/src/output/table.c b/src/output/table.c index eac8d90f..cb06a3f1 100644 --- a/src/output/table.c +++ b/src/output/table.c @@ -58,7 +58,7 @@ tab_create (int nc, int nr, int reallocable UNUSED) t = pool_create_container (struct tab_table, container); t->col_style = TAB_COL_NONE; t->col_group = 0; - ls_null (&t->title); + t->title = NULL; t->flags = SOMF_NONE; t->nr = nr; t->nc = t->cf = nc; @@ -143,7 +143,7 @@ tab_realloc (struct tab_table *t, int nc, int nr) int mr1 = min (nr, t->nr); int mc1 = min (nc, t->nc); - struct fixed_string *new_cc; + struct substring *new_cc; unsigned char *new_ct; int r; @@ -372,23 +372,16 @@ tab_box (struct tab_table *t, int f_h, int f_v, int i_h, int i_v, } } -/* Formats text TEXT and arguments ARGS as indicated in OPT and sets - the resultant string into S in TABLE's pool. */ -static void -text_format (struct tab_table *table, int opt, const char *text, va_list args, - struct fixed_string *s) +/* Formats text TEXT and arguments ARGS as indicated in OPT in + TABLE's pool and returns the resultant string. */ +static struct substring +text_format (struct tab_table *table, int opt, const char *text, va_list args) { - char *tmp = NULL; - - assert (table != NULL && text != NULL && s != NULL); - - if (opt & TAT_PRINTF) - text = tmp = xvasprintf (text, args); + assert (table != NULL && text != NULL); - ls_create_buffer (s, text, strlen (text)); - pool_register (table->container, free, s->string); - - free (tmp); + return ss_cstr (opt & TAT_PRINTF + ? pool_vasprintf (table->container, text, args) + : pool_strdup (table->container, text)); } /* Set the title of table T to TITLE, which is formatted as if @@ -400,7 +393,7 @@ tab_title (struct tab_table *t, const char *title, ...) assert (t != NULL && title != NULL); va_start (args, title); - text_format (t, TAT_PRINTF, title, args, &t->title); + t->title = xvasprintf (title, args); va_end (args); } @@ -435,7 +428,6 @@ tab_natural_width (struct tab_table *t, struct outp_driver *d, int c) continue; text.string = t->cc[c + r * t->cf]; - assert (!ls_null_p (&text.string)); text.justification = OUTP_LEFT; text.font = options_to_font (opt); text.h = text.v = INT_MAX; @@ -488,7 +480,6 @@ tab_natural_height (struct tab_table *t, struct outp_driver *d, int r) continue; text.string = t->cc[c + r * t->cf]; - assert (!ls_null_p (&text.string)); text.justification = OUTP_LEFT; text.font = options_to_font (opt); text.h = t->w[c]; @@ -546,7 +537,7 @@ tab_value (struct tab_table *table, int c, int r, unsigned char opt, #endif contents = pool_alloc (table->container, f->w); - ls_init (&table->cc[c + r * table->cf], contents, f->w); + table->cc[c + r * table->cf] = ss_buffer (contents, f->w); table->ct[c + r * table->cf] = opt; data_out (contents, f, v); @@ -596,7 +587,7 @@ tab_float (struct tab_table *table, int c, int r, unsigned char opt, f.w = w - (cp - buf); contents = pool_alloc (table->container, f.w); - ls_init (&table->cc[c + r * table->cf], contents, f.w); + table->cc[c + r * table->cf] = ss_buffer (contents, f.w); table->ct[c + r * table->cf] = opt; memcpy (contents, cp, f.w); } @@ -631,7 +622,7 @@ tab_text (struct tab_table *table, int c, int r, unsigned opt, const char *text, #endif va_start (args, text); - text_format (table, opt, text, args, &table->cc[c + r * table->cf]); + table->cc[c + r * table->cf] = text_format (table, opt, text, args); table->ct[c + r * table->cf] = opt; va_end (args); } @@ -683,14 +674,14 @@ tab_joint_text (struct tab_table *table, int x1, int y1, int x2, int y2, va_list args; va_start (args, text); - text_format (table, opt, text, args, &j->contents); + j->contents = text_format (table, opt, text, args); va_end (args); } opt |= TAB_JOIN; { - struct fixed_string *cc = &table->cc[x1 + y1 * table->cf]; + struct substring *cc = &table->cc[x1 + y1 * table->cf]; unsigned char *ct = &table->ct[x1 + y1 * table->cf]; const int ofs = table->cf - (x2 - x1); @@ -702,7 +693,7 @@ tab_joint_text (struct tab_table *table, int x1, int y1, int x2, int y2, for (x = x1; x < x2; x++) { - ls_init (cc++, (char *) j, 0); + *cc++ = ss_buffer ((char *) j, 0); *ct++ = opt; } @@ -715,7 +706,7 @@ tab_joint_text (struct tab_table *table, int x1, int y1, int x2, int y2, /* Sets cell (C,R) in TABLE, with options OPT, to contents STRING. */ void tab_raw (struct tab_table *table, int c, int r, unsigned opt, - struct fixed_string *string) + struct substring *string) { assert (table != NULL && string != NULL); @@ -1169,10 +1160,11 @@ tabi_title (int x, int y) if (command_name != NULL) cp = spprintf (cp, " %s", command_name); cp = stpcpy (cp, ". "); - if (!ls_empty_p (&t->title)) + if (t->title != NULL) { - memcpy (cp, ls_c_str (&t->title), ls_length (&t->title)); - cp += ls_length (&t->title); + size_t length = strlen (t->title); + memcpy (cp, t->title, length); + cp += length; } *cp = 0; @@ -1181,7 +1173,7 @@ tabi_title (int x, int y) text.font = OUTP_PROPORTIONAL; text.justification = OUTP_LEFT; - ls_init (&text.string, buf, cp - buf); + text.string = ss_buffer (buf, cp - buf); text.h = d->width; text.v = d->font_height; text.x = 0; @@ -1393,7 +1385,7 @@ render_cell (int x, int y, int c, int r, int c1, int r1) { const int index = c + (r * t->cf); unsigned char type = t->ct[index]; - struct fixed_string *content = &t->cc[index]; + struct substring *content = &t->cc[index]; if (!(type & TAB_JOIN)) { @@ -1413,7 +1405,7 @@ render_cell (int x, int y, int c, int r, int c1, int r1) else { struct tab_joined_cell *j - = (struct tab_joined_cell *) ls_c_str (content); + = (struct tab_joined_cell *) ss_data (*content); if (j->hit != tab_hit) { diff --git a/src/output/table.h b/src/output/table.h index 6245d12b..93f4d1e9 100644 --- a/src/output/table.h +++ b/src/output/table.h @@ -65,7 +65,7 @@ struct tab_joined_cell int x1, y1; int x2, y2; int hit; - struct fixed_string contents; + struct substring contents; }; struct outp_driver; @@ -80,12 +80,12 @@ struct tab_table /* Contents. */ int col_style; /* Columns: One of TAB_COL_*. */ int col_group; /* Number of rows per column group. */ - struct fixed_string title; /* Table title. */ + char *title; /* Table title. */ unsigned flags; /* SOMF_*. */ int nc, nr; /* Number of columns, rows. */ int cf; /* Column factor for indexing purposes. */ int l, r, t, b; /* Number of header rows on each side. */ - struct fixed_string *cc; /* Cell contents; fixed_string *[nr][nc]. */ + struct substring *cc; /* Cell contents; substring *[nr][nc]. */ unsigned char *ct; /* Cell types; unsigned char[nr][nc]. */ unsigned char *rh; /* Horiz rules; unsigned char[nr+1][nc]. */ unsigned char *rv; /* Vert rules; unsigned char[nr][nc+1]. */ @@ -170,7 +170,7 @@ void tab_joint_text (struct tab_table *, int x1, int y1, int x2, int y2, /* Cell low-level access. */ #define tab_alloc(TABLE, AMT) pool_alloc ((TABLE)->container, (AMT)) void tab_raw (struct tab_table *, int c, int r, unsigned opt, - struct fixed_string *); + struct substring *); /* Editing. */ void tab_offset (struct tab_table *, int col, int row); diff --git a/src/ui/terminal/msg-ui.c b/src/ui/terminal/msg-ui.c index 3422da02..3e354a7b 100644 --- a/src/ui/terminal/msg-ui.c +++ b/src/ui/terminal/msg-ui.c @@ -119,30 +119,30 @@ handle_msg (const struct msg *m) const struct category *category = &categories[m->category]; const struct severity *severity = &severities[m->severity]; - struct string string = DS_INITIALIZER; + struct string string = DS_EMPTY_INITIALIZER; if (category->show_file_location && m->where.file_name) { - ds_printf (&string, "%s:", m->where.file_name); + ds_put_format (&string, "%s:", m->where.file_name); if (m->where.line_number != -1) - ds_printf (&string, "%d:", m->where.line_number); - ds_putc (&string, ' '); + ds_put_format (&string, "%d:", m->where.line_number); + ds_put_char (&string, ' '); } if (severity->name != NULL) - ds_printf (&string, "%s: ", gettext (severity->name)); + ds_put_format (&string, "%s: ", gettext (severity->name)); if (severity->count != NULL) ++*severity->count; if (category->show_command_name && msg_get_command_name () != NULL) - ds_printf (&string, "%s: ", msg_get_command_name ()); + ds_put_format (&string, "%s: ", msg_get_command_name ()); - ds_puts (&string, m->text); + ds_put_cstr (&string, m->text); /* FIXME: Check set_messages and set_errors to determine where to send errors and messages. */ - dump_message (ds_c_str (&string), get_viewwidth (), 8, stdout); + dump_message (ds_cstr (&string), get_viewwidth (), 8, stdout); ds_destroy (&string); } diff --git a/src/ui/terminal/read-line.c b/src/ui/terminal/read-line.c index a89a953b..a87adfcf 100644 --- a/src/ui/terminal/read-line.c +++ b/src/ui/terminal/read-line.c @@ -133,14 +133,14 @@ readln_read (struct string *line, const char *prompt) { if (string[0]) add_history (string); - ds_assign_c_str (line, string); + ds_assign_cstr (line, string); free (string); return true; } #else fputs (prompt, stdout); fflush (stdout); - if (ds_gets (line, stdin)) + if (ds_read_line (line, stdin)) { ds_chomp (line, '\n'); return true; -- 2.30.2
"); - escape_string (x->file, ls_c_str (&t->title), ls_length (&t->title), - " "); + escape_string (x->file, t->title, strlen (t->title), " "); fputs ("