X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flibpspp%2Fstr.c;h=ccd7739c647b87d989b9cbc867810d915839770c;hb=c9b92e317e7426db24fce2636134e1e46eb05d40;hp=b8e55344d3a4ae0bc663d00db6735eba3c78c4d5;hpb=ebf3b993687a25782fe72f45bf3e72aa4bee7c95;p=pspp-builds.git diff --git a/src/libpspp/str.c b/src/libpspp/str.c index b8e55344..ccd7739c 100644 --- a/src/libpspp/str.c +++ b/src/libpspp/str.c @@ -1,30 +1,35 @@ -/* PSPP - computes sample statistics. - Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc. - Written by Ben Pfaff . +/* PSPP - a program for statistical analysis. + Copyright (C) 1997-9, 2000, 2006, 2009 Free Software Foundation, Inc. - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. */ + along with this program. If not, see . */ #include + #include "str.h" -#include "message.h" + #include -#include +#include +#include #include -#include "alloc.h" -#include "message.h" + +#include +#include + +#include +#include "minmax.h" +#include "xalloc.h" +#include "xsize.h" /* Reverses the order of NBYTES bytes at address P, thus converting between little- and big-endian byte orders. */ @@ -64,12 +69,12 @@ buf_compare_case (const char *a_, const char *b_, size_t size) const unsigned char *a = (unsigned char *) a_; const unsigned char *b = (unsigned char *) b_; - while (size-- > 0) + while (size-- > 0) { unsigned char ac = toupper (*a++); unsigned char bc = toupper (*b++); - if (ac != bc) + if (ac != bc) return ac > bc ? 1 : -1; } @@ -89,17 +94,17 @@ buf_compare_rpad (const char *a, size_t a_len, const char *b, size_t b_len) result = memcmp (a, b, min_len); if (result != 0) return result; - else + else { size_t idx; - - if (a_len < b_len) + + if (a_len < b_len) { for (idx = min_len; idx < b_len; idx++) if (' ' != b[idx]) return ' ' > b[idx] ? 1 : -1; } - else + else { for (idx = min_len; idx < a_len; idx++) if (a[idx] != ' ') @@ -120,9 +125,9 @@ str_compare_rpad (const char *a, const char *b) /* Copies string SRC to buffer DST, of size DST_SIZE bytes. DST is truncated to DST_SIZE bytes or padded on the right with - spaces as needed. */ + copies of PAD as needed. */ void -buf_copy_str_rpad (char *dst, size_t dst_size, const char *src) +buf_copy_str_rpad (char *dst, size_t dst_size, const char *src, char pad) { size_t src_len = strlen (src); if (src_len >= dst_size) @@ -130,15 +135,15 @@ buf_copy_str_rpad (char *dst, size_t dst_size, const char *src) else { memcpy (dst, src, src_len); - memset (&dst[src_len], ' ', dst_size - src_len); + memset (&dst[src_len], pad, dst_size - src_len); } } /* Copies string SRC to buffer DST, of size DST_SIZE bytes. DST is truncated to DST_SIZE bytes or padded on the left with - spaces as needed. */ + copies of PAD as needed. */ void -buf_copy_str_lpad (char *dst, size_t dst_size, const char *src) +buf_copy_str_lpad (char *dst, size_t dst_size, const char *src, char pad) { size_t src_len = strlen (src); if (src_len >= dst_size) @@ -146,24 +151,42 @@ buf_copy_str_lpad (char *dst, size_t dst_size, const char *src) else { size_t pad_cnt = dst_size - src_len; - memset (&dst[0], ' ', pad_cnt); + memset (&dst[0], pad, pad_cnt); memcpy (dst + pad_cnt, src, src_len); } } +/* Copies buffer SRC, of SRC_SIZE bytes, to DST, of DST_SIZE bytes. + DST is truncated to DST_SIZE bytes or padded on the left with + copies of PAD as needed. */ +void +buf_copy_lpad (char *dst, size_t dst_size, + const char *src, size_t src_size, + char pad) +{ + if (src_size >= dst_size) + memmove (dst, src, dst_size); + else + { + memset (dst, pad, dst_size - src_size); + memmove (&dst[dst_size - src_size], src, src_size); + } +} + /* Copies buffer SRC, of SRC_SIZE bytes, to DST, of DST_SIZE bytes. DST is truncated to DST_SIZE bytes or padded on the right with - spaces as needed. */ + copies of PAD as needed. */ void buf_copy_rpad (char *dst, size_t dst_size, - const char *src, size_t src_size) + const char *src, size_t src_size, + char pad) { if (src_size >= dst_size) memmove (dst, src, dst_size); else { memmove (dst, src, src_size); - memset (&dst[src_size], ' ', dst_size - src_size); + memset (&dst[src_size], pad, dst_size - src_size); } } @@ -174,27 +197,30 @@ buf_copy_rpad (char *dst, size_t dst_size, void str_copy_rpad (char *dst, size_t dst_size, const char *src) { - size_t src_len = strlen (src); - if (src_len < dst_size - 1) + if (dst_size > 0) { - memcpy (dst, src, src_len); - memset (&dst[src_len], ' ', dst_size - 1 - src_len); + size_t src_len = strlen (src); + if (src_len < dst_size - 1) + { + memcpy (dst, src, src_len); + memset (&dst[src_len], ' ', dst_size - 1 - src_len); + } + else + memcpy (dst, src, dst_size - 1); + dst[dst_size - 1] = 0; } - else - memcpy (dst, src, dst_size - 1); - dst[dst_size - 1] = 0; } /* Copies SRC to DST, which is in a buffer DST_SIZE bytes long. Truncates DST to DST_SIZE - 1 characters, if necessary. */ void -str_copy_trunc (char *dst, size_t dst_size, const char *src) +str_copy_trunc (char *dst, size_t dst_size, const char *src) { size_t src_len = strlen (src); assert (dst_size > 0); if (src_len + 1 < dst_size) memcpy (dst, src, src_len + 1); - else + else { memcpy (dst, src, dst_size - 1); dst[dst_size - 1] = '\0'; @@ -206,7 +232,7 @@ str_copy_trunc (char *dst, size_t dst_size, const char *src) Truncates DST to DST_SIZE - 1 characters, if necessary. */ void str_copy_buf_trunc (char *dst, size_t dst_size, - const char *src, size_t src_size) + const char *src, size_t src_size) { size_t dst_len; assert (dst_size > 0); @@ -218,7 +244,7 @@ str_copy_buf_trunc (char *dst, size_t dst_size, /* Converts each character in S to uppercase. */ void -str_uppercase (char *s) +str_uppercase (char *s) { for (; *s != '\0'; s++) *s = toupper ((unsigned char) *s); @@ -226,160 +252,698 @@ str_uppercase (char *s) /* Converts each character in S to lowercase. */ void -str_lowercase (char *s) +str_lowercase (char *s) { for (; *s != '\0'; s++) *s = tolower ((unsigned char) *s); } + +/* Converts NUMBER into a string in 26-adic notation in BUFFER, + which has room for SIZE bytes. Returns true if successful, + false if NUMBER, plus a trailing null, is too large to fit in + the available space. + + 26-adic notation is "spreadsheet column numbering": 1 = A, 2 = + B, 3 = C, ... 26 = Z, 27 = AA, 28 = AB, 29 = AC, ... + + 26-adic notation is the special case of a k-adic numeration + system (aka bijective base-k numeration) with k=26. In k-adic + numeration, the digits are {1, 2, 3, ..., k} (there is no + digit 0), and integer 0 is represented by the empty string. + For more information, see + http://en.wikipedia.org/wiki/Bijective_numeration. */ +bool +str_format_26adic (unsigned long int number, char buffer[], size_t size) +{ + size_t length = 0; + + while (number-- > 0) + { + if (length >= size) + return false; + buffer[length++] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"[number % 26]; + number /= 26; + } + + if (length >= size) + return false; + buffer[length] = '\0'; + + buf_reverse (buffer, length); + return true; +} + +/* Formats FORMAT into DST, as with sprintf(), and returns the + address of the terminating null written to DST. */ +char * +spprintf (char *dst, const char *format, ...) +{ + va_list args; + int count; + + va_start (args, format); + count = vsprintf (dst, format, args); + va_end (args); + + return dst + count; +} + +/* Sets the SIZE bytes starting at BLOCK to C, + and returns the byte following BLOCK. */ +void * +mempset (void *block, int c, size_t size) +{ + memset (block, c, size); + return (char *) block + size; +} -/* Initializes ST with initial contents S. */ +/* Substrings. */ + +/* Returns an empty substring. */ +struct substring +ss_empty (void) +{ + struct substring ss; + ss.string = NULL; + ss.length = 0; + return ss; +} + +/* Returns a substring whose contents are the given C-style + string CSTR. */ +struct substring +ss_cstr (const char *cstr) +{ + return ss_buffer (cstr, strlen (cstr)); +} + +/* Returns a substring whose contents are the CNT characters in + BUFFER. */ +struct substring +ss_buffer (const char *buffer, size_t cnt) +{ + struct substring ss; + ss.string = (char *) buffer; + ss.length = cnt; + return ss; +} + +/* Returns a substring whose contents are the CNT characters + starting at the (0-based) position START in SS. */ +struct substring +ss_substr (struct substring ss, size_t start, size_t cnt) +{ + if (start < ss.length) + return ss_buffer (ss.string + start, MIN (cnt, ss.length - start)); + else + return ss_buffer (ss.string + ss.length, 0); +} + +/* Returns a substring whose contents are the first CNT + characters in SS. */ +struct substring +ss_head (struct substring ss, size_t cnt) +{ + return ss_buffer (ss.string, MIN (cnt, ss.length)); +} + +/* Returns a substring whose contents are the last CNT characters + in SS. */ +struct substring +ss_tail (struct substring ss, size_t cnt) +{ + if (cnt < ss.length) + return ss_buffer (ss.string + (ss.length - cnt), cnt); + else + return ss; +} + +/* Makes a malloc()'d copy of the contents of OLD + and stores it in NEW. */ void -ds_create (struct string *st, const char *s) +ss_alloc_substring (struct substring *new, struct substring old) { - st->length = strlen (s); - st->capacity = 8 + st->length * 2; - st->string = xmalloc (st->capacity + 1); - strcpy (st->string, s); + new->string = xmalloc (old.length); + new->length = old.length; + memcpy (new->string, old.string, old.length); } -/* Initializes DST with the contents of SRC between characters FIRST and LAST - inclusive */ +/* Allocates room for a CNT-character string in NEW. */ void -ds_create_substr(struct string *dst, const struct string *src, - int first, int last) +ss_alloc_uninit (struct substring *new, size_t cnt) { - assert(last >= first); - dst->length = last - first + 1; - dst->capacity = 8 + dst->length * 2; - dst->string = xmalloc (dst->capacity + 1); + new->string = xmalloc (cnt); + new->length = cnt; +} - memcpy (dst->string, &src->string[first], dst->length); +/* Makes a pool_alloc_unaligned()'d copy of the contents of OLD + in POOL, and stores it in NEW. */ +void +ss_alloc_substring_pool (struct substring *new, struct substring old, + struct pool *pool) +{ + new->string = pool_alloc_unaligned (pool, old.length); + new->length = old.length; + memcpy (new->string, old.string, old.length); } +/* Allocates room for a CNT-character string in NEW in POOL. */ +void +ss_alloc_uninit_pool (struct substring *new, size_t cnt, struct pool *pool) +{ + new->string = pool_alloc_unaligned (pool, cnt); + new->length = cnt; +} -/* Initializes ST, making room for at least CAPACITY characters. */ +/* Frees the string that SS points to. */ void -ds_init (struct string *st, size_t capacity) +ss_dealloc (struct substring *ss) { - st->length = 0; - if (capacity > 8) - st->capacity = capacity; - else - st->capacity = 8; - st->string = xmalloc (st->capacity + 1); + free (ss->string); } -/* Replaces the contents of ST with STRING. STRING may overlap with - ST. */ +/* Truncates SS to at most CNT characters in length. */ void -ds_replace (struct string *st, const char *string) +ss_truncate (struct substring *ss, size_t cnt) { - size_t new_length = strlen (string); - if (new_length > st->capacity) + if (ss->length > cnt) + ss->length = cnt; +} + +/* Removes trailing characters in TRIM_SET from SS. + Returns number of characters removed. */ +size_t +ss_rtrim (struct substring *ss, struct substring trim_set) +{ + size_t cnt = 0; + while (cnt < ss->length + && ss_find_char (trim_set, + ss->string[ss->length - cnt - 1]) != SIZE_MAX) + cnt++; + ss->length -= cnt; + return cnt; +} + +/* Removes leading characters in TRIM_SET from SS. + Returns number of characters removed. */ +size_t +ss_ltrim (struct substring *ss, struct substring trim_set) +{ + size_t cnt = ss_span (*ss, trim_set); + ss_advance (ss, cnt); + return cnt; +} + +/* Trims leading and trailing characters in TRIM_SET from SS. */ +void +ss_trim (struct substring *ss, struct substring trim_set) +{ + ss_ltrim (ss, trim_set); + ss_rtrim (ss, trim_set); +} + +/* If the last character in SS is C, removes it and returns true. + Otherwise, returns false without changing the string. */ +bool +ss_chomp (struct substring *ss, char c) +{ + if (ss_last (*ss) == c) { - /* The new length is longer than the allocated length, so - there can be no overlap. */ - st->length = 0; - ds_concat (st, string, new_length); + ss->length--; + return true; } else + return false; +} + +/* Divides SS into tokens separated by any of the DELIMITERS. + Each call replaces TOKEN by the next token in SS, or by an + empty string if no tokens remain. Returns true if a token was + obtained, false otherwise. + + Before the first call, initialize *SAVE_IDX to 0. Do not + modify *SAVE_IDX between calls. + + SS divides into exactly one more tokens than it contains + delimiters. That is, a delimiter at the start or end of SS or + a pair of adjacent delimiters yields an empty token, and the + empty string contains a single token. */ +bool +ss_separate (struct substring ss, struct substring delimiters, + size_t *save_idx, struct substring *token) +{ + if (*save_idx <= ss_length (ss)) { - /* Overlap is possible, but the new string will fit in the - allocated space, so we can just copy data. */ - st->length = new_length; - memmove (st->string, string, st->length); + struct substring tmp = ss_substr (ss, *save_idx, SIZE_MAX); + size_t length = ss_cspan (tmp, delimiters); + *token = ss_head (tmp, length); + *save_idx += length + 1; + return true; + } + else + { + *token = ss_empty (); + return false; } } -/* Frees ST. */ -void -ds_destroy (struct string *st) +/* Divides SS into tokens separated by any of the DELIMITERS, + merging adjacent delimiters so that the empty string is never + produced as a token. Each call replaces TOKEN by the next + token in SS, or by an empty string if no tokens remain, and + then skips past the first delimiter following the token. + Returns true if a token was obtained, false otherwise. + + Before the first call, initialize *SAVE_IDX to 0. Do not + modify *SAVE_IDX between calls. */ +bool +ss_tokenize (struct substring ss, struct substring delimiters, + size_t *save_idx, struct substring *token) { - free (st->string); - st->string = NULL; + ss_advance (&ss, *save_idx); + *save_idx += ss_ltrim (&ss, delimiters); + ss_get_chars (&ss, ss_cspan (ss, delimiters), token); + *save_idx += ss_length (*token) + 1; + return ss_length (*token) > 0; } -/* Swaps the contents of strings A and B. */ +/* Removes the first CNT characters from SS. */ void -ds_swap (struct string *a, struct string *b) +ss_advance (struct substring *ss, size_t cnt) { - struct string tmp = *a; - *a = *b; - *b = tmp; + if (cnt > ss->length) + cnt = ss->length; + ss->string += cnt; + ss->length -= cnt; } -/* Truncates ST to zero length. */ -void -ds_clear (struct string *st) +/* If the first character in SS is C, removes it and returns true. + Otherwise, returns false without changing the string. */ +bool +ss_match_char (struct substring *ss, char c) { - st->length = 0; + if (ss_first (*ss) == c) + { + ss->string++; + ss->length--; + return true; + } + else + return false; } -/* Pad ST on the right with copies of PAD until ST is at least - LENGTH characters in size. If ST is initially LENGTH - characters or longer, this is a no-op. */ -void -ds_rpad (struct string *st, size_t length, char pad) +/* If the first character in SS is in MATCH, removes it and + returns the character that was removed. + Otherwise, returns EOF without changing the string. */ +int +ss_match_char_in (struct substring *ss, struct substring match) +{ + int c = EOF; + if (ss->length > 0 + && memchr (match.string, ss->string[0], match.length) != NULL) + { + c = ss->string[0]; + ss->string++; + ss->length--; + } + return c; +} + +/* If SS begins with TARGET, removes it and returns true. + Otherwise, returns false without changing SS. */ +bool +ss_match_string (struct substring *ss, const struct substring target) { - assert (st != NULL); - if (st->length < length) + size_t length = ss_length (target); + if (ss_equals (ss_head (*ss, length), target)) { - if (st->capacity < length) - ds_extend (st, length); - memset (&st->string[st->length], pad, length - st->length); - st->length = length; + ss_advance (ss, length); + return true; } + else + return false; } -/* Removes trailing spaces from ST. - Returns number of spaces removed. */ +/* Removes the first character from SS and returns it. + If SS is empty, returns EOF without modifying SS. */ int -ds_rtrim_spaces (struct string *st) +ss_get_char (struct substring *ss) { - int cnt = 0; - while (isspace (ds_last (st))) + int c = ss_first (*ss); + if (c != EOF) { - st->length--; - cnt++; + ss->string++; + ss->length--; } + return c; +} + +/* Stores the prefix of SS up to the first DELIMITER in OUT (if + any). Trims those same characters from SS. DELIMITER is + removed from SS but not made part of OUT. Returns true if + DELIMITER was found (and removed), false otherwise. */ +bool +ss_get_until (struct substring *ss, char delimiter, struct substring *out) +{ + ss_get_chars (ss, ss_cspan (*ss, ss_buffer (&delimiter, 1)), out); + return ss_match_char (ss, delimiter); +} + +/* Stores the first CNT characters in SS in OUT (or fewer, if SS + is shorter than CNT characters). Trims the same characters + from the beginning of SS. Returns CNT. */ +size_t +ss_get_chars (struct substring *ss, size_t cnt, struct substring *out) +{ + *out = ss_head (*ss, cnt); + ss_advance (ss, cnt); return cnt; } -/* Removes leading spaces from ST. - Returns number of spaces removed. */ -int -ds_ltrim_spaces (struct string *st) +/* Parses and removes an optionally signed decimal integer from + the beginning of SS. Returns 0 if an error occurred, + otherwise the number of characters removed from SS. Stores + the integer's value into *VALUE. */ +size_t +ss_get_long (struct substring *ss, long *value) { - int idx = ds_n_find(st, "\t "); - if (0 == idx) - return 0; + char tmp[64]; + size_t length; - if (idx < 0 ) + length = ss_span (*ss, ss_cstr ("+-")); + length += ss_span (ss_substr (*ss, length, SIZE_MAX), ss_cstr (CC_DIGITS)); + if (length > 0 && length < sizeof tmp) { - int len = ds_length(st); - ds_clear(st); - return len; + char *tail; + + memcpy (tmp, ss_data (*ss), length); + tmp[length] = '\0'; + + *value = strtol (tmp, &tail, 10); + if (tail - tmp == length) + { + ss_advance (ss, length); + return length; + } } - - ds_replace(st, &ds_c_str(st)[idx]); - - return idx; + *value = 0; + return 0; } - -/* If the last character in ST is C, removes it and returns true. - Otherwise, returns false without modifying ST. */ +/* Returns true if SS is empty (contains no characters), + false otherwise. */ bool -ds_chomp (struct string *st, char c_) +ss_is_empty (struct substring ss) +{ + return ss.length == 0; +} + +/* Returns the number of characters in SS. */ +size_t +ss_length (struct substring ss) +{ + return ss.length; +} + +/* Returns a pointer to the characters in SS. */ +char * +ss_data (struct substring ss) +{ + return ss.string; +} + +/* Returns a pointer just past the last character in SS. */ +char * +ss_end (struct substring ss) +{ + return ss.string + ss.length; +} + +/* Returns the character in position IDX in SS, as a value in the + range of unsigned char. Returns EOF if IDX is out of the + range of indexes for SS. */ +int +ss_at (struct substring ss, size_t idx) +{ + return idx < ss.length ? (unsigned char) ss.string[idx] : EOF; +} + +/* Returns the first character in SS as a value in the range of + unsigned char. Returns EOF if SS is the empty string. */ +int +ss_first (struct substring ss) +{ + return ss_at (ss, 0); +} + +/* Returns the last character in SS as a value in the range of + unsigned char. Returns EOF if SS is the empty string. */ +int +ss_last (struct substring ss) +{ + return ss.length > 0 ? (unsigned char) ss.string[ss.length - 1] : EOF; +} + +/* Returns the number of contiguous characters at the beginning + of SS that are in SKIP_SET. */ +size_t +ss_span (struct substring ss, struct substring skip_set) +{ + size_t i; + for (i = 0; i < ss.length; i++) + if (ss_find_char (skip_set, ss.string[i]) == SIZE_MAX) + break; + return i; +} + +/* Returns the number of contiguous characters at the beginning + of SS that are not in SKIP_SET. */ +size_t +ss_cspan (struct substring ss, struct substring stop_set) +{ + size_t i; + for (i = 0; i < ss.length; i++) + if (ss_find_char (stop_set, ss.string[i]) != SIZE_MAX) + break; + return i; +} + +/* Returns the offset in SS of the first instance of C, + or SIZE_MAX if C does not occur in SS. */ +size_t +ss_find_char (struct substring ss, char c) +{ + const char *p = memchr (ss.string, c, ss.length); + return p != NULL ? p - ss.string : SIZE_MAX; +} + +/* Compares A and B and returns a strcmp()-type comparison + result. */ +int +ss_compare (struct substring a, struct substring b) { - unsigned char c = c_; - if (ds_last (st) == c) + int retval = memcmp (a.string, b.string, MIN (a.length, b.length)); + if (retval == 0) + retval = a.length < b.length ? -1 : a.length > b.length; + return retval; +} + +/* Compares A and B case-insensitively and returns a + strcmp()-type comparison result. */ +int +ss_compare_case (struct substring a, struct substring b) +{ + int retval = memcasecmp (a.string, b.string, MIN (a.length, b.length)); + if (retval == 0) + retval = a.length < b.length ? -1 : a.length > b.length; + return retval; +} + +/* Compares A and B and returns true if their contents are + identical, false otherwise. */ +int +ss_equals (struct substring a, struct substring b) +{ + return a.length == b.length && !memcmp (a.string, b.string, a.length); +} + +/* Compares A and B and returns true if their contents are + identical except possibly for case differences, false + otherwise. */ +int +ss_equals_case (struct substring a, struct substring b) +{ + return a.length == b.length && !memcasecmp (a.string, b.string, a.length); +} + +/* Returns the position in SS that the character at P occupies. + P must point within SS or one past its end. */ +size_t +ss_pointer_to_position (struct substring ss, const char *p) +{ + size_t pos = p - ss.string; + assert (pos <= ss.length); + return pos; +} + +/* Allocates and returns a null-terminated string that contains + SS. */ +char * +ss_xstrdup (struct substring ss) +{ + char *s = xmalloc (ss.length + 1); + memcpy (s, ss.string, ss.length); + s[ss.length] = '\0'; + return s; +} + +/* Initializes ST as an empty string. */ +void +ds_init_empty (struct string *st) +{ + st->ss = ss_empty (); + st->capacity = 0; +} + +/* Initializes ST with initial contents S. */ +void +ds_init_string (struct string *st, const struct string *s) +{ + ds_init_substring (st, ds_ss (s)); +} + +/* Initializes ST with initial contents SS. */ +void +ds_init_substring (struct string *st, struct substring ss) +{ + st->capacity = MAX (8, ss.length * 2); + st->ss.string = xmalloc (st->capacity + 1); + memcpy (st->ss.string, ss.string, ss.length); + st->ss.length = ss.length; +} + +/* Initializes ST with initial contents S. */ +void +ds_init_cstr (struct string *st, const char *s) +{ + ds_init_substring (st, ss_cstr (s)); +} + +/* Frees ST. */ +void +ds_destroy (struct string *st) +{ + if (st != NULL) { - st->length--; - return true; + ss_dealloc (&st->ss); + st->ss.string = NULL; + st->ss.length = 0; + st->capacity = 0; } - else - return false; +} + +/* Swaps the contents of strings A and B. */ +void +ds_swap (struct string *a, struct string *b) +{ + struct string tmp = *a; + *a = *b; + *b = tmp; +} + +/* Helper function for ds_register_pool. */ +static void +free_string (void *st_) +{ + struct string *st = st_; + ds_destroy (st); +} + +/* Arranges for ST to be destroyed automatically as part of + POOL. */ +void +ds_register_pool (struct string *st, struct pool *pool) +{ + pool_register (pool, free_string, st); +} + +/* Cancels the arrangement for ST to be destroyed automatically + as part of POOL. */ +void +ds_unregister_pool (struct string *st, struct pool *pool) +{ + pool_unregister (pool, st); +} + +/* Copies SRC into DST. + DST and SRC may be the same string. */ +void +ds_assign_string (struct string *dst, const struct string *src) +{ + ds_assign_substring (dst, ds_ss (src)); +} + +/* Replaces DST by SS. + SS may be a substring of DST. */ +void +ds_assign_substring (struct string *dst, struct substring ss) +{ + dst->ss.length = ss.length; + ds_extend (dst, ss.length); + memmove (dst->ss.string, ss.string, ss.length); +} + +/* Replaces DST by null-terminated string SRC. SRC may overlap + with DST. */ +void +ds_assign_cstr (struct string *dst, const char *src) +{ + ds_assign_substring (dst, ss_cstr (src)); +} + +/* Truncates ST to zero length. */ +void +ds_clear (struct string *st) +{ + st->ss.length = 0; +} + +/* Returns a substring that contains ST. */ +struct substring +ds_ss (const struct string *st) +{ + return st->ss; +} + +/* Returns a substring that contains CNT characters from ST + starting at position START. + + If START is greater than or equal to the length of ST, then + the substring will be the empty string. If START + CNT + exceeds the length of ST, then the substring will only be + ds_length(ST) - START characters long. */ +struct substring +ds_substr (const struct string *st, size_t start, size_t cnt) +{ + return ss_substr (ds_ss (st), start, cnt); +} + +/* Returns a substring that contains the first CNT characters in + ST. If CNT exceeds the length of ST, then the substring will + contain all of ST. */ +struct substring +ds_head (const struct string *st, size_t cnt) +{ + return ss_head (ds_ss (st), cnt); +} + +/* Returns a substring that contains the last CNT characters in + ST. If CNT exceeds the length of ST, then the substring will + contain all of ST. */ +struct substring +ds_tail (const struct string *st, size_t cnt) +{ + return ss_tail (ds_ss (st), cnt); } /* Ensures that ST can hold at least MIN_CAPACITY characters plus a null @@ -391,9 +955,9 @@ ds_extend (struct string *st, size_t min_capacity) { st->capacity *= 2; if (st->capacity < min_capacity) - st->capacity = min_capacity * 2; - - st->string = xrealloc (st->string, st->capacity + 1); + st->capacity = 2 * min_capacity; + + st->ss.string = xrealloc (st->ss.string, st->capacity + 1); } } @@ -401,10 +965,10 @@ ds_extend (struct string *st, size_t min_capacity) void ds_shrink (struct string *st) { - if (st->capacity != st->length) + if (st->capacity != st->ss.length) { - st->capacity = st->length; - st->string = xrealloc (st->string, st->capacity + 1); + st->capacity = st->ss.length; + st->ss.string = xrealloc (st->ss.string, st->capacity + 1); } } @@ -412,114 +976,148 @@ ds_shrink (struct string *st) void ds_truncate (struct string *st, size_t length) { - if (length >= st->length) - return; - st->length = length; + ss_truncate (&st->ss, length); } -/* Returns true if ST is empty, false otherwise. */ -bool -ds_is_empty (const struct string *st) +/* Removes trailing characters in TRIM_SET from ST. + Returns number of characters removed. */ +size_t +ds_rtrim (struct string *st, struct substring trim_set) { - return st->length == 0; + return ss_rtrim (&st->ss, trim_set); } -/* Returns the length of ST. */ +/* Removes leading characters in TRIM_SET from ST. + Returns number of characters removed. */ size_t -ds_length (const struct string *st) +ds_ltrim (struct string *st, struct substring trim_set) { - return st->length; + size_t cnt = ds_span (st, trim_set); + if (cnt > 0) + ds_assign_substring (st, ds_substr (st, cnt, SIZE_MAX)); + return cnt; } -/* Returns the allocation size of ST. */ +/* Trims leading and trailing characters in TRIM_SET from ST. + Returns number of charactesr removed. */ size_t -ds_capacity (const struct string *st) +ds_trim (struct string *st, struct substring trim_set) { - return st->capacity; + size_t cnt = ds_rtrim (st, trim_set); + return cnt + ds_ltrim (st, trim_set); +} + +/* If the last character in ST is C, removes it and returns true. + Otherwise, returns false without modifying ST. */ +bool +ds_chomp (struct string *st, char c) +{ + return ss_chomp (&st->ss, c); } +/* Divides ST into tokens separated by any of the DELIMITERS. + Each call replaces TOKEN by the next token in ST, or by an + empty string if no tokens remain. Returns true if a token was + obtained, false otherwise. -/* Returns the index of the first character in ST which - is an element of the set CS. - Returns -1 if no characters are found. -*/ -int -ds_find(const struct string *st, const char cs[]) + Before the first call, initialize *SAVE_IDX to 0. Do not + modify *SAVE_IDX between calls. + + ST divides into exactly one more tokens than it contains + delimiters. That is, a delimiter at the start or end of ST or + a pair of adjacent delimiters yields an empty token, and the + empty string contains a single token. */ +bool +ds_separate (const struct string *st, struct substring delimiters, + size_t *save_idx, struct substring *token) { - int i; - int j; - for(i = 0; i < st->length ; ++i) - { - if ('\0' == st->string[i]) - break; - for (j = 0 ; j < strlen(cs) ; ++j) - { - if ( st->string[i] == cs[j]) - return i; - } - } - return -1; + return ss_separate (ds_ss (st), delimiters, save_idx, token); } -/* Returns the index of the first character in ST which - is NOT an element of the set CS. - Returns -1 if no such character is found. -*/ -int -ds_n_find(const struct string *st, const char cs[]) +/* Divides ST into tokens separated by any of the DELIMITERS, + merging adjacent delimiters so that the empty string is never + produced as a token. Each call replaces TOKEN by the next + token in ST, or by an empty string if no tokens remain. + Returns true if a token was obtained, false otherwise. + + Before the first call, initialize *SAVE_IDX to 0. Do not + modify *SAVE_IDX between calls. */ +bool +ds_tokenize (const struct string *st, struct substring delimiters, + size_t *save_idx, struct substring *token) { - int i; - int j; - for(i = 0; i < st->length ; ++i) - { - bool found = false; - if ('\0' == st->string[i]) - break; - for (j = 0 ; j < strlen(cs) ; ++j) - { - if ( st->string[i] == cs[j]) - { - found = true; - break; - } - } - if ( !found ) - return i; - } - return -1; + return ss_tokenize (ds_ss (st), delimiters, save_idx, token); } +/* Pad ST on the right with copies of PAD until ST is at least + LENGTH characters in size. If ST is initially LENGTH + characters or longer, this is a no-op. */ +void +ds_rpad (struct string *st, size_t length, char pad) +{ + if (length > st->ss.length) + ds_put_char_multiple (st, pad, length - st->ss.length); +} +/* Sets the length of ST to exactly NEW_LENGTH, + either by truncating characters from the end, + or by padding on the right with PAD. */ +void +ds_set_length (struct string *st, size_t new_length, char pad) +{ + if (st->ss.length < new_length) + ds_rpad (st, new_length, pad); + else + st->ss.length = new_length; +} -/* Returns the first character in ST as a value in the range of - unsigned char. Returns EOF if ST is the empty string. */ -int -ds_first (const struct string *st) +/* Removes N characters from ST starting at offset START. */ +void +ds_remove (struct string *st, size_t start, size_t n) { - return st->length > 0 ? (unsigned char) st->string[0] : EOF; + if (n > 0 && start < st->ss.length) + { + if (st->ss.length - start <= n) + { + /* All characters at or beyond START are deleted. */ + st->ss.length = start; + } + else + { + /* Some characters remain and must be shifted into + position. */ + memmove (st->ss.string + st->ss.length, + st->ss.string + st->ss.length + n, + st->ss.length - start - n); + st->ss.length -= n; + } + } + else + { + /* There are no characters to delete or no characters at or + beyond START, hence deletion is a no-op. */ + } } -/* Returns the last character in ST as a value in the range of - unsigned char. Returns EOF if ST is the empty string. */ -int -ds_last (const struct string *st) +/* Returns true if ST is empty, false otherwise. */ +bool +ds_is_empty (const struct string *st) { - return st->length > 0 ? (unsigned char) st->string[st->length - 1] : EOF; + return ss_is_empty (st->ss); } -/* Returns the value of ST as a null-terminated string. */ -char * -ds_c_str (const struct string *st) +/* Returns the length of ST. */ +size_t +ds_length (const struct string *st) { - ((char *) st->string)[st->length] = '\0'; - return st->string; + return ss_length (ds_ss (st)); } /* Returns the string data inside ST. */ char * ds_data (const struct string *st) { - return st->string; + return ss_data (ds_ss (st)); } /* Returns a pointer to the null terminator ST. @@ -528,122 +1126,162 @@ ds_data (const struct string *st) char * ds_end (const struct string *st) { - return st->string + st->length; + return ss_end (ds_ss (st)); } -/* Concatenates S onto ST. */ -void -ds_puts (struct string *st, const char *s) +/* Returns the character in position IDX in ST, as a value in the + range of unsigned char. Returns EOF if IDX is out of the + range of indexes for ST. */ +int +ds_at (const struct string *st, size_t idx) { - size_t s_len; + return ss_at (ds_ss (st), idx); +} - if (!s) return; +/* Returns the first character in ST as a value in the range of + unsigned char. Returns EOF if ST is the empty string. */ +int +ds_first (const struct string *st) +{ + return ss_first (ds_ss (st)); +} - s_len = strlen (s); - ds_extend (st, st->length + s_len); - strcpy (st->string + st->length, s); - st->length += s_len; +/* Returns the last character in ST as a value in the range of + unsigned char. Returns EOF if ST is the empty string. */ +int +ds_last (const struct string *st) +{ + return ss_last (ds_ss (st)); } -/* Concatenates LEN characters from BUF onto ST. */ -void -ds_concat (struct string *st, const char *buf, size_t len) +/* Returns the number of consecutive characters at the beginning + of ST that are in SKIP_SET. */ +size_t +ds_span (const struct string *st, struct substring skip_set) { - ds_extend (st, st->length + len); - memcpy (st->string + st->length, buf, len); - st->length += len; + return ss_span (ds_ss (st), skip_set); } -void ds_vprintf (struct string *st, const char *format, va_list args); +/* Returns the number of consecutive characters at the beginning + of ST that are not in STOP_SET. */ +size_t +ds_cspan (const struct string *st, struct substring stop_set) +{ + return ss_cspan (ds_ss (st), stop_set); +} +/* Returns the position of the first occurrence of character C in + ST at or after position OFS, or SIZE_MAX if there is no such + occurrence. */ +size_t +ds_find_char (const struct string *st, char c) +{ + return ss_find_char (ds_ss (st), c); +} -/* Formats FORMAT as a printf string and appends the result to ST. */ -void -ds_printf (struct string *st, const char *format, ...) +/* Compares A and B and returns a strcmp()-type comparison + result. */ +int +ds_compare (const struct string *a, const struct string *b) { - va_list args; + return ss_compare (ds_ss (a), ds_ss (b)); +} - va_start (args, format); - ds_vprintf(st,format,args); - va_end (args); +/* Returns the position in ST that the character at P occupies. + P must point within ST or one past its end. */ +size_t +ds_pointer_to_position (const struct string *st, const char *p) +{ + return ss_pointer_to_position (ds_ss (st), p); +} +/* Allocates and returns a null-terminated string that contains + ST. */ +char * +ds_xstrdup (const struct string *st) +{ + return ss_xstrdup (ds_ss (st)); } -/* Formats FORMAT as a printf string and appends the result to ST. */ -void -ds_vprintf (struct string *st, const char *format, va_list args) +/* Returns the allocation size of ST. */ +size_t +ds_capacity (const struct string *st) { - /* Fscking glibc silently changed behavior between 2.0 and 2.1. - Fsck fsck fsck. Before, it returned -1 on buffer overflow. Now, - it returns the number of characters (not bytes) that would have - been written. */ + return st->capacity; +} - int avail, needed; - va_list a1; +/* Returns the value of ST as a null-terminated string. */ +char * +ds_cstr (const struct string *st_) +{ + struct string *st = (struct string *) st_; + if (st->ss.string == NULL) + ds_extend (st, 1); + st->ss.string[st->ss.length] = '\0'; + return st->ss.string; +} - va_copy(a1, args); - avail = st->capacity - st->length + 1; - needed = vsnprintf (st->string + st->length, avail, format, args); +/* Reads characters from STREAM and appends them to ST, stopping + after MAX_LENGTH characters, after appending a newline, or + after an I/O error or end of file was encountered, whichever + comes first. Returns true if at least one character was added + to ST, false if no characters were read before an I/O error or + end of file (or if MAX_LENGTH was 0). + This function accepts LF, CR LF, and CR sequences as new-line, + and translates each of them to a single '\n' new-line + character in ST. */ +bool +ds_read_line (struct string *st, FILE *stream, size_t max_length) +{ + size_t length; - if (needed >= avail) + for (length = 0; length < max_length; length++) { - ds_extend (st, st->length + needed); - - vsprintf (st->string + st->length, format, a1); - } - else - while (needed == -1) - { - va_list a2; - va_copy(a2, a1); - - ds_extend (st, (st->capacity + 1) * 2); - avail = st->capacity - st->length + 1; - - needed = vsnprintf (st->string + st->length, avail, format, a2); - va_end(a2); - - } - va_end(a1); + int c = getc (stream); + if (c == EOF) + break; - st->length += needed; -} + if (c == '\r') + { + c = getc (stream); + if (c != '\n') + { + ungetc (c, stream); + c = '\n'; + } + } + ds_put_char (st, c); + if (c == '\n') + return true; + } -/* Appends character CH to ST. */ -void -ds_putc (struct string *st, int ch) -{ - if (st->length == st->capacity) - ds_extend (st, st->length + 1); - st->string[st->length++] = ch; + return length > 0; } -/* Appends to ST a newline-terminated line read from STREAM. - Newline is the last character of ST on return, unless an I/O error - or end of file is encountered after reading some characters. - Returns 1 if a line is successfully read, or 0 if no characters at - all were read before an I/O error or end of file was - encountered. */ -int -ds_gets (struct string *st, FILE *stream) +/* Removes a comment introduced by `#' from ST, + ignoring occurrences inside quoted strings. */ +static void +remove_comment (struct string *st) { - int c; + char *cp; + int quote = 0; - c = getc (stream); - if (c == EOF) - return 0; - - for (;;) - { - ds_putc (st, c); - if (c == '\n') - return 1; - - c = getc (stream); - if (c == EOF) - return 1; - } + for (cp = ds_data (st); cp < ds_end (st); cp++) + if (quote) + { + if (*cp == quote) + quote = 0; + else if (*cp == '\\') + cp++; + } + else if (*cp == '\'' || *cp == '"') + quote = *cp; + else if (*cp == '#') + { + ds_truncate (st, cp - ds_cstr (st)); + break; + } } /* Reads a line from STREAM into ST, then preprocesses as follows: @@ -653,157 +1291,153 @@ ds_gets (struct string *st, FILE *stream) - Deletes comments introduced by `#' outside of single or double quotes. - - Trailing whitespace will be deleted. - - Increments cust_ln as appropriate. + - Deletes trailing white space. - Returns nonzero only if a line was successfully read. */ -int -ds_get_config_line (FILE *stream, struct string *st, struct file_locator *where) + Returns true if a line was successfully read, false on + failure. If LINE_NUMBER is non-null, then *LINE_NUMBER is + incremented by the number of lines read. */ +bool +ds_read_config_line (struct string *st, int *line_number, FILE *stream) { - /* Read the first line. */ ds_clear (st); - where->line_number++; - if (!ds_gets (st, stream)) - return 0; - - /* Read additional lines, if any. */ - for (;;) + do { - /* Remove trailing whitespace. */ - { - char *s = ds_c_str (st); - size_t len = ds_length (st); - - while (len > 0 && isspace ((unsigned char) s[len - 1])) - len--; - ds_truncate (st, len); - } - - /* Check for trailing \. Remove if found, bail otherwise. */ - if (ds_length (st) == 0 || ds_c_str (st)[ds_length (st) - 1] != '\\') - break; - ds_truncate (st, ds_length (st) - 1); - - /* Append another line and go around again. */ - { - int success = ds_gets (st, stream); - where->line_number++; - if (!success) - return 1; - } + if (!ds_read_line (st, stream, SIZE_MAX)) + return false; + (*line_number)++; + ds_rtrim (st, ss_cstr (CC_SPACES)); } + while (ds_chomp (st, '\\')); - /* Find a comment and remove. */ - { - char *cp; - int quote = 0; - - for (cp = ds_c_str (st); *cp; cp++) - if (quote) - { - if (*cp == quote) - quote = 0; - else if (*cp == '\\') - cp++; - } - else if (*cp == '\'' || *cp == '"') - quote = *cp; - else if (*cp == '#') - { - ds_truncate (st, cp - ds_c_str (st)); - break; - } - } - - return 1; + remove_comment (st); + return true; } - -/* Lengthed strings. */ -/* Creates a new lengthed string LS with contents as a copy of - S. */ -void -ls_create (struct fixed_string *ls, const char *s) +/* Attempts to read SIZE * CNT bytes from STREAM and append them + to ST. + Returns true if all the requested data was read, false otherwise. */ +bool +ds_read_stream (struct string *st, size_t size, size_t cnt, FILE *stream) { - ls->length = strlen (s); - ls->string = xmalloc (ls->length + 1); - memcpy (ls->string, s, ls->length + 1); + if (size != 0) + { + size_t try_bytes = xtimes (cnt, size); + if (size_in_bounds_p (xsum (ds_length (st), try_bytes))) + { + char *buffer = ds_put_uninit (st, try_bytes); + size_t got_bytes = fread (buffer, 1, try_bytes, stream); + ds_truncate (st, ds_length (st) - (try_bytes - got_bytes)); + return got_bytes == try_bytes; + } + else + { + errno = ENOMEM; + return false; + } + } + else + return true; } -/* Creates a new lengthed string LS with contents as a copy of - BUFFER with length LEN. */ +/* Concatenates S onto ST. */ void -ls_create_buffer (struct fixed_string *ls, - const char *buffer, size_t len) +ds_put_cstr (struct string *st, const char *s) { - ls->length = len; - ls->string = xmalloc (len + 1); - memcpy (ls->string, buffer, len); - ls->string[len] = '\0'; + if (s != NULL) + ds_put_substring (st, ss_cstr (s)); } -/* Sets the fields of LS to the specified values. */ +/* Concatenates SS to ST. */ void -ls_init (struct fixed_string *ls, const char *string, size_t length) +ds_put_substring (struct string *st, struct substring ss) { - ls->string = (char *) string; - ls->length = length; + memcpy (ds_put_uninit (st, ss_length (ss)), ss_data (ss), ss_length (ss)); } -/* Copies the fields of SRC to DST. */ -void -ls_shallow_copy (struct fixed_string *dst, const struct fixed_string *src) +/* Returns ds_end(ST) and THEN increases the length by INCR. */ +char * +ds_put_uninit (struct string *st, size_t incr) { - *dst = *src; + char *end; + ds_extend (st, ds_length (st) + incr); + end = ds_end (st); + st->ss.length += incr; + return end; } -/* Frees the memory backing LS. */ +/* Formats FORMAT as a printf string and appends the result to ST. */ void -ls_destroy (struct fixed_string *ls) +ds_put_format (struct string *st, const char *format, ...) { - free (ls->string); + va_list args; + + va_start (args, format); + ds_put_vformat (st, format, args); + va_end (args); } -/* Sets LS to a null pointer value. */ +/* Formats FORMAT as a printf string and appends the result to ST. */ void -ls_null (struct fixed_string *ls) +ds_put_vformat (struct string *st, const char *format, va_list args_) { - ls->string = NULL; -} + int avail, needed; + va_list args; -/* Returns nonzero only if LS has a null pointer value. */ -int -ls_null_p (const struct fixed_string *ls) -{ - return ls->string == NULL; -} + va_copy (args, args_); + avail = st->ss.string != NULL ? st->capacity - st->ss.length + 1 : 0; + needed = vsnprintf (st->ss.string + st->ss.length, avail, format, args); + va_end (args); -/* Returns nonzero only if LS is a null pointer or has length 0. */ -int -ls_empty_p (const struct fixed_string *ls) -{ - return ls->string == NULL || ls->length == 0; + if (needed >= avail) + { + va_copy (args, args_); + vsprintf (ds_put_uninit (st, needed), format, args); + va_end (args); + } + else + { + /* Some old libc's returned -1 when the destination string + was too short. */ + while (needed == -1) + { + ds_extend (st, (st->capacity + 1) * 2); + avail = st->capacity - st->ss.length + 1; + + va_copy (args, args_); + needed = vsnprintf (ds_end (st), avail, format, args); + va_end (args); + } + st->ss.length += needed; + } } -/* Returns the length of LS, which must not be null. */ -size_t -ls_length (const struct fixed_string *ls) +/* Appends character CH to ST. */ +void +ds_put_char (struct string *st, int ch) { - return ls->length; + ds_put_uninit (st, 1)[0] = ch; } -/* Returns a pointer to the character string in LS. */ -char * -ls_c_str (const struct fixed_string *ls) +/* Appends CNT copies of character CH to ST. */ +void +ds_put_char_multiple (struct string *st, int ch, size_t cnt) { - return (char *) ls->string; + memset (ds_put_uninit (st, cnt), ch, cnt); } -/* Returns a pointer to the null terminator of the character string in - LS. */ -char * -ls_end (const struct fixed_string *ls) + +/* If relocation has been enabled, replace ST, + with its relocated version */ +void +ds_relocate (struct string *st) { - return (char *) (ls->string + ls->length); + const char *orig = ds_cstr (st); + const char *rel = relocate (orig); + + if ( orig != rel) + { + ds_clear (st); + ds_put_cstr (st, rel); + free ((char *) rel); + } }