X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flibpspp%2Fstr.c;h=afe32de9f2049bfcc5a80ac7a95b36b348be4cb7;hb=8830c95bb9e8d72621787866141a27fc22e8c786;hp=c8497c34af4fbf1dd57ace97d18d1ad72e60116c;hpb=a621b4a73d06d73a753c9f7207c03393dfb1b81e;p=pspp-builds.git diff --git a/src/libpspp/str.c b/src/libpspp/str.c index c8497c34..afe32de9 100644 --- a/src/libpspp/str.c +++ b/src/libpspp/str.c @@ -1,32 +1,35 @@ -/* PSPP - computes sample statistics. - Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc. - Written by Ben Pfaff . +/* PSPP - a program for statistical analysis. + Copyright (C) 1997-9, 2000, 2006, 2009 Free Software Foundation, Inc. - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. */ + along with this program. If not, see . */ #include + #include "str.h" -#include "message.h" + #include -#include +#include +#include #include -#include "alloc.h" -#include "message.h" + +#include +#include + +#include #include "minmax.h" -#include "size_max.h" +#include "xalloc.h" +#include "xsize.h" /* Reverses the order of NBYTES bytes at address P, thus converting between little- and big-endian byte orders. */ @@ -66,12 +69,12 @@ buf_compare_case (const char *a_, const char *b_, size_t size) const unsigned char *a = (unsigned char *) a_; const unsigned char *b = (unsigned char *) b_; - while (size-- > 0) + while (size-- > 0) { unsigned char ac = toupper (*a++); unsigned char bc = toupper (*b++); - if (ac != bc) + if (ac != bc) return ac > bc ? 1 : -1; } @@ -91,17 +94,17 @@ buf_compare_rpad (const char *a, size_t a_len, const char *b, size_t b_len) result = memcmp (a, b, min_len); if (result != 0) return result; - else + else { size_t idx; - - if (a_len < b_len) + + if (a_len < b_len) { for (idx = min_len; idx < b_len; idx++) if (' ' != b[idx]) return ' ' > b[idx] ? 1 : -1; } - else + else { for (idx = min_len; idx < a_len; idx++) if (a[idx] != ' ') @@ -122,9 +125,9 @@ str_compare_rpad (const char *a, const char *b) /* Copies string SRC to buffer DST, of size DST_SIZE bytes. DST is truncated to DST_SIZE bytes or padded on the right with - spaces as needed. */ + copies of PAD as needed. */ void -buf_copy_str_rpad (char *dst, size_t dst_size, const char *src) +buf_copy_str_rpad (char *dst, size_t dst_size, const char *src, char pad) { size_t src_len = strlen (src); if (src_len >= dst_size) @@ -132,15 +135,15 @@ buf_copy_str_rpad (char *dst, size_t dst_size, const char *src) else { memcpy (dst, src, src_len); - memset (&dst[src_len], ' ', dst_size - src_len); + memset (&dst[src_len], pad, dst_size - src_len); } } /* Copies string SRC to buffer DST, of size DST_SIZE bytes. DST is truncated to DST_SIZE bytes or padded on the left with - spaces as needed. */ + copies of PAD as needed. */ void -buf_copy_str_lpad (char *dst, size_t dst_size, const char *src) +buf_copy_str_lpad (char *dst, size_t dst_size, const char *src, char pad) { size_t src_len = strlen (src); if (src_len >= dst_size) @@ -148,24 +151,42 @@ buf_copy_str_lpad (char *dst, size_t dst_size, const char *src) else { size_t pad_cnt = dst_size - src_len; - memset (&dst[0], ' ', pad_cnt); + memset (&dst[0], pad, pad_cnt); memcpy (dst + pad_cnt, src, src_len); } } +/* Copies buffer SRC, of SRC_SIZE bytes, to DST, of DST_SIZE bytes. + DST is truncated to DST_SIZE bytes or padded on the left with + copies of PAD as needed. */ +void +buf_copy_lpad (char *dst, size_t dst_size, + const char *src, size_t src_size, + char pad) +{ + if (src_size >= dst_size) + memmove (dst, src, dst_size); + else + { + memset (dst, pad, dst_size - src_size); + memmove (&dst[dst_size - src_size], src, src_size); + } +} + /* Copies buffer SRC, of SRC_SIZE bytes, to DST, of DST_SIZE bytes. DST is truncated to DST_SIZE bytes or padded on the right with - spaces as needed. */ + copies of PAD as needed. */ void buf_copy_rpad (char *dst, size_t dst_size, - const char *src, size_t src_size) + const char *src, size_t src_size, + char pad) { if (src_size >= dst_size) memmove (dst, src, dst_size); else { memmove (dst, src, src_size); - memset (&dst[src_size], ' ', dst_size - src_size); + memset (&dst[src_size], pad, dst_size - src_size); } } @@ -176,27 +197,30 @@ buf_copy_rpad (char *dst, size_t dst_size, void str_copy_rpad (char *dst, size_t dst_size, const char *src) { - size_t src_len = strlen (src); - if (src_len < dst_size - 1) + if (dst_size > 0) { - memcpy (dst, src, src_len); - memset (&dst[src_len], ' ', dst_size - 1 - src_len); + size_t src_len = strlen (src); + if (src_len < dst_size - 1) + { + memcpy (dst, src, src_len); + memset (&dst[src_len], ' ', dst_size - 1 - src_len); + } + else + memcpy (dst, src, dst_size - 1); + dst[dst_size - 1] = 0; } - else - memcpy (dst, src, dst_size - 1); - dst[dst_size - 1] = 0; } /* Copies SRC to DST, which is in a buffer DST_SIZE bytes long. Truncates DST to DST_SIZE - 1 characters, if necessary. */ void -str_copy_trunc (char *dst, size_t dst_size, const char *src) +str_copy_trunc (char *dst, size_t dst_size, const char *src) { size_t src_len = strlen (src); assert (dst_size > 0); if (src_len + 1 < dst_size) memcpy (dst, src, src_len + 1); - else + else { memcpy (dst, src, dst_size - 1); dst[dst_size - 1] = '\0'; @@ -208,7 +232,7 @@ str_copy_trunc (char *dst, size_t dst_size, const char *src) Truncates DST to DST_SIZE - 1 characters, if necessary. */ void str_copy_buf_trunc (char *dst, size_t dst_size, - const char *src, size_t src_size) + const char *src, size_t src_size) { size_t dst_len; assert (dst_size > 0); @@ -220,7 +244,7 @@ str_copy_buf_trunc (char *dst, size_t dst_size, /* Converts each character in S to uppercase. */ void -str_uppercase (char *s) +str_uppercase (char *s) { for (; *s != '\0'; s++) *s = toupper ((unsigned char) *s); @@ -228,108 +252,698 @@ str_uppercase (char *s) /* Converts each character in S to lowercase. */ void -str_lowercase (char *s) +str_lowercase (char *s) { for (; *s != '\0'; s++) *s = tolower ((unsigned char) *s); } + +/* Converts NUMBER into a string in 26-adic notation in BUFFER, + which has room for SIZE bytes. Returns true if successful, + false if NUMBER, plus a trailing null, is too large to fit in + the available space. + + 26-adic notation is "spreadsheet column numbering": 1 = A, 2 = + B, 3 = C, ... 26 = Z, 27 = AA, 28 = AB, 29 = AC, ... + + 26-adic notation is the special case of a k-adic numeration + system (aka bijective base-k numeration) with k=26. In k-adic + numeration, the digits are {1, 2, 3, ..., k} (there is no + digit 0), and integer 0 is represented by the empty string. + For more information, see + http://en.wikipedia.org/wiki/Bijective_numeration. */ +bool +str_format_26adic (unsigned long int number, char buffer[], size_t size) +{ + size_t length = 0; + + while (number-- > 0) + { + if (length >= size) + return false; + buffer[length++] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"[number % 26]; + number /= 26; + } + + if (length >= size) + return false; + buffer[length] = '\0'; + + buf_reverse (buffer, length); + return true; +} + +/* Formats FORMAT into DST, as with sprintf(), and returns the + address of the terminating null written to DST. */ +char * +spprintf (char *dst, const char *format, ...) +{ + va_list args; + int count; + + va_start (args, format); + count = vsprintf (dst, format, args); + va_end (args); + + return dst + count; +} + +/* Sets the SIZE bytes starting at BLOCK to C, + and returns the byte following BLOCK. */ +void * +mempset (void *block, int c, size_t size) +{ + memset (block, c, size); + return (char *) block + size; +} + +/* Substrings. */ + +/* Returns an empty substring. */ +struct substring +ss_empty (void) +{ + struct substring ss; + ss.string = NULL; + ss.length = 0; + return ss; +} + +/* Returns a substring whose contents are the given C-style + string CSTR. */ +struct substring +ss_cstr (const char *cstr) +{ + return ss_buffer (cstr, strlen (cstr)); +} + +/* Returns a substring whose contents are the CNT characters in + BUFFER. */ +struct substring +ss_buffer (const char *buffer, size_t cnt) +{ + struct substring ss; + ss.string = (char *) buffer; + ss.length = cnt; + return ss; +} + +/* Returns a substring whose contents are the CNT characters + starting at the (0-based) position START in SS. */ +struct substring +ss_substr (struct substring ss, size_t start, size_t cnt) +{ + if (start < ss.length) + return ss_buffer (ss.string + start, MIN (cnt, ss.length - start)); + else + return ss_buffer (ss.string + ss.length, 0); +} + +/* Returns a substring whose contents are the first CNT + characters in SS. */ +struct substring +ss_head (struct substring ss, size_t cnt) +{ + return ss_buffer (ss.string, MIN (cnt, ss.length)); +} + +/* Returns a substring whose contents are the last CNT characters + in SS. */ +struct substring +ss_tail (struct substring ss, size_t cnt) +{ + if (cnt < ss.length) + return ss_buffer (ss.string + (ss.length - cnt), cnt); + else + return ss; +} + +/* Makes a malloc()'d copy of the contents of OLD + and stores it in NEW. */ +void +ss_alloc_substring (struct substring *new, struct substring old) +{ + new->string = xmalloc (old.length); + new->length = old.length; + memcpy (new->string, old.string, old.length); +} + +/* Allocates room for a CNT-character string in NEW. */ +void +ss_alloc_uninit (struct substring *new, size_t cnt) +{ + new->string = xmalloc (cnt); + new->length = cnt; +} + +/* Makes a pool_alloc_unaligned()'d copy of the contents of OLD + in POOL, and stores it in NEW. */ +void +ss_alloc_substring_pool (struct substring *new, struct substring old, + struct pool *pool) +{ + new->string = pool_alloc_unaligned (pool, old.length); + new->length = old.length; + memcpy (new->string, old.string, old.length); +} + +/* Allocates room for a CNT-character string in NEW in POOL. */ +void +ss_alloc_uninit_pool (struct substring *new, size_t cnt, struct pool *pool) +{ + new->string = pool_alloc_unaligned (pool, cnt); + new->length = cnt; +} + +/* Frees the string that SS points to. */ +void +ss_dealloc (struct substring *ss) +{ + free (ss->string); +} + +/* Truncates SS to at most CNT characters in length. */ +void +ss_truncate (struct substring *ss, size_t cnt) +{ + if (ss->length > cnt) + ss->length = cnt; +} + +/* Removes trailing characters in TRIM_SET from SS. + Returns number of characters removed. */ +size_t +ss_rtrim (struct substring *ss, struct substring trim_set) +{ + size_t cnt = 0; + while (cnt < ss->length + && ss_find_char (trim_set, + ss->string[ss->length - cnt - 1]) != SIZE_MAX) + cnt++; + ss->length -= cnt; + return cnt; +} + +/* Removes leading characters in TRIM_SET from SS. + Returns number of characters removed. */ +size_t +ss_ltrim (struct substring *ss, struct substring trim_set) +{ + size_t cnt = ss_span (*ss, trim_set); + ss_advance (ss, cnt); + return cnt; +} + +/* Trims leading and trailing characters in TRIM_SET from SS. */ +void +ss_trim (struct substring *ss, struct substring trim_set) +{ + ss_ltrim (ss, trim_set); + ss_rtrim (ss, trim_set); +} + +/* If the last character in SS is C, removes it and returns true. + Otherwise, returns false without changing the string. */ +bool +ss_chomp (struct substring *ss, char c) +{ + if (ss_last (*ss) == c) + { + ss->length--; + return true; + } + else + return false; +} + +/* Divides SS into tokens separated by any of the DELIMITERS. + Each call replaces TOKEN by the next token in SS, or by an + empty string if no tokens remain. Returns true if a token was + obtained, false otherwise. + + Before the first call, initialize *SAVE_IDX to 0. Do not + modify *SAVE_IDX between calls. + + SS divides into exactly one more tokens than it contains + delimiters. That is, a delimiter at the start or end of SS or + a pair of adjacent delimiters yields an empty token, and the + empty string contains a single token. */ +bool +ss_separate (struct substring ss, struct substring delimiters, + size_t *save_idx, struct substring *token) +{ + if (*save_idx <= ss_length (ss)) + { + struct substring tmp = ss_substr (ss, *save_idx, SIZE_MAX); + size_t length = ss_cspan (tmp, delimiters); + *token = ss_head (tmp, length); + *save_idx += length + 1; + return true; + } + else + { + *token = ss_empty (); + return false; + } +} + +/* Divides SS into tokens separated by any of the DELIMITERS, + merging adjacent delimiters so that the empty string is never + produced as a token. Each call replaces TOKEN by the next + token in SS, or by an empty string if no tokens remain, and + then skips past the first delimiter following the token. + Returns true if a token was obtained, false otherwise. + + Before the first call, initialize *SAVE_IDX to 0. Do not + modify *SAVE_IDX between calls. */ +bool +ss_tokenize (struct substring ss, struct substring delimiters, + size_t *save_idx, struct substring *token) +{ + ss_advance (&ss, *save_idx); + *save_idx += ss_ltrim (&ss, delimiters); + ss_get_chars (&ss, ss_cspan (ss, delimiters), token); + *save_idx += ss_length (*token) + 1; + return ss_length (*token) > 0; +} + +/* Removes the first CNT characters from SS. */ +void +ss_advance (struct substring *ss, size_t cnt) +{ + if (cnt > ss->length) + cnt = ss->length; + ss->string += cnt; + ss->length -= cnt; +} + +/* If the first character in SS is C, removes it and returns true. + Otherwise, returns false without changing the string. */ +bool +ss_match_char (struct substring *ss, char c) +{ + if (ss_first (*ss) == c) + { + ss->string++; + ss->length--; + return true; + } + else + return false; +} + +/* If the first character in SS is in MATCH, removes it and + returns the character that was removed. + Otherwise, returns EOF without changing the string. */ +int +ss_match_char_in (struct substring *ss, struct substring match) +{ + int c = EOF; + if (ss->length > 0 + && memchr (match.string, ss->string[0], match.length) != NULL) + { + c = ss->string[0]; + ss->string++; + ss->length--; + } + return c; +} + +/* If SS begins with TARGET, removes it and returns true. + Otherwise, returns false without changing SS. */ +bool +ss_match_string (struct substring *ss, const struct substring target) +{ + size_t length = ss_length (target); + if (ss_equals (ss_head (*ss, length), target)) + { + ss_advance (ss, length); + return true; + } + else + return false; +} + +/* Removes the first character from SS and returns it. + If SS is empty, returns EOF without modifying SS. */ +int +ss_get_char (struct substring *ss) +{ + int c = ss_first (*ss); + if (c != EOF) + { + ss->string++; + ss->length--; + } + return c; +} + +/* Stores the prefix of SS up to the first DELIMITER in OUT (if + any). Trims those same characters from SS. DELIMITER is + removed from SS but not made part of OUT. Returns true if + DELIMITER was found (and removed), false otherwise. */ +bool +ss_get_until (struct substring *ss, char delimiter, struct substring *out) +{ + ss_get_chars (ss, ss_cspan (*ss, ss_buffer (&delimiter, 1)), out); + return ss_match_char (ss, delimiter); +} + +/* Stores the first CNT characters in SS in OUT (or fewer, if SS + is shorter than CNT characters). Trims the same characters + from the beginning of SS. Returns CNT. */ +size_t +ss_get_chars (struct substring *ss, size_t cnt, struct substring *out) +{ + *out = ss_head (*ss, cnt); + ss_advance (ss, cnt); + return cnt; +} + +/* Parses and removes an optionally signed decimal integer from + the beginning of SS. Returns 0 if an error occurred, + otherwise the number of characters removed from SS. Stores + the integer's value into *VALUE. */ +size_t +ss_get_long (struct substring *ss, long *value) +{ + char tmp[64]; + size_t length; + + length = ss_span (*ss, ss_cstr ("+-")); + length += ss_span (ss_substr (*ss, length, SIZE_MAX), ss_cstr (CC_DIGITS)); + if (length > 0 && length < sizeof tmp) + { + char *tail; + + memcpy (tmp, ss_data (*ss), length); + tmp[length] = '\0'; + + *value = strtol (tmp, &tail, 10); + if (tail - tmp == length) + { + ss_advance (ss, length); + return length; + } + } + *value = 0; + return 0; +} + +/* Returns true if SS is empty (contains no characters), + false otherwise. */ +bool +ss_is_empty (struct substring ss) +{ + return ss.length == 0; +} + +/* Returns the number of characters in SS. */ +size_t +ss_length (struct substring ss) +{ + return ss.length; +} + +/* Returns a pointer to the characters in SS. */ +char * +ss_data (struct substring ss) +{ + return ss.string; +} + +/* Returns a pointer just past the last character in SS. */ +char * +ss_end (struct substring ss) +{ + return ss.string + ss.length; +} + +/* Returns the character in position IDX in SS, as a value in the + range of unsigned char. Returns EOF if IDX is out of the + range of indexes for SS. */ +int +ss_at (struct substring ss, size_t idx) +{ + return idx < ss.length ? (unsigned char) ss.string[idx] : EOF; +} + +/* Returns the first character in SS as a value in the range of + unsigned char. Returns EOF if SS is the empty string. */ +int +ss_first (struct substring ss) +{ + return ss_at (ss, 0); +} + +/* Returns the last character in SS as a value in the range of + unsigned char. Returns EOF if SS is the empty string. */ +int +ss_last (struct substring ss) +{ + return ss.length > 0 ? (unsigned char) ss.string[ss.length - 1] : EOF; +} + +/* Returns the number of contiguous characters at the beginning + of SS that are in SKIP_SET. */ +size_t +ss_span (struct substring ss, struct substring skip_set) +{ + size_t i; + for (i = 0; i < ss.length; i++) + if (ss_find_char (skip_set, ss.string[i]) == SIZE_MAX) + break; + return i; +} + +/* Returns the number of contiguous characters at the beginning + of SS that are not in SKIP_SET. */ +size_t +ss_cspan (struct substring ss, struct substring stop_set) +{ + size_t i; + for (i = 0; i < ss.length; i++) + if (ss_find_char (stop_set, ss.string[i]) != SIZE_MAX) + break; + return i; +} + +/* Returns the offset in SS of the first instance of C, + or SIZE_MAX if C does not occur in SS. */ +size_t +ss_find_char (struct substring ss, char c) +{ + const char *p = memchr (ss.string, c, ss.length); + return p != NULL ? p - ss.string : SIZE_MAX; +} + +/* Compares A and B and returns a strcmp()-type comparison + result. */ +int +ss_compare (struct substring a, struct substring b) +{ + int retval = memcmp (a.string, b.string, MIN (a.length, b.length)); + if (retval == 0) + retval = a.length < b.length ? -1 : a.length > b.length; + return retval; +} + +/* Compares A and B case-insensitively and returns a + strcmp()-type comparison result. */ +int +ss_compare_case (struct substring a, struct substring b) +{ + int retval = memcasecmp (a.string, b.string, MIN (a.length, b.length)); + if (retval == 0) + retval = a.length < b.length ? -1 : a.length > b.length; + return retval; +} + +/* Compares A and B and returns true if their contents are + identical, false otherwise. */ +int +ss_equals (struct substring a, struct substring b) +{ + return a.length == b.length && !memcmp (a.string, b.string, a.length); +} + +/* Compares A and B and returns true if their contents are + identical except possibly for case differences, false + otherwise. */ +int +ss_equals_case (struct substring a, struct substring b) +{ + return a.length == b.length && !memcasecmp (a.string, b.string, a.length); +} + +/* Returns the position in SS that the character at P occupies. + P must point within SS or one past its end. */ +size_t +ss_pointer_to_position (struct substring ss, const char *p) +{ + size_t pos = p - ss.string; + assert (pos <= ss.length); + return pos; +} + +/* Allocates and returns a null-terminated string that contains + SS. */ +char * +ss_xstrdup (struct substring ss) +{ + char *s = xmalloc (ss.length + 1); + memcpy (s, ss.string, ss.length); + s[ss.length] = '\0'; + return s; +} +/* Initializes ST as an empty string. */ +void +ds_init_empty (struct string *st) +{ + st->ss = ss_empty (); + st->capacity = 0; +} + /* Initializes ST with initial contents S. */ void -ds_create (struct string *st, const char *s) +ds_init_string (struct string *st, const struct string *s) +{ + ds_init_substring (st, ds_ss (s)); +} + +/* Initializes ST with initial contents SS. */ +void +ds_init_substring (struct string *st, struct substring ss) { - st->length = strlen (s); - st->capacity = MAX (8, st->length * 2); - st->string = xmalloc (st->capacity + 1); - strcpy (st->string, s); + st->capacity = MAX (8, ss.length * 2); + st->ss.string = xmalloc (st->capacity + 1); + memcpy (st->ss.string, ss.string, ss.length); + st->ss.length = ss.length; } -/* Initializes ST, making room for at least CAPACITY characters. */ +/* Initializes ST with initial contents S. */ void -ds_init (struct string *st, size_t capacity) +ds_init_cstr (struct string *st, const char *s) { - st->length = 0; - st->capacity = MAX (8, capacity); - st->string = xmalloc (st->capacity + 1); + ds_init_substring (st, ss_cstr (s)); } /* Frees ST. */ void ds_destroy (struct string *st) { - if (st != NULL) + if (st != NULL) { - free (st->string); - st->string = NULL; - st->length = 0; - st->capacity = 0; + ss_dealloc (&st->ss); + st->ss.string = NULL; + st->ss.length = 0; + st->capacity = 0; } } /* Swaps the contents of strings A and B. */ void -ds_swap (struct string *a, struct string *b) +ds_swap (struct string *a, struct string *b) { struct string tmp = *a; *a = *b; *b = tmp; } -/* Initializes DST with the CNT characters from SRC starting at - position IDX. */ +/* Helper function for ds_register_pool. */ +static void +free_string (void *st_) +{ + struct string *st = st_; + ds_destroy (st); +} + +/* Arranges for ST to be destroyed automatically as part of + POOL. */ void -ds_init_substring (struct string *dst, - const struct string *src, size_t idx, size_t cnt) +ds_register_pool (struct string *st, struct pool *pool) { - assert (dst != src); - ds_init (dst, cnt); - ds_assign_substring (dst, src, idx, cnt); + pool_register (pool, free_string, st); } -/* Copies SRC into DST. - DST and SRC may be the same string. */ +/* Cancels the arrangement for ST to be destroyed automatically + as part of POOL. */ void -ds_assign_string (struct string *dst, const struct string *src) +ds_unregister_pool (struct string *st, struct pool *pool) { - ds_assign_buffer (dst, ds_data (src), ds_length (src)); + pool_unregister (pool, st); } -/* Replaces DST by CNT characters from SRC starting at position - IDX. +/* Copies SRC into DST. DST and SRC may be the same string. */ void -ds_assign_substring (struct string *dst, - const struct string *src, size_t idx, size_t cnt) +ds_assign_string (struct string *dst, const struct string *src) { - if (idx < src->length) - ds_assign_buffer (dst, src->string + idx, MIN (cnt, src->length - idx)); - else - ds_clear (dst); + ds_assign_substring (dst, ds_ss (src)); } -/* Replaces DST by the LENGTH characters in SRC. - SRC may be a substring within DST. */ +/* Replaces DST by SS. + SS may be a substring of DST. */ void -ds_assign_buffer (struct string *dst, const char *src, size_t length) +ds_assign_substring (struct string *dst, struct substring ss) { - dst->length = length; - ds_extend (dst, length); - memmove (dst->string, src, length); + dst->ss.length = ss.length; + ds_extend (dst, ss.length); + memmove (dst->ss.string, ss.string, ss.length); } /* Replaces DST by null-terminated string SRC. SRC may overlap with DST. */ void -ds_assign_c_str (struct string *dst, const char *src) +ds_assign_cstr (struct string *dst, const char *src) { - ds_assign_buffer (dst, src, strlen (src)); + ds_assign_substring (dst, ss_cstr (src)); } /* Truncates ST to zero length. */ void ds_clear (struct string *st) { - st->length = 0; + st->ss.length = 0; +} + +/* Returns a substring that contains ST. */ +struct substring +ds_ss (const struct string *st) +{ + return st->ss; +} + +/* Returns a substring that contains CNT characters from ST + starting at position START. + + If START is greater than or equal to the length of ST, then + the substring will be the empty string. If START + CNT + exceeds the length of ST, then the substring will only be + ds_length(ST) - START characters long. */ +struct substring +ds_substr (const struct string *st, size_t start, size_t cnt) +{ + return ss_substr (ds_ss (st), start, cnt); +} + +/* Returns a substring that contains the first CNT characters in + ST. If CNT exceeds the length of ST, then the substring will + contain all of ST. */ +struct substring +ds_head (const struct string *st, size_t cnt) +{ + return ss_head (ds_ss (st), cnt); +} + +/* Returns a substring that contains the last CNT characters in + ST. If CNT exceeds the length of ST, then the substring will + contain all of ST. */ +struct substring +ds_tail (const struct string *st, size_t cnt) +{ + return ss_tail (ds_ss (st), cnt); } /* Ensures that ST can hold at least MIN_CAPACITY characters plus a null @@ -343,7 +957,7 @@ ds_extend (struct string *st, size_t min_capacity) if (st->capacity < min_capacity) st->capacity = 2 * min_capacity; - st->string = xrealloc (st->string, st->capacity + 1); + st->ss.string = xrealloc (st->ss.string, st->capacity + 1); } } @@ -351,10 +965,10 @@ ds_extend (struct string *st, size_t min_capacity) void ds_shrink (struct string *st) { - if (st->capacity != st->length) + if (st->capacity != st->ss.length) { - st->capacity = st->length; - st->string = xrealloc (st->string, st->capacity + 1); + st->capacity = st->ss.length; + st->ss.string = xrealloc (st->ss.string, st->capacity + 1); } } @@ -362,68 +976,43 @@ ds_shrink (struct string *st) void ds_truncate (struct string *st, size_t length) { - if (st->length > length) - st->length = length; + ss_truncate (&st->ss, length); } -/* Pad ST on the right with copies of PAD until ST is at least - LENGTH characters in size. If ST is initially LENGTH - characters or longer, this is a no-op. */ -void -ds_rpad (struct string *st, size_t length, char pad) +/* Removes trailing characters in TRIM_SET from ST. + Returns number of characters removed. */ +size_t +ds_rtrim (struct string *st, struct substring trim_set) { - if (length > st->length) - ds_putc_multiple (st, pad, length - st->length); + return ss_rtrim (&st->ss, trim_set); } -/* Removes trailing spaces from ST. - Returns number of spaces removed. */ -int -ds_rtrim_spaces (struct string *st) -{ - int cnt = 0; - while (isspace (ds_last (st))) - { - st->length--; - cnt++; - } - return cnt; -} - -/* Removes leading spaces from ST. - Returns number of spaces removed. */ -int -ds_ltrim_spaces (struct string *st) +/* Removes leading characters in TRIM_SET from ST. + Returns number of characters removed. */ +size_t +ds_ltrim (struct string *st, struct substring trim_set) { - size_t cnt = 0; - while (isspace (ds_at (st, cnt))) - cnt++; + size_t cnt = ds_span (st, trim_set); if (cnt > 0) - ds_assign_substring (st, st, cnt, SIZE_MAX); + ds_assign_substring (st, ds_substr (st, cnt, SIZE_MAX)); return cnt; } -/* Trims leading and trailing spaces from ST. */ -void -ds_trim_spaces (struct string *st) +/* Trims leading and trailing characters in TRIM_SET from ST. + Returns number of charactesr removed. */ +size_t +ds_trim (struct string *st, struct substring trim_set) { - ds_rtrim_spaces (st); - ds_ltrim_spaces (st); + size_t cnt = ds_rtrim (st, trim_set); + return cnt + ds_ltrim (st, trim_set); } /* If the last character in ST is C, removes it and returns true. Otherwise, returns false without modifying ST. */ bool -ds_chomp (struct string *st, char c_) +ds_chomp (struct string *st, char c) { - unsigned char c = c_; - if (ds_last (st) == c) - { - st->length--; - return true; - } - else - return false; + return ss_chomp (&st->ss, c); } /* Divides ST into tokens separated by any of the DELIMITERS. @@ -439,18 +1028,10 @@ ds_chomp (struct string *st, char c_) a pair of adjacent delimiters yields an empty token, and the empty string contains a single token. */ bool -ds_separate (const struct string *st, struct string *token, - const char *delimiters, size_t *save_idx) +ds_separate (const struct string *st, struct substring delimiters, + size_t *save_idx, struct substring *token) { - if (*save_idx <= ds_length (st)) - { - size_t length = ds_cspan (st, *save_idx, delimiters); - ds_assign_substring (token, st, *save_idx, length); - *save_idx += length + 1; - return true; - } - else - return false; + return ss_separate (ds_ss (st), delimiters, save_idx, token); } /* Divides ST into tokens separated by any of the DELIMITERS, @@ -462,46 +1043,81 @@ ds_separate (const struct string *st, struct string *token, Before the first call, initialize *SAVE_IDX to 0. Do not modify *SAVE_IDX between calls. */ bool -ds_tokenize (const struct string *st, struct string *token, - const char *delimiters, size_t *save_idx) +ds_tokenize (const struct string *st, struct substring delimiters, + size_t *save_idx, struct substring *token) { - size_t start = *save_idx + ds_span (st, *save_idx, delimiters); - size_t length = ds_cspan (st, start, delimiters); - ds_assign_substring (token, st, start, length); - *save_idx = start + length; - return length > 0; + return ss_tokenize (ds_ss (st), delimiters, save_idx, token); +} + +/* Pad ST on the right with copies of PAD until ST is at least + LENGTH characters in size. If ST is initially LENGTH + characters or longer, this is a no-op. */ +void +ds_rpad (struct string *st, size_t length, char pad) +{ + if (length > st->ss.length) + ds_put_char_multiple (st, pad, length - st->ss.length); +} + +/* Sets the length of ST to exactly NEW_LENGTH, + either by truncating characters from the end, + or by padding on the right with PAD. */ +void +ds_set_length (struct string *st, size_t new_length, char pad) +{ + if (st->ss.length < new_length) + ds_rpad (st, new_length, pad); + else + st->ss.length = new_length; +} + +/* Removes N characters from ST starting at offset START. */ +void +ds_remove (struct string *st, size_t start, size_t n) +{ + if (n > 0 && start < st->ss.length) + { + if (st->ss.length - start <= n) + { + /* All characters at or beyond START are deleted. */ + st->ss.length = start; + } + else + { + /* Some characters remain and must be shifted into + position. */ + memmove (st->ss.string + st->ss.length, + st->ss.string + st->ss.length + n, + st->ss.length - start - n); + st->ss.length -= n; + } + } + else + { + /* There are no characters to delete or no characters at or + beyond START, hence deletion is a no-op. */ + } } /* Returns true if ST is empty, false otherwise. */ bool -ds_is_empty (const struct string *st) +ds_is_empty (const struct string *st) { - return st->length == 0; + return ss_is_empty (st->ss); } /* Returns the length of ST. */ size_t ds_length (const struct string *st) { - return st->length; -} - -/* Returns the value of ST as a null-terminated string. */ -char * -ds_c_str (const struct string *st_) -{ - struct string *st = (struct string *) st_; - if (st->string == NULL) - ds_extend (st, 1); - st->string[st->length] = '\0'; - return st->string; + return ss_length (ds_ss (st)); } /* Returns the string data inside ST. */ char * ds_data (const struct string *st) { - return st->string; + return ss_data (ds_ss (st)); } /* Returns a pointer to the null terminator ST. @@ -510,98 +1126,137 @@ ds_data (const struct string *st) char * ds_end (const struct string *st) { - return st->string + st->length; -} - -/* Returns the allocation size of ST. */ -size_t -ds_capacity (const struct string *st) -{ - return st->capacity; + return ss_end (ds_ss (st)); } /* Returns the character in position IDX in ST, as a value in the range of unsigned char. Returns EOF if IDX is out of the range of indexes for ST. */ int -ds_at (const struct string *st, size_t idx) +ds_at (const struct string *st, size_t idx) { - return idx < st->length ? (unsigned char) st->string[idx] : EOF; + return ss_at (ds_ss (st), idx); } /* Returns the first character in ST as a value in the range of unsigned char. Returns EOF if ST is the empty string. */ int -ds_first (const struct string *st) +ds_first (const struct string *st) { - return ds_at (st, 0); + return ss_first (ds_ss (st)); } /* Returns the last character in ST as a value in the range of unsigned char. Returns EOF if ST is the empty string. */ int -ds_last (const struct string *st) +ds_last (const struct string *st) { - return st->length > 0 ? (unsigned char) st->string[st->length - 1] : EOF; + return ss_last (ds_ss (st)); } -/* Returns the number of consecutive characters starting at OFS - in ST that are in SKIP_SET. (The null terminator is not - considered to be part of SKIP_SET.) */ +/* Returns the number of consecutive characters at the beginning + of ST that are in SKIP_SET. */ size_t -ds_span (const struct string *st, size_t ofs, const char skip_set[]) +ds_span (const struct string *st, struct substring skip_set) { - size_t i; - for (i = ofs; i < st->length; i++) - { - int c = st->string[i]; - if (strchr (skip_set, c) == NULL || c == '\0') - break; - } - return i - ofs; + return ss_span (ds_ss (st), skip_set); } -/* Returns the number of consecutive characters starting at OFS - in ST that are not in STOP_SET. (The null terminator is not - considered to be part of STOP_SET.) */ +/* Returns the number of consecutive characters at the beginning + of ST that are not in STOP_SET. */ size_t -ds_cspan (const struct string *st, size_t ofs, const char stop_set[]) +ds_cspan (const struct string *st, struct substring stop_set) { - size_t i; - for (i = ofs; i < st->length; i++) - { - int c = st->string[i]; - if (strchr (stop_set, c) != NULL) - break; - } - return i - ofs; + return ss_cspan (ds_ss (st), stop_set); } -/* Appends to ST a newline-terminated line read from STREAM. - Newline is the last character of ST on return, unless an I/O error - or end of file is encountered after reading some characters. - Returns true if a line is successfully read, false if no characters at - all were read before an I/O error or end of file was - encountered. */ -bool -ds_gets (struct string *st, FILE *stream) +/* Returns the position of the first occurrence of character C in + ST at or after position OFS, or SIZE_MAX if there is no such + occurrence. */ +size_t +ds_find_char (const struct string *st, char c) { - int c; + return ss_find_char (ds_ss (st), c); +} - c = getc (stream); - if (c == EOF) - return false; +/* Compares A and B and returns a strcmp()-type comparison + result. */ +int +ds_compare (const struct string *a, const struct string *b) +{ + return ss_compare (ds_ss (a), ds_ss (b)); +} - for (;;) - { - ds_putc (st, c); - if (c == '\n') - return true; +/* Returns the position in ST that the character at P occupies. + P must point within ST or one past its end. */ +size_t +ds_pointer_to_position (const struct string *st, const char *p) +{ + return ss_pointer_to_position (ds_ss (st), p); +} + +/* Allocates and returns a null-terminated string that contains + ST. */ +char * +ds_xstrdup (const struct string *st) +{ + return ss_xstrdup (ds_ss (st)); +} + +/* Returns the allocation size of ST. */ +size_t +ds_capacity (const struct string *st) +{ + return st->capacity; +} + +/* Returns the value of ST as a null-terminated string. */ +char * +ds_cstr (const struct string *st_) +{ + struct string *st = (struct string *) st_; + if (st->ss.string == NULL) + ds_extend (st, 1); + st->ss.string[st->ss.length] = '\0'; + return st->ss.string; +} + +/* Reads characters from STREAM and appends them to ST, stopping + after MAX_LENGTH characters, after appending a newline, or + after an I/O error or end of file was encountered, whichever + comes first. Returns true if at least one character was added + to ST, false if no characters were read before an I/O error or + end of file (or if MAX_LENGTH was 0). + + This function accepts LF, CR LF, and CR sequences as new-line, + and translates each of them to a single '\n' new-line + character in ST. */ +bool +ds_read_line (struct string *st, FILE *stream, size_t max_length) +{ + size_t length; - c = getc (stream); + for (length = 0; length < max_length; length++) + { + int c = getc (stream); if (c == EOF) - return true; + break; + + if (c == '\r') + { + c = getc (stream); + if (c != '\n') + { + ungetc (c, stream); + c = '\n'; + } + } + ds_put_char (st, c); + if (c == '\n') + return true; } + + return length > 0; } /* Removes a comment introduced by `#' from ST, @@ -611,8 +1266,8 @@ remove_comment (struct string *st) { char *cp; int quote = 0; - - for (cp = ds_c_str (st); cp < ds_end (st); cp++) + + for (cp = ds_data (st); cp < ds_end (st); cp++) if (quote) { if (*cp == quote) @@ -624,7 +1279,7 @@ remove_comment (struct string *st) quote = *cp; else if (*cp == '#') { - ds_truncate (st, cp - ds_c_str (st)); + ds_truncate (st, cp - ds_cstr (st)); break; } } @@ -636,208 +1291,175 @@ remove_comment (struct string *st) - Deletes comments introduced by `#' outside of single or double quotes. - - Deletes trailing white space. + - Deletes trailing white space. Returns true if a line was successfully read, false on failure. If LINE_NUMBER is non-null, then *LINE_NUMBER is incremented by the number of lines read. */ bool -ds_get_config_line (FILE *stream, struct string *st, int *line_number) +ds_read_config_line (struct string *st, int *line_number, FILE *stream) { ds_clear (st); do { - if (!ds_gets (st, stream)) + if (!ds_read_line (st, stream, SIZE_MAX)) return false; (*line_number)++; - ds_rtrim_spaces (st); + ds_rtrim (st, ss_cstr (CC_SPACES)); } while (ds_chomp (st, '\\')); - + remove_comment (st); return true; } +/* Attempts to read SIZE * CNT bytes from STREAM and append them + to ST. + Returns true if all the requested data was read, false otherwise. */ +bool +ds_read_stream (struct string *st, size_t size, size_t cnt, FILE *stream) +{ + if (size != 0) + { + size_t try_bytes = xtimes (cnt, size); + if (size_in_bounds_p (xsum (ds_length (st), try_bytes))) + { + char *buffer = ds_put_uninit (st, try_bytes); + size_t got_bytes = fread (buffer, 1, try_bytes, stream); + ds_truncate (st, ds_length (st) - (try_bytes - got_bytes)); + return got_bytes == try_bytes; + } + else + { + errno = ENOMEM; + return false; + } + } + else + return true; +} + /* Concatenates S onto ST. */ void -ds_puts (struct string *st, const char *s) +ds_put_cstr (struct string *st, const char *s) { - size_t s_len; - - if (!s) return; - - s_len = strlen (s); - ds_extend (st, st->length + s_len); - strcpy (st->string + st->length, s); - st->length += s_len; + if (s != NULL) + ds_put_substring (st, ss_cstr (s)); } -/* Concatenates LEN characters from BUF onto ST. */ +/* Concatenates SS to ST. */ void -ds_concat (struct string *st, const char *buf, size_t len) +ds_put_substring (struct string *st, struct substring ss) { - ds_extend (st, st->length + len); - memcpy (st->string + st->length, buf, len); - st->length += len; + memcpy (ds_put_uninit (st, ss_length (ss)), ss_data (ss), ss_length (ss)); +} + +/* Returns ds_end(ST) and THEN increases the length by INCR. */ +char * +ds_put_uninit (struct string *st, size_t incr) +{ + char *end; + ds_extend (st, ds_length (st) + incr); + end = ds_end (st); + st->ss.length += incr; + return end; } /* Formats FORMAT as a printf string and appends the result to ST. */ void -ds_printf (struct string *st, const char *format, ...) +ds_put_format (struct string *st, const char *format, ...) { va_list args; va_start (args, format); - ds_vprintf(st, format, args); + ds_put_vformat (st, format, args); va_end (args); } /* Formats FORMAT as a printf string and appends the result to ST. */ void -ds_vprintf (struct string *st, const char *format, va_list args_) +ds_put_vformat (struct string *st, const char *format, va_list args_) { int avail, needed; va_list args; -#ifndef va_copy -#define va_copy(DST, SRC) (DST) = (SRC) -#endif - va_copy (args, args_); - avail = st->string != NULL ? st->capacity - st->length + 1 : 0; - needed = vsnprintf (st->string + st->length, avail, format, args); + avail = st->ss.string != NULL ? st->capacity - st->ss.length + 1 : 0; + needed = vsnprintf (st->ss.string + st->ss.length, avail, format, args); va_end (args); if (needed >= avail) { - ds_extend (st, st->length + needed); - va_copy (args, args_); - vsprintf (st->string + st->length, format, args); + vsprintf (ds_put_uninit (st, needed), format, args); va_end (args); } - else + else { /* Some old libc's returned -1 when the destination string was too short. */ while (needed == -1) { ds_extend (st, (st->capacity + 1) * 2); - avail = st->capacity - st->length + 1; + avail = st->capacity - st->ss.length + 1; va_copy (args, args_); - needed = vsnprintf (st->string + st->length, avail, format, args); + needed = vsnprintf (ds_end (st), avail, format, args); va_end (args); - } + } + st->ss.length += needed; } - - st->length += needed; } /* Appends character CH to ST. */ void -ds_putc (struct string *st, int ch) +ds_put_char (struct string *st, int ch) { - if (st->length >= st->capacity) - ds_extend (st, st->length + 1); - st->string[st->length++] = ch; + ds_put_uninit (st, 1)[0] = ch; } /* Appends CNT copies of character CH to ST. */ void -ds_putc_multiple (struct string *st, int ch, size_t cnt) +ds_put_char_multiple (struct string *st, int ch, size_t cnt) { - ds_extend (st, st->length + cnt); - memset (&st->string[st->length], ch, cnt); - st->length += cnt; + memset (ds_put_uninit (st, cnt), ch, cnt); } - -/* Lengthed strings. */ -/* Creates a new lengthed string LS with contents as a copy of - S. */ +/* If relocation has been enabled, replace ST, + with its relocated version */ void -ls_create (struct fixed_string *ls, const char *s) +ds_relocate (struct string *st) { - ls->length = strlen (s); - ls->string = xmalloc (ls->length + 1); - memcpy (ls->string, s, ls->length + 1); -} + const char *orig = ds_cstr (st); + const char *rel = relocate (orig); -/* Creates a new lengthed string LS with contents as a copy of - BUFFER with length LEN. */ -void -ls_create_buffer (struct fixed_string *ls, - const char *buffer, size_t len) -{ - ls->length = len; - ls->string = xmalloc (len + 1); - memcpy (ls->string, buffer, len); - ls->string[len] = '\0'; + if ( orig != rel) + { + ds_clear (st); + ds_put_cstr (st, rel); + free ((char *) rel); + } } -/* Sets the fields of LS to the specified values. */ -void -ls_init (struct fixed_string *ls, const char *string, size_t length) -{ - ls->string = (char *) string; - ls->length = length; -} -/* Copies the fields of SRC to DST. */ -void -ls_shallow_copy (struct fixed_string *dst, const struct fixed_string *src) -{ - *dst = *src; -} + -/* Frees the memory backing LS. */ -void -ls_destroy (struct fixed_string *ls) -{ - free (ls->string); -} +/* Operations on uint8_t "strings" */ -/* Sets LS to a null pointer value. */ +/* Copies buffer SRC, of SRC_SIZE bytes, to DST, of DST_SIZE bytes. + DST is truncated to DST_SIZE bytes or padded on the right with + copies of PAD as needed. */ void -ls_null (struct fixed_string *ls) -{ - ls->string = NULL; -} - -/* Returns nonzero only if LS has a null pointer value. */ -int -ls_null_p (const struct fixed_string *ls) -{ - return ls->string == NULL; -} - -/* Returns nonzero only if LS is a null pointer or has length 0. */ -int -ls_empty_p (const struct fixed_string *ls) -{ - return ls->string == NULL || ls->length == 0; -} - -/* Returns the length of LS, which must not be null. */ -size_t -ls_length (const struct fixed_string *ls) -{ - return ls->length; -} - -/* Returns a pointer to the character string in LS. */ -char * -ls_c_str (const struct fixed_string *ls) +u8_buf_copy_rpad (uint8_t *dst, size_t dst_size, + const uint8_t *src, size_t src_size, + char pad) { - return (char *) ls->string; -} - -/* Returns a pointer to the null terminator of the character string in - LS. */ -char * -ls_end (const struct fixed_string *ls) -{ - return (char *) (ls->string + ls->length); + if (src_size >= dst_size) + memmove (dst, src, dst_size); + else + { + memmove (dst, src, src_size); + memset (&dst[src_size], pad, dst_size - src_size); + } }