X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flibpspp%2Fstr.c;h=45187ef12bbc75a6d108ea4eaa3cbbaf6c58106f;hb=d8b3292a8c12564dbc67e59f24d626dcfbf2e274;hp=655774d63d57c8e1b9b146b9d513edc8abe2a387;hpb=f0e5634dd6fcd77bd9d4d2407cde2c4a3a330617;p=pspp diff --git a/src/libpspp/str.c b/src/libpspp/str.c index 655774d63d..45187ef12b 100644 --- a/src/libpspp/str.c +++ b/src/libpspp/str.c @@ -1,35 +1,42 @@ -/* PSPP - computes sample statistics. - Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc. - Written by Ben Pfaff . +/* PSPP - a program for statistical analysis. + Copyright (C) 1997-9, 2000, 2006, 2009, 2010, 2011, 2012, 2014, + 2020 Free Software Foundation, Inc. - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. */ + along with this program. If not, see . */ #include #include "str.h" #include -#include +#include +#include #include - -#include -#include - -#include "minmax.h" -#include "size_max.h" +#include + +#include "libpspp/cast.h" +#include "libpspp/i18n.h" +#include "libpspp/message.h" +#include "libpspp/pool.h" + +#include "gl/c-ctype.h" +#include "gl/c-vasnprintf.h" +#include "gl/relocatable.h" +#include "gl/minmax.h" +#include "gl/xalloc.h" +#include "gl/xmemdup0.h" +#include "gl/xsize.h" /* Reverses the order of NBYTES bytes at address P, thus converting between little- and big-endian byte orders. */ @@ -48,19 +55,6 @@ buf_reverse (char *p, size_t nbytes) } } -/* Finds the last NEEDLE of length NEEDLE_LEN in a HAYSTACK of length - HAYSTACK_LEN. Returns a pointer to the needle found. */ -char * -buf_find_reverse (const char *haystack, size_t haystack_len, - const char *needle, size_t needle_len) -{ - int i; - for (i = haystack_len - needle_len; i >= 0; i--) - if (!memcmp (needle, &haystack[i], needle_len)) - return (char *) &haystack[i]; - return 0; -} - /* Compares the SIZE bytes in A to those in B, disregarding case, and returns a strcmp()-type result. */ int @@ -69,12 +63,12 @@ buf_compare_case (const char *a_, const char *b_, size_t size) const unsigned char *a = (unsigned char *) a_; const unsigned char *b = (unsigned char *) b_; - while (size-- > 0) + while (size-- > 0) { unsigned char ac = toupper (*a++); unsigned char bc = toupper (*b++); - if (ac != bc) + if (ac != bc) return ac > bc ? 1 : -1; } @@ -94,17 +88,17 @@ buf_compare_rpad (const char *a, size_t a_len, const char *b, size_t b_len) result = memcmp (a, b, min_len); if (result != 0) return result; - else + else { size_t idx; - - if (a_len < b_len) + + if (a_len < b_len) { for (idx = min_len; idx < b_len; idx++) if (' ' != b[idx]) return ' ' > b[idx] ? 1 : -1; } - else + else { for (idx = min_len; idx < a_len; idx++) if (a[idx] != ' ') @@ -125,9 +119,9 @@ str_compare_rpad (const char *a, const char *b) /* Copies string SRC to buffer DST, of size DST_SIZE bytes. DST is truncated to DST_SIZE bytes or padded on the right with - spaces as needed. */ + copies of PAD as needed. */ void -buf_copy_str_rpad (char *dst, size_t dst_size, const char *src) +buf_copy_str_rpad (char *dst, size_t dst_size, const char *src, char pad) { size_t src_len = strlen (src); if (src_len >= dst_size) @@ -135,222 +129,988 @@ buf_copy_str_rpad (char *dst, size_t dst_size, const char *src) else { memcpy (dst, src, src_len); - memset (&dst[src_len], ' ', dst_size - src_len); + memset (&dst[src_len], pad, dst_size - src_len); } } /* Copies string SRC to buffer DST, of size DST_SIZE bytes. DST is truncated to DST_SIZE bytes or padded on the left with - spaces as needed. */ + copies of PAD as needed. */ void -buf_copy_str_lpad (char *dst, size_t dst_size, const char *src) +buf_copy_str_lpad (char *dst, size_t dst_size, const char *src, char pad) { size_t src_len = strlen (src); if (src_len >= dst_size) memcpy (dst, src, dst_size); else { - size_t pad_cnt = dst_size - src_len; - memset (&dst[0], ' ', pad_cnt); - memcpy (dst + pad_cnt, src, src_len); + size_t n_pad = dst_size - src_len; + memset (&dst[0], pad, n_pad); + memcpy (dst + n_pad, src, src_len); + } +} + +/* Copies buffer SRC, of SRC_SIZE bytes, to DST, of DST_SIZE bytes. + DST is truncated to DST_SIZE bytes or padded on the left with + copies of PAD as needed. */ +void +buf_copy_lpad (char *dst, size_t dst_size, + const char *src, size_t src_size, + char pad) +{ + if (src_size >= dst_size) + memmove (dst, src, dst_size); + else + { + memset (dst, pad, dst_size - src_size); + memmove (&dst[dst_size - src_size], src, src_size); } } /* Copies buffer SRC, of SRC_SIZE bytes, to DST, of DST_SIZE bytes. DST is truncated to DST_SIZE bytes or padded on the right with - spaces as needed. */ + copies of PAD as needed. */ void buf_copy_rpad (char *dst, size_t dst_size, - const char *src, size_t src_size) + const char *src, size_t src_size, + char pad) { if (src_size >= dst_size) memmove (dst, src, dst_size); else { - memmove (dst, src, src_size); - memset (&dst[src_size], ' ', dst_size - src_size); + memmove (dst, src, src_size); + memset (&dst[src_size], pad, dst_size - src_size); + } +} + +/* Copies string SRC to string DST, which is in a buffer DST_SIZE + bytes long. + Truncates DST to DST_SIZE - 1 bytes or right-pads with + spaces to DST_SIZE - 1 bytes if necessary. */ +void +str_copy_rpad (char *dst, size_t dst_size, const char *src) +{ + if (dst_size > 0) + { + size_t src_len = strlen (src); + if (src_len < dst_size - 1) + { + memcpy (dst, src, src_len); + memset (&dst[src_len], ' ', dst_size - 1 - src_len); + } + else + memcpy (dst, src, dst_size - 1); + dst[dst_size - 1] = 0; + } +} + +/* Copies SRC to DST, which is in a buffer DST_SIZE bytes long. + Truncates DST to DST_SIZE - 1 bytes, if necessary. */ +void +str_copy_trunc (char *dst, size_t dst_size, const char *src) +{ + size_t src_len = strlen (src); + assert (dst_size > 0); + if (src_len + 1 < dst_size) + memcpy (dst, src, src_len + 1); + else + { + memcpy (dst, src, dst_size - 1); + dst[dst_size - 1] = '\0'; + } +} + +/* Copies buffer SRC, of SRC_LEN bytes, + to DST, which is in a buffer DST_SIZE bytes long. + Truncates DST to DST_SIZE - 1 bytes, if necessary. */ +void +str_copy_buf_trunc (char *dst, size_t dst_size, + const char *src, size_t src_size) +{ + size_t dst_len; + assert (dst_size > 0); + + dst_len = src_size < dst_size ? src_size : dst_size - 1; + memcpy (dst, src, dst_len); + dst[dst_len] = '\0'; +} + +/* Converts each byte in S to uppercase. + + This is suitable only for ASCII strings. Use utf8_to_upper() for UTF-8 + strings.*/ +void +str_uppercase (char *s) +{ + for (; *s != '\0'; s++) + *s = c_toupper ((unsigned char) *s); +} + +/* Converts each byte in S to lowercase. + + This is suitable only for ASCII strings. Use utf8_to_lower() for UTF-8 + strings.*/ +void +str_lowercase (char *s) +{ + for (; *s != '\0'; s++) + *s = c_tolower ((unsigned char) *s); +} + +/* Converts NUMBER into a string in 26-adic notation in BUFFER, + which has room for SIZE bytes. Uses uppercase if UPPERCASE is + true, otherwise lowercase, Returns true if successful, false + if NUMBER, plus a trailing null, is too large to fit in the + available space. + + 26-adic notation is "spreadsheet column numbering": 1 = A, 2 = + B, 3 = C, ... 26 = Z, 27 = AA, 28 = AB, 29 = AC, ... + + 26-adic notation is the special case of a k-adic numeration + system (aka bijective base-k numeration) with k=26. In k-adic + numeration, the digits are {1, 2, 3, ..., k} (there is no + digit 0), and integer 0 is represented by the empty string. + For more information, see + http://en.wikipedia.org/wiki/Bijective_numeration. */ +bool +str_format_26adic (unsigned long int number, bool uppercase, + char buffer[], size_t size) +{ + const char *alphabet + = uppercase ? "ABCDEFGHIJKLMNOPQRSTUVWXYZ" : "abcdefghijklmnopqrstuvwxyz"; + size_t length = 0; + + while (number-- > 0) + { + if (length >= size) + goto overflow; + buffer[length++] = alphabet[number % 26]; + number /= 26; + } + + if (length >= size) + goto overflow; + buffer[length] = '\0'; + + buf_reverse (buffer, length); + return true; + +overflow: + if (length > 0) + buffer[0] = '\0'; + return false; +} + +/* Copies IN to buffer OUT with size OUT_SIZE, appending a null terminator. If + IN is too long for OUT, or if IN contains a new-line, replaces the tail with + "...". + + OUT_SIZE must be at least 16. */ +void +str_ellipsize (struct substring in, char *out, size_t out_size) +{ + assert (out_size >= 16); + + size_t out_maxlen = out_size - 1; + if (in.length > out_maxlen - 3) + out_maxlen -= 3; + + size_t out_len = 0; + while (out_len < in.length + && in.string[out_len] != '\n' + && in.string[out_len] != '\0' + && (in.string[out_len] != '\r' + || out_len + 1 >= in.length + || in.string[out_len + 1] != '\n')) + { + int mblen = u8_mblen (CHAR_CAST (const uint8_t *, in.string + out_len), + in.length - out_len); + if (mblen < 0 || out_len + mblen > out_maxlen) + break; + out_len += mblen; + } + + memcpy (out, in.string, out_len); + strcpy (&out[out_len], out_len < in.length ? "..." : ""); +} + +/* Sets the SIZE bytes starting at BLOCK to C, + and returns the byte following BLOCK. */ +void * +mempset (void *block, int c, size_t size) +{ + memset (block, c, size); + return (char *) block + size; +} + +/* Substrings. */ + +/* Returns a substring whose contents are the N bytes + starting at the (0-based) position START in SS. */ +struct substring +ss_substr (struct substring ss, size_t start, size_t n) +{ + if (start < ss.length) + return ss_buffer (ss.string + start, MIN (n, ss.length - start)); + else + return ss_buffer (ss.string + ss.length, 0); +} + +/* Returns a substring whose contents are the first N + bytes in SS. */ +struct substring +ss_head (struct substring ss, size_t n) +{ + return ss_buffer (ss.string, MIN (n, ss.length)); +} + +/* Returns a substring whose contents are the last N bytes + in SS. */ +struct substring +ss_tail (struct substring ss, size_t n) +{ + if (n < ss.length) + return ss_buffer (ss.string + (ss.length - n), n); + else + return ss; +} + +/* Makes a malloc()'d, null-terminated copy of the contents of OLD + and stores it in NEW. */ +void +ss_alloc_substring (struct substring *new, struct substring old) +{ + new->string = xmemdup0 (old.string, old.length); + new->length = old.length; +} + +/* Allocates room for a N-byte string in NEW. */ +void +ss_alloc_uninit (struct substring *new, size_t n) +{ + new->string = xmalloc (n); + new->length = n; +} + +void +ss_realloc (struct substring *ss, size_t size) +{ + ss->string = xrealloc (ss->string, size); +} + +/* Makes a pool_alloc_unaligned()'d, null-terminated copy of the contents of + OLD in POOL, and stores it in NEW. */ +void +ss_alloc_substring_pool (struct substring *new, struct substring old, + struct pool *pool) +{ + new->string = pool_alloc_unaligned (pool, old.length + 1); + new->length = old.length; + memcpy (new->string, old.string, old.length); + new->string[old.length] = '\0'; +} + +/* Allocates room for a N-byte string in NEW in POOL. */ +void +ss_alloc_uninit_pool (struct substring *new, size_t n, struct pool *pool) +{ + new->string = pool_alloc_unaligned (pool, n); + new->length = n; +} + +/* Frees the string that SS points to. */ +void +ss_dealloc (struct substring *ss) +{ + free (ss->string); +} + +/* Exchanges the contents of A and B. */ +void +ss_swap (struct substring *a, struct substring *b) +{ + struct substring tmp = *a; + *a = *b; + *b = tmp; +} + +/* Truncates SS to at most N bytes in length. */ +void +ss_truncate (struct substring *ss, size_t n) +{ + if (ss->length > n) + ss->length = n; +} + +/* Removes trailing bytes in TRIM_SET from SS. + Returns number of bytes removed. */ +size_t +ss_rtrim (struct substring *ss, struct substring trim_set) +{ + size_t n = 0; + while (n < ss->length + && ss_find_byte (trim_set, + ss->string[ss->length - n - 1]) != SIZE_MAX) + n++; + ss->length -= n; + return n; +} + +/* Removes leading bytes in TRIM_SET from SS. + Returns number of bytes removed. */ +size_t +ss_ltrim (struct substring *ss, struct substring trim_set) +{ + size_t n = ss_span (*ss, trim_set); + ss_advance (ss, n); + return n; +} + +/* Trims leading and trailing bytes in TRIM_SET from SS. */ +void +ss_trim (struct substring *ss, struct substring trim_set) +{ + ss_ltrim (ss, trim_set); + ss_rtrim (ss, trim_set); +} + +/* If the last byte in SS is C, removes it and returns true. + Otherwise, returns false without changing the string. */ +bool +ss_chomp_byte (struct substring *ss, char c) +{ + if (ss_last (*ss) == c) + { + ss->length--; + return true; + } + else + return false; +} + +/* If SS ends with SUFFIX, removes it and returns true. + Otherwise, returns false without changing the string. */ +bool +ss_chomp (struct substring *ss, struct substring suffix) +{ + if (ss_ends_with (*ss, suffix)) + { + ss->length -= suffix.length; + return true; + } + else + return false; +} + +/* Divides SS into tokens separated by any of the DELIMITERS. + Each call replaces TOKEN by the next token in SS, or by an + empty string if no tokens remain. Returns true if a token was + obtained, false otherwise. + + Before the first call, initialize *SAVE_IDX to 0. Do not + modify *SAVE_IDX between calls. + + SS divides into exactly one more tokens than it contains + delimiters. That is, a delimiter at the start or end of SS or + a pair of adjacent delimiters yields an empty token, and the + empty string contains a single token. */ +bool +ss_separate (struct substring ss, struct substring delimiters, + size_t *save_idx, struct substring *token) +{ + if (*save_idx <= ss_length (ss)) + { + struct substring tmp = ss_substr (ss, *save_idx, SIZE_MAX); + size_t length = ss_cspan (tmp, delimiters); + *token = ss_head (tmp, length); + *save_idx += length + 1; + return true; + } + else + { + *token = ss_empty (); + return false; + } +} + +/* Divides SS into tokens separated by any of the DELIMITERS, + merging adjacent delimiters so that the empty string is never + produced as a token. Each call replaces TOKEN by the next + token in SS, or by an empty string if no tokens remain, and + then skips past the first delimiter following the token. + Returns true if a token was obtained, false otherwise. + + Before the first call, initialize *SAVE_IDX to 0. Do not + modify *SAVE_IDX between calls. */ +bool +ss_tokenize (struct substring ss, struct substring delimiters, + size_t *save_idx, struct substring *token) +{ + bool found_token; + + ss_advance (&ss, *save_idx); + *save_idx += ss_ltrim (&ss, delimiters); + ss_get_bytes (&ss, ss_cspan (ss, delimiters), token); + + found_token = ss_length (*token) > 0; + *save_idx += ss_length (*token) + (found_token?1:0); + return found_token; +} + +/* Removes the first N bytes from SS. */ +void +ss_advance (struct substring *ss, size_t n) +{ + if (n > ss->length) + n = ss->length; + ss->string += n; + ss->length -= n; +} + +/* If the first byte in SS is C, removes it and returns true. + Otherwise, returns false without changing the string. */ +bool +ss_match_byte (struct substring *ss, char c) +{ + if (ss_first (*ss) == c) + { + ss->string++; + ss->length--; + return true; + } + else + return false; +} + +/* If the first byte in SS is in MATCH, removes it and + returns the byte that was removed. + Otherwise, returns EOF without changing the string. */ +int +ss_match_byte_in (struct substring *ss, struct substring match) +{ + int c = EOF; + if (ss->length > 0 + && memchr (match.string, ss->string[0], match.length) != NULL) + { + c = ss->string[0]; + ss->string++; + ss->length--; + } + return c; +} + +/* If SS begins with TARGET, removes it and returns true. + Otherwise, returns false without changing SS. */ +bool +ss_match_string (struct substring *ss, const struct substring target) +{ + size_t length = ss_length (target); + if (ss_equals (ss_head (*ss, length), target)) + { + ss_advance (ss, length); + return true; + } + else + return false; +} + +/* If SS begins with TARGET, except possibly for case differences, removes it + and returns true. Otherwise, returns false without changing SS. */ +bool +ss_match_string_case (struct substring *ss, const struct substring target) +{ + size_t length = ss_length (target); + if (ss_equals_case (ss_head (*ss, length), target)) + { + ss_advance (ss, length); + return true; + } + else + return false; +} + +/* Removes the first byte from SS and returns it. + If SS is empty, returns EOF without modifying SS. */ +int +ss_get_byte (struct substring *ss) +{ + int c = ss_first (*ss); + if (c != EOF) + { + ss->string++; + ss->length--; + } + return c; +} + +/* Stores the prefix of SS up to the first DELIMITER in OUT (if + any). Trims those same bytes from SS. DELIMITER is + removed from SS but not made part of OUT. Returns true if + DELIMITER was found (and removed), false otherwise. */ +bool +ss_get_until (struct substring *ss, char delimiter, struct substring *out) +{ + ss_get_bytes (ss, ss_cspan (*ss, ss_buffer (&delimiter, 1)), out); + return ss_match_byte (ss, delimiter); +} + +/* Stores the first N bytes in SS in OUT (or fewer, if SS + is shorter than N bytes). Trims the same bytes + from the beginning of SS. Returns N. */ +size_t +ss_get_bytes (struct substring *ss, size_t n, struct substring *out) +{ + *out = ss_head (*ss, n); + ss_advance (ss, n); + return n; +} + +/* Parses and removes an optionally signed decimal integer from + the beginning of SS. Returns 0 if an error occurred, + otherwise the number of bytes removed from SS. Stores + the integer's value into *VALUE. */ +size_t +ss_get_long (struct substring *ss, long *value) +{ + char tmp[64]; + size_t length; + + length = ss_span (*ss, ss_cstr ("+-")); + length += ss_span (ss_substr (*ss, length, SIZE_MAX), ss_cstr (CC_DIGITS)); + if (length > 0 && length < sizeof tmp) + { + char *tail; + + memcpy (tmp, ss_data (*ss), length); + tmp[length] = '\0'; + + *value = strtol (tmp, &tail, 10); + if (tail - tmp == length) + { + ss_advance (ss, length); + return length; + } + } + *value = 0; + return 0; +} + +/* Returns true if SS is empty (has length 0 bytes), + false otherwise. */ +bool +ss_is_empty (struct substring ss) +{ + return ss.length == 0; +} + +/* Returns the number of bytes in SS. */ +size_t +ss_length (struct substring ss) +{ + return ss.length; +} + +/* Returns a pointer to the bytes in SS. */ +char * +ss_data (struct substring ss) +{ + return ss.string; +} + +/* Returns a pointer just past the last byte in SS. */ +char * +ss_end (struct substring ss) +{ + return ss.string + ss.length; +} + +/* Returns the byte in position IDX in SS, as a value in the + range of unsigned char. Returns EOF if IDX is out of the + range of indexes for SS. */ +int +ss_at (struct substring ss, size_t idx) +{ + return idx < ss.length ? (unsigned char) ss.string[idx] : EOF; +} + +/* Returns the first byte in SS as a value in the range of + unsigned char. Returns EOF if SS is the empty string. */ +int +ss_first (struct substring ss) +{ + return ss_at (ss, 0); +} + +/* Returns the last byte in SS as a value in the range of + unsigned char. Returns EOF if SS is the empty string. */ +int +ss_last (struct substring ss) +{ + return ss.length > 0 ? (unsigned char) ss.string[ss.length - 1] : EOF; +} + +/* Returns true if SS starts with PREFIX, false otherwise. */ +bool +ss_starts_with (struct substring ss, struct substring prefix) +{ + return (ss.length >= prefix.length + && !memcmp (ss.string, prefix.string, prefix.length)); +} + +/* Returns true if SS ends with SUFFIX, false otherwise. */ +bool +ss_ends_with (struct substring ss, struct substring suffix) +{ + return (ss.length >= suffix.length + && !memcmp (&ss.string[ss.length - suffix.length], suffix.string, + suffix.length)); +} + +/* Returns the number of contiguous bytes at the beginning + of SS that are in SKIP_SET. */ +size_t +ss_span (struct substring ss, struct substring skip_set) +{ + size_t i; + for (i = 0; i < ss.length; i++) + if (ss_find_byte (skip_set, ss.string[i]) == SIZE_MAX) + break; + return i; +} + +/* Returns the number of contiguous bytes at the beginning + of SS that are not in SKIP_SET. */ +size_t +ss_cspan (struct substring ss, struct substring stop_set) +{ + size_t i; + for (i = 0; i < ss.length; i++) + if (ss_find_byte (stop_set, ss.string[i]) != SIZE_MAX) + break; + return i; +} + +/* Returns the offset in SS of the first instance of C, + or SIZE_MAX if C does not occur in SS. */ +size_t +ss_find_byte (struct substring ss, char c) +{ + const char *p = memchr (ss.string, (int) c, ss.length); + return p != NULL ? p - ss.string : SIZE_MAX; +} + +/* Returns the offset in HAYSTACK of the first instance of NEEDLE, + or SIZE_MAX if NEEDLE does not occur in HAYSTACK. */ +size_t +ss_find_substring (struct substring haystack, struct substring needle) +{ + const char *p = memmem (haystack.string, haystack.length, + needle.string, needle.length); + return p != NULL ? p - haystack.string : SIZE_MAX; +} + +/* Compares A and B and returns a strcmp()-type comparison + result. */ +int +ss_compare (struct substring a, struct substring b) +{ + int retval = memcmp (a.string, b.string, MIN (a.length, b.length)); + if (retval == 0) + retval = a.length < b.length ? -1 : a.length > b.length; + return retval; +} + +/* Compares A and B case-insensitively and returns a + strcmp()-type comparison result. */ +int +ss_compare_case (struct substring a, struct substring b) +{ + int retval = memcasecmp (a.string, b.string, MIN (a.length, b.length)); + if (retval == 0) + retval = a.length < b.length ? -1 : a.length > b.length; + return retval; +} + +/* Compares A and B and returns true if their contents are + identical, false otherwise. */ +int +ss_equals (struct substring a, struct substring b) +{ + return a.length == b.length && !memcmp (a.string, b.string, a.length); +} + +/* Compares A and B and returns true if their contents are + identical except possibly for case differences, false + otherwise. */ +int +ss_equals_case (struct substring a, struct substring b) +{ + return a.length == b.length && !memcasecmp (a.string, b.string, a.length); +} + +/* Returns the position in SS that the byte at P occupies. + P must point within SS or one past its end. */ +size_t +ss_pointer_to_position (struct substring ss, const char *p) +{ + size_t pos = p - ss.string; + assert (pos <= ss.length); + return pos; +} + +/* Allocates and returns a null-terminated string that contains + SS. */ +char * +ss_xstrdup (struct substring ss) +{ + char *s = xmalloc (ss.length + 1); + memcpy (s, ss.string, ss.length); + s[ss.length] = '\0'; + return s; +} +/* UTF-8. */ + +/* Returns the character represented by the UTF-8 sequence at the start of S. + The return value is either a Unicode code point in the range 0 to 0x10ffff, + or UINT32_MAX if S is empty. */ +ucs4_t +ss_first_mb (struct substring s) +{ + return ss_at_mb (s, 0); +} + +/* Returns the number of bytes in the UTF-8 character at the beginning of S. + + The return value is 0 if S is empty, otherwise between 1 and 4. */ +int +ss_first_mblen (struct substring s) +{ + return ss_at_mblen (s, 0); +} + +/* Advances S past the UTF-8 character at its beginning. Returns the Unicode + code point that was skipped (in the range 0 to 0x10ffff), or UINT32_MAX if S + was not modified because it was initially empty. */ +ucs4_t +ss_get_mb (struct substring *s) +{ + if (s->length > 0) + { + ucs4_t uc; + int n; + + n = u8_mbtouc (&uc, CHAR_CAST (const uint8_t *, s->string), s->length); + s->string += n; + s->length -= n; + return uc; } + else + return UINT32_MAX; } -/* Copies string SRC to string DST, which is in a buffer DST_SIZE - bytes long. - Truncates DST to DST_SIZE - 1 characters or right-pads with - spaces to DST_SIZE - 1 characters if necessary. */ -void -str_copy_rpad (char *dst, size_t dst_size, const char *src) +/* Returns the character represented by the UTF-8 sequence starting OFS bytes + into S. The return value is either a Unicode code point in the range 0 to + 0x10ffff, or UINT32_MAX if OFS is past the last byte in S. + + (Returns 0xfffd if OFS points into the middle, not the beginning, of a UTF-8 + sequence.) */ +ucs4_t +ss_at_mb (struct substring s, size_t ofs) { - size_t src_len = strlen (src); - if (src_len < dst_size - 1) + if (s.length > ofs) { - memcpy (dst, src, src_len); - memset (&dst[src_len], ' ', dst_size - 1 - src_len); + ucs4_t uc; + u8_mbtouc (&uc, CHAR_CAST (const uint8_t *, s.string + ofs), + s.length - ofs); + return uc; } else - memcpy (dst, src, dst_size - 1); - dst[dst_size - 1] = 0; + return UINT32_MAX; } -/* Copies SRC to DST, which is in a buffer DST_SIZE bytes long. - Truncates DST to DST_SIZE - 1 characters, if necessary. */ -void -str_copy_trunc (char *dst, size_t dst_size, const char *src) +/* Returns the number of bytes represented by the UTF-8 sequence starting OFS + bytes into S. The return value is 0 if OFS is past the last byte in S, + otherwise between 1 and 4. */ +int +ss_at_mblen (struct substring s, size_t ofs) { - size_t src_len = strlen (src); - assert (dst_size > 0); - if (src_len + 1 < dst_size) - memcpy (dst, src, src_len + 1); - else + if (s.length > ofs) { - memcpy (dst, src, dst_size - 1); - dst[dst_size - 1] = '\0'; + ucs4_t uc; + return u8_mbtouc (&uc, CHAR_CAST (const uint8_t *, s.string + ofs), + s.length - ofs); } + else + return 0; } -/* Copies buffer SRC, of SRC_LEN bytes, - to DST, which is in a buffer DST_SIZE bytes long. - Truncates DST to DST_SIZE - 1 characters, if necessary. */ -void -str_copy_buf_trunc (char *dst, size_t dst_size, - const char *src, size_t src_size) +size_t +ss_utf8_count_columns (struct substring s) { - size_t dst_len; - assert (dst_size > 0); - - dst_len = src_size < dst_size ? src_size : dst_size - 1; - memcpy (dst, src, dst_len); - dst[dst_len] = '\0'; + return utf8_count_columns (s.string, s.length); } -/* Converts each character in S to uppercase. */ -void -str_uppercase (char *s) +/* Returns a substring of S starting at 0-based display column START and + running for N display columns. */ +struct substring +ss_utf8_columns (struct substring s, size_t start, size_t n) { - for (; *s != '\0'; s++) - *s = toupper ((unsigned char) *s); + ss_advance (&s, utf8_columns_to_bytes (s.string, s.length, start)); + s.length = utf8_columns_to_bytes (s.string, s.length, n); + return s; } - -/* Converts each character in S to lowercase. */ + +/* Initializes ST as an empty string. */ void -str_lowercase (char *s) +ds_init_empty (struct string *st) { - for (; *s != '\0'; s++) - *s = tolower ((unsigned char) *s); + st->ss = ss_empty (); + st->capacity = 0; } -/* Formats FORMAT into DST, as with sprintf(), and returns the - address of the terminating null written to DST. */ -char * -spprintf (char *dst, const char *format, ...) +/* Initializes ST with initial contents S. */ +void +ds_init_string (struct string *st, const struct string *s) { - va_list args; - int count; - - va_start (args, format); - count = vsprintf (dst, format, args); - va_end (args); - - return dst + count; + ds_init_substring (st, ds_ss (s)); } - -/* Initializes ST with initial contents S. */ + +/* Initializes ST with initial contents SS. */ void -ds_create (struct string *st, const char *s) +ds_init_substring (struct string *st, struct substring ss) { - st->length = strlen (s); - st->capacity = MAX (8, st->length * 2); - st->string = xmalloc (st->capacity + 1); - strcpy (st->string, s); + st->capacity = MAX (8, ss.length * 2); + st->ss.string = xmalloc (st->capacity + 1); + memcpy (st->ss.string, ss.string, ss.length); + st->ss.length = ss.length; } -/* Initializes ST as an empty string. */ +/* Initializes ST with initial contents S. */ void -ds_init (struct string *st) +ds_init_cstr (struct string *st, const char *s) { - st->length = 0; - st->capacity = 0; - st->string = NULL; + ds_init_substring (st, ss_cstr (s)); } /* Frees ST. */ void ds_destroy (struct string *st) { - if (st != NULL) + if (st != NULL) { - free (st->string); - st->string = NULL; - st->length = 0; - st->capacity = 0; + ss_dealloc (&st->ss); + st->ss.string = NULL; + st->ss.length = 0; + st->capacity = 0; } } /* Swaps the contents of strings A and B. */ void -ds_swap (struct string *a, struct string *b) +ds_swap (struct string *a, struct string *b) { struct string tmp = *a; *a = *b; *b = tmp; } -/* Initializes DST with the CNT characters from SRC starting at - position IDX. */ +/* Helper function for ds_register_pool. */ +static void +free_string (void *st_) +{ + struct string *st = st_; + ds_destroy (st); +} + +/* Arranges for ST to be destroyed automatically as part of + POOL. */ void -ds_init_substring (struct string *dst, - const struct string *src, size_t idx, size_t cnt) +ds_register_pool (struct string *st, struct pool *pool) { - assert (dst != src); - ds_init (dst); - ds_assign_substring (dst, src, idx, cnt); + pool_register (pool, free_string, st); } -/* Copies SRC into DST. - DST and SRC may be the same string. */ +/* Cancels the arrangement for ST to be destroyed automatically + as part of POOL. */ void -ds_assign_string (struct string *dst, const struct string *src) +ds_unregister_pool (struct string *st, struct pool *pool) { - ds_assign_buffer (dst, ds_data (src), ds_length (src)); + pool_unregister (pool, st); } -/* Replaces DST by CNT characters from SRC starting at position - IDX. +/* Copies SRC into DST. DST and SRC may be the same string. */ void -ds_assign_substring (struct string *dst, - const struct string *src, size_t idx, size_t cnt) +ds_assign_string (struct string *dst, const struct string *src) { - if (idx < src->length) - ds_assign_buffer (dst, src->string + idx, MIN (cnt, src->length - idx)); - else - ds_clear (dst); + ds_assign_substring (dst, ds_ss (src)); } -/* Replaces DST by the LENGTH characters in SRC. - SRC may be a substring within DST. */ +/* Replaces DST by SS. + SS may be a substring of DST. */ void -ds_assign_buffer (struct string *dst, const char *src, size_t length) +ds_assign_substring (struct string *dst, struct substring ss) { - dst->length = length; - ds_extend (dst, length); - memmove (dst->string, src, length); + dst->ss.length = ss.length; + ds_extend (dst, ss.length); + memmove (dst->ss.string, ss.string, ss.length); } /* Replaces DST by null-terminated string SRC. SRC may overlap with DST. */ void -ds_assign_c_str (struct string *dst, const char *src) +ds_assign_cstr (struct string *dst, const char *src) { - ds_assign_buffer (dst, src, strlen (src)); + ds_assign_substring (dst, ss_cstr (src)); } /* Truncates ST to zero length. */ void ds_clear (struct string *st) { - st->length = 0; + st->ss.length = 0; +} + +/* Returns a substring that contains ST. */ +struct substring +ds_ss (const struct string *st) +{ + return st->ss; +} + +/* Returns a substring that contains N bytes from ST + starting at position START. + + If START is greater than or equal to the length of ST, then + the substring will be the empty string. If START + N + exceeds the length of ST, then the substring will only be + ds_length(ST) - START bytes long. */ +struct substring +ds_substr (const struct string *st, size_t start, size_t n) +{ + return ss_substr (ds_ss (st), start, n); +} + +/* Returns a substring that contains the first N bytes in + ST. If N exceeds the length of ST, then the substring will + contain all of ST. */ +struct substring +ds_head (const struct string *st, size_t n) +{ + return ss_head (ds_ss (st), n); +} + +/* Returns a substring that contains the last N bytes in + ST. If N exceeds the length of ST, then the substring will + contain all of ST. */ +struct substring +ds_tail (const struct string *st, size_t n) +{ + return ss_tail (ds_ss (st), n); } -/* Ensures that ST can hold at least MIN_CAPACITY characters plus a null +/* Ensures that ST can hold at least MIN_CAPACITY bytes plus a null terminator. */ void ds_extend (struct string *st, size_t min_capacity) @@ -361,7 +1121,7 @@ ds_extend (struct string *st, size_t min_capacity) if (st->capacity < min_capacity) st->capacity = 2 * min_capacity; - st->string = xrealloc (st->string, st->capacity + 1); + st->ss.string = xrealloc (st->ss.string, st->capacity + 1); } } @@ -369,79 +1129,62 @@ ds_extend (struct string *st, size_t min_capacity) void ds_shrink (struct string *st) { - if (st->capacity != st->length) + if (st->capacity != st->ss.length) { - st->capacity = st->length; - st->string = xrealloc (st->string, st->capacity + 1); + st->capacity = st->ss.length; + st->ss.string = xrealloc (st->ss.string, st->capacity + 1); } } -/* Truncates ST to at most LENGTH characters long. */ +/* Truncates ST to at most LENGTH bytes long. */ void ds_truncate (struct string *st, size_t length) { - if (st->length > length) - st->length = length; + ss_truncate (&st->ss, length); } -/* Pad ST on the right with copies of PAD until ST is at least - LENGTH characters in size. If ST is initially LENGTH - characters or longer, this is a no-op. */ -void -ds_rpad (struct string *st, size_t length, char pad) +/* Removes trailing bytes in TRIM_SET from ST. + Returns number of bytes removed. */ +size_t +ds_rtrim (struct string *st, struct substring trim_set) { - if (length > st->length) - ds_putc_multiple (st, pad, length - st->length); + return ss_rtrim (&st->ss, trim_set); } -/* Removes trailing spaces from ST. - Returns number of spaces removed. */ -int -ds_rtrim_spaces (struct string *st) +/* Removes leading bytes in TRIM_SET from ST. + Returns number of bytes removed. */ +size_t +ds_ltrim (struct string *st, struct substring trim_set) { - int cnt = 0; - while (isspace (ds_last (st))) - { - st->length--; - cnt++; - } - return cnt; + size_t n = ds_span (st, trim_set); + if (n > 0) + ds_assign_substring (st, ds_substr (st, n, SIZE_MAX)); + return n; } -/* Removes leading spaces from ST. - Returns number of spaces removed. */ -int -ds_ltrim_spaces (struct string *st) +/* Trims leading and trailing bytes in TRIM_SET from ST. + Returns number of bytes removed. */ +size_t +ds_trim (struct string *st, struct substring trim_set) { - size_t cnt = 0; - while (isspace (ds_at (st, cnt))) - cnt++; - if (cnt > 0) - ds_assign_substring (st, st, cnt, SIZE_MAX); - return cnt; + size_t n = ds_rtrim (st, trim_set); + return n + ds_ltrim (st, trim_set); } -/* Trims leading and trailing spaces from ST. */ -void -ds_trim_spaces (struct string *st) +/* If the last byte in ST is C, removes it and returns true. + Otherwise, returns false without modifying ST. */ +bool +ds_chomp_byte (struct string *st, char c) { - ds_rtrim_spaces (st); - ds_ltrim_spaces (st); + return ss_chomp_byte (&st->ss, c); } -/* If the last character in ST is C, removes it and returns true. +/* If ST ends with SUFFIX, removes it and returns true. Otherwise, returns false without modifying ST. */ bool -ds_chomp (struct string *st, char c_) +ds_chomp (struct string *st, struct substring suffix) { - unsigned char c = c_; - if (ds_last (st) == c) - { - st->length--; - return true; - } - else - return false; + return ss_chomp (&st->ss, suffix); } /* Divides ST into tokens separated by any of the DELIMITERS. @@ -457,18 +1200,10 @@ ds_chomp (struct string *st, char c_) a pair of adjacent delimiters yields an empty token, and the empty string contains a single token. */ bool -ds_separate (const struct string *st, struct string *token, - const char *delimiters, size_t *save_idx) +ds_separate (const struct string *st, struct substring delimiters, + size_t *save_idx, struct substring *token) { - if (*save_idx <= ds_length (st)) - { - size_t length = ds_cspan (st, *save_idx, delimiters); - ds_assign_substring (token, st, *save_idx, length); - *save_idx += length + 1; - return true; - } - else - return false; + return ss_separate (ds_ss (st), delimiters, save_idx, token); } /* Divides ST into tokens separated by any of the DELIMITERS, @@ -480,146 +1215,252 @@ ds_separate (const struct string *st, struct string *token, Before the first call, initialize *SAVE_IDX to 0. Do not modify *SAVE_IDX between calls. */ bool -ds_tokenize (const struct string *st, struct string *token, - const char *delimiters, size_t *save_idx) +ds_tokenize (const struct string *st, struct substring delimiters, + size_t *save_idx, struct substring *token) { - size_t start = *save_idx + ds_span (st, *save_idx, delimiters); - size_t length = ds_cspan (st, start, delimiters); - ds_assign_substring (token, st, start, length); - *save_idx = start + length; - return length > 0; + return ss_tokenize (ds_ss (st), delimiters, save_idx, token); +} + +/* Pad ST on the right with copies of PAD until ST is at least + LENGTH bytes in size. If ST is initially LENGTH + bytes or longer, this is a no-op. */ +void +ds_rpad (struct string *st, size_t length, char pad) +{ + if (length > st->ss.length) + ds_put_byte_multiple (st, pad, length - st->ss.length); +} + +/* Sets the length of ST to exactly NEW_LENGTH, + either by truncating bytes from the end, + or by padding on the right with PAD. */ +void +ds_set_length (struct string *st, size_t new_length, char pad) +{ + if (st->ss.length < new_length) + ds_rpad (st, new_length, pad); + else + st->ss.length = new_length; +} + +/* Removes N bytes from ST starting at offset START. */ +void +ds_remove (struct string *st, size_t start, size_t n) +{ + if (n > 0 && start < st->ss.length) + { + if (st->ss.length - start <= n) + { + /* All bytes at or beyond START are deleted. */ + st->ss.length = start; + } + else + { + /* Some bytes remain and must be shifted into + position. */ + memmove (st->ss.string + st->ss.length, + st->ss.string + st->ss.length + n, + st->ss.length - start - n); + st->ss.length -= n; + } + } + else + { + /* There are no bytes to delete or no bytes at or + beyond START, hence deletion is a no-op. */ + } } /* Returns true if ST is empty, false otherwise. */ bool -ds_is_empty (const struct string *st) +ds_is_empty (const struct string *st) { - return st->length == 0; + return ss_is_empty (st->ss); } /* Returns the length of ST. */ size_t ds_length (const struct string *st) { - return st->length; -} - -/* Returns the value of ST as a null-terminated string. */ -char * -ds_c_str (const struct string *st_) -{ - struct string *st = (struct string *) st_; - if (st->string == NULL) - ds_extend (st, 1); - st->string[st->length] = '\0'; - return st->string; + return ss_length (ds_ss (st)); } /* Returns the string data inside ST. */ char * ds_data (const struct string *st) { - return st->string; + return ss_data (ds_ss (st)); } /* Returns a pointer to the null terminator ST. - This might not be an actual null character unless ds_c_str() has + This might not be an actual null byte unless ds_c_str() has been called since the last modification to ST. */ char * ds_end (const struct string *st) { - return st->string + st->length; -} - -/* Returns the allocation size of ST. */ -size_t -ds_capacity (const struct string *st) -{ - return st->capacity; + return ss_end (ds_ss (st)); } -/* Returns the character in position IDX in ST, as a value in the +/* Returns the byte in position IDX in ST, as a value in the range of unsigned char. Returns EOF if IDX is out of the range of indexes for ST. */ int -ds_at (const struct string *st, size_t idx) +ds_at (const struct string *st, size_t idx) { - return idx < st->length ? (unsigned char) st->string[idx] : EOF; + return ss_at (ds_ss (st), idx); } -/* Returns the first character in ST as a value in the range of +/* Returns the first byte in ST as a value in the range of unsigned char. Returns EOF if ST is the empty string. */ int -ds_first (const struct string *st) +ds_first (const struct string *st) { - return ds_at (st, 0); + return ss_first (ds_ss (st)); } -/* Returns the last character in ST as a value in the range of +/* Returns the last byte in ST as a value in the range of unsigned char. Returns EOF if ST is the empty string. */ int -ds_last (const struct string *st) +ds_last (const struct string *st) +{ + return ss_last (ds_ss (st)); +} + +/* Returns true if ST ends with SUFFIX, false otherwise. */ +bool +ds_ends_with (const struct string *st, struct substring suffix) { - return st->length > 0 ? (unsigned char) st->string[st->length - 1] : EOF; + return ss_ends_with (st->ss, suffix); } -/* Returns the number of consecutive characters starting at OFS - in ST that are in SKIP_SET. (The null terminator is not - considered to be part of SKIP_SET.) */ +/* Returns the number of consecutive bytes at the beginning + of ST that are in SKIP_SET. */ size_t -ds_span (const struct string *st, size_t ofs, const char skip_set[]) +ds_span (const struct string *st, struct substring skip_set) { - size_t i; - for (i = ofs; i < st->length; i++) - { - int c = st->string[i]; - if (strchr (skip_set, c) == NULL || c == '\0') - break; - } - return i - ofs; + return ss_span (ds_ss (st), skip_set); } -/* Returns the number of consecutive characters starting at OFS - in ST that are not in STOP_SET. (The null terminator is not - considered to be part of STOP_SET.) */ +/* Returns the number of consecutive bytes at the beginning + of ST that are not in STOP_SET. */ size_t -ds_cspan (const struct string *st, size_t ofs, const char stop_set[]) +ds_cspan (const struct string *st, struct substring stop_set) { - size_t i; - for (i = ofs; i < st->length; i++) - { - int c = st->string[i]; - if (strchr (stop_set, c) != NULL) - break; - } - return i - ofs; + return ss_cspan (ds_ss (st), stop_set); } -/* Appends to ST a newline-terminated line read from STREAM. - Newline is the last character of ST on return, unless an I/O error - or end of file is encountered after reading some characters. - Returns true if a line is successfully read, false if no characters at - all were read before an I/O error or end of file was - encountered. */ -bool -ds_gets (struct string *st, FILE *stream) +/* Returns the position of the first occurrence of byte C in + ST at or after position OFS, or SIZE_MAX if there is no such + occurrence. */ +size_t +ds_find_byte (const struct string *st, char c) { - int c; + return ss_find_byte (ds_ss (st), c); +} - c = getc (stream); - if (c == EOF) - return false; +/* Compares A and B and returns a strcmp()-type comparison + result. */ +int +ds_compare (const struct string *a, const struct string *b) +{ + return ss_compare (ds_ss (a), ds_ss (b)); +} - for (;;) - { - ds_putc (st, c); - if (c == '\n') - return true; +/* Returns the position in ST that the byte at P occupies. + P must point within ST or one past its end. */ +size_t +ds_pointer_to_position (const struct string *st, const char *p) +{ + return ss_pointer_to_position (ds_ss (st), p); +} + +/* Allocates and returns a null-terminated string that contains + ST. */ +char * +ds_xstrdup (const struct string *st) +{ + return ss_xstrdup (ds_ss (st)); +} + +/* Returns the allocation size of ST. */ +size_t +ds_capacity (const struct string *st) +{ + return st->capacity; +} + +/* Returns the value of ST as a null-terminated string. */ +char * +ds_cstr (const struct string *st_) +{ + struct string *st = CONST_CAST (struct string *, st_); + if (st->ss.string == NULL) + ds_extend (st, 1); + st->ss.string[st->ss.length] = '\0'; + return st->ss.string; +} + +/* Returns the value of ST as a null-terminated string and then + reinitialized ST as an empty string. The caller must free the + returned string with free(). */ +char * +ds_steal_cstr (struct string *st) +{ + char *s = ds_cstr (st); + ds_init_empty (st); + return s; +} + +/* Reads bytes from STREAM and appends them to ST, stopping + after MAX_LENGTH bytes, after appending a newline, or + after an I/O error or end of file was encountered, whichever + comes first. Returns true if at least one byte was added + to ST, false if no bytes were read before an I/O error or + end of file (or if MAX_LENGTH was 0). + + This function treats LF and CR LF sequences as new-line, + translating each of them to a single '\n' in ST. */ +bool +ds_read_line (struct string *st, FILE *stream, size_t max_length) +{ + size_t length; - c = getc (stream); - if (c == EOF) - return true; + for (length = 0; length < max_length; length++) + { + int c = getc (stream); + switch (c) + { + case EOF: + return length > 0; + + case '\n': + ds_put_byte (st, c); + return true; + + case '\r': + c = getc (stream); + if (c == '\n') + { + /* CR followed by LF is special: translate to \n. */ + ds_put_byte (st, '\n'); + return true; + } + else + { + /* CR followed by anything else is just CR. */ + ds_put_byte (st, '\r'); + if (c == EOF) + return true; + ungetc (c, stream); + } + break; + + default: + ds_put_byte (st, c); + } } + + return length > 0; } /* Removes a comment introduced by `#' from ST, @@ -629,8 +1470,8 @@ remove_comment (struct string *st) { char *cp; int quote = 0; - - for (cp = ds_c_str (st); cp < ds_end (st); cp++) + + for (cp = ds_data (st); cp < ds_end (st); cp++) if (quote) { if (*cp == quote) @@ -642,7 +1483,7 @@ remove_comment (struct string *st) quote = *cp; else if (*cp == '#') { - ds_truncate (st, cp - ds_c_str (st)); + ds_truncate (st, cp - ds_cstr (st)); break; } } @@ -654,219 +1495,251 @@ remove_comment (struct string *st) - Deletes comments introduced by `#' outside of single or double quotes. - - Deletes trailing white space. + - Deletes trailing white space. Returns true if a line was successfully read, false on failure. If LINE_NUMBER is non-null, then *LINE_NUMBER is incremented by the number of lines read. */ bool -ds_get_config_line (FILE *stream, struct string *st, int *line_number) +ds_read_config_line (struct string *st, int *line_number, FILE *stream) { ds_clear (st); do { - if (!ds_gets (st, stream)) + if (!ds_read_line (st, stream, SIZE_MAX)) return false; (*line_number)++; - ds_rtrim_spaces (st); + ds_rtrim (st, ss_cstr (CC_SPACES)); } - while (ds_chomp (st, '\\')); - + while (ds_chomp_byte (st, '\\')); + remove_comment (st); return true; } +/* Attempts to read SIZE * N bytes from STREAM and append them + to ST. + Returns true if all the requested data was read, false otherwise. */ +bool +ds_read_stream (struct string *st, size_t size, size_t n, FILE *stream) +{ + if (size != 0) + { + size_t try_bytes = xtimes (n, size); + if (size_in_bounds_p (xsum (ds_length (st), try_bytes))) + { + char *buffer = ds_put_uninit (st, try_bytes); + size_t got_bytes = fread (buffer, 1, try_bytes, stream); + ds_truncate (st, ds_length (st) - (try_bytes - got_bytes)); + return got_bytes == try_bytes; + } + else + { + errno = ENOMEM; + return false; + } + } + else + return true; +} + /* Concatenates S onto ST. */ void -ds_puts (struct string *st, const char *s) +ds_put_cstr (struct string *st, const char *s) { - size_t s_len; - - if (!s) return; - - s_len = strlen (s); - ds_extend (st, st->length + s_len); - strcpy (st->string + st->length, s); - st->length += s_len; + if (s != NULL) + ds_put_substring (st, ss_cstr (s)); } -/* Concatenates LEN characters from BUF onto ST. */ +/* Concatenates SS to ST. */ void -ds_concat (struct string *st, const char *buf, size_t len) +ds_put_substring (struct string *st, struct substring ss) { - ds_extend (st, st->length + len); - memcpy (st->string + st->length, buf, len); - st->length += len; + if (ss.length) + memcpy (ds_put_uninit (st, ss_length (ss)), ss_data (ss), ss_length (ss)); } /* Returns ds_end(ST) and THEN increases the length by INCR. */ char * -ds_append_uninit(struct string *st, size_t incr) +ds_put_uninit (struct string *st, size_t incr) { char *end; + ds_extend (st, ds_length (st) + incr); + end = ds_end (st); + st->ss.length += incr; + return end; +} - ds_extend(st, ds_length(st) + incr); +/* Moves the bytes in ST following offset OFS + OLD_LEN in ST to offset OFS + + NEW_LEN and returns the byte at offset OFS. The first min(OLD_LEN, NEW_LEN) + bytes at the returned position are unchanged; if NEW_LEN > OLD_LEN then the + following NEW_LEN - OLD_LEN bytes are initially indeterminate. - end = ds_end(st); + The intention is that the caller should write NEW_LEN bytes at the returned + position, to effectively replace the OLD_LEN bytes previously at that + position. */ +char * +ds_splice_uninit (struct string *st, + size_t ofs, size_t old_len, size_t new_len) +{ + if (new_len != old_len) + { + if (new_len > old_len) + ds_extend (st, ds_length (st) + (new_len - old_len)); - st->length += incr; - - return end; + assert (ds_length (st) >= ofs + old_len); + + memmove (ds_data (st) + (ofs + new_len), + ds_data (st) + (ofs + old_len), + ds_length (st) - (ofs + old_len)); + st->ss.length += new_len - old_len; + } + return ds_data (st) + ofs; } /* Formats FORMAT as a printf string and appends the result to ST. */ void -ds_printf (struct string *st, const char *format, ...) +ds_put_format (struct string *st, const char *format, ...) +{ + va_list args; + + va_start (args, format); + ds_put_vformat (st, format, args); + va_end (args); +} + +/* Formats FORMAT as a printf string as if in the C locale and appends the result to ST. */ +void +ds_put_c_format (struct string *st, const char *format, ...) { va_list args; va_start (args, format); - ds_vprintf(st, format, args); + ds_put_c_vformat (st, format, args); va_end (args); } /* Formats FORMAT as a printf string and appends the result to ST. */ void -ds_vprintf (struct string *st, const char *format, va_list args_) +ds_put_vformat (struct string *st, const char *format, va_list args_) { int avail, needed; va_list args; va_copy (args, args_); - avail = st->string != NULL ? st->capacity - st->length + 1 : 0; - needed = vsnprintf (st->string + st->length, avail, format, args); + avail = st->ss.string != NULL ? st->capacity - st->ss.length + 1 : 0; + needed = vsnprintf (st->ss.string + st->ss.length, avail, format, args); va_end (args); if (needed >= avail) { - ds_extend (st, st->length + needed); - va_copy (args, args_); - vsprintf (st->string + st->length, format, args); + vsnprintf (ds_put_uninit (st, needed), needed + 1, format, args); va_end (args); } - else + else { /* Some old libc's returned -1 when the destination string was too short. */ while (needed == -1) { ds_extend (st, (st->capacity + 1) * 2); - avail = st->capacity - st->length + 1; + avail = st->capacity - st->ss.length + 1; va_copy (args, args_); - needed = vsnprintf (st->string + st->length, avail, format, args); + needed = vsnprintf (ds_end (st), avail, format, args); va_end (args); - } + } + st->ss.length += needed; } - - st->length += needed; -} - -/* Appends character CH to ST. */ -void -ds_putc (struct string *st, int ch) -{ - if (st->length >= st->capacity) - ds_extend (st, st->length + 1); - st->string[st->length++] = ch; -} - -/* Appends CNT copies of character CH to ST. */ -void -ds_putc_multiple (struct string *st, int ch, size_t cnt) -{ - ds_extend (st, st->length + cnt); - memset (&st->string[st->length], ch, cnt); - st->length += cnt; } - -/* Lengthed strings. */ - -/* Creates a new lengthed string LS with contents as a copy of - S. */ +/* Formats FORMAT as a printf string, as if in the C locale, + and appends the result to ST. */ void -ls_create (struct fixed_string *ls, const char *s) +ds_put_c_vformat (struct string *st, const char *format, va_list args) { - ls->length = strlen (s); - ls->string = xmalloc (ls->length + 1); - memcpy (ls->string, s, ls->length + 1); + char buf[128]; + size_t len = sizeof buf; + char *output = c_vasnprintf (buf, &len, format, args); + if (output) + { + ds_put_cstr (st, output); + if (output != buf) + free (output); + } } -/* Creates a new lengthed string LS with contents as a copy of - BUFFER with length LEN. */ +/* Appends byte CH to ST. */ void -ls_create_buffer (struct fixed_string *ls, - const char *buffer, size_t len) +ds_put_byte (struct string *st, int ch) { - ls->length = len; - ls->string = xmalloc (len + 1); - memcpy (ls->string, buffer, len); - ls->string[len] = '\0'; + ds_put_uninit (st, 1)[0] = ch; } -/* Sets the fields of LS to the specified values. */ +/* Appends N copies of byte CH to ST. */ void -ls_init (struct fixed_string *ls, const char *string, size_t length) +ds_put_byte_multiple (struct string *st, int ch, size_t n) { - ls->string = (char *) string; - ls->length = length; + memset (ds_put_uninit (st, n), ch, n); } -/* Copies the fields of SRC to DST. */ +/* Appends Unicode code point UC to ST in UTF-8 encoding. */ void -ls_shallow_copy (struct fixed_string *dst, const struct fixed_string *src) +ds_put_unichar (struct string *st, ucs4_t uc) { - *dst = *src; + ds_extend (st, ds_length (st) + 6); + st->ss.length += u8_uctomb (CHAR_CAST (uint8_t *, ds_end (st)), uc, 6); } -/* Frees the memory backing LS. */ +/* Appends N copies of S to ST. */ void -ls_destroy (struct fixed_string *ls) +ds_put_substring_multiple (struct string *dst, struct substring src, size_t n) { - free (ls->string); + char *p = ds_put_uninit (dst, n * src.length); + for (size_t i = 0; i < n; i++) + { + memcpy (p, src.string, src.length); + p += src.length; + } } -/* Sets LS to a null pointer value. */ +/* If relocation has been enabled, replace ST, + with its relocated version */ void -ls_null (struct fixed_string *ls) +ds_relocate (struct string *st) { - ls->string = NULL; -} + const char *orig = ds_cstr (st); + const char *rel = relocate (orig); -/* Returns nonzero only if LS has a null pointer value. */ -int -ls_null_p (const struct fixed_string *ls) -{ - return ls->string == NULL; + if (orig != rel) + { + ds_clear (st); + ds_put_cstr (st, rel); + /* The documentation for relocate says that casting away const + and then freeing is appropriate ... */ + free (CONST_CAST (char *, rel)); + } } -/* Returns nonzero only if LS is a null pointer or has length 0. */ -int -ls_empty_p (const struct fixed_string *ls) -{ - return ls->string == NULL || ls->length == 0; -} -/* Returns the length of LS, which must not be null. */ -size_t -ls_length (const struct fixed_string *ls) -{ - return ls->length; -} + -/* Returns a pointer to the character string in LS. */ -char * -ls_c_str (const struct fixed_string *ls) -{ - return (char *) ls->string; -} +/* Operations on uint8_t "strings" */ -/* Returns a pointer to the null terminator of the character string in - LS. */ -char * -ls_end (const struct fixed_string *ls) +/* Copies buffer SRC, of SRC_SIZE bytes, to DST, of DST_SIZE bytes. + DST is truncated to DST_SIZE bytes or padded on the right with + copies of PAD as needed. */ +void +u8_buf_copy_rpad (uint8_t *dst, size_t dst_size, + const uint8_t *src, size_t src_size, + char pad) { - return (char *) (ls->string + ls->length); + if (src_size >= dst_size) + memmove (dst, src, dst_size); + else + { + memmove (dst, src, src_size); + memset (&dst[src_size], pad, dst_size - src_size); + } }