+
+/* Converts NUMBER into a string in 26-adic notation in BUFFER,
+ which has room for SIZE bytes. Returns true if successful,
+ false if NUMBER, plus a trailing null, is too large to fit in
+ the available space.
+
+ 26-adic notation is "spreadsheet column numbering": 1 = A, 2 =
+ B, 3 = C, ... 26 = Z, 27 = AA, 28 = AB, 29 = AC, ...
+
+ 26-adic notation is the special case of a k-adic numeration
+ system (aka bijective base-k numeration) with k=26. In k-adic
+ numeration, the digits are {1, 2, 3, ..., k} (there is no
+ digit 0), and integer 0 is represented by the empty string.
+ For more information, see
+ http://en.wikipedia.org/wiki/Bijective_numeration. */
+bool
+str_format_26adic (unsigned long int number, char buffer[], size_t size)
+{
+ size_t length = 0;
+
+ while (number-- > 0)
+ {
+ if (length >= size)
+ return false;
+ buffer[length++] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"[number % 26];
+ number /= 26;
+ }
+
+ if (length >= size)
+ return false;
+ buffer[length] = '\0';
+
+ buf_reverse (buffer, length);
+ return true;
+}
+
+/* Sets the SIZE bytes starting at BLOCK to C,
+ and returns the byte following BLOCK. */
+void *
+mempset (void *block, int c, size_t size)
+{
+ memset (block, c, size);
+ return (char *) block + size;
+}
+\f
+/* Substrings. */
+
+/* Returns a substring whose contents are the CNT bytes
+ starting at the (0-based) position START in SS. */
+struct substring
+ss_substr (struct substring ss, size_t start, size_t cnt)
+{
+ if (start < ss.length)
+ return ss_buffer (ss.string + start, MIN (cnt, ss.length - start));
+ else
+ return ss_buffer (ss.string + ss.length, 0);
+}
+
+/* Returns a substring whose contents are the first CNT
+ bytes in SS. */
+struct substring
+ss_head (struct substring ss, size_t cnt)
+{
+ return ss_buffer (ss.string, MIN (cnt, ss.length));
+}
+
+/* Returns a substring whose contents are the last CNT bytes
+ in SS. */
+struct substring
+ss_tail (struct substring ss, size_t cnt)
+{
+ if (cnt < ss.length)
+ return ss_buffer (ss.string + (ss.length - cnt), cnt);
+ else
+ return ss;
+}
+
+/* Makes a malloc()'d, null-terminated copy of the contents of OLD
+ and stores it in NEW. */
+void
+ss_alloc_substring (struct substring *new, struct substring old)
+{
+ new->string = xmemdup0 (old.string, old.length);
+ new->length = old.length;
+}
+
+/* Allocates room for a CNT-byte string in NEW. */
+void
+ss_alloc_uninit (struct substring *new, size_t cnt)
+{
+ new->string = xmalloc (cnt);
+ new->length = cnt;
+}
+
+void
+ss_realloc (struct substring *ss, size_t size)
+{
+ ss->string = xrealloc (ss->string, size);
+}
+
+/* Makes a pool_alloc_unaligned()'d copy of the contents of OLD
+ in POOL, and stores it in NEW. */
+void
+ss_alloc_substring_pool (struct substring *new, struct substring old,
+ struct pool *pool)
+{
+ new->string = pool_alloc_unaligned (pool, old.length);
+ new->length = old.length;
+ memcpy (new->string, old.string, old.length);
+}
+
+/* Allocates room for a CNT-byte string in NEW in POOL. */
+void
+ss_alloc_uninit_pool (struct substring *new, size_t cnt, struct pool *pool)
+{
+ new->string = pool_alloc_unaligned (pool, cnt);
+ new->length = cnt;
+}
+
+/* Frees the string that SS points to. */
+void
+ss_dealloc (struct substring *ss)
+{
+ free (ss->string);
+}
+
+/* Truncates SS to at most CNT bytes in length. */
+void
+ss_truncate (struct substring *ss, size_t cnt)
+{
+ if (ss->length > cnt)
+ ss->length = cnt;
+}
+
+/* Removes trailing bytes in TRIM_SET from SS.
+ Returns number of bytes removed. */
+size_t
+ss_rtrim (struct substring *ss, struct substring trim_set)
+{
+ size_t cnt = 0;
+ while (cnt < ss->length
+ && ss_find_byte (trim_set,
+ ss->string[ss->length - cnt - 1]) != SIZE_MAX)
+ cnt++;
+ ss->length -= cnt;
+ return cnt;
+}
+
+/* Removes leading bytes in TRIM_SET from SS.
+ Returns number of bytes removed. */
+size_t
+ss_ltrim (struct substring *ss, struct substring trim_set)
+{
+ size_t cnt = ss_span (*ss, trim_set);
+ ss_advance (ss, cnt);
+ return cnt;
+}
+
+/* Trims leading and trailing bytes in TRIM_SET from SS. */
+void
+ss_trim (struct substring *ss, struct substring trim_set)
+{
+ ss_ltrim (ss, trim_set);
+ ss_rtrim (ss, trim_set);
+}
+
+/* If the last byte in SS is C, removes it and returns true.
+ Otherwise, returns false without changing the string. */
+bool
+ss_chomp_byte (struct substring *ss, char c)
+{
+ if (ss_last (*ss) == c)
+ {
+ ss->length--;
+ return true;
+ }
+ else
+ return false;
+}
+
+/* If SS ends with SUFFIX, removes it and returns true.
+ Otherwise, returns false without changing the string. */
+bool
+ss_chomp (struct substring *ss, struct substring suffix)
+{
+ if (ss_ends_with (*ss, suffix))
+ {
+ ss->length -= suffix.length;
+ return true;
+ }
+ else
+ return false;
+}
+
+/* Divides SS into tokens separated by any of the DELIMITERS.
+ Each call replaces TOKEN by the next token in SS, or by an
+ empty string if no tokens remain. Returns true if a token was
+ obtained, false otherwise.
+
+ Before the first call, initialize *SAVE_IDX to 0. Do not
+ modify *SAVE_IDX between calls.
+
+ SS divides into exactly one more tokens than it contains
+ delimiters. That is, a delimiter at the start or end of SS or
+ a pair of adjacent delimiters yields an empty token, and the
+ empty string contains a single token. */
+bool
+ss_separate (struct substring ss, struct substring delimiters,
+ size_t *save_idx, struct substring *token)
+{
+ if (*save_idx <= ss_length (ss))
+ {
+ struct substring tmp = ss_substr (ss, *save_idx, SIZE_MAX);
+ size_t length = ss_cspan (tmp, delimiters);
+ *token = ss_head (tmp, length);
+ *save_idx += length + 1;
+ return true;
+ }
+ else
+ {
+ *token = ss_empty ();
+ return false;
+ }
+}
+
+/* Divides SS into tokens separated by any of the DELIMITERS,
+ merging adjacent delimiters so that the empty string is never
+ produced as a token. Each call replaces TOKEN by the next
+ token in SS, or by an empty string if no tokens remain, and
+ then skips past the first delimiter following the token.
+ Returns true if a token was obtained, false otherwise.
+
+ Before the first call, initialize *SAVE_IDX to 0. Do not
+ modify *SAVE_IDX between calls. */
+bool
+ss_tokenize (struct substring ss, struct substring delimiters,
+ size_t *save_idx, struct substring *token)
+{
+ ss_advance (&ss, *save_idx);
+ *save_idx += ss_ltrim (&ss, delimiters);
+ ss_get_bytes (&ss, ss_cspan (ss, delimiters), token);
+ *save_idx += ss_length (*token) + 1;
+ return ss_length (*token) > 0;
+}
+
+/* Removes the first CNT bytes from SS. */
+void
+ss_advance (struct substring *ss, size_t cnt)
+{
+ if (cnt > ss->length)
+ cnt = ss->length;
+ ss->string += cnt;
+ ss->length -= cnt;
+}
+
+/* If the first byte in SS is C, removes it and returns true.
+ Otherwise, returns false without changing the string. */
+bool
+ss_match_byte (struct substring *ss, char c)
+{
+ if (ss_first (*ss) == c)
+ {
+ ss->string++;
+ ss->length--;
+ return true;
+ }
+ else
+ return false;
+}
+
+/* If the first byte in SS is in MATCH, removes it and
+ returns the byte that was removed.
+ Otherwise, returns EOF without changing the string. */
+int
+ss_match_byte_in (struct substring *ss, struct substring match)
+{
+ int c = EOF;
+ if (ss->length > 0
+ && memchr (match.string, ss->string[0], match.length) != NULL)
+ {
+ c = ss->string[0];
+ ss->string++;
+ ss->length--;
+ }
+ return c;
+}
+
+/* If SS begins with TARGET, removes it and returns true.
+ Otherwise, returns false without changing SS. */
+bool
+ss_match_string (struct substring *ss, const struct substring target)
+{
+ size_t length = ss_length (target);
+ if (ss_equals (ss_head (*ss, length), target))
+ {
+ ss_advance (ss, length);
+ return true;
+ }
+ else
+ return false;
+}
+
+/* Removes the first byte from SS and returns it.
+ If SS is empty, returns EOF without modifying SS. */
+int
+ss_get_byte (struct substring *ss)
+{
+ int c = ss_first (*ss);
+ if (c != EOF)
+ {
+ ss->string++;
+ ss->length--;
+ }
+ return c;
+}
+
+/* Stores the prefix of SS up to the first DELIMITER in OUT (if
+ any). Trims those same bytes from SS. DELIMITER is
+ removed from SS but not made part of OUT. Returns true if
+ DELIMITER was found (and removed), false otherwise. */
+bool
+ss_get_until (struct substring *ss, char delimiter, struct substring *out)
+{
+ ss_get_bytes (ss, ss_cspan (*ss, ss_buffer (&delimiter, 1)), out);
+ return ss_match_byte (ss, delimiter);
+}
+
+/* Stores the first CNT bytes in SS in OUT (or fewer, if SS
+ is shorter than CNT bytes). Trims the same bytes
+ from the beginning of SS. Returns CNT. */
+size_t
+ss_get_bytes (struct substring *ss, size_t cnt, struct substring *out)
+{
+ *out = ss_head (*ss, cnt);
+ ss_advance (ss, cnt);
+ return cnt;
+}
+
+/* Parses and removes an optionally signed decimal integer from
+ the beginning of SS. Returns 0 if an error occurred,
+ otherwise the number of bytes removed from SS. Stores
+ the integer's value into *VALUE. */
+size_t
+ss_get_long (struct substring *ss, long *value)
+{
+ char tmp[64];
+ size_t length;
+
+ length = ss_span (*ss, ss_cstr ("+-"));
+ length += ss_span (ss_substr (*ss, length, SIZE_MAX), ss_cstr (CC_DIGITS));
+ if (length > 0 && length < sizeof tmp)
+ {
+ char *tail;
+
+ memcpy (tmp, ss_data (*ss), length);
+ tmp[length] = '\0';
+
+ *value = strtol (tmp, &tail, 10);
+ if (tail - tmp == length)
+ {
+ ss_advance (ss, length);
+ return length;
+ }
+ }
+ *value = 0;
+ return 0;
+}
+
+/* Returns true if SS is empty (has length 0 bytes),
+ false otherwise. */
+bool
+ss_is_empty (struct substring ss)
+{
+ return ss.length == 0;
+}
+
+/* Returns the number of bytes in SS. */
+size_t
+ss_length (struct substring ss)
+{
+ return ss.length;
+}
+
+/* Returns a pointer to the bytes in SS. */
+char *
+ss_data (struct substring ss)
+{
+ return ss.string;
+}
+
+/* Returns a pointer just past the last byte in SS. */
+char *
+ss_end (struct substring ss)
+{
+ return ss.string + ss.length;
+}
+
+/* Returns the byte in position IDX in SS, as a value in the
+ range of unsigned char. Returns EOF if IDX is out of the
+ range of indexes for SS. */
+int
+ss_at (struct substring ss, size_t idx)
+{
+ return idx < ss.length ? (unsigned char) ss.string[idx] : EOF;
+}
+
+/* Returns the first byte in SS as a value in the range of
+ unsigned char. Returns EOF if SS is the empty string. */
+int
+ss_first (struct substring ss)
+{
+ return ss_at (ss, 0);
+}
+
+/* Returns the last byte in SS as a value in the range of
+ unsigned char. Returns EOF if SS is the empty string. */
+int
+ss_last (struct substring ss)
+{
+ return ss.length > 0 ? (unsigned char) ss.string[ss.length - 1] : EOF;
+}
+
+/* Returns true if SS ends with SUFFIX, false otherwise. */
+bool
+ss_ends_with (struct substring ss, struct substring suffix)
+{
+ return (ss.length >= suffix.length
+ && !memcmp (&ss.string[ss.length - suffix.length], suffix.string,
+ suffix.length));
+}
+
+/* Returns the number of contiguous bytes at the beginning
+ of SS that are in SKIP_SET. */
+size_t
+ss_span (struct substring ss, struct substring skip_set)
+{
+ size_t i;
+ for (i = 0; i < ss.length; i++)
+ if (ss_find_byte (skip_set, ss.string[i]) == SIZE_MAX)
+ break;
+ return i;
+}
+
+/* Returns the number of contiguous bytes at the beginning
+ of SS that are not in SKIP_SET. */
+size_t
+ss_cspan (struct substring ss, struct substring stop_set)
+{
+ size_t i;
+ for (i = 0; i < ss.length; i++)
+ if (ss_find_byte (stop_set, ss.string[i]) != SIZE_MAX)
+ break;
+ return i;
+}
+
+/* Returns the offset in SS of the first instance of C,
+ or SIZE_MAX if C does not occur in SS. */
+size_t
+ss_find_byte (struct substring ss, char c)
+{
+ const char *p = memchr (ss.string, c, ss.length);
+ return p != NULL ? p - ss.string : SIZE_MAX;
+}
+
+/* Compares A and B and returns a strcmp()-type comparison
+ result. */
+int
+ss_compare (struct substring a, struct substring b)
+{
+ int retval = memcmp (a.string, b.string, MIN (a.length, b.length));
+ if (retval == 0)
+ retval = a.length < b.length ? -1 : a.length > b.length;
+ return retval;
+}
+
+/* Compares A and B case-insensitively and returns a
+ strcmp()-type comparison result. */
+int
+ss_compare_case (struct substring a, struct substring b)
+{
+ int retval = memcasecmp (a.string, b.string, MIN (a.length, b.length));
+ if (retval == 0)
+ retval = a.length < b.length ? -1 : a.length > b.length;
+ return retval;
+}
+
+/* Compares A and B and returns true if their contents are
+ identical, false otherwise. */
+int
+ss_equals (struct substring a, struct substring b)
+{
+ return a.length == b.length && !memcmp (a.string, b.string, a.length);
+}
+
+/* Compares A and B and returns true if their contents are
+ identical except possibly for case differences, false
+ otherwise. */
+int
+ss_equals_case (struct substring a, struct substring b)
+{
+ return a.length == b.length && !memcasecmp (a.string, b.string, a.length);
+}
+
+/* Returns the position in SS that the byte at P occupies.
+ P must point within SS or one past its end. */
+size_t
+ss_pointer_to_position (struct substring ss, const char *p)
+{
+ size_t pos = p - ss.string;
+ assert (pos <= ss.length);
+ return pos;
+}
+
+/* Allocates and returns a null-terminated string that contains
+ SS. */
+char *
+ss_xstrdup (struct substring ss)
+{
+ char *s = xmalloc (ss.length + 1);
+ memcpy (s, ss.string, ss.length);
+ s[ss.length] = '\0';
+ return s;
+}
+/* UTF-8. */
+
+/* Returns the character represented by the UTF-8 sequence at the start of S.
+ The return value is either a Unicode code point in the range 0 to 0x10ffff,
+ or UINT32_MAX if S is empty. */
+ucs4_t
+ss_first_mb (struct substring s)
+{
+ return ss_at_mb (s, 0);
+}
+
+/* Returns the number of bytes in the UTF-8 character at the beginning of S.
+
+ The return value is 0 if S is empty, otherwise between 1 and 4. */
+int
+ss_first_mblen (struct substring s)
+{
+ return ss_at_mblen (s, 0);
+}
+
+/* Advances S past the UTF-8 character at its beginning. Returns the Unicode
+ code point that was skipped (in the range 0 to 0x10ffff), or UINT32_MAX if S
+ was not modified because it was initially empty. */
+ucs4_t
+ss_get_mb (struct substring *s)
+{
+ if (s->length > 0)
+ {
+ ucs4_t uc;
+ int n;
+
+ n = u8_mbtouc (&uc, CHAR_CAST (const uint8_t *, s->string), s->length);
+ s->string += n;
+ s->length -= n;
+ return uc;
+ }
+ else
+ return UINT32_MAX;
+}
+
+/* Returns the character represented by the UTF-8 sequence starting OFS bytes
+ into S. The return value is either a Unicode code point in the range 0 to
+ 0x10ffff, or UINT32_MAX if OFS is past the last byte in S.
+
+ (Returns 0xfffd if OFS points into the middle, not the beginning, of a UTF-8
+ sequence.) */
+ucs4_t
+ss_at_mb (struct substring s, size_t ofs)
+{
+ if (s.length > ofs)
+ {
+ ucs4_t uc;
+ u8_mbtouc (&uc, CHAR_CAST (const uint8_t *, s.string + ofs),
+ s.length - ofs);
+ return uc;
+ }
+ else
+ return UINT32_MAX;
+}
+
+/* Returns the number of bytes represented by the UTF-8 sequence starting OFS
+ bytes into S. The return value is 0 if OFS is past the last byte in S,
+ otherwise between 1 and 4. */
+int
+ss_at_mblen (struct substring s, size_t ofs)
+{
+ if (s.length > ofs)
+ {
+ ucs4_t uc;
+ return u8_mbtouc (&uc, CHAR_CAST (const uint8_t *, s.string + ofs),
+ s.length - ofs);
+ }
+ else
+ return 0;
+}