1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006, 2009, 2010 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
26 #include <libpspp/cast.h>
27 #include <libpspp/message.h>
28 #include <libpspp/pool.h>
30 #include <relocatable.h>
35 /* Reverses the order of NBYTES bytes at address P, thus converting
36 between little- and big-endian byte orders. */
38 buf_reverse (char *p, size_t nbytes)
40 char *h = p, *t = &h[nbytes - 1];
52 /* Finds the last NEEDLE of length NEEDLE_LEN in a HAYSTACK of length
53 HAYSTACK_LEN. Returns a pointer to the needle found. */
55 buf_find_reverse (const char *haystack, size_t haystack_len,
56 const char *needle, size_t needle_len)
59 for (i = haystack_len - needle_len; i >= 0; i--)
60 if (!memcmp (needle, &haystack[i], needle_len))
61 return (char *) &haystack[i];
65 /* Compares the SIZE bytes in A to those in B, disregarding case,
66 and returns a strcmp()-type result. */
68 buf_compare_case (const char *a_, const char *b_, size_t size)
70 const unsigned char *a = (unsigned char *) a_;
71 const unsigned char *b = (unsigned char *) b_;
75 unsigned char ac = toupper (*a++);
76 unsigned char bc = toupper (*b++);
79 return ac > bc ? 1 : -1;
85 /* Compares A of length A_LEN to B of length B_LEN. The shorter
86 string is considered to be padded with spaces to the length of
89 buf_compare_rpad (const char *a, size_t a_len, const char *b, size_t b_len)
94 min_len = a_len < b_len ? a_len : b_len;
95 result = memcmp (a, b, min_len);
104 for (idx = min_len; idx < b_len; idx++)
106 return ' ' > b[idx] ? 1 : -1;
110 for (idx = min_len; idx < a_len; idx++)
112 return a[idx] > ' ' ? 1 : -1;
118 /* Compares strin A to string B. The shorter string is
119 considered to be padded with spaces to the length of the
122 str_compare_rpad (const char *a, const char *b)
124 return buf_compare_rpad (a, strlen (a), b, strlen (b));
127 /* Copies string SRC to buffer DST, of size DST_SIZE bytes.
128 DST is truncated to DST_SIZE bytes or padded on the right with
129 copies of PAD as needed. */
131 buf_copy_str_rpad (char *dst, size_t dst_size, const char *src, char pad)
133 size_t src_len = strlen (src);
134 if (src_len >= dst_size)
135 memcpy (dst, src, dst_size);
138 memcpy (dst, src, src_len);
139 memset (&dst[src_len], pad, dst_size - src_len);
143 /* Copies string SRC to buffer DST, of size DST_SIZE bytes.
144 DST is truncated to DST_SIZE bytes or padded on the left with
145 copies of PAD as needed. */
147 buf_copy_str_lpad (char *dst, size_t dst_size, const char *src, char pad)
149 size_t src_len = strlen (src);
150 if (src_len >= dst_size)
151 memcpy (dst, src, dst_size);
154 size_t pad_cnt = dst_size - src_len;
155 memset (&dst[0], pad, pad_cnt);
156 memcpy (dst + pad_cnt, src, src_len);
160 /* Copies buffer SRC, of SRC_SIZE bytes, to DST, of DST_SIZE bytes.
161 DST is truncated to DST_SIZE bytes or padded on the left with
162 copies of PAD as needed. */
164 buf_copy_lpad (char *dst, size_t dst_size,
165 const char *src, size_t src_size,
168 if (src_size >= dst_size)
169 memmove (dst, src, dst_size);
172 memset (dst, pad, dst_size - src_size);
173 memmove (&dst[dst_size - src_size], src, src_size);
177 /* Copies buffer SRC, of SRC_SIZE bytes, to DST, of DST_SIZE bytes.
178 DST is truncated to DST_SIZE bytes or padded on the right with
179 copies of PAD as needed. */
181 buf_copy_rpad (char *dst, size_t dst_size,
182 const char *src, size_t src_size,
185 if (src_size >= dst_size)
186 memmove (dst, src, dst_size);
189 memmove (dst, src, src_size);
190 memset (&dst[src_size], pad, dst_size - src_size);
194 /* Copies string SRC to string DST, which is in a buffer DST_SIZE
196 Truncates DST to DST_SIZE - 1 characters or right-pads with
197 spaces to DST_SIZE - 1 characters if necessary. */
199 str_copy_rpad (char *dst, size_t dst_size, const char *src)
203 size_t src_len = strlen (src);
204 if (src_len < dst_size - 1)
206 memcpy (dst, src, src_len);
207 memset (&dst[src_len], ' ', dst_size - 1 - src_len);
210 memcpy (dst, src, dst_size - 1);
211 dst[dst_size - 1] = 0;
215 /* Copies SRC to DST, which is in a buffer DST_SIZE bytes long.
216 Truncates DST to DST_SIZE - 1 characters, if necessary. */
218 str_copy_trunc (char *dst, size_t dst_size, const char *src)
220 size_t src_len = strlen (src);
221 assert (dst_size > 0);
222 if (src_len + 1 < dst_size)
223 memcpy (dst, src, src_len + 1);
226 memcpy (dst, src, dst_size - 1);
227 dst[dst_size - 1] = '\0';
231 /* Copies buffer SRC, of SRC_LEN bytes,
232 to DST, which is in a buffer DST_SIZE bytes long.
233 Truncates DST to DST_SIZE - 1 characters, if necessary. */
235 str_copy_buf_trunc (char *dst, size_t dst_size,
236 const char *src, size_t src_size)
239 assert (dst_size > 0);
241 dst_len = src_size < dst_size ? src_size : dst_size - 1;
242 memcpy (dst, src, dst_len);
246 /* Converts each character in S to uppercase. */
248 str_uppercase (char *s)
250 for (; *s != '\0'; s++)
251 *s = toupper ((unsigned char) *s);
254 /* Converts each character in S to lowercase. */
256 str_lowercase (char *s)
258 for (; *s != '\0'; s++)
259 *s = tolower ((unsigned char) *s);
262 /* Converts NUMBER into a string in 26-adic notation in BUFFER,
263 which has room for SIZE bytes. Returns true if successful,
264 false if NUMBER, plus a trailing null, is too large to fit in
267 26-adic notation is "spreadsheet column numbering": 1 = A, 2 =
268 B, 3 = C, ... 26 = Z, 27 = AA, 28 = AB, 29 = AC, ...
270 26-adic notation is the special case of a k-adic numeration
271 system (aka bijective base-k numeration) with k=26. In k-adic
272 numeration, the digits are {1, 2, 3, ..., k} (there is no
273 digit 0), and integer 0 is represented by the empty string.
274 For more information, see
275 http://en.wikipedia.org/wiki/Bijective_numeration. */
277 str_format_26adic (unsigned long int number, char buffer[], size_t size)
285 buffer[length++] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"[number % 26];
291 buffer[length] = '\0';
293 buf_reverse (buffer, length);
297 /* Formats FORMAT into DST, as with sprintf(), and returns the
298 address of the terminating null written to DST. */
300 spprintf (char *dst, const char *format, ...)
305 va_start (args, format);
306 count = vsprintf (dst, format, args);
312 /* Sets the SIZE bytes starting at BLOCK to C,
313 and returns the byte following BLOCK. */
315 mempset (void *block, int c, size_t size)
317 memset (block, c, size);
318 return (char *) block + size;
323 /* Returns an empty substring. */
333 /* Returns a substring whose contents are the given C-style
336 ss_cstr (const char *cstr)
338 return ss_buffer (cstr, strlen (cstr));
341 /* Returns a substring whose contents are the CNT characters in
344 ss_buffer (const char *buffer, size_t cnt)
347 ss.string = (char *) buffer;
352 /* Returns a substring whose contents are the CNT characters
353 starting at the (0-based) position START in SS. */
355 ss_substr (struct substring ss, size_t start, size_t cnt)
357 if (start < ss.length)
358 return ss_buffer (ss.string + start, MIN (cnt, ss.length - start));
360 return ss_buffer (ss.string + ss.length, 0);
363 /* Returns a substring whose contents are the first CNT
366 ss_head (struct substring ss, size_t cnt)
368 return ss_buffer (ss.string, MIN (cnt, ss.length));
371 /* Returns a substring whose contents are the last CNT characters
374 ss_tail (struct substring ss, size_t cnt)
377 return ss_buffer (ss.string + (ss.length - cnt), cnt);
382 /* Makes a malloc()'d copy of the contents of OLD
383 and stores it in NEW. */
385 ss_alloc_substring (struct substring *new, struct substring old)
387 new->string = xmalloc (old.length);
388 new->length = old.length;
389 memcpy (new->string, old.string, old.length);
392 /* Allocates room for a CNT-character string in NEW. */
394 ss_alloc_uninit (struct substring *new, size_t cnt)
396 new->string = xmalloc (cnt);
400 /* Makes a pool_alloc_unaligned()'d copy of the contents of OLD
401 in POOL, and stores it in NEW. */
403 ss_alloc_substring_pool (struct substring *new, struct substring old,
406 new->string = pool_alloc_unaligned (pool, old.length);
407 new->length = old.length;
408 memcpy (new->string, old.string, old.length);
411 /* Allocates room for a CNT-character string in NEW in POOL. */
413 ss_alloc_uninit_pool (struct substring *new, size_t cnt, struct pool *pool)
415 new->string = pool_alloc_unaligned (pool, cnt);
419 /* Frees the string that SS points to. */
421 ss_dealloc (struct substring *ss)
426 /* Truncates SS to at most CNT characters in length. */
428 ss_truncate (struct substring *ss, size_t cnt)
430 if (ss->length > cnt)
434 /* Removes trailing characters in TRIM_SET from SS.
435 Returns number of characters removed. */
437 ss_rtrim (struct substring *ss, struct substring trim_set)
440 while (cnt < ss->length
441 && ss_find_char (trim_set,
442 ss->string[ss->length - cnt - 1]) != SIZE_MAX)
448 /* Removes leading characters in TRIM_SET from SS.
449 Returns number of characters removed. */
451 ss_ltrim (struct substring *ss, struct substring trim_set)
453 size_t cnt = ss_span (*ss, trim_set);
454 ss_advance (ss, cnt);
458 /* Trims leading and trailing characters in TRIM_SET from SS. */
460 ss_trim (struct substring *ss, struct substring trim_set)
462 ss_ltrim (ss, trim_set);
463 ss_rtrim (ss, trim_set);
466 /* If the last character in SS is C, removes it and returns true.
467 Otherwise, returns false without changing the string. */
469 ss_chomp (struct substring *ss, char c)
471 if (ss_last (*ss) == c)
480 /* Divides SS into tokens separated by any of the DELIMITERS.
481 Each call replaces TOKEN by the next token in SS, or by an
482 empty string if no tokens remain. Returns true if a token was
483 obtained, false otherwise.
485 Before the first call, initialize *SAVE_IDX to 0. Do not
486 modify *SAVE_IDX between calls.
488 SS divides into exactly one more tokens than it contains
489 delimiters. That is, a delimiter at the start or end of SS or
490 a pair of adjacent delimiters yields an empty token, and the
491 empty string contains a single token. */
493 ss_separate (struct substring ss, struct substring delimiters,
494 size_t *save_idx, struct substring *token)
496 if (*save_idx <= ss_length (ss))
498 struct substring tmp = ss_substr (ss, *save_idx, SIZE_MAX);
499 size_t length = ss_cspan (tmp, delimiters);
500 *token = ss_head (tmp, length);
501 *save_idx += length + 1;
506 *token = ss_empty ();
511 /* Divides SS into tokens separated by any of the DELIMITERS,
512 merging adjacent delimiters so that the empty string is never
513 produced as a token. Each call replaces TOKEN by the next
514 token in SS, or by an empty string if no tokens remain, and
515 then skips past the first delimiter following the token.
516 Returns true if a token was obtained, false otherwise.
518 Before the first call, initialize *SAVE_IDX to 0. Do not
519 modify *SAVE_IDX between calls. */
521 ss_tokenize (struct substring ss, struct substring delimiters,
522 size_t *save_idx, struct substring *token)
524 ss_advance (&ss, *save_idx);
525 *save_idx += ss_ltrim (&ss, delimiters);
526 ss_get_chars (&ss, ss_cspan (ss, delimiters), token);
527 *save_idx += ss_length (*token) + 1;
528 return ss_length (*token) > 0;
531 /* Removes the first CNT characters from SS. */
533 ss_advance (struct substring *ss, size_t cnt)
535 if (cnt > ss->length)
541 /* If the first character in SS is C, removes it and returns true.
542 Otherwise, returns false without changing the string. */
544 ss_match_char (struct substring *ss, char c)
546 if (ss_first (*ss) == c)
556 /* If the first character in SS is in MATCH, removes it and
557 returns the character that was removed.
558 Otherwise, returns EOF without changing the string. */
560 ss_match_char_in (struct substring *ss, struct substring match)
564 && memchr (match.string, ss->string[0], match.length) != NULL)
573 /* If SS begins with TARGET, removes it and returns true.
574 Otherwise, returns false without changing SS. */
576 ss_match_string (struct substring *ss, const struct substring target)
578 size_t length = ss_length (target);
579 if (ss_equals (ss_head (*ss, length), target))
581 ss_advance (ss, length);
588 /* Removes the first character from SS and returns it.
589 If SS is empty, returns EOF without modifying SS. */
591 ss_get_char (struct substring *ss)
593 int c = ss_first (*ss);
602 /* Stores the prefix of SS up to the first DELIMITER in OUT (if
603 any). Trims those same characters from SS. DELIMITER is
604 removed from SS but not made part of OUT. Returns true if
605 DELIMITER was found (and removed), false otherwise. */
607 ss_get_until (struct substring *ss, char delimiter, struct substring *out)
609 ss_get_chars (ss, ss_cspan (*ss, ss_buffer (&delimiter, 1)), out);
610 return ss_match_char (ss, delimiter);
613 /* Stores the first CNT characters in SS in OUT (or fewer, if SS
614 is shorter than CNT characters). Trims the same characters
615 from the beginning of SS. Returns CNT. */
617 ss_get_chars (struct substring *ss, size_t cnt, struct substring *out)
619 *out = ss_head (*ss, cnt);
620 ss_advance (ss, cnt);
624 /* Parses and removes an optionally signed decimal integer from
625 the beginning of SS. Returns 0 if an error occurred,
626 otherwise the number of characters removed from SS. Stores
627 the integer's value into *VALUE. */
629 ss_get_long (struct substring *ss, long *value)
634 length = ss_span (*ss, ss_cstr ("+-"));
635 length += ss_span (ss_substr (*ss, length, SIZE_MAX), ss_cstr (CC_DIGITS));
636 if (length > 0 && length < sizeof tmp)
640 memcpy (tmp, ss_data (*ss), length);
643 *value = strtol (tmp, &tail, 10);
644 if (tail - tmp == length)
646 ss_advance (ss, length);
654 /* Returns true if SS is empty (contains no characters),
657 ss_is_empty (struct substring ss)
659 return ss.length == 0;
662 /* Returns the number of characters in SS. */
664 ss_length (struct substring ss)
669 /* Returns a pointer to the characters in SS. */
671 ss_data (struct substring ss)
676 /* Returns a pointer just past the last character in SS. */
678 ss_end (struct substring ss)
680 return ss.string + ss.length;
683 /* Returns the character in position IDX in SS, as a value in the
684 range of unsigned char. Returns EOF if IDX is out of the
685 range of indexes for SS. */
687 ss_at (struct substring ss, size_t idx)
689 return idx < ss.length ? (unsigned char) ss.string[idx] : EOF;
692 /* Returns the first character in SS as a value in the range of
693 unsigned char. Returns EOF if SS is the empty string. */
695 ss_first (struct substring ss)
697 return ss_at (ss, 0);
700 /* Returns the last character in SS as a value in the range of
701 unsigned char. Returns EOF if SS is the empty string. */
703 ss_last (struct substring ss)
705 return ss.length > 0 ? (unsigned char) ss.string[ss.length - 1] : EOF;
708 /* Returns the number of contiguous characters at the beginning
709 of SS that are in SKIP_SET. */
711 ss_span (struct substring ss, struct substring skip_set)
714 for (i = 0; i < ss.length; i++)
715 if (ss_find_char (skip_set, ss.string[i]) == SIZE_MAX)
720 /* Returns the number of contiguous characters at the beginning
721 of SS that are not in SKIP_SET. */
723 ss_cspan (struct substring ss, struct substring stop_set)
726 for (i = 0; i < ss.length; i++)
727 if (ss_find_char (stop_set, ss.string[i]) != SIZE_MAX)
732 /* Returns the offset in SS of the first instance of C,
733 or SIZE_MAX if C does not occur in SS. */
735 ss_find_char (struct substring ss, char c)
737 const char *p = memchr (ss.string, c, ss.length);
738 return p != NULL ? p - ss.string : SIZE_MAX;
741 /* Compares A and B and returns a strcmp()-type comparison
744 ss_compare (struct substring a, struct substring b)
746 int retval = memcmp (a.string, b.string, MIN (a.length, b.length));
748 retval = a.length < b.length ? -1 : a.length > b.length;
752 /* Compares A and B case-insensitively and returns a
753 strcmp()-type comparison result. */
755 ss_compare_case (struct substring a, struct substring b)
757 int retval = memcasecmp (a.string, b.string, MIN (a.length, b.length));
759 retval = a.length < b.length ? -1 : a.length > b.length;
763 /* Compares A and B and returns true if their contents are
764 identical, false otherwise. */
766 ss_equals (struct substring a, struct substring b)
768 return a.length == b.length && !memcmp (a.string, b.string, a.length);
771 /* Compares A and B and returns true if their contents are
772 identical except possibly for case differences, false
775 ss_equals_case (struct substring a, struct substring b)
777 return a.length == b.length && !memcasecmp (a.string, b.string, a.length);
780 /* Returns the position in SS that the character at P occupies.
781 P must point within SS or one past its end. */
783 ss_pointer_to_position (struct substring ss, const char *p)
785 size_t pos = p - ss.string;
786 assert (pos <= ss.length);
790 /* Allocates and returns a null-terminated string that contains
793 ss_xstrdup (struct substring ss)
795 char *s = xmalloc (ss.length + 1);
796 memcpy (s, ss.string, ss.length);
801 /* Initializes ST as an empty string. */
803 ds_init_empty (struct string *st)
805 st->ss = ss_empty ();
809 /* Initializes ST with initial contents S. */
811 ds_init_string (struct string *st, const struct string *s)
813 ds_init_substring (st, ds_ss (s));
816 /* Initializes ST with initial contents SS. */
818 ds_init_substring (struct string *st, struct substring ss)
820 st->capacity = MAX (8, ss.length * 2);
821 st->ss.string = xmalloc (st->capacity + 1);
822 memcpy (st->ss.string, ss.string, ss.length);
823 st->ss.length = ss.length;
826 /* Initializes ST with initial contents S. */
828 ds_init_cstr (struct string *st, const char *s)
830 ds_init_substring (st, ss_cstr (s));
835 ds_destroy (struct string *st)
839 ss_dealloc (&st->ss);
840 st->ss.string = NULL;
846 /* Swaps the contents of strings A and B. */
848 ds_swap (struct string *a, struct string *b)
850 struct string tmp = *a;
855 /* Helper function for ds_register_pool. */
857 free_string (void *st_)
859 struct string *st = st_;
863 /* Arranges for ST to be destroyed automatically as part of
866 ds_register_pool (struct string *st, struct pool *pool)
868 pool_register (pool, free_string, st);
871 /* Cancels the arrangement for ST to be destroyed automatically
874 ds_unregister_pool (struct string *st, struct pool *pool)
876 pool_unregister (pool, st);
879 /* Copies SRC into DST.
880 DST and SRC may be the same string. */
882 ds_assign_string (struct string *dst, const struct string *src)
884 ds_assign_substring (dst, ds_ss (src));
887 /* Replaces DST by SS.
888 SS may be a substring of DST. */
890 ds_assign_substring (struct string *dst, struct substring ss)
892 dst->ss.length = ss.length;
893 ds_extend (dst, ss.length);
894 memmove (dst->ss.string, ss.string, ss.length);
897 /* Replaces DST by null-terminated string SRC. SRC may overlap
900 ds_assign_cstr (struct string *dst, const char *src)
902 ds_assign_substring (dst, ss_cstr (src));
905 /* Truncates ST to zero length. */
907 ds_clear (struct string *st)
912 /* Returns a substring that contains ST. */
914 ds_ss (const struct string *st)
919 /* Returns a substring that contains CNT characters from ST
920 starting at position START.
922 If START is greater than or equal to the length of ST, then
923 the substring will be the empty string. If START + CNT
924 exceeds the length of ST, then the substring will only be
925 ds_length(ST) - START characters long. */
927 ds_substr (const struct string *st, size_t start, size_t cnt)
929 return ss_substr (ds_ss (st), start, cnt);
932 /* Returns a substring that contains the first CNT characters in
933 ST. If CNT exceeds the length of ST, then the substring will
934 contain all of ST. */
936 ds_head (const struct string *st, size_t cnt)
938 return ss_head (ds_ss (st), cnt);
941 /* Returns a substring that contains the last CNT characters in
942 ST. If CNT exceeds the length of ST, then the substring will
943 contain all of ST. */
945 ds_tail (const struct string *st, size_t cnt)
947 return ss_tail (ds_ss (st), cnt);
950 /* Ensures that ST can hold at least MIN_CAPACITY characters plus a null
953 ds_extend (struct string *st, size_t min_capacity)
955 if (min_capacity > st->capacity)
958 if (st->capacity < min_capacity)
959 st->capacity = 2 * min_capacity;
961 st->ss.string = xrealloc (st->ss.string, st->capacity + 1);
965 /* Shrink ST to the minimum capacity need to contain its content. */
967 ds_shrink (struct string *st)
969 if (st->capacity != st->ss.length)
971 st->capacity = st->ss.length;
972 st->ss.string = xrealloc (st->ss.string, st->capacity + 1);
976 /* Truncates ST to at most LENGTH characters long. */
978 ds_truncate (struct string *st, size_t length)
980 ss_truncate (&st->ss, length);
983 /* Removes trailing characters in TRIM_SET from ST.
984 Returns number of characters removed. */
986 ds_rtrim (struct string *st, struct substring trim_set)
988 return ss_rtrim (&st->ss, trim_set);
991 /* Removes leading characters in TRIM_SET from ST.
992 Returns number of characters removed. */
994 ds_ltrim (struct string *st, struct substring trim_set)
996 size_t cnt = ds_span (st, trim_set);
998 ds_assign_substring (st, ds_substr (st, cnt, SIZE_MAX));
1002 /* Trims leading and trailing characters in TRIM_SET from ST.
1003 Returns number of charactesr removed. */
1005 ds_trim (struct string *st, struct substring trim_set)
1007 size_t cnt = ds_rtrim (st, trim_set);
1008 return cnt + ds_ltrim (st, trim_set);
1011 /* If the last character in ST is C, removes it and returns true.
1012 Otherwise, returns false without modifying ST. */
1014 ds_chomp (struct string *st, char c)
1016 return ss_chomp (&st->ss, c);
1019 /* Divides ST into tokens separated by any of the DELIMITERS.
1020 Each call replaces TOKEN by the next token in ST, or by an
1021 empty string if no tokens remain. Returns true if a token was
1022 obtained, false otherwise.
1024 Before the first call, initialize *SAVE_IDX to 0. Do not
1025 modify *SAVE_IDX between calls.
1027 ST divides into exactly one more tokens than it contains
1028 delimiters. That is, a delimiter at the start or end of ST or
1029 a pair of adjacent delimiters yields an empty token, and the
1030 empty string contains a single token. */
1032 ds_separate (const struct string *st, struct substring delimiters,
1033 size_t *save_idx, struct substring *token)
1035 return ss_separate (ds_ss (st), delimiters, save_idx, token);
1038 /* Divides ST into tokens separated by any of the DELIMITERS,
1039 merging adjacent delimiters so that the empty string is never
1040 produced as a token. Each call replaces TOKEN by the next
1041 token in ST, or by an empty string if no tokens remain.
1042 Returns true if a token was obtained, false otherwise.
1044 Before the first call, initialize *SAVE_IDX to 0. Do not
1045 modify *SAVE_IDX between calls. */
1047 ds_tokenize (const struct string *st, struct substring delimiters,
1048 size_t *save_idx, struct substring *token)
1050 return ss_tokenize (ds_ss (st), delimiters, save_idx, token);
1053 /* Pad ST on the right with copies of PAD until ST is at least
1054 LENGTH characters in size. If ST is initially LENGTH
1055 characters or longer, this is a no-op. */
1057 ds_rpad (struct string *st, size_t length, char pad)
1059 if (length > st->ss.length)
1060 ds_put_char_multiple (st, pad, length - st->ss.length);
1063 /* Sets the length of ST to exactly NEW_LENGTH,
1064 either by truncating characters from the end,
1065 or by padding on the right with PAD. */
1067 ds_set_length (struct string *st, size_t new_length, char pad)
1069 if (st->ss.length < new_length)
1070 ds_rpad (st, new_length, pad);
1072 st->ss.length = new_length;
1075 /* Removes N characters from ST starting at offset START. */
1077 ds_remove (struct string *st, size_t start, size_t n)
1079 if (n > 0 && start < st->ss.length)
1081 if (st->ss.length - start <= n)
1083 /* All characters at or beyond START are deleted. */
1084 st->ss.length = start;
1088 /* Some characters remain and must be shifted into
1090 memmove (st->ss.string + st->ss.length,
1091 st->ss.string + st->ss.length + n,
1092 st->ss.length - start - n);
1098 /* There are no characters to delete or no characters at or
1099 beyond START, hence deletion is a no-op. */
1103 /* Returns true if ST is empty, false otherwise. */
1105 ds_is_empty (const struct string *st)
1107 return ss_is_empty (st->ss);
1110 /* Returns the length of ST. */
1112 ds_length (const struct string *st)
1114 return ss_length (ds_ss (st));
1117 /* Returns the string data inside ST. */
1119 ds_data (const struct string *st)
1121 return ss_data (ds_ss (st));
1124 /* Returns a pointer to the null terminator ST.
1125 This might not be an actual null character unless ds_c_str() has
1126 been called since the last modification to ST. */
1128 ds_end (const struct string *st)
1130 return ss_end (ds_ss (st));
1133 /* Returns the character in position IDX in ST, as a value in the
1134 range of unsigned char. Returns EOF if IDX is out of the
1135 range of indexes for ST. */
1137 ds_at (const struct string *st, size_t idx)
1139 return ss_at (ds_ss (st), idx);
1142 /* Returns the first character in ST as a value in the range of
1143 unsigned char. Returns EOF if ST is the empty string. */
1145 ds_first (const struct string *st)
1147 return ss_first (ds_ss (st));
1150 /* Returns the last character in ST as a value in the range of
1151 unsigned char. Returns EOF if ST is the empty string. */
1153 ds_last (const struct string *st)
1155 return ss_last (ds_ss (st));
1158 /* Returns the number of consecutive characters at the beginning
1159 of ST that are in SKIP_SET. */
1161 ds_span (const struct string *st, struct substring skip_set)
1163 return ss_span (ds_ss (st), skip_set);
1166 /* Returns the number of consecutive characters at the beginning
1167 of ST that are not in STOP_SET. */
1169 ds_cspan (const struct string *st, struct substring stop_set)
1171 return ss_cspan (ds_ss (st), stop_set);
1174 /* Returns the position of the first occurrence of character C in
1175 ST at or after position OFS, or SIZE_MAX if there is no such
1178 ds_find_char (const struct string *st, char c)
1180 return ss_find_char (ds_ss (st), c);
1183 /* Compares A and B and returns a strcmp()-type comparison
1186 ds_compare (const struct string *a, const struct string *b)
1188 return ss_compare (ds_ss (a), ds_ss (b));
1191 /* Returns the position in ST that the character at P occupies.
1192 P must point within ST or one past its end. */
1194 ds_pointer_to_position (const struct string *st, const char *p)
1196 return ss_pointer_to_position (ds_ss (st), p);
1199 /* Allocates and returns a null-terminated string that contains
1202 ds_xstrdup (const struct string *st)
1204 return ss_xstrdup (ds_ss (st));
1207 /* Returns the allocation size of ST. */
1209 ds_capacity (const struct string *st)
1211 return st->capacity;
1214 /* Returns the value of ST as a null-terminated string. */
1216 ds_cstr (const struct string *st_)
1218 struct string *st = CONST_CAST (struct string *, st_);
1219 if (st->ss.string == NULL)
1221 st->ss.string[st->ss.length] = '\0';
1222 return st->ss.string;
1225 /* Returns the value of ST as a null-terminated string and then
1226 reinitialized ST as an empty string. The caller must free the
1227 returned string with free(). */
1229 ds_steal_cstr (struct string *st)
1231 char *s = ds_cstr (st);
1236 /* Reads characters from STREAM and appends them to ST, stopping
1237 after MAX_LENGTH characters, after appending a newline, or
1238 after an I/O error or end of file was encountered, whichever
1239 comes first. Returns true if at least one character was added
1240 to ST, false if no characters were read before an I/O error or
1241 end of file (or if MAX_LENGTH was 0).
1243 This function treats LF and CR LF sequences as new-line,
1244 translating each of them to a single '\n' new-line character
1247 ds_read_line (struct string *st, FILE *stream, size_t max_length)
1251 for (length = 0; length < max_length; length++)
1253 int c = getc (stream);
1260 ds_put_char (st, c);
1267 /* CR followed by LF is special: translate to \n. */
1268 ds_put_char (st, '\n');
1273 /* CR followed by anything else is just CR. */
1274 ds_put_char (st, '\r');
1282 ds_put_char (st, c);
1289 /* Removes a comment introduced by `#' from ST,
1290 ignoring occurrences inside quoted strings. */
1292 remove_comment (struct string *st)
1297 for (cp = ds_data (st); cp < ds_end (st); cp++)
1302 else if (*cp == '\\')
1305 else if (*cp == '\'' || *cp == '"')
1307 else if (*cp == '#')
1309 ds_truncate (st, cp - ds_cstr (st));
1314 /* Reads a line from STREAM into ST, then preprocesses as follows:
1316 - Splices lines terminated with `\'.
1318 - Deletes comments introduced by `#' outside of single or double
1321 - Deletes trailing white space.
1323 Returns true if a line was successfully read, false on
1324 failure. If LINE_NUMBER is non-null, then *LINE_NUMBER is
1325 incremented by the number of lines read. */
1327 ds_read_config_line (struct string *st, int *line_number, FILE *stream)
1332 if (!ds_read_line (st, stream, SIZE_MAX))
1335 ds_rtrim (st, ss_cstr (CC_SPACES));
1337 while (ds_chomp (st, '\\'));
1339 remove_comment (st);
1343 /* Attempts to read SIZE * CNT bytes from STREAM and append them
1345 Returns true if all the requested data was read, false otherwise. */
1347 ds_read_stream (struct string *st, size_t size, size_t cnt, FILE *stream)
1351 size_t try_bytes = xtimes (cnt, size);
1352 if (size_in_bounds_p (xsum (ds_length (st), try_bytes)))
1354 char *buffer = ds_put_uninit (st, try_bytes);
1355 size_t got_bytes = fread (buffer, 1, try_bytes, stream);
1356 ds_truncate (st, ds_length (st) - (try_bytes - got_bytes));
1357 return got_bytes == try_bytes;
1369 /* Concatenates S onto ST. */
1371 ds_put_cstr (struct string *st, const char *s)
1374 ds_put_substring (st, ss_cstr (s));
1377 /* Concatenates SS to ST. */
1379 ds_put_substring (struct string *st, struct substring ss)
1381 memcpy (ds_put_uninit (st, ss_length (ss)), ss_data (ss), ss_length (ss));
1384 /* Returns ds_end(ST) and THEN increases the length by INCR. */
1386 ds_put_uninit (struct string *st, size_t incr)
1389 ds_extend (st, ds_length (st) + incr);
1391 st->ss.length += incr;
1395 /* Formats FORMAT as a printf string and appends the result to ST. */
1397 ds_put_format (struct string *st, const char *format, ...)
1401 va_start (args, format);
1402 ds_put_vformat (st, format, args);
1406 /* Formats FORMAT as a printf string and appends the result to ST. */
1408 ds_put_vformat (struct string *st, const char *format, va_list args_)
1413 va_copy (args, args_);
1414 avail = st->ss.string != NULL ? st->capacity - st->ss.length + 1 : 0;
1415 needed = vsnprintf (st->ss.string + st->ss.length, avail, format, args);
1418 if (needed >= avail)
1420 va_copy (args, args_);
1421 vsprintf (ds_put_uninit (st, needed), format, args);
1426 /* Some old libc's returned -1 when the destination string
1428 while (needed == -1)
1430 ds_extend (st, (st->capacity + 1) * 2);
1431 avail = st->capacity - st->ss.length + 1;
1433 va_copy (args, args_);
1434 needed = vsnprintf (ds_end (st), avail, format, args);
1437 st->ss.length += needed;
1441 /* Appends character CH to ST. */
1443 ds_put_char (struct string *st, int ch)
1445 ds_put_uninit (st, 1)[0] = ch;
1448 /* Appends CNT copies of character CH to ST. */
1450 ds_put_char_multiple (struct string *st, int ch, size_t cnt)
1452 memset (ds_put_uninit (st, cnt), ch, cnt);
1456 /* If relocation has been enabled, replace ST,
1457 with its relocated version */
1459 ds_relocate (struct string *st)
1461 const char *orig = ds_cstr (st);
1462 const char *rel = relocate (orig);
1467 ds_put_cstr (st, rel);
1468 free ((char *) rel);
1475 /* Operations on uint8_t "strings" */
1477 /* Copies buffer SRC, of SRC_SIZE bytes, to DST, of DST_SIZE bytes.
1478 DST is truncated to DST_SIZE bytes or padded on the right with
1479 copies of PAD as needed. */
1481 u8_buf_copy_rpad (uint8_t *dst, size_t dst_size,
1482 const uint8_t *src, size_t src_size,
1485 if (src_size >= dst_size)
1486 memmove (dst, src, dst_size);
1489 memmove (dst, src, src_size);
1490 memset (&dst[src_size], pad, dst_size - src_size);