X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flibpspp%2Fstr.c;h=811674225c23c9f0f8f757bab266ccae662b7c38;hb=32538f9f35aee7145a49971f9dae1394a1a201b5;hp=d7c71b11f5509f15678693555676c54cc4662e3f;hpb=6f3865480503c571963d8a2d1af858a4d72d4e88;p=pspp diff --git a/src/libpspp/str.c b/src/libpspp/str.c index d7c71b11f5..811674225c 100644 --- a/src/libpspp/str.c +++ b/src/libpspp/str.c @@ -1,5 +1,6 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000, 2006, 2009, 2010, 2011, 2012 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2006, 2009, 2010, 2011, 2012, 2014, + 2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -25,6 +26,7 @@ #include #include "libpspp/cast.h" +#include "libpspp/i18n.h" #include "libpspp/message.h" #include "libpspp/pool.h" @@ -142,9 +144,9 @@ buf_copy_str_lpad (char *dst, size_t dst_size, const char *src, char pad) memcpy (dst, src, dst_size); else { - size_t pad_cnt = dst_size - src_len; - memset (&dst[0], pad, pad_cnt); - memcpy (dst + pad_cnt, src, src_len); + size_t n_pad = dst_size - src_len; + memset (&dst[0], pad, n_pad); + memcpy (dst + n_pad, src, src_len); } } @@ -189,7 +191,7 @@ buf_copy_rpad (char *dst, size_t dst_size, void str_copy_rpad (char *dst, size_t dst_size, const char *src) { - if (dst_size > 0) + if (dst_size > 0) { size_t src_len = strlen (src); if (src_len < dst_size - 1) @@ -257,9 +259,10 @@ str_lowercase (char *s) } /* Converts NUMBER into a string in 26-adic notation in BUFFER, - which has room for SIZE bytes. Returns true if successful, - false if NUMBER, plus a trailing null, is too large to fit in - the available space. + which has room for SIZE bytes. Uses uppercase if UPPERCASE is + true, otherwise lowercase, Returns true if successful, false + if NUMBER, plus a trailing null, is too large to fit in the + available space. 26-adic notation is "spreadsheet column numbering": 1 = A, 2 = B, 3 = C, ... 26 = Z, 27 = AA, 28 = AB, 29 = AC, ... @@ -271,15 +274,18 @@ str_lowercase (char *s) For more information, see http://en.wikipedia.org/wiki/Bijective_numeration. */ bool -str_format_26adic (unsigned long int number, char buffer[], size_t size) +str_format_26adic (unsigned long int number, bool uppercase, + char buffer[], size_t size) { + const char *alphabet + = uppercase ? "ABCDEFGHIJKLMNOPQRSTUVWXYZ" : "abcdefghijklmnopqrstuvwxyz"; size_t length = 0; while (number-- > 0) { if (length >= size) goto overflow; - buffer[length++] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"[number % 26]; + buffer[length++] = alphabet[number % 26]; number /= 26; } @@ -296,6 +302,39 @@ overflow: return false; } +/* Copies IN to buffer OUT with size OUT_SIZE, appending a null terminator. If + IN is too long for OUT, or if IN contains a new-line, replaces the tail with + "...". + + OUT_SIZE must be at least 16. */ +void +str_ellipsize (struct substring in, char *out, size_t out_size) +{ + assert (out_size >= 16); + + size_t out_maxlen = out_size - 1; + if (in.length > out_maxlen - 3) + out_maxlen -= 3; + + size_t out_len = 0; + while (out_len < in.length + && in.string[out_len] != '\n' + && in.string[out_len] != '\0' + && (in.string[out_len] != '\r' + || out_len + 1 >= in.length + || in.string[out_len + 1] != '\n')) + { + int mblen = u8_mblen (CHAR_CAST (const uint8_t *, in.string + out_len), + in.length - out_len); + if (mblen < 0 || out_len + mblen > out_maxlen) + break; + out_len += mblen; + } + + memcpy (out, in.string, out_len); + strcpy (&out[out_len], out_len < in.length ? "..." : ""); +} + /* Sets the SIZE bytes starting at BLOCK to C, and returns the byte following BLOCK. */ void * @@ -307,32 +346,32 @@ mempset (void *block, int c, size_t size) /* Substrings. */ -/* Returns a substring whose contents are the CNT bytes +/* Returns a substring whose contents are the N bytes starting at the (0-based) position START in SS. */ struct substring -ss_substr (struct substring ss, size_t start, size_t cnt) +ss_substr (struct substring ss, size_t start, size_t n) { if (start < ss.length) - return ss_buffer (ss.string + start, MIN (cnt, ss.length - start)); + return ss_buffer (ss.string + start, MIN (n, ss.length - start)); else return ss_buffer (ss.string + ss.length, 0); } -/* Returns a substring whose contents are the first CNT +/* Returns a substring whose contents are the first N bytes in SS. */ struct substring -ss_head (struct substring ss, size_t cnt) +ss_head (struct substring ss, size_t n) { - return ss_buffer (ss.string, MIN (cnt, ss.length)); + return ss_buffer (ss.string, MIN (n, ss.length)); } -/* Returns a substring whose contents are the last CNT bytes +/* Returns a substring whose contents are the last N bytes in SS. */ struct substring -ss_tail (struct substring ss, size_t cnt) +ss_tail (struct substring ss, size_t n) { - if (cnt < ss.length) - return ss_buffer (ss.string + (ss.length - cnt), cnt); + if (n < ss.length) + return ss_buffer (ss.string + (ss.length - n), n); else return ss; } @@ -346,12 +385,12 @@ ss_alloc_substring (struct substring *new, struct substring old) new->length = old.length; } -/* Allocates room for a CNT-byte string in NEW. */ +/* Allocates room for a N-byte string in NEW. */ void -ss_alloc_uninit (struct substring *new, size_t cnt) +ss_alloc_uninit (struct substring *new, size_t n) { - new->string = xmalloc (cnt); - new->length = cnt; + new->string = xmalloc (n); + new->length = n; } void @@ -372,12 +411,12 @@ ss_alloc_substring_pool (struct substring *new, struct substring old, new->string[old.length] = '\0'; } -/* Allocates room for a CNT-byte string in NEW in POOL. */ +/* Allocates room for a N-byte string in NEW in POOL. */ void -ss_alloc_uninit_pool (struct substring *new, size_t cnt, struct pool *pool) +ss_alloc_uninit_pool (struct substring *new, size_t n, struct pool *pool) { - new->string = pool_alloc_unaligned (pool, cnt); - new->length = cnt; + new->string = pool_alloc_unaligned (pool, n); + new->length = n; } /* Frees the string that SS points to. */ @@ -387,12 +426,21 @@ ss_dealloc (struct substring *ss) free (ss->string); } -/* Truncates SS to at most CNT bytes in length. */ +/* Exchanges the contents of A and B. */ +void +ss_swap (struct substring *a, struct substring *b) +{ + struct substring tmp = *a; + *a = *b; + *b = tmp; +} + +/* Truncates SS to at most N bytes in length. */ void -ss_truncate (struct substring *ss, size_t cnt) +ss_truncate (struct substring *ss, size_t n) { - if (ss->length > cnt) - ss->length = cnt; + if (ss->length > n) + ss->length = n; } /* Removes trailing bytes in TRIM_SET from SS. @@ -400,13 +448,13 @@ ss_truncate (struct substring *ss, size_t cnt) size_t ss_rtrim (struct substring *ss, struct substring trim_set) { - size_t cnt = 0; - while (cnt < ss->length + size_t n = 0; + while (n < ss->length && ss_find_byte (trim_set, - ss->string[ss->length - cnt - 1]) != SIZE_MAX) - cnt++; - ss->length -= cnt; - return cnt; + ss->string[ss->length - n - 1]) != SIZE_MAX) + n++; + ss->length -= n; + return n; } /* Removes leading bytes in TRIM_SET from SS. @@ -414,9 +462,9 @@ ss_rtrim (struct substring *ss, struct substring trim_set) size_t ss_ltrim (struct substring *ss, struct substring trim_set) { - size_t cnt = ss_span (*ss, trim_set); - ss_advance (ss, cnt); - return cnt; + size_t n = ss_span (*ss, trim_set); + ss_advance (ss, n); + return n; } /* Trims leading and trailing bytes in TRIM_SET from SS. */ @@ -506,18 +554,18 @@ ss_tokenize (struct substring ss, struct substring delimiters, ss_get_bytes (&ss, ss_cspan (ss, delimiters), token); found_token = ss_length (*token) > 0; - *save_idx += ss_length (*token) + found_token; + *save_idx += ss_length (*token) + (found_token?1:0); return found_token; } -/* Removes the first CNT bytes from SS. */ +/* Removes the first N bytes from SS. */ void -ss_advance (struct substring *ss, size_t cnt) +ss_advance (struct substring *ss, size_t n) { - if (cnt > ss->length) - cnt = ss->length; - ss->string += cnt; - ss->length -= cnt; + if (n > ss->length) + n = ss->length; + ss->string += n; + ss->length -= n; } /* If the first byte in SS is C, removes it and returns true. @@ -592,15 +640,15 @@ ss_get_until (struct substring *ss, char delimiter, struct substring *out) return ss_match_byte (ss, delimiter); } -/* Stores the first CNT bytes in SS in OUT (or fewer, if SS - is shorter than CNT bytes). Trims the same bytes - from the beginning of SS. Returns CNT. */ +/* Stores the first N bytes in SS in OUT (or fewer, if SS + is shorter than N bytes). Trims the same bytes + from the beginning of SS. Returns N. */ size_t -ss_get_bytes (struct substring *ss, size_t cnt, struct substring *out) +ss_get_bytes (struct substring *ss, size_t n, struct substring *out) { - *out = ss_head (*ss, cnt); - ss_advance (ss, cnt); - return cnt; + *out = ss_head (*ss, n); + ss_advance (ss, n); + return n; } /* Parses and removes an optionally signed decimal integer from @@ -687,6 +735,14 @@ ss_last (struct substring ss) return ss.length > 0 ? (unsigned char) ss.string[ss.length - 1] : EOF; } +/* Returns true if SS starts with PREFIX, false otherwise. */ +bool +ss_starts_with (struct substring ss, struct substring prefix) +{ + return (ss.length >= prefix.length + && !memcmp (ss.string, prefix.string, prefix.length)); +} + /* Returns true if SS ends with SUFFIX, false otherwise. */ bool ss_ends_with (struct substring ss, struct substring suffix) @@ -725,10 +781,20 @@ ss_cspan (struct substring ss, struct substring stop_set) size_t ss_find_byte (struct substring ss, char c) { - const char *p = memchr (ss.string, c, ss.length); + const char *p = memchr (ss.string, (int) c, ss.length); return p != NULL ? p - ss.string : SIZE_MAX; } +/* Returns the offset in HAYSTACK of the first instance of NEEDLE, + or SIZE_MAX if NEEDLE does not occur in HAYSTACK. */ +size_t +ss_find_substring (struct substring haystack, struct substring needle) +{ + const char *p = memmem (haystack.string, haystack.length, + needle.string, needle.length); + return p != NULL ? p - haystack.string : SIZE_MAX; +} + /* Compares A and B and returns a strcmp()-type comparison result. */ int @@ -863,6 +929,22 @@ ss_at_mblen (struct substring s, size_t ofs) else return 0; } + +size_t +ss_utf8_count_columns (struct substring s) +{ + return utf8_count_columns (s.string, s.length); +} + +/* Returns a substring of S starting at 0-based display column START and + running for N display columns. */ +struct substring +ss_utf8_columns (struct substring s, size_t start, size_t n) +{ + ss_advance (&s, utf8_columns_to_bytes (s.string, s.length, start)); + s.length = utf8_columns_to_bytes (s.string, s.length, n); + return s; +} /* Initializes ST as an empty string. */ void @@ -982,35 +1064,35 @@ ds_ss (const struct string *st) return st->ss; } -/* Returns a substring that contains CNT bytes from ST +/* Returns a substring that contains N bytes from ST starting at position START. If START is greater than or equal to the length of ST, then - the substring will be the empty string. If START + CNT + the substring will be the empty string. If START + N exceeds the length of ST, then the substring will only be ds_length(ST) - START bytes long. */ struct substring -ds_substr (const struct string *st, size_t start, size_t cnt) +ds_substr (const struct string *st, size_t start, size_t n) { - return ss_substr (ds_ss (st), start, cnt); + return ss_substr (ds_ss (st), start, n); } -/* Returns a substring that contains the first CNT bytes in - ST. If CNT exceeds the length of ST, then the substring will +/* Returns a substring that contains the first N bytes in + ST. If N exceeds the length of ST, then the substring will contain all of ST. */ struct substring -ds_head (const struct string *st, size_t cnt) +ds_head (const struct string *st, size_t n) { - return ss_head (ds_ss (st), cnt); + return ss_head (ds_ss (st), n); } -/* Returns a substring that contains the last CNT bytes in - ST. If CNT exceeds the length of ST, then the substring will +/* Returns a substring that contains the last N bytes in + ST. If N exceeds the length of ST, then the substring will contain all of ST. */ struct substring -ds_tail (const struct string *st, size_t cnt) +ds_tail (const struct string *st, size_t n) { - return ss_tail (ds_ss (st), cnt); + return ss_tail (ds_ss (st), n); } /* Ensures that ST can hold at least MIN_CAPACITY bytes plus a null @@ -1059,10 +1141,10 @@ ds_rtrim (struct string *st, struct substring trim_set) size_t ds_ltrim (struct string *st, struct substring trim_set) { - size_t cnt = ds_span (st, trim_set); - if (cnt > 0) - ds_assign_substring (st, ds_substr (st, cnt, SIZE_MAX)); - return cnt; + size_t n = ds_span (st, trim_set); + if (n > 0) + ds_assign_substring (st, ds_substr (st, n, SIZE_MAX)); + return n; } /* Trims leading and trailing bytes in TRIM_SET from ST. @@ -1070,8 +1152,8 @@ ds_ltrim (struct string *st, struct substring trim_set) size_t ds_trim (struct string *st, struct substring trim_set) { - size_t cnt = ds_rtrim (st, trim_set); - return cnt + ds_ltrim (st, trim_set); + size_t n = ds_rtrim (st, trim_set); + return n + ds_ltrim (st, trim_set); } /* If the last byte in ST is C, removes it and returns true. @@ -1420,15 +1502,15 @@ ds_read_config_line (struct string *st, int *line_number, FILE *stream) return true; } -/* Attempts to read SIZE * CNT bytes from STREAM and append them +/* Attempts to read SIZE * N bytes from STREAM and append them to ST. Returns true if all the requested data was read, false otherwise. */ bool -ds_read_stream (struct string *st, size_t size, size_t cnt, FILE *stream) +ds_read_stream (struct string *st, size_t size, size_t n, FILE *stream) { if (size != 0) { - size_t try_bytes = xtimes (cnt, size); + size_t try_bytes = xtimes (n, size); if (size_in_bounds_p (xsum (ds_length (st), try_bytes))) { char *buffer = ds_put_uninit (st, try_bytes); @@ -1458,7 +1540,8 @@ ds_put_cstr (struct string *st, const char *s) void ds_put_substring (struct string *st, struct substring ss) { - memcpy (ds_put_uninit (st, ss_length (ss)), ss_data (ss), ss_length (ss)); + if (ss.length) + memcpy (ds_put_uninit (st, ss_length (ss)), ss_data (ss), ss_length (ss)); } /* Returns ds_end(ST) and THEN increases the length by INCR. */ @@ -1488,6 +1571,9 @@ ds_splice_uninit (struct string *st, { if (new_len > old_len) ds_extend (st, ds_length (st) + (new_len - old_len)); + + assert (ds_length (st) >= ofs + old_len); + memmove (ds_data (st) + (ofs + new_len), ds_data (st) + (ofs + old_len), ds_length (st) - (ofs + old_len)); @@ -1518,25 +1604,22 @@ ds_put_c_format (struct string *st, const char *format, ...) va_end (args); } - -/* Formats FORMAT as a printf string, using fmt_func (a snprintf like function) - and appends the result to ST. */ -static void -ds_put_vformat_int (struct string *st, const char *format, va_list args_, - int (*fmt_func) (char *, size_t, const char *, va_list)) +/* Formats FORMAT as a printf string and appends the result to ST. */ +void +ds_put_vformat (struct string *st, const char *format, va_list args_) { int avail, needed; va_list args; va_copy (args, args_); avail = st->ss.string != NULL ? st->capacity - st->ss.length + 1 : 0; - needed = fmt_func (st->ss.string + st->ss.length, avail, format, args); + needed = vsnprintf (st->ss.string + st->ss.length, avail, format, args); va_end (args); if (needed >= avail) { va_copy (args, args_); - fmt_func (ds_put_uninit (st, needed), needed + 1, format, args); + vsnprintf (ds_put_uninit (st, needed), needed + 1, format, args); va_end (args); } else @@ -1549,34 +1632,27 @@ ds_put_vformat_int (struct string *st, const char *format, va_list args_, avail = st->capacity - st->ss.length + 1; va_copy (args, args_); - needed = fmt_func (ds_end (st), avail, format, args); + needed = vsnprintf (ds_end (st), avail, format, args); va_end (args); } st->ss.length += needed; } } - -static int -vasnwrapper (char *str, size_t size, const char *format, va_list ap) -{ - c_vasnprintf (str, &size, format, ap); - return size; -} - -/* Formats FORMAT as a printf string and appends the result to ST. */ -void -ds_put_vformat (struct string *st, const char *format, va_list args_) -{ - ds_put_vformat_int (st, format, args_, vsnprintf); -} - -/* Formats FORMAT as a printf string, as if in the C locale, +/* Formats FORMAT as a printf string, as if in the C locale, and appends the result to ST. */ void -ds_put_c_vformat (struct string *st, const char *format, va_list args_) +ds_put_c_vformat (struct string *st, const char *format, va_list args) { - ds_put_vformat_int (st, format, args_, vasnwrapper); + char buf[128]; + size_t len = sizeof buf; + char *output = c_vasnprintf (buf, &len, format, args); + if (output) + { + ds_put_cstr (st, output); + if (output != buf) + free (output); + } } /* Appends byte CH to ST. */ @@ -1586,11 +1662,11 @@ ds_put_byte (struct string *st, int ch) ds_put_uninit (st, 1)[0] = ch; } -/* Appends CNT copies of byte CH to ST. */ +/* Appends N copies of byte CH to ST. */ void -ds_put_byte_multiple (struct string *st, int ch, size_t cnt) +ds_put_byte_multiple (struct string *st, int ch, size_t n) { - memset (ds_put_uninit (st, cnt), ch, cnt); + memset (ds_put_uninit (st, n), ch, n); } /* Appends Unicode code point UC to ST in UTF-8 encoding. */ @@ -1601,6 +1677,18 @@ ds_put_unichar (struct string *st, ucs4_t uc) st->ss.length += u8_uctomb (CHAR_CAST (uint8_t *, ds_end (st)), uc, 6); } +/* Appends N copies of S to ST. */ +void +ds_put_substring_multiple (struct string *dst, struct substring src, size_t n) +{ + char *p = ds_put_uninit (dst, n * src.length); + for (size_t i = 0; i < n; i++) + { + memcpy (p, src.string, src.length); + p += src.length; + } +} + /* If relocation has been enabled, replace ST, with its relocated version */ void @@ -1609,7 +1697,7 @@ ds_relocate (struct string *st) const char *orig = ds_cstr (st); const char *rel = relocate (orig); - if ( orig != rel) + if (orig != rel) { ds_clear (st); ds_put_cstr (st, rel);