X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flibpspp%2Fstr.c;h=2a592b9064d88368547216404cc759cb9da21b8b;hb=981adc6169ffe7227de286f92f70edf684d37a2b;hp=57386cbfc4f89e554a8a4a0e7d5e4303a022ccbf;hpb=9ceda5b4082cf665b5279cc5c56ecbe4fd44fb15;p=pspp diff --git a/src/libpspp/str.c b/src/libpspp/str.c index 57386cbfc4..2a592b9064 100644 --- a/src/libpspp/str.c +++ b/src/libpspp/str.c @@ -1,5 +1,6 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000, 2006, 2009, 2010, 2011, 2012, 2014 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2006, 2009, 2010, 2011, 2012, 2014, + 2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -25,6 +26,7 @@ #include #include "libpspp/cast.h" +#include "libpspp/i18n.h" #include "libpspp/message.h" #include "libpspp/pool.h" @@ -142,9 +144,9 @@ buf_copy_str_lpad (char *dst, size_t dst_size, const char *src, char pad) memcpy (dst, src, dst_size); else { - size_t pad_cnt = dst_size - src_len; - memset (&dst[0], pad, pad_cnt); - memcpy (dst + pad_cnt, src, src_len); + size_t n_pad = dst_size - src_len; + memset (&dst[0], pad, n_pad); + memcpy (dst + n_pad, src, src_len); } } @@ -300,6 +302,39 @@ overflow: return false; } +/* Copies IN to buffer OUT with size OUT_SIZE, appending a null terminator. If + IN is too long for OUT, or if IN contains a new-line, replaces the tail with + "...". + + OUT_SIZE must be at least 16. */ +void +str_ellipsize (struct substring in, char *out, size_t out_size) +{ + assert (out_size >= 16); + + size_t out_maxlen = out_size - 1; + if (in.length > out_maxlen - 3) + out_maxlen -= 3; + + size_t out_len = 0; + while (out_len < in.length + && in.string[out_len] != '\n' + && in.string[out_len] != '\0' + && (in.string[out_len] != '\r' + || out_len + 1 >= in.length + || in.string[out_len + 1] != '\n')) + { + int mblen = u8_mblen (CHAR_CAST (const uint8_t *, in.string + out_len), + in.length - out_len); + if (mblen < 0 || out_len + mblen > out_maxlen) + break; + out_len += mblen; + } + + memcpy (out, in.string, out_len); + strcpy (&out[out_len], out_len < in.length ? "..." : ""); +} + /* Sets the SIZE bytes starting at BLOCK to C, and returns the byte following BLOCK. */ void * @@ -311,32 +346,32 @@ mempset (void *block, int c, size_t size) /* Substrings. */ -/* Returns a substring whose contents are the CNT bytes +/* Returns a substring whose contents are the N bytes starting at the (0-based) position START in SS. */ struct substring -ss_substr (struct substring ss, size_t start, size_t cnt) +ss_substr (struct substring ss, size_t start, size_t n) { if (start < ss.length) - return ss_buffer (ss.string + start, MIN (cnt, ss.length - start)); + return ss_buffer (ss.string + start, MIN (n, ss.length - start)); else return ss_buffer (ss.string + ss.length, 0); } -/* Returns a substring whose contents are the first CNT +/* Returns a substring whose contents are the first N bytes in SS. */ struct substring -ss_head (struct substring ss, size_t cnt) +ss_head (struct substring ss, size_t n) { - return ss_buffer (ss.string, MIN (cnt, ss.length)); + return ss_buffer (ss.string, MIN (n, ss.length)); } -/* Returns a substring whose contents are the last CNT bytes +/* Returns a substring whose contents are the last N bytes in SS. */ struct substring -ss_tail (struct substring ss, size_t cnt) +ss_tail (struct substring ss, size_t n) { - if (cnt < ss.length) - return ss_buffer (ss.string + (ss.length - cnt), cnt); + if (n < ss.length) + return ss_buffer (ss.string + (ss.length - n), n); else return ss; } @@ -350,12 +385,12 @@ ss_alloc_substring (struct substring *new, struct substring old) new->length = old.length; } -/* Allocates room for a CNT-byte string in NEW. */ +/* Allocates room for a N-byte string in NEW. */ void -ss_alloc_uninit (struct substring *new, size_t cnt) +ss_alloc_uninit (struct substring *new, size_t n) { - new->string = xmalloc (cnt); - new->length = cnt; + new->string = xmalloc (n); + new->length = n; } void @@ -376,12 +411,12 @@ ss_alloc_substring_pool (struct substring *new, struct substring old, new->string[old.length] = '\0'; } -/* Allocates room for a CNT-byte string in NEW in POOL. */ +/* Allocates room for a N-byte string in NEW in POOL. */ void -ss_alloc_uninit_pool (struct substring *new, size_t cnt, struct pool *pool) +ss_alloc_uninit_pool (struct substring *new, size_t n, struct pool *pool) { - new->string = pool_alloc_unaligned (pool, cnt); - new->length = cnt; + new->string = pool_alloc_unaligned (pool, n); + new->length = n; } /* Frees the string that SS points to. */ @@ -391,12 +426,21 @@ ss_dealloc (struct substring *ss) free (ss->string); } -/* Truncates SS to at most CNT bytes in length. */ +/* Exchanges the contents of A and B. */ +void +ss_swap (struct substring *a, struct substring *b) +{ + struct substring tmp = *a; + *a = *b; + *b = tmp; +} + +/* Truncates SS to at most N bytes in length. */ void -ss_truncate (struct substring *ss, size_t cnt) +ss_truncate (struct substring *ss, size_t n) { - if (ss->length > cnt) - ss->length = cnt; + if (ss->length > n) + ss->length = n; } /* Removes trailing bytes in TRIM_SET from SS. @@ -404,13 +448,13 @@ ss_truncate (struct substring *ss, size_t cnt) size_t ss_rtrim (struct substring *ss, struct substring trim_set) { - size_t cnt = 0; - while (cnt < ss->length + size_t n = 0; + while (n < ss->length && ss_find_byte (trim_set, - ss->string[ss->length - cnt - 1]) != SIZE_MAX) - cnt++; - ss->length -= cnt; - return cnt; + ss->string[ss->length - n - 1]) != SIZE_MAX) + n++; + ss->length -= n; + return n; } /* Removes leading bytes in TRIM_SET from SS. @@ -418,9 +462,9 @@ ss_rtrim (struct substring *ss, struct substring trim_set) size_t ss_ltrim (struct substring *ss, struct substring trim_set) { - size_t cnt = ss_span (*ss, trim_set); - ss_advance (ss, cnt); - return cnt; + size_t n = ss_span (*ss, trim_set); + ss_advance (ss, n); + return n; } /* Trims leading and trailing bytes in TRIM_SET from SS. */ @@ -510,18 +554,18 @@ ss_tokenize (struct substring ss, struct substring delimiters, ss_get_bytes (&ss, ss_cspan (ss, delimiters), token); found_token = ss_length (*token) > 0; - *save_idx += ss_length (*token) + found_token; + *save_idx += ss_length (*token) + (found_token?1:0); return found_token; } -/* Removes the first CNT bytes from SS. */ +/* Removes the first N bytes from SS. */ void -ss_advance (struct substring *ss, size_t cnt) +ss_advance (struct substring *ss, size_t n) { - if (cnt > ss->length) - cnt = ss->length; - ss->string += cnt; - ss->length -= cnt; + if (n > ss->length) + n = ss->length; + ss->string += n; + ss->length -= n; } /* If the first byte in SS is C, removes it and returns true. @@ -571,6 +615,21 @@ ss_match_string (struct substring *ss, const struct substring target) return false; } +/* If SS begins with TARGET, except possibly for case differences, removes it + and returns true. Otherwise, returns false without changing SS. */ +bool +ss_match_string_case (struct substring *ss, const struct substring target) +{ + size_t length = ss_length (target); + if (ss_equals_case (ss_head (*ss, length), target)) + { + ss_advance (ss, length); + return true; + } + else + return false; +} + /* Removes the first byte from SS and returns it. If SS is empty, returns EOF without modifying SS. */ int @@ -596,15 +655,15 @@ ss_get_until (struct substring *ss, char delimiter, struct substring *out) return ss_match_byte (ss, delimiter); } -/* Stores the first CNT bytes in SS in OUT (or fewer, if SS - is shorter than CNT bytes). Trims the same bytes - from the beginning of SS. Returns CNT. */ +/* Stores the first N bytes in SS in OUT (or fewer, if SS + is shorter than N bytes). Trims the same bytes + from the beginning of SS. Returns N. */ size_t -ss_get_bytes (struct substring *ss, size_t cnt, struct substring *out) +ss_get_bytes (struct substring *ss, size_t n, struct substring *out) { - *out = ss_head (*ss, cnt); - ss_advance (ss, cnt); - return cnt; + *out = ss_head (*ss, n); + ss_advance (ss, n); + return n; } /* Parses and removes an optionally signed decimal integer from @@ -699,6 +758,14 @@ ss_starts_with (struct substring ss, struct substring prefix) && !memcmp (ss.string, prefix.string, prefix.length)); } +/* Returns true if SS starts with PREFIX in any case, false otherwise. */ +bool +ss_starts_with_case (struct substring ss, struct substring prefix) +{ + return (ss.length >= prefix.length + && !memcasecmp (ss.string, prefix.string, prefix.length)); +} + /* Returns true if SS ends with SUFFIX, false otherwise. */ bool ss_ends_with (struct substring ss, struct substring suffix) @@ -708,6 +775,15 @@ ss_ends_with (struct substring ss, struct substring suffix) suffix.length)); } +/* Returns true if SS ends with SUFFIX in any case, false otherwise. */ +bool +ss_ends_with_case (struct substring ss, struct substring suffix) +{ + return (ss.length >= suffix.length + && !memcasecmp (&ss.string[ss.length - suffix.length], suffix.string, + suffix.length)); +} + /* Returns the number of contiguous bytes at the beginning of SS that are in SKIP_SET. */ size_t @@ -737,7 +813,7 @@ ss_cspan (struct substring ss, struct substring stop_set) size_t ss_find_byte (struct substring ss, char c) { - const char *p = memchr (ss.string, c, ss.length); + const char *p = memchr (ss.string, (int) c, ss.length); return p != NULL ? p - ss.string : SIZE_MAX; } @@ -885,6 +961,22 @@ ss_at_mblen (struct substring s, size_t ofs) else return 0; } + +size_t +ss_utf8_count_columns (struct substring s) +{ + return utf8_count_columns (s.string, s.length); +} + +/* Returns a substring of S starting at 0-based display column START and + running for N display columns. */ +struct substring +ss_utf8_columns (struct substring s, size_t start, size_t n) +{ + ss_advance (&s, utf8_columns_to_bytes (s.string, s.length, start)); + s.length = utf8_columns_to_bytes (s.string, s.length, n); + return s; +} /* Initializes ST as an empty string. */ void @@ -1004,35 +1096,35 @@ ds_ss (const struct string *st) return st->ss; } -/* Returns a substring that contains CNT bytes from ST +/* Returns a substring that contains N bytes from ST starting at position START. If START is greater than or equal to the length of ST, then - the substring will be the empty string. If START + CNT + the substring will be the empty string. If START + N exceeds the length of ST, then the substring will only be ds_length(ST) - START bytes long. */ struct substring -ds_substr (const struct string *st, size_t start, size_t cnt) +ds_substr (const struct string *st, size_t start, size_t n) { - return ss_substr (ds_ss (st), start, cnt); + return ss_substr (ds_ss (st), start, n); } -/* Returns a substring that contains the first CNT bytes in - ST. If CNT exceeds the length of ST, then the substring will +/* Returns a substring that contains the first N bytes in + ST. If N exceeds the length of ST, then the substring will contain all of ST. */ struct substring -ds_head (const struct string *st, size_t cnt) +ds_head (const struct string *st, size_t n) { - return ss_head (ds_ss (st), cnt); + return ss_head (ds_ss (st), n); } -/* Returns a substring that contains the last CNT bytes in - ST. If CNT exceeds the length of ST, then the substring will +/* Returns a substring that contains the last N bytes in + ST. If N exceeds the length of ST, then the substring will contain all of ST. */ struct substring -ds_tail (const struct string *st, size_t cnt) +ds_tail (const struct string *st, size_t n) { - return ss_tail (ds_ss (st), cnt); + return ss_tail (ds_ss (st), n); } /* Ensures that ST can hold at least MIN_CAPACITY bytes plus a null @@ -1081,10 +1173,10 @@ ds_rtrim (struct string *st, struct substring trim_set) size_t ds_ltrim (struct string *st, struct substring trim_set) { - size_t cnt = ds_span (st, trim_set); - if (cnt > 0) - ds_assign_substring (st, ds_substr (st, cnt, SIZE_MAX)); - return cnt; + size_t n = ds_span (st, trim_set); + if (n > 0) + ds_assign_substring (st, ds_substr (st, n, SIZE_MAX)); + return n; } /* Trims leading and trailing bytes in TRIM_SET from ST. @@ -1092,8 +1184,8 @@ ds_ltrim (struct string *st, struct substring trim_set) size_t ds_trim (struct string *st, struct substring trim_set) { - size_t cnt = ds_rtrim (st, trim_set); - return cnt + ds_ltrim (st, trim_set); + size_t n = ds_rtrim (st, trim_set); + return n + ds_ltrim (st, trim_set); } /* If the last byte in ST is C, removes it and returns true. @@ -1442,15 +1534,15 @@ ds_read_config_line (struct string *st, int *line_number, FILE *stream) return true; } -/* Attempts to read SIZE * CNT bytes from STREAM and append them +/* Attempts to read SIZE * N bytes from STREAM and append them to ST. Returns true if all the requested data was read, false otherwise. */ bool -ds_read_stream (struct string *st, size_t size, size_t cnt, FILE *stream) +ds_read_stream (struct string *st, size_t size, size_t n, FILE *stream) { if (size != 0) { - size_t try_bytes = xtimes (cnt, size); + size_t try_bytes = xtimes (n, size); if (size_in_bounds_p (xsum (ds_length (st), try_bytes))) { char *buffer = ds_put_uninit (st, try_bytes); @@ -1480,7 +1572,8 @@ ds_put_cstr (struct string *st, const char *s) void ds_put_substring (struct string *st, struct substring ss) { - memcpy (ds_put_uninit (st, ss_length (ss)), ss_data (ss), ss_length (ss)); + if (ss.length) + memcpy (ds_put_uninit (st, ss_length (ss)), ss_data (ss), ss_length (ss)); } /* Returns ds_end(ST) and THEN increases the length by INCR. */ @@ -1510,6 +1603,9 @@ ds_splice_uninit (struct string *st, { if (new_len > old_len) ds_extend (st, ds_length (st) + (new_len - old_len)); + + assert (ds_length (st) >= ofs + old_len); + memmove (ds_data (st) + (ofs + new_len), ds_data (st) + (ofs + old_len), ds_length (st) - (ofs + old_len)); @@ -1598,11 +1694,11 @@ ds_put_byte (struct string *st, int ch) ds_put_uninit (st, 1)[0] = ch; } -/* Appends CNT copies of byte CH to ST. */ +/* Appends N copies of byte CH to ST. */ void -ds_put_byte_multiple (struct string *st, int ch, size_t cnt) +ds_put_byte_multiple (struct string *st, int ch, size_t n) { - memset (ds_put_uninit (st, cnt), ch, cnt); + memset (ds_put_uninit (st, n), ch, n); } /* Appends Unicode code point UC to ST in UTF-8 encoding. */ @@ -1613,6 +1709,18 @@ ds_put_unichar (struct string *st, ucs4_t uc) st->ss.length += u8_uctomb (CHAR_CAST (uint8_t *, ds_end (st)), uc, 6); } +/* Appends N copies of S to ST. */ +void +ds_put_substring_multiple (struct string *dst, struct substring src, size_t n) +{ + char *p = ds_put_uninit (dst, n * src.length); + for (size_t i = 0; i < n; i++) + { + memcpy (p, src.string, src.length); + p += src.length; + } +} + /* If relocation has been enabled, replace ST, with its relocated version */ void @@ -1621,7 +1729,7 @@ ds_relocate (struct string *st) const char *orig = ds_cstr (st); const char *rel = relocate (orig); - if ( orig != rel) + if (orig != rel) { ds_clear (st); ds_put_cstr (st, rel);