X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flibpspp%2Fstr.c;h=86e7fd9199a5b9e22fba9fa2ac2eec4e9e7ff1c7;hb=f49d8549666763efac0d3cbda14e29de29976542;hp=ba4a26f1426ac51552dd31885d5cdfff5fea321d;hpb=530906aaa19f6c209ca008c8187f7f750a0b1283;p=pspp diff --git a/src/libpspp/str.c b/src/libpspp/str.c index ba4a26f142..86e7fd9199 100644 --- a/src/libpspp/str.c +++ b/src/libpspp/str.c @@ -1,5 +1,6 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000, 2006, 2009, 2010, 2011 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2006, 2009, 2010, 2011, 2012, 2014, + 2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -28,6 +29,8 @@ #include "libpspp/message.h" #include "libpspp/pool.h" +#include "gl/c-ctype.h" +#include "gl/c-vasnprintf.h" #include "gl/relocatable.h" #include "gl/minmax.h" #include "gl/xalloc.h" @@ -187,7 +190,7 @@ buf_copy_rpad (char *dst, size_t dst_size, void str_copy_rpad (char *dst, size_t dst_size, const char *src) { - if (dst_size > 0) + if (dst_size > 0) { size_t src_len = strlen (src); if (src_len < dst_size - 1) @@ -232,26 +235,33 @@ str_copy_buf_trunc (char *dst, size_t dst_size, dst[dst_len] = '\0'; } -/* Converts each byte in S to uppercase. */ +/* Converts each byte in S to uppercase. + + This is suitable only for ASCII strings. Use utf8_to_upper() for UTF-8 + strings.*/ void str_uppercase (char *s) { for (; *s != '\0'; s++) - *s = toupper ((unsigned char) *s); + *s = c_toupper ((unsigned char) *s); } -/* Converts each byte in S to lowercase. */ +/* Converts each byte in S to lowercase. + + This is suitable only for ASCII strings. Use utf8_to_lower() for UTF-8 + strings.*/ void str_lowercase (char *s) { for (; *s != '\0'; s++) - *s = tolower ((unsigned char) *s); + *s = c_tolower ((unsigned char) *s); } /* Converts NUMBER into a string in 26-adic notation in BUFFER, - which has room for SIZE bytes. Returns true if successful, - false if NUMBER, plus a trailing null, is too large to fit in - the available space. + which has room for SIZE bytes. Uses uppercase if UPPERCASE is + true, otherwise lowercase, Returns true if successful, false + if NUMBER, plus a trailing null, is too large to fit in the + available space. 26-adic notation is "spreadsheet column numbering": 1 = A, 2 = B, 3 = C, ... 26 = Z, 27 = AA, 28 = AB, 29 = AC, ... @@ -263,24 +273,65 @@ str_lowercase (char *s) For more information, see http://en.wikipedia.org/wiki/Bijective_numeration. */ bool -str_format_26adic (unsigned long int number, char buffer[], size_t size) +str_format_26adic (unsigned long int number, bool uppercase, + char buffer[], size_t size) { + const char *alphabet + = uppercase ? "ABCDEFGHIJKLMNOPQRSTUVWXYZ" : "abcdefghijklmnopqrstuvwxyz"; size_t length = 0; while (number-- > 0) { if (length >= size) - return false; - buffer[length++] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"[number % 26]; + goto overflow; + buffer[length++] = alphabet[number % 26]; number /= 26; } if (length >= size) - return false; + goto overflow; buffer[length] = '\0'; buf_reverse (buffer, length); return true; + +overflow: + if (length > 0) + buffer[0] = '\0'; + return false; +} + +/* Copies IN to buffer OUT with size OUT_SIZE, appending a null terminator. If + IN is too long for OUT, or if IN contains a new-line, replaces the tail with + "...". + + OUT_SIZE must be at least 16. */ +void +str_ellipsize (struct substring in, char *out, size_t out_size) +{ + assert (out_size >= 16); + + size_t out_maxlen = out_size - 1; + if (in.length > out_maxlen - 3) + out_maxlen -= 3; + + size_t out_len = 0; + while (out_len < in.length + && in.string[out_len] != '\n' + && in.string[out_len] != '\0' + && (in.string[out_len] != '\r' + || out_len + 1 >= in.length + || in.string[out_len + 1] != '\n')) + { + int mblen = u8_mblen (CHAR_CAST (const uint8_t *, in.string + out_len), + in.length - out_len); + if (mblen < 0 || out_len + mblen > out_maxlen) + break; + out_len += mblen; + } + + memcpy (out, in.string, out_len); + strcpy (&out[out_len], out_len < in.length ? "..." : ""); } /* Sets the SIZE bytes starting at BLOCK to C, @@ -347,15 +398,16 @@ ss_realloc (struct substring *ss, size_t size) ss->string = xrealloc (ss->string, size); } -/* Makes a pool_alloc_unaligned()'d copy of the contents of OLD - in POOL, and stores it in NEW. */ +/* Makes a pool_alloc_unaligned()'d, null-terminated copy of the contents of + OLD in POOL, and stores it in NEW. */ void ss_alloc_substring_pool (struct substring *new, struct substring old, struct pool *pool) { - new->string = pool_alloc_unaligned (pool, old.length); + new->string = pool_alloc_unaligned (pool, old.length + 1); new->length = old.length; memcpy (new->string, old.string, old.length); + new->string[old.length] = '\0'; } /* Allocates room for a CNT-byte string in NEW in POOL. */ @@ -373,6 +425,15 @@ ss_dealloc (struct substring *ss) free (ss->string); } +/* Exchanges the contents of A and B. */ +void +ss_swap (struct substring *a, struct substring *b) +{ + struct substring tmp = *a; + *a = *b; + *b = tmp; +} + /* Truncates SS to at most CNT bytes in length. */ void ss_truncate (struct substring *ss, size_t cnt) @@ -485,11 +546,15 @@ bool ss_tokenize (struct substring ss, struct substring delimiters, size_t *save_idx, struct substring *token) { + bool found_token; + ss_advance (&ss, *save_idx); *save_idx += ss_ltrim (&ss, delimiters); ss_get_bytes (&ss, ss_cspan (ss, delimiters), token); - *save_idx += ss_length (*token) + 1; - return ss_length (*token) > 0; + + found_token = ss_length (*token) > 0; + *save_idx += ss_length (*token) + (found_token?1:0); + return found_token; } /* Removes the first CNT bytes from SS. */ @@ -669,6 +734,14 @@ ss_last (struct substring ss) return ss.length > 0 ? (unsigned char) ss.string[ss.length - 1] : EOF; } +/* Returns true if SS starts with PREFIX, false otherwise. */ +bool +ss_starts_with (struct substring ss, struct substring prefix) +{ + return (ss.length >= prefix.length + && !memcmp (ss.string, prefix.string, prefix.length)); +} + /* Returns true if SS ends with SUFFIX, false otherwise. */ bool ss_ends_with (struct substring ss, struct substring suffix) @@ -707,10 +780,20 @@ ss_cspan (struct substring ss, struct substring stop_set) size_t ss_find_byte (struct substring ss, char c) { - const char *p = memchr (ss.string, c, ss.length); + const char *p = memchr (ss.string, (int) c, ss.length); return p != NULL ? p - ss.string : SIZE_MAX; } +/* Returns the offset in HAYSTACK of the first instance of NEEDLE, + or SIZE_MAX if NEEDLE does not occur in HAYSTACK. */ +size_t +ss_find_substring (struct substring haystack, struct substring needle) +{ + const char *p = memmem (haystack.string, haystack.length, + needle.string, needle.length); + return p != NULL ? p - haystack.string : SIZE_MAX; +} + /* Compares A and B and returns a strcmp()-type comparison result. */ int @@ -1440,7 +1523,8 @@ ds_put_cstr (struct string *st, const char *s) void ds_put_substring (struct string *st, struct substring ss) { - memcpy (ds_put_uninit (st, ss_length (ss)), ss_data (ss), ss_length (ss)); + if (ss.length) + memcpy (ds_put_uninit (st, ss_length (ss)), ss_data (ss), ss_length (ss)); } /* Returns ds_end(ST) and THEN increases the length by INCR. */ @@ -1454,6 +1538,33 @@ ds_put_uninit (struct string *st, size_t incr) return end; } +/* Moves the bytes in ST following offset OFS + OLD_LEN in ST to offset OFS + + NEW_LEN and returns the byte at offset OFS. The first min(OLD_LEN, NEW_LEN) + bytes at the returned position are unchanged; if NEW_LEN > OLD_LEN then the + following NEW_LEN - OLD_LEN bytes are initially indeterminate. + + The intention is that the caller should write NEW_LEN bytes at the returned + position, to effectively replace the OLD_LEN bytes previously at that + position. */ +char * +ds_splice_uninit (struct string *st, + size_t ofs, size_t old_len, size_t new_len) +{ + if (new_len != old_len) + { + if (new_len > old_len) + ds_extend (st, ds_length (st) + (new_len - old_len)); + + assert (ds_length (st) >= ofs + old_len); + + memmove (ds_data (st) + (ofs + new_len), + ds_data (st) + (ofs + old_len), + ds_length (st) - (ofs + old_len)); + st->ss.length += new_len - old_len; + } + return ds_data (st) + ofs; +} + /* Formats FORMAT as a printf string and appends the result to ST. */ void ds_put_format (struct string *st, const char *format, ...) @@ -1465,6 +1576,17 @@ ds_put_format (struct string *st, const char *format, ...) va_end (args); } +/* Formats FORMAT as a printf string as if in the C locale and appends the result to ST. */ +void +ds_put_c_format (struct string *st, const char *format, ...) +{ + va_list args; + + va_start (args, format); + ds_put_c_vformat (st, format, args); + va_end (args); +} + /* Formats FORMAT as a printf string and appends the result to ST. */ void ds_put_vformat (struct string *st, const char *format, va_list args_) @@ -1480,7 +1602,7 @@ ds_put_vformat (struct string *st, const char *format, va_list args_) if (needed >= avail) { va_copy (args, args_); - vsprintf (ds_put_uninit (st, needed), format, args); + vsnprintf (ds_put_uninit (st, needed), needed + 1, format, args); va_end (args); } else @@ -1500,6 +1622,22 @@ ds_put_vformat (struct string *st, const char *format, va_list args_) } } +/* Formats FORMAT as a printf string, as if in the C locale, + and appends the result to ST. */ +void +ds_put_c_vformat (struct string *st, const char *format, va_list args) +{ + char buf[128]; + size_t len = sizeof buf; + char *output = c_vasnprintf (buf, &len, format, args); + if (output) + { + ds_put_cstr (st, output); + if (output != buf) + free (output); + } +} + /* Appends byte CH to ST. */ void ds_put_byte (struct string *st, int ch) @@ -1514,6 +1652,13 @@ ds_put_byte_multiple (struct string *st, int ch, size_t cnt) memset (ds_put_uninit (st, cnt), ch, cnt); } +/* Appends Unicode code point UC to ST in UTF-8 encoding. */ +void +ds_put_unichar (struct string *st, ucs4_t uc) +{ + ds_extend (st, ds_length (st) + 6); + st->ss.length += u8_uctomb (CHAR_CAST (uint8_t *, ds_end (st)), uc, 6); +} /* If relocation has been enabled, replace ST, with its relocated version */ @@ -1523,7 +1668,7 @@ ds_relocate (struct string *st) const char *orig = ds_cstr (st); const char *rel = relocate (orig); - if ( orig != rel) + if (orig != rel) { ds_clear (st); ds_put_cstr (st, rel);