X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flibpspp%2Fstr.c;h=b40cb4b4ab8f07ac662965e70ae88adebfea90d0;hb=8539b9672ca634e0bedf7a531709e845a6b451d6;hp=44e4a1da28ee7183a98eb1e819a755bc55759c0f;hpb=14c065be90af77f2661d4bf4d35f3ec943fd99bd;p=pspp diff --git a/src/libpspp/str.c b/src/libpspp/str.c index 44e4a1da28..b40cb4b4ab 100644 --- a/src/libpspp/str.c +++ b/src/libpspp/str.c @@ -1,5 +1,6 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000, 2006, 2009, 2010, 2011, 2012 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2006, 2009, 2010, 2011, 2012, 2014, + 2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -25,9 +26,12 @@ #include #include "libpspp/cast.h" +#include "libpspp/i18n.h" #include "libpspp/message.h" #include "libpspp/pool.h" +#include "gl/c-ctype.h" +#include "gl/c-vasnprintf.h" #include "gl/relocatable.h" #include "gl/minmax.h" #include "gl/xalloc.h" @@ -187,7 +191,7 @@ buf_copy_rpad (char *dst, size_t dst_size, void str_copy_rpad (char *dst, size_t dst_size, const char *src) { - if (dst_size > 0) + if (dst_size > 0) { size_t src_len = strlen (src); if (src_len < dst_size - 1) @@ -232,26 +236,33 @@ str_copy_buf_trunc (char *dst, size_t dst_size, dst[dst_len] = '\0'; } -/* Converts each byte in S to uppercase. */ +/* Converts each byte in S to uppercase. + + This is suitable only for ASCII strings. Use utf8_to_upper() for UTF-8 + strings.*/ void str_uppercase (char *s) { for (; *s != '\0'; s++) - *s = toupper ((unsigned char) *s); + *s = c_toupper ((unsigned char) *s); } -/* Converts each byte in S to lowercase. */ +/* Converts each byte in S to lowercase. + + This is suitable only for ASCII strings. Use utf8_to_lower() for UTF-8 + strings.*/ void str_lowercase (char *s) { for (; *s != '\0'; s++) - *s = tolower ((unsigned char) *s); + *s = c_tolower ((unsigned char) *s); } /* Converts NUMBER into a string in 26-adic notation in BUFFER, - which has room for SIZE bytes. Returns true if successful, - false if NUMBER, plus a trailing null, is too large to fit in - the available space. + which has room for SIZE bytes. Uses uppercase if UPPERCASE is + true, otherwise lowercase, Returns true if successful, false + if NUMBER, plus a trailing null, is too large to fit in the + available space. 26-adic notation is "spreadsheet column numbering": 1 = A, 2 = B, 3 = C, ... 26 = Z, 27 = AA, 28 = AB, 29 = AC, ... @@ -263,15 +274,18 @@ str_lowercase (char *s) For more information, see http://en.wikipedia.org/wiki/Bijective_numeration. */ bool -str_format_26adic (unsigned long int number, char buffer[], size_t size) +str_format_26adic (unsigned long int number, bool uppercase, + char buffer[], size_t size) { + const char *alphabet + = uppercase ? "ABCDEFGHIJKLMNOPQRSTUVWXYZ" : "abcdefghijklmnopqrstuvwxyz"; size_t length = 0; while (number-- > 0) { if (length >= size) goto overflow; - buffer[length++] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"[number % 26]; + buffer[length++] = alphabet[number % 26]; number /= 26; } @@ -288,6 +302,39 @@ overflow: return false; } +/* Copies IN to buffer OUT with size OUT_SIZE, appending a null terminator. If + IN is too long for OUT, or if IN contains a new-line, replaces the tail with + "...". + + OUT_SIZE must be at least 16. */ +void +str_ellipsize (struct substring in, char *out, size_t out_size) +{ + assert (out_size >= 16); + + size_t out_maxlen = out_size - 1; + if (in.length > out_maxlen - 3) + out_maxlen -= 3; + + size_t out_len = 0; + while (out_len < in.length + && in.string[out_len] != '\n' + && in.string[out_len] != '\0' + && (in.string[out_len] != '\r' + || out_len + 1 >= in.length + || in.string[out_len + 1] != '\n')) + { + int mblen = u8_mblen (CHAR_CAST (const uint8_t *, in.string + out_len), + in.length - out_len); + if (mblen < 0 || out_len + mblen > out_maxlen) + break; + out_len += mblen; + } + + memcpy (out, in.string, out_len); + strcpy (&out[out_len], out_len < in.length ? "..." : ""); +} + /* Sets the SIZE bytes starting at BLOCK to C, and returns the byte following BLOCK. */ void * @@ -379,6 +426,15 @@ ss_dealloc (struct substring *ss) free (ss->string); } +/* Exchanges the contents of A and B. */ +void +ss_swap (struct substring *a, struct substring *b) +{ + struct substring tmp = *a; + *a = *b; + *b = tmp; +} + /* Truncates SS to at most CNT bytes in length. */ void ss_truncate (struct substring *ss, size_t cnt) @@ -498,7 +554,7 @@ ss_tokenize (struct substring ss, struct substring delimiters, ss_get_bytes (&ss, ss_cspan (ss, delimiters), token); found_token = ss_length (*token) > 0; - *save_idx += ss_length (*token) + found_token; + *save_idx += ss_length (*token) + (found_token?1:0); return found_token; } @@ -679,6 +735,14 @@ ss_last (struct substring ss) return ss.length > 0 ? (unsigned char) ss.string[ss.length - 1] : EOF; } +/* Returns true if SS starts with PREFIX, false otherwise. */ +bool +ss_starts_with (struct substring ss, struct substring prefix) +{ + return (ss.length >= prefix.length + && !memcmp (ss.string, prefix.string, prefix.length)); +} + /* Returns true if SS ends with SUFFIX, false otherwise. */ bool ss_ends_with (struct substring ss, struct substring suffix) @@ -717,10 +781,20 @@ ss_cspan (struct substring ss, struct substring stop_set) size_t ss_find_byte (struct substring ss, char c) { - const char *p = memchr (ss.string, c, ss.length); + const char *p = memchr (ss.string, (int) c, ss.length); return p != NULL ? p - ss.string : SIZE_MAX; } +/* Returns the offset in HAYSTACK of the first instance of NEEDLE, + or SIZE_MAX if NEEDLE does not occur in HAYSTACK. */ +size_t +ss_find_substring (struct substring haystack, struct substring needle) +{ + const char *p = memmem (haystack.string, haystack.length, + needle.string, needle.length); + return p != NULL ? p - haystack.string : SIZE_MAX; +} + /* Compares A and B and returns a strcmp()-type comparison result. */ int @@ -855,6 +929,22 @@ ss_at_mblen (struct substring s, size_t ofs) else return 0; } + +size_t +ss_utf8_count_columns (struct substring s) +{ + return utf8_count_columns (s.string, s.length); +} + +/* Returns a substring of S starting at 0-based display column START and + running for N display columns. */ +struct substring +ss_utf8_columns (struct substring s, size_t start, size_t n) +{ + ss_advance (&s, utf8_columns_to_bytes (s.string, s.length, start)); + s.length = utf8_columns_to_bytes (s.string, s.length, n); + return s; +} /* Initializes ST as an empty string. */ void @@ -1450,7 +1540,8 @@ ds_put_cstr (struct string *st, const char *s) void ds_put_substring (struct string *st, struct substring ss) { - memcpy (ds_put_uninit (st, ss_length (ss)), ss_data (ss), ss_length (ss)); + if (ss.length) + memcpy (ds_put_uninit (st, ss_length (ss)), ss_data (ss), ss_length (ss)); } /* Returns ds_end(ST) and THEN increases the length by INCR. */ @@ -1480,6 +1571,9 @@ ds_splice_uninit (struct string *st, { if (new_len > old_len) ds_extend (st, ds_length (st) + (new_len - old_len)); + + assert (ds_length (st) >= ofs + old_len); + memmove (ds_data (st) + (ofs + new_len), ds_data (st) + (ofs + old_len), ds_length (st) - (ofs + old_len)); @@ -1499,6 +1593,17 @@ ds_put_format (struct string *st, const char *format, ...) va_end (args); } +/* Formats FORMAT as a printf string as if in the C locale and appends the result to ST. */ +void +ds_put_c_format (struct string *st, const char *format, ...) +{ + va_list args; + + va_start (args, format); + ds_put_c_vformat (st, format, args); + va_end (args); +} + /* Formats FORMAT as a printf string and appends the result to ST. */ void ds_put_vformat (struct string *st, const char *format, va_list args_) @@ -1514,7 +1619,7 @@ ds_put_vformat (struct string *st, const char *format, va_list args_) if (needed >= avail) { va_copy (args, args_); - vsprintf (ds_put_uninit (st, needed), format, args); + vsnprintf (ds_put_uninit (st, needed), needed + 1, format, args); va_end (args); } else @@ -1534,6 +1639,22 @@ ds_put_vformat (struct string *st, const char *format, va_list args_) } } +/* Formats FORMAT as a printf string, as if in the C locale, + and appends the result to ST. */ +void +ds_put_c_vformat (struct string *st, const char *format, va_list args) +{ + char buf[128]; + size_t len = sizeof buf; + char *output = c_vasnprintf (buf, &len, format, args); + if (output) + { + ds_put_cstr (st, output); + if (output != buf) + free (output); + } +} + /* Appends byte CH to ST. */ void ds_put_byte (struct string *st, int ch) @@ -1564,7 +1685,7 @@ ds_relocate (struct string *st) const char *orig = ds_cstr (st); const char *rel = relocate (orig); - if ( orig != rel) + if (orig != rel) { ds_clear (st); ds_put_cstr (st, rel);