X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flibpspp%2Fstr.c;h=3fa2fbe9a75fbe48d0ed75dc48a1b32916b0ef88;hb=6ee48467ac46bb8553f6fb3782623559affb1b9c;hp=7e722c17e6b92602798725e3bdd618327a46691f;hpb=fe8dc2171009e90d2335f159d05f7e6660e24780;p=pspp diff --git a/src/libpspp/str.c b/src/libpspp/str.c index 7e722c17e6..3fa2fbe9a7 100644 --- a/src/libpspp/str.c +++ b/src/libpspp/str.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000, 2006, 2009, 2010, 2011 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2006, 2009, 2010, 2011, 2012, 2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -28,6 +28,8 @@ #include "libpspp/message.h" #include "libpspp/pool.h" +#include "gl/c-ctype.h" +#include "gl/c-vasnprintf.h" #include "gl/relocatable.h" #include "gl/minmax.h" #include "gl/xalloc.h" @@ -232,26 +234,33 @@ str_copy_buf_trunc (char *dst, size_t dst_size, dst[dst_len] = '\0'; } -/* Converts each byte in S to uppercase. */ +/* Converts each byte in S to uppercase. + + This is suitable only for ASCII strings. Use utf8_to_upper() for UTF-8 + strings.*/ void str_uppercase (char *s) { for (; *s != '\0'; s++) - *s = toupper ((unsigned char) *s); + *s = c_toupper ((unsigned char) *s); } -/* Converts each byte in S to lowercase. */ +/* Converts each byte in S to lowercase. + + This is suitable only for ASCII strings. Use utf8_to_lower() for UTF-8 + strings.*/ void str_lowercase (char *s) { for (; *s != '\0'; s++) - *s = tolower ((unsigned char) *s); + *s = c_tolower ((unsigned char) *s); } /* Converts NUMBER into a string in 26-adic notation in BUFFER, - which has room for SIZE bytes. Returns true if successful, - false if NUMBER, plus a trailing null, is too large to fit in - the available space. + which has room for SIZE bytes. Uses uppercase if UPPERCASE is + true, otherwise lowercase, Returns true if successful, false + if NUMBER, plus a trailing null, is too large to fit in the + available space. 26-adic notation is "spreadsheet column numbering": 1 = A, 2 = B, 3 = C, ... 26 = Z, 27 = AA, 28 = AB, 29 = AC, ... @@ -263,15 +272,18 @@ str_lowercase (char *s) For more information, see http://en.wikipedia.org/wiki/Bijective_numeration. */ bool -str_format_26adic (unsigned long int number, char buffer[], size_t size) +str_format_26adic (unsigned long int number, bool uppercase, + char buffer[], size_t size) { + const char *alphabet + = uppercase ? "ABCDEFGHIJKLMNOPQRSTUVWXYZ" : "abcdefghijklmnopqrstuvwxyz"; size_t length = 0; while (number-- > 0) { if (length >= size) goto overflow; - buffer[length++] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"[number % 26]; + buffer[length++] = alphabet[number % 26]; number /= 26; } @@ -491,11 +503,15 @@ bool ss_tokenize (struct substring ss, struct substring delimiters, size_t *save_idx, struct substring *token) { + bool found_token; + ss_advance (&ss, *save_idx); *save_idx += ss_ltrim (&ss, delimiters); ss_get_bytes (&ss, ss_cspan (ss, delimiters), token); - *save_idx += ss_length (*token) + 1; - return ss_length (*token) > 0; + + found_token = ss_length (*token) > 0; + *save_idx += ss_length (*token) + found_token; + return found_token; } /* Removes the first CNT bytes from SS. */ @@ -1495,22 +1511,36 @@ ds_put_format (struct string *st, const char *format, ...) va_end (args); } -/* Formats FORMAT as a printf string and appends the result to ST. */ +/* Formats FORMAT as a printf string as if in the C locale and appends the result to ST. */ void -ds_put_vformat (struct string *st, const char *format, va_list args_) +ds_put_c_format (struct string *st, const char *format, ...) +{ + va_list args; + + va_start (args, format); + ds_put_c_vformat (st, format, args); + va_end (args); +} + + +/* Formats FORMAT as a printf string, using fmt_func (a snprintf like function) + and appends the result to ST. */ +static void +ds_put_vformat_int (struct string *st, const char *format, va_list args_, + int (*fmt_func) (char *, size_t, const char *, va_list)) { int avail, needed; va_list args; va_copy (args, args_); avail = st->ss.string != NULL ? st->capacity - st->ss.length + 1 : 0; - needed = vsnprintf (st->ss.string + st->ss.length, avail, format, args); + needed = fmt_func (st->ss.string + st->ss.length, avail, format, args); va_end (args); if (needed >= avail) { va_copy (args, args_); - vsprintf (ds_put_uninit (st, needed), format, args); + fmt_func (ds_put_uninit (st, needed), needed + 1, format, args); va_end (args); } else @@ -1523,13 +1553,36 @@ ds_put_vformat (struct string *st, const char *format, va_list args_) avail = st->capacity - st->ss.length + 1; va_copy (args, args_); - needed = vsnprintf (ds_end (st), avail, format, args); + needed = fmt_func (ds_end (st), avail, format, args); va_end (args); } st->ss.length += needed; } } + +static int +vasnwrapper (char *str, size_t size, const char *format, va_list ap) +{ + c_vasnprintf (str, &size, format, ap); + return size; +} + +/* Formats FORMAT as a printf string and appends the result to ST. */ +void +ds_put_vformat (struct string *st, const char *format, va_list args_) +{ + ds_put_vformat_int (st, format, args_, vsnprintf); +} + +/* Formats FORMAT as a printf string, as if in the C locale, + and appends the result to ST. */ +void +ds_put_c_vformat (struct string *st, const char *format, va_list args_) +{ + ds_put_vformat_int (st, format, args_, vasnwrapper); +} + /* Appends byte CH to ST. */ void ds_put_byte (struct string *st, int ch) @@ -1544,6 +1597,13 @@ ds_put_byte_multiple (struct string *st, int ch, size_t cnt) memset (ds_put_uninit (st, cnt), ch, cnt); } +/* Appends Unicode code point UC to ST in UTF-8 encoding. */ +void +ds_put_unichar (struct string *st, ucs4_t uc) +{ + ds_extend (st, ds_length (st) + 6); + st->ss.length += u8_uctomb (CHAR_CAST (uint8_t *, ds_end (st)), uc, 6); +} /* If relocation has been enabled, replace ST, with its relocated version */