X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flibpspp%2Fstr.c;h=e34c150df1c32506522155482c3285433c021c99;hb=086322fd8c85a303ba6f552950d6f057f2867add;hp=cd2363a0026c88454a26fbca0145603692bb7329;hpb=d8493b3b0617cc447446a70b031a69079bc19002;p=pspp-builds.git diff --git a/src/libpspp/str.c b/src/libpspp/str.c index cd2363a0..e34c150d 100644 --- a/src/libpspp/str.c +++ b/src/libpspp/str.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000, 2006, 2009, 2010 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2006, 2009, 2010, 2011 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -22,6 +22,7 @@ #include #include #include +#include #include "libpspp/cast.h" #include "libpspp/message.h" @@ -340,6 +341,12 @@ ss_alloc_uninit (struct substring *new, size_t cnt) new->length = cnt; } +void +ss_realloc (struct substring *ss, size_t size) +{ + ss->string = xrealloc (ss->string, size); +} + /* Makes a pool_alloc_unaligned()'d copy of the contents of OLD in POOL, and stores it in NEW. */ void @@ -409,7 +416,7 @@ ss_trim (struct substring *ss, struct substring trim_set) /* If the last byte in SS is C, removes it and returns true. Otherwise, returns false without changing the string. */ bool -ss_chomp (struct substring *ss, char c) +ss_chomp_byte (struct substring *ss, char c) { if (ss_last (*ss) == c) { @@ -740,6 +747,81 @@ ss_xstrdup (struct substring ss) s[ss.length] = '\0'; return s; } +/* UTF-8. */ + +/* Returns the character represented by the UTF-8 sequence at the start of S. + The return value is either a Unicode code point in the range 0 to 0x10ffff, + or UINT32_MAX if S is empty. */ +ucs4_t +ss_first_mb (struct substring s) +{ + return ss_at_mb (s, 0); +} + +/* Returns the number of bytes in the UTF-8 character at the beginning of S. + + The return value is 0 if S is empty, otherwise between 1 and 4. */ +int +ss_first_mblen (struct substring s) +{ + return ss_at_mblen (s, 0); +} + +/* Advances S past the UTF-8 character at its beginning. Returns the Unicode + code point that was skipped (in the range 0 to 0x10ffff), or UINT32_MAX if S + was not modified because it was initially empty. */ +ucs4_t +ss_get_mb (struct substring *s) +{ + if (s->length > 0) + { + ucs4_t uc; + int n; + + n = u8_mbtouc (&uc, CHAR_CAST (const uint8_t *, s->string), s->length); + s->string += n; + s->length -= n; + return uc; + } + else + return UINT32_MAX; +} + +/* Returns the character represented by the UTF-8 sequence starting OFS bytes + into S. The return value is either a Unicode code point in the range 0 to + 0x10ffff, or UINT32_MAX if OFS is past the last byte in S. + + (Returns 0xfffd if OFS points into the middle, not the beginning, of a UTF-8 + sequence.) */ +ucs4_t +ss_at_mb (struct substring s, size_t ofs) +{ + if (s.length > ofs) + { + ucs4_t uc; + u8_mbtouc (&uc, CHAR_CAST (const uint8_t *, s.string + ofs), + s.length - ofs); + return uc; + } + else + return UINT32_MAX; +} + +/* Returns the number of bytes represented by the UTF-8 sequence starting OFS + bytes into S. The return value is 0 if OFS is past the last byte in S, + otherwise between 1 and 4. */ +int +ss_at_mblen (struct substring s, size_t ofs) +{ + if (s.length > ofs) + { + ucs4_t uc; + return u8_mbtouc (&uc, CHAR_CAST (const uint8_t *, s.string + ofs), + s.length - ofs); + } + else + return 0; +} /* Initializes ST as an empty string. */ void @@ -954,9 +1036,9 @@ ds_trim (struct string *st, struct substring trim_set) /* If the last byte in ST is C, removes it and returns true. Otherwise, returns false without modifying ST. */ bool -ds_chomp (struct string *st, char c) +ds_chomp_byte (struct string *st, char c) { - return ss_chomp (&st->ss, c); + return ss_chomp_byte (&st->ss, c); } /* Divides ST into tokens separated by any of the DELIMITERS. @@ -1276,7 +1358,7 @@ ds_read_config_line (struct string *st, int *line_number, FILE *stream) (*line_number)++; ds_rtrim (st, ss_cstr (CC_SPACES)); } - while (ds_chomp (st, '\\')); + while (ds_chomp_byte (st, '\\')); remove_comment (st); return true;