trunc \
unicase/u8-casecmp \
unicase/u8-casefold \
+ unicase/u8-tolower \
+ unicase/u8-toupper \
unictype/ctype-print \
unictype/property-id-continue \
unictype/property-id-start \
if (record == NULL)
{
- /* Convert variable names to lowercase. */
+ /* There are no long variable names. Use the short variable names,
+ converted to lowercase, as the long variable names. */
size_t i;
for (i = 0; i < dict_get_var_cnt (dict); i++)
struct variable *var = dict_get_var (dict, i);
char *new_name;
- new_name = xstrdup (var_get_name (var));
- str_lowercase (new_name);
-
+ new_name = utf8_to_lower (var_get_name (var));
rename_var_and_save_short_names (dict, var, new_name);
-
free (new_name);
}
for (j = 0; j < mrset->n_vars; j++)
{
const char *short_name_utf8 = var_get_short_name (mrset->vars[j], 0);
+ char *lower_name_utf8 = utf8_to_lower (short_name_utf8);
char *short_name = recode_string (encoding, "UTF-8",
- short_name_utf8, -1);
- str_lowercase (short_name);
+ lower_name_utf8, -1);
ds_put_format (&s, " %s", short_name);
free (short_name);
+ free (lower_name_utf8);
}
ds_put_byte (&s, '\n');
}
for (i = old_cnt; i < var->short_name_cnt; i++)
var->short_names[i] = NULL;
}
- var->short_names[idx] = xstrdup (short_name);
- str_uppercase (var->short_names[idx]);
+ var->short_names[idx] = utf8_to_upper (short_name);
}
dict_var_changed (var);
return result;
}
+
+static char *
+utf8_casemap (const char *s,
+ uint8_t *(*f) (const uint8_t *, size_t, const char *, uninorm_t,
+ uint8_t *, size_t *))
+{
+ char *result;
+ size_t size;
+
+ result = CHAR_CAST (char *,
+ f (CHAR_CAST (const uint8_t *, s), strlen (s) + 1,
+ NULL, NULL, NULL, &size));
+ if (result == NULL)
+ {
+ if (errno == ENOMEM)
+ xalloc_die ();
+
+ result = xstrdup (s);
+ }
+ return result;
+}
+
+char *
+utf8_to_upper (const char *s)
+{
+ return utf8_casemap (s, u8_toupper);
+}
+
+char *
+utf8_to_lower (const char *s)
+{
+ return utf8_casemap (s, u8_tolower);
+}
\f
bool
get_encoding_info (struct encoding_info *e, const char *name)
unsigned int utf8_hash_case_string (const char *, unsigned int basis);
int utf8_strcasecmp (const char *, const char *);
int utf8_strncasecmp (const char *, size_t, const char *, size_t);
+char *utf8_to_upper (const char *);
+char *utf8_to_lower (const char *);
\f
/* Information about character encodings. */
#include "libpspp/message.h"
#include "libpspp/pool.h"
+#include "gl/c-ctype.h"
#include "gl/c-vasnprintf.h"
#include "gl/relocatable.h"
#include "gl/minmax.h"
dst[dst_len] = '\0';
}
-/* Converts each byte in S to uppercase. */
+/* Converts each byte in S to uppercase.
+
+ This is suitable only for ASCII strings. Use utf8_to_upper() for UTF-8
+ strings.*/
void
str_uppercase (char *s)
{
for (; *s != '\0'; s++)
- *s = toupper ((unsigned char) *s);
+ *s = c_toupper ((unsigned char) *s);
}
-/* Converts each byte in S to lowercase. */
+/* Converts each byte in S to lowercase.
+
+ This is suitable only for ASCII strings. Use utf8_to_lower() for UTF-8
+ strings.*/
void
str_lowercase (char *s)
{
for (; *s != '\0'; s++)
- *s = tolower ((unsigned char) *s);
+ *s = c_tolower ((unsigned char) *s);
}
/* Converts NUMBER into a string in 26-adic notation in BUFFER,
,Missing Values: 1 THRU 3; 5,,
num9,Format: F8.0,,9
,Missing Values: 1 THRU HIGHEST; -5,,
-numÃ\80Ã\88Ã\8cÃ\91Ã\92,Format: F8.0,,10
+numà èìñò,Format: F8.0,,10
,Missing Values: LOWEST THRU 1; 5,,
str1,Format: A4,,11
str2,String variable 2's label,,12
,Format: A25,,
Table: Data List
-num1,num2,num3,num4,num5,num6,num7,num8,num9,numÃ\80Ã\88Ã\8cÃ\91Ã\92,str1,str2,str3,str4,str5,str6,str7,str8
+num1,num2,num3,num4,num5,num6,num7,num8,num9,numà èìñò,str1,str2,str3,str4,str5,str6,str7,str8
1,2,3,4,5,6,7,8,9,10,abcd,efgh,ijkl,mnop,qrst,uvwx,yzABCDEFGHI,JKLMNOPQRSTUVWXYZ01234567
])
done
AT_CHECK([pspp -o pspp.csv sys-file.sps])
AT_CHECK([grep -v Measure pspp.csv | grep -v Display], [0], [dnl
Variable,Description,,Position
-sÃ\89q256,Format: A256,,1
+séq256,Format: A256,,1
str600,Format: A600,,2
Table: Data List
-sÃ\89q256,str600
+séq256,str600
abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789@#abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789@#abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789@#abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789@a,abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789@#abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789@#abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789@#abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789@abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789@#abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789@#abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789@#abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789@abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789@#abcdefghijklmnopqrstuvwxyz
])
done