From 655fa71fb616c6016dff390b7760fb9719c74f43 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sun, 5 Dec 2021 21:20:42 -0800 Subject: [PATCH] string-array: New functions for comparing string arrays. --- src/libpspp/i18n.c | 18 ++++++++++++++++-- src/libpspp/i18n.h | 4 ++++ src/libpspp/string-array.c | 31 +++++++++++++++++++++++++++++++ src/libpspp/string-array.h | 5 +++++ 4 files changed, 56 insertions(+), 2 deletions(-) diff --git a/src/libpspp/i18n.c b/src/libpspp/i18n.c index 4e04d32c25..3faadcbb87 100644 --- a/src/libpspp/i18n.c +++ b/src/libpspp/i18n.c @@ -880,7 +880,15 @@ utf8_hash_case_bytes (const char *s, size_t n, unsigned int basis) unsigned int utf8_hash_case_string (const char *s, unsigned int basis) { - return utf8_hash_case_bytes (s, strlen (s), basis); + return utf8_hash_case_substring (ss_cstr (s), basis); +} + +/* Returns a hash value for UTF-8 string S, with lowercase and uppercase + letters treated as equal, starting from BASIS. */ +unsigned int +utf8_hash_case_substring (struct substring s, unsigned int basis) +{ + return utf8_hash_case_bytes (s.string, s.length, basis); } /* Compares UTF-8 strings A and B case-insensitively. @@ -888,7 +896,13 @@ utf8_hash_case_string (const char *s, unsigned int basis) int utf8_strcasecmp (const char *a, const char *b) { - return utf8_strncasecmp (a, strlen (a), b, strlen (b)); + return utf8_sscasecmp (ss_cstr (a), ss_cstr (b)); +} + +int +utf8_sscasecmp (struct substring a, struct substring b) +{ + return utf8_strncasecmp (a.string, a.length, b.string, b.length); } /* Compares UTF-8 strings A (with length AN) and B (with length BN) diff --git a/src/libpspp/i18n.h b/src/libpspp/i18n.h index 3ae1e9e0b1..d41ef1ef2c 100644 --- a/src/libpspp/i18n.h +++ b/src/libpspp/i18n.h @@ -18,6 +18,7 @@ #define I18N_H #include "libpspp/compiler.h" +#include "libpspp/str.h" #include #include @@ -77,7 +78,10 @@ const char *uc_name (ucs4_t uc, char buffer[16]); unsigned int utf8_hash_case_bytes (const char *, size_t n, unsigned int basis) WARN_UNUSED_RESULT; unsigned int utf8_hash_case_string (const char *, unsigned int basis) WARN_UNUSED_RESULT; +unsigned int utf8_hash_case_substring (struct substring, unsigned int basis) + WARN_UNUSED_RESULT; int utf8_strcasecmp (const char *, const char *); +int utf8_sscasecmp (struct substring, struct substring); int utf8_strncasecmp (const char *, size_t, const char *, size_t); int utf8_strverscasecmp (const char *, const char *); char *utf8_to_upper (const char *); diff --git a/src/libpspp/string-array.c b/src/libpspp/string-array.c index d4badf4638..d162593b2c 100644 --- a/src/libpspp/string-array.c +++ b/src/libpspp/string-array.c @@ -23,6 +23,7 @@ #include #include "libpspp/array.h" +#include "libpspp/i18n.h" #include "libpspp/str.h" #include "gl/xalloc.h" @@ -253,6 +254,36 @@ string_array_uniq (struct string_array *sa) sa->n = n; } +/* Returns true if A and B contain the same strings in the same order, + false otherwise. */ +bool +string_array_equal (const struct string_array *a, + const struct string_array *b) +{ + if (a->n != b->n) + return false; + + for (size_t i = 0; i < a->n; i++) + if (strcmp (a->strings[i], b->strings[i])) + return false; + return true; +} + +/* Returns true if A and B contain the same strings in the same order, + false otherwise. */ +bool +string_array_equal_case (const struct string_array *a, + const struct string_array *b) +{ + if (a->n != b->n) + return false; + + for (size_t i = 0; i < a->n; i++) + if (utf8_strcasecmp (a->strings[i], b->strings[i])) + return false; + return true; +} + /* Divides STRING into tokens at DELIMITERS and adds each token to SA. */ void string_array_parse (struct string_array *sa, struct substring string, diff --git a/src/libpspp/string-array.h b/src/libpspp/string-array.h index 353ddf5018..b4a6989d6d 100644 --- a/src/libpspp/string-array.h +++ b/src/libpspp/string-array.h @@ -63,6 +63,11 @@ void string_array_shrink (struct string_array *); void string_array_sort (struct string_array *); void string_array_uniq (struct string_array *); +bool string_array_equal (const struct string_array *, + const struct string_array *); +bool string_array_equal_case (const struct string_array *, + const struct string_array *); + void string_array_parse (struct string_array *, struct substring string, struct substring delimiters); char *string_array_join (const struct string_array *, const char *separator); -- 2.30.2