/* PSPP - a program for statistical analysis.
- Copyright (C) 2009, 2010 Free Software Foundation, Inc.
+ Copyright (C) 2009, 2010, 2012 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
#include <config.h>
-#include <libpspp/stringi-set.h>
+#include "libpspp/stringi-set.h"
#include <stdlib.h>
#include <string.h>
-#include <libpspp/hash-functions.h>
+#include "libpspp/cast.h"
+#include "libpspp/hash-functions.h"
+#include "libpspp/i18n.h"
#include "gl/xalloc.h"
static struct stringi_set_node *stringi_set_find_node__ (
const struct stringi_set *, const char *, unsigned int hash);
+static struct stringi_set_node *stringi_set_find_node_len__ (
+ const struct stringi_set *, const char *, size_t length, unsigned int hash);
static void stringi_set_insert__ (struct stringi_set *, char *,
unsigned int hash);
static bool stringi_set_delete__ (struct stringi_set *, const char *,
bool
stringi_set_contains (const struct stringi_set *set, const char *s)
{
- return stringi_set_find_node (set, s) != NULL;
+ return stringi_set_contains_len (set, s, strlen (s));
+}
+
+/* Returns true if SET contains S with the given LENGTH (or a similar string
+ with different case), false otherwise. */
+bool
+stringi_set_contains_len (const struct stringi_set *set, const char *s,
+ size_t length)
+{
+ return stringi_set_find_node_len (set, s, length) != NULL;
}
/* Returns the node in SET that contains S, or a null pointer if SET does not
struct stringi_set_node *
stringi_set_find_node (const struct stringi_set *set, const char *s)
{
- return stringi_set_find_node__ (set, s, hash_case_string (s, 0));
+ return stringi_set_find_node_len (set, s, strlen (s));
+}
+
+/* Returns the node in SET that contains S with the given LENGTH, or a null
+ pointer if SET does not contain S. */
+struct stringi_set_node *
+stringi_set_find_node_len (const struct stringi_set *set, const char *s,
+ size_t length)
+{
+ return stringi_set_find_node_len__ (set, s, length,
+ utf8_hash_case_bytes (s, length, 0));
}
/* Inserts a copy of S into SET. Returns true if successful, false if SET
bool
stringi_set_insert (struct stringi_set *set, const char *s)
{
- unsigned int hash = hash_case_string (s, 0);
+ unsigned int hash = utf8_hash_case_string (s, 0);
if (!stringi_set_find_node__ (set, s, hash))
{
stringi_set_insert__ (set, xstrdup (s), hash);
bool
stringi_set_insert_nocopy (struct stringi_set *set, char *s)
{
- unsigned int hash = hash_case_string (s, 0);
+ unsigned int hash = utf8_hash_case_string (s, 0);
if (!stringi_set_find_node__ (set, s, hash))
{
stringi_set_insert__ (set, s, hash);
bool
stringi_set_delete (struct stringi_set *set, const char *s)
{
- return stringi_set_delete__ (set, s, hash_case_string (s, 0));
+ return stringi_set_delete__ (set, s, utf8_hash_case_string (s, 0));
}
/* Deletes NODE from SET, and frees NODE and its string. */
stringi_set_delete__ (a, node->string, node->hmap_node.hash);
}
}
+
+/* Allocates and returns an array that points to each of the strings in SET.
+ The caller must not free or modify any of the strings. Removing a string
+ from SET invalidates the corresponding element of the returned array. The
+ caller it is responsible for freeing the returned array itself (with
+ free()).
+
+ The returned array is in the same order as observed by stringi_set_first()
+ and stringi_set_next(), that is, no particular order. */
+char **
+stringi_set_get_array (const struct stringi_set *set)
+{
+ const struct stringi_set_node *node;
+ const char *s;
+ char **array;
+ size_t i;
+
+ array = xnmalloc (stringi_set_count (set), sizeof *array);
+
+ i = 0;
+ STRINGI_SET_FOR_EACH (s, node, set)
+ array[i++] = CONST_CAST (char *, s);
+
+ return array;
+}
+
+static int
+compare_strings (const void *a_, const void *b_)
+{
+ const char *const *a = a_;
+ const char *const *b = b_;
+ return utf8_strcasecmp (*a, *b);
+}
+
+/* Allocates and returns an array that points to each of the strings in SET.
+ The caller must not free or modify any of the strings. Removing a string
+ from SET invalidates the corresponding element of the returned array. The
+ caller it is responsible for freeing the returned array itself (with
+ free()).
+
+ The returned array is ordered according to utf8_strcasecmp(). */
+char **
+stringi_set_get_sorted_array (const struct stringi_set *set)
+{
+ char **array = stringi_set_get_array (set);
+ qsort (array, stringi_set_count (set), sizeof *array, compare_strings);
+ return array;
+}
\f
/* Internal functions. */
static struct stringi_set_node *
stringi_set_find_node__ (const struct stringi_set *set, const char *s,
- unsigned int hash)
+ unsigned int hash)
+{
+ return stringi_set_find_node_len__ (set, s, strlen (s), hash);
+}
+
+static struct stringi_set_node *
+stringi_set_find_node_len__ (const struct stringi_set *set, const char *s,
+ size_t length, unsigned int hash)
{
struct stringi_set_node *node;
HMAP_FOR_EACH_WITH_HASH (node, struct stringi_set_node, hmap_node,
hash, &set->hmap)
- if (!strcasecmp (s, node->string))
+ if (!utf8_strncasecmp (s, length, node->string, strlen (node->string)))
return node;
return NULL;