/* PSPP - a program for statistical analysis.
- Copyright (C) 1997-9, 2000, 2006, 2007, 2009, 2010 Free Software Foundation, Inc.
+ Copyright (C) 1997-9, 2000, 2006, 2007, 2009, 2010, 2011 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
#include "data/dictionary.h"
+#include <stdint.h>
#include <stdlib.h>
#include <ctype.h>
+#include <unistr.h>
#include "data/attributes.h"
#include "data/case.h"
-#include "data/category.h"
#include "data/identifier.h"
+#include "data/mrset.h"
#include "data/settings.h"
#include "data/value-labels.h"
#include "data/vardict.h"
#include "libpspp/compiler.h"
#include "libpspp/hash-functions.h"
#include "libpspp/hmap.h"
+#include "libpspp/i18n.h"
#include "libpspp/message.h"
#include "libpspp/misc.h"
#include "libpspp/pool.h"
#include "libpspp/str.h"
+#include "libpspp/string-array.h"
#include "gl/intprops.h"
#include "gl/minmax.h"
#include "gl/xalloc.h"
+#include "gl/xmemdup0.h"
#include "gettext.h"
#define _(msgid) gettext (msgid)
struct variable *filter; /* FILTER variable. */
casenumber case_limit; /* Current case limit (N command). */
char *label; /* File label. */
- struct string documents; /* Documents, as a string. */
+ struct string_array documents; /* Documents. */
struct vector **vector; /* Vectors of variables. */
size_t vector_cnt; /* Number of vectors. */
struct attrset attributes; /* Custom attributes. */
+ struct mrset **mrsets; /* Multiple response sets. */
+ size_t n_mrsets; /* Number of multiple response sets. */
char *encoding; /* Character encoding of string data */
void *changed_data;
};
+static void dict_unset_split_var (struct dictionary *, struct variable *);
+static void dict_unset_mrset_var (struct dictionary *, struct variable *);
void
dict_set_encoding (struct dictionary *d, const char *enc)
return d->encoding ;
}
+/* Returns true if UTF-8 string ID is an acceptable identifier in DICT's
+ encoding, false otherwise. If ISSUE_ERROR is true, issues an explanatory
+ error message on failure. */
+bool
+dict_id_is_valid (const struct dictionary *dict, const char *id,
+ bool issue_error)
+{
+ return id_is_valid (id, dict->encoding, issue_error);
+}
void
dict_set_change_callback (struct dictionary *d,
dict_set_attributes (d, dict_get_attributes (s));
+ for (i = 0; i < s->n_mrsets; i++)
+ {
+ const struct mrset *old = s->mrsets[i];
+ struct mrset *new;
+ size_t j;
+
+ /* Clone old mrset, then replace vars from D by vars from S. */
+ new = mrset_clone (old);
+ for (j = 0; j < new->n_vars; j++)
+ new->vars[j] = dict_lookup_var_assert (d, var_get_name (new->vars[j]));
+
+ dict_add_mrset (d, new);
+ }
+
return d;
}
d->case_limit = 0;
free (d->label);
d->label = NULL;
- ds_destroy (&d->documents);
+ string_array_clear (&d->documents);
dict_clear_vectors (d);
attrset_clear (&d->attributes);
}
dict_clear (d);
hmap_destroy (&d->name_map);
attrset_destroy (&d->attributes);
+ dict_clear_mrsets (d);
+ free (d->encoding);
free (d);
}
}
var_clear_aux (v);
dict_unset_split_var (d, v);
+ dict_unset_mrset_var (d, v);
if (d->weight == v)
dict_set_weight (d, NULL);
&& lex_id_to_token (ss_cstr (name)) == T_ID);
}
-static bool
-make_hinted_name (const struct dictionary *dict, const char *hint,
- char name[VAR_NAME_LEN + 1])
+static char *
+make_hinted_name (const struct dictionary *dict, const char *hint)
{
+ size_t hint_len = strlen (hint);
bool dropped = false;
- char *cp;
-
- for (cp = name; *hint && cp < name + VAR_NAME_LEN; hint++)
+ char *root, *rp;
+ size_t ofs;
+ int mblen;
+
+ /* The allocation size here is OK: characters that are copied directly fit
+ OK, and characters that are not copied directly are replaced by a single
+ '_' byte. If u8_mbtouc() replaces bad input by 0xfffd, then that will get
+ replaced by '_' too. */
+ root = rp = xmalloc (hint_len + 1);
+ for (ofs = 0; ofs < hint_len; ofs += mblen)
{
- if (cp == name
- ? lex_is_id1 (*hint) && *hint != '$'
- : lex_is_idn (*hint))
+ ucs4_t uc;
+
+ mblen = u8_mbtouc (&uc, CHAR_CAST (const uint8_t *, hint + ofs),
+ hint_len - ofs);
+ if (rp == root
+ ? lex_uc_is_id1 (uc) && uc != '$'
+ : lex_uc_is_idn (uc))
{
if (dropped)
{
- *cp++ = '_';
+ *rp++ = '_';
dropped = false;
}
- if (cp < name + VAR_NAME_LEN)
- *cp++ = *hint;
+ rp += u8_uctomb (CHAR_CAST (uint8_t *, rp), uc, 6);
}
- else if (cp > name)
+ else if (rp != root)
dropped = true;
}
- *cp = '\0';
+ *rp = '\0';
- if (name[0] != '\0')
+ if (root[0] != '\0')
{
- size_t len = strlen (name);
unsigned long int i;
- if (var_name_is_insertable (dict, name))
- return true;
+ if (var_name_is_insertable (dict, root))
+ return root;
for (i = 0; i < ULONG_MAX; i++)
{
char suffix[INT_BUFSIZE_BOUND (i) + 1];
- int ofs;
+ char *name;
suffix[0] = '_';
if (!str_format_26adic (i + 1, &suffix[1], sizeof suffix - 1))
NOT_REACHED ();
- ofs = MIN (VAR_NAME_LEN - strlen (suffix), len);
- strcpy (&name[ofs], suffix);
-
+ name = utf8_encoding_concat (root, suffix, dict->encoding, 64);
if (var_name_is_insertable (dict, name))
- return true;
+ {
+ free (root);
+ return name;
+ }
+ free (name);
}
}
- return false;
+ free (root);
+
+ return NULL;
}
-static bool
-make_numeric_name (const struct dictionary *dict, unsigned long int *num_start,
- char name[VAR_NAME_LEN + 1])
+static char *
+make_numeric_name (const struct dictionary *dict, unsigned long int *num_start)
{
unsigned long int number;
number < ULONG_MAX;
number++)
{
+ char name[3 + INT_STRLEN_BOUND (number) + 1];
+
sprintf (name, "VAR%03lu", number);
if (dict_lookup_var (dict, name) == NULL)
{
if (num_start != NULL)
*num_start = number + 1;
- return true;
+ return xstrdup (name);
}
}
- if (num_start != NULL)
- *num_start = ULONG_MAX;
- return false;
+ NOT_REACHED ();
}
-/* Attempts to devise a variable name unique within DICT.
- Returns true if successful, in which case the new variable
- name is stored into NAME. Returns false if all names that can
- be generated have already been taken. (Returning false is
- quite unlikely: at least ULONG_MAX unique names can be
- generated.)
+/* Devises and returns a variable name unique within DICT. The variable name
+ is owned by the caller, which must free it with free() when it is no longer
+ needed.
HINT, if it is non-null, is used as a suggestion that will be
modified for suitability as a variable name and for
value is used. If NUM_START is non-null, then its value is
used as the minimum numeric value to check, and it is updated
to the next value to be checked.
- */
-bool
+*/
+char *
dict_make_unique_var_name (const struct dictionary *dict, const char *hint,
- unsigned long int *num_start,
- char name[VAR_NAME_LEN + 1])
+ unsigned long int *num_start)
{
- return ((hint != NULL && make_hinted_name (dict, hint, name))
- || make_numeric_name (dict, num_start, name));
+ if (hint != NULL)
+ {
+ char *hinted_name = make_hinted_name (dict, hint);
+ if (hinted_name != NULL)
+ return hinted_name;
+ }
+ return make_numeric_name (dict, num_start);
}
/* Returns the weighting variable in dictionary D, or a null
/* Removes variable V, which must be in D, from D's set of split
variables. */
-void
+static void
dict_unset_split_var (struct dictionary *d, struct variable *v)
{
int orig_count;
}
/* Sets D's file label to LABEL, truncating it to a maximum of 60
- characters. */
+ characters.
+
+ Removes D's label if LABEL is null or the empty string. */
void
dict_set_label (struct dictionary *d, const char *label)
{
free (d->label);
- d->label = label != NULL ? xstrndup (label, 60) : NULL;
+ d->label = label != NULL && label[0] != '\0' ? xstrndup (label, 60) : NULL;
}
-/* Returns the documents for D, or a null pointer if D has no
- documents. If the return value is nonnull, then the string
- will be an exact multiple of DOC_LINE_LENGTH bytes in length,
- with each segment corresponding to one line. */
-const char *
+/* Returns the documents for D, as an UTF-8 encoded string_array. The
+ return value is always nonnull; if there are no documents then the
+ string_arary is empty.*/
+const struct string_array *
dict_get_documents (const struct dictionary *d)
{
- return ds_is_empty (&d->documents) ? NULL : ds_cstr (&d->documents);
+ return &d->documents;
}
-/* Sets the documents for D to DOCUMENTS, or removes D's
- documents if DOCUMENT is a null pointer. If DOCUMENTS is
- nonnull, then it should be an exact multiple of
- DOC_LINE_LENGTH bytes in length, with each segment
- corresponding to one line. */
+/* Replaces the documents for D by NEW_DOCS, a UTF-8 encoded string_array. */
void
-dict_set_documents (struct dictionary *d, const char *documents)
+dict_set_documents (struct dictionary *d, const struct string_array *new_docs)
{
- size_t remainder;
+ size_t i;
- ds_assign_cstr (&d->documents, documents != NULL ? documents : "");
+ dict_clear_documents (d);
- /* In case the caller didn't get it quite right, pad out the
- final line with spaces. */
- remainder = ds_length (&d->documents) % DOC_LINE_LENGTH;
- if (remainder != 0)
- ds_put_char_multiple (&d->documents, ' ', DOC_LINE_LENGTH - remainder);
+ for (i = 0; i < new_docs->n; i++)
+ dict_add_document_line (d, new_docs->strings[i], false);
+}
+
+/* Replaces the documents for D by UTF-8 encoded string NEW_DOCS, dividing it
+ into individual lines at new-line characters. Each line is truncated to at
+ most DOC_LINE_LENGTH bytes in D's encoding. */
+void
+dict_set_documents_string (struct dictionary *d, const char *new_docs)
+{
+ const char *s;
+
+ dict_clear_documents (d);
+ for (s = new_docs; *s != '\0'; )
+ {
+ size_t len = strcspn (s, "\n");
+ char *line = xmemdup0 (s, len);
+ dict_add_document_line (d, line, false);
+ free (line);
+
+ s += len;
+ if (*s == '\n')
+ s++;
+ }
}
/* Drops the documents from dictionary D. */
void
dict_clear_documents (struct dictionary *d)
{
- ds_clear (&d->documents);
+ string_array_clear (&d->documents);
}
-/* Appends LINE to the documents in D. LINE will be truncated or
- padded on the right with spaces to make it exactly
- DOC_LINE_LENGTH bytes long. */
-void
-dict_add_document_line (struct dictionary *d, const char *line)
+/* Appends the UTF-8 encoded LINE to the documents in D. LINE will be
+ truncated so that it is no more than 80 bytes in the dictionary's
+ encoding. If this causes some text to be lost, and ISSUE_WARNING is true,
+ then a warning will be issued. */
+bool
+dict_add_document_line (struct dictionary *d, const char *line,
+ bool issue_warning)
{
- if (strlen (line) > DOC_LINE_LENGTH)
+ size_t trunc_len;
+ bool truncated;
+
+ trunc_len = utf8_encoding_trunc_len (line, d->encoding, DOC_LINE_LENGTH);
+ truncated = line[trunc_len] != '\0';
+ if (truncated && issue_warning)
{
/* Note to translators: "bytes" is correct, not characters */
msg (SW, _("Truncating document line to %d bytes."), DOC_LINE_LENGTH);
}
- buf_copy_str_rpad (ds_put_uninit (&d->documents, DOC_LINE_LENGTH),
- DOC_LINE_LENGTH, line, ' ');
+
+ string_array_append_nocopy (&d->documents, xmemdup0 (line, trunc_len));
+
+ return !truncated;
}
/* Returns the number of document lines in dictionary D. */
size_t
dict_get_document_line_cnt (const struct dictionary *d)
{
- return ds_length (&d->documents) / DOC_LINE_LENGTH;
+ return d->documents.n;
}
-/* Copies document line number IDX from dictionary D into
- LINE, trimming off any trailing white space. */
-void
-dict_get_document_line (const struct dictionary *d,
- size_t idx, struct string *line)
+/* Returns document line number IDX in dictionary D. The caller must not
+ modify or free the returned string. */
+const char *
+dict_get_document_line (const struct dictionary *d, size_t idx)
{
- assert (idx < dict_get_document_line_cnt (d));
- ds_assign_substring (line, ds_substr (&d->documents, idx * DOC_LINE_LENGTH,
- DOC_LINE_LENGTH));
- ds_rtrim (line, ss_cstr (CC_SPACES));
+ assert (idx < d->documents.n);
+ return d->documents.strings[idx];
}
/* Creates in D a vector named NAME that contains the CNT
d->vector = NULL;
d->vector_cnt = 0;
}
+\f
+/* Multiple response sets. */
+
+/* Returns the multiple response set in DICT with index IDX, which must be
+ between 0 and the count returned by dict_get_n_mrsets(), exclusive. */
+const struct mrset *
+dict_get_mrset (const struct dictionary *dict, size_t idx)
+{
+ assert (idx < dict->n_mrsets);
+ return dict->mrsets[idx];
+}
+
+/* Returns the number of multiple response sets in DICT. */
+size_t
+dict_get_n_mrsets (const struct dictionary *dict)
+{
+ return dict->n_mrsets;
+}
+/* Looks for a multiple response set named NAME in DICT. If it finds one,
+ returns its index; otherwise, returns SIZE_MAX. */
+static size_t
+dict_lookup_mrset_idx (const struct dictionary *dict, const char *name)
+{
+ size_t i;
+
+ for (i = 0; i < dict->n_mrsets; i++)
+ if (!strcasecmp (name, dict->mrsets[i]->name))
+ return i;
+
+ return SIZE_MAX;
+}
+
+/* Looks for a multiple response set named NAME in DICT. If it finds one,
+ returns it; otherwise, returns NULL. */
+const struct mrset *
+dict_lookup_mrset (const struct dictionary *dict, const char *name)
+{
+ size_t idx = dict_lookup_mrset_idx (dict, name);
+ return idx != SIZE_MAX ? dict->mrsets[idx] : NULL;
+}
+
+/* Adds MRSET to DICT, replacing any existing set with the same name. Returns
+ true if a set was replaced, false if none existed with the specified name.
+
+ Ownership of MRSET is transferred to DICT. */
+bool
+dict_add_mrset (struct dictionary *dict, struct mrset *mrset)
+{
+ size_t idx;
+
+ assert (mrset_ok (mrset, dict));
+
+ idx = dict_lookup_mrset_idx (dict, mrset->name);
+ if (idx == SIZE_MAX)
+ {
+ dict->mrsets = xrealloc (dict->mrsets,
+ (dict->n_mrsets + 1) * sizeof *dict->mrsets);
+ dict->mrsets[dict->n_mrsets++] = mrset;
+ return true;
+ }
+ else
+ {
+ mrset_destroy (dict->mrsets[idx]);
+ dict->mrsets[idx] = mrset;
+ return false;
+ }
+}
+
+/* Looks for a multiple response set in DICT named NAME. If found, removes it
+ from DICT and returns true. If none is found, returns false without
+ modifying DICT.
+
+ Deleting one multiple response set causes the indexes of other sets within
+ DICT to change. */
+bool
+dict_delete_mrset (struct dictionary *dict, const char *name)
+{
+ size_t idx = dict_lookup_mrset_idx (dict, name);
+ if (idx != SIZE_MAX)
+ {
+ mrset_destroy (dict->mrsets[idx]);
+ dict->mrsets[idx] = dict->mrsets[--dict->n_mrsets];
+ return true;
+ }
+ else
+ return false;
+}
+
+/* Deletes all multiple response sets from DICT. */
+void
+dict_clear_mrsets (struct dictionary *dict)
+{
+ size_t i;
+
+ for (i = 0; i < dict->n_mrsets; i++)
+ mrset_destroy (dict->mrsets[i]);
+ free (dict->mrsets);
+ dict->mrsets = NULL;
+ dict->n_mrsets = 0;
+}
+
+/* Removes VAR, which must be in DICT, from DICT's multiple response sets. */
+static void
+dict_unset_mrset_var (struct dictionary *dict, struct variable *var)
+{
+ size_t i;
+
+ assert (dict_contains_var (dict, var));
+
+ for (i = 0; i < dict->n_mrsets; )
+ {
+ struct mrset *mrset = dict->mrsets[i];
+ size_t j;
+
+ for (j = 0; j < mrset->n_vars; )
+ if (mrset->vars[j] == var)
+ remove_element (mrset->vars, mrset->n_vars--,
+ sizeof *mrset->vars, j);
+ else
+ j++;
+
+ if (mrset->n_vars < 2)
+ {
+ mrset_destroy (mrset);
+ dict->mrsets[i] = dict->mrsets[--dict->n_mrsets];
+ }
+ else
+ i++;
+ }
+}
+\f
/* Returns D's attribute set. The caller may examine or modify
the attribute set, but must not destroy it. Destroying D or
calling dict_set_attributes for D will also destroy D's