X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fdata%2Fdictionary.c;h=79d36374fc42767660234911ed533c5732270b92;hb=9ade26c8349b4434008c46cf09bc7473ec743972;hp=03548c44eb628aef66341b1c3bd53da05ae9aca2;hpb=432761249a7e96e81407a123d0e25c3de3066202;p=pspp-builds.git diff --git a/src/data/dictionary.c b/src/data/dictionary.c index 03548c44..79d36374 100644 --- a/src/data/dictionary.c +++ b/src/data/dictionary.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000, 2006, 2007, 2009, 2010 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2006, 2007, 2009, 2010, 2011 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,13 +18,15 @@ #include "data/dictionary.h" +#include #include #include +#include #include "data/attributes.h" #include "data/case.h" -#include "data/category.h" #include "data/identifier.h" +#include "data/mrset.h" #include "data/settings.h" #include "data/value-labels.h" #include "data/vardict.h" @@ -35,14 +37,17 @@ #include "libpspp/compiler.h" #include "libpspp/hash-functions.h" #include "libpspp/hmap.h" +#include "libpspp/i18n.h" #include "libpspp/message.h" #include "libpspp/misc.h" #include "libpspp/pool.h" #include "libpspp/str.h" +#include "libpspp/string-array.h" #include "gl/intprops.h" #include "gl/minmax.h" #include "gl/xalloc.h" +#include "gl/xmemdup0.h" #include "gettext.h" #define _(msgid) gettext (msgid) @@ -62,10 +67,12 @@ struct dictionary struct variable *filter; /* FILTER variable. */ casenumber case_limit; /* Current case limit (N command). */ char *label; /* File label. */ - struct string documents; /* Documents, as a string. */ + struct string_array documents; /* Documents. */ struct vector **vector; /* Vectors of variables. */ size_t vector_cnt; /* Number of vectors. */ struct attrset attributes; /* Custom attributes. */ + struct mrset **mrsets; /* Multiple response sets. */ + size_t n_mrsets; /* Number of multiple response sets. */ char *encoding; /* Character encoding of string data */ @@ -77,6 +84,8 @@ struct dictionary void *changed_data; }; +static void dict_unset_split_var (struct dictionary *, struct variable *); +static void dict_unset_mrset_var (struct dictionary *, struct variable *); void dict_set_encoding (struct dictionary *d, const char *enc) @@ -94,6 +103,15 @@ dict_get_encoding (const struct dictionary *d) return d->encoding ; } +/* Returns true if UTF-8 string ID is an acceptable identifier in DICT's + encoding, false otherwise. If ISSUE_ERROR is true, issues an explanatory + error message on failure. */ +bool +dict_id_is_valid (const struct dictionary *dict, const char *id, + bool issue_error) +{ + return id_is_valid (id, dict->encoding, issue_error); +} void dict_set_change_callback (struct dictionary *d, @@ -222,6 +240,20 @@ dict_clone (const struct dictionary *s) dict_set_attributes (d, dict_get_attributes (s)); + for (i = 0; i < s->n_mrsets; i++) + { + const struct mrset *old = s->mrsets[i]; + struct mrset *new; + size_t j; + + /* Clone old mrset, then replace vars from D by vars from S. */ + new = mrset_clone (old); + for (j = 0; j < new->n_vars; j++) + new->vars[j] = dict_lookup_var_assert (d, var_get_name (new->vars[j])); + + dict_add_mrset (d, new); + } + return d; } @@ -249,7 +281,7 @@ dict_clear (struct dictionary *d) d->case_limit = 0; free (d->label); d->label = NULL; - ds_destroy (&d->documents); + string_array_clear (&d->documents); dict_clear_vectors (d); attrset_clear (&d->attributes); } @@ -278,6 +310,8 @@ dict_destroy (struct dictionary *d) dict_clear (d); hmap_destroy (&d->name_map); attrset_destroy (&d->attributes); + dict_clear_mrsets (d); + free (d->encoding); free (d); } } @@ -577,6 +611,7 @@ dict_delete_var (struct dictionary *d, struct variable *v) var_clear_aux (v); dict_unset_split_var (d, v); + dict_unset_mrset_var (d, v); if (d->weight == v) dict_set_weight (d, NULL); @@ -820,63 +855,75 @@ var_name_is_insertable (const struct dictionary *dict, const char *name) && lex_id_to_token (ss_cstr (name)) == T_ID); } -static bool -make_hinted_name (const struct dictionary *dict, const char *hint, - char name[VAR_NAME_LEN + 1]) +static char * +make_hinted_name (const struct dictionary *dict, const char *hint) { + size_t hint_len = strlen (hint); bool dropped = false; - char *cp; - - for (cp = name; *hint && cp < name + VAR_NAME_LEN; hint++) + char *root, *rp; + size_t ofs; + int mblen; + + /* The allocation size here is OK: characters that are copied directly fit + OK, and characters that are not copied directly are replaced by a single + '_' byte. If u8_mbtouc() replaces bad input by 0xfffd, then that will get + replaced by '_' too. */ + root = rp = xmalloc (hint_len + 1); + for (ofs = 0; ofs < hint_len; ofs += mblen) { - if (cp == name - ? lex_is_id1 (*hint) && *hint != '$' - : lex_is_idn (*hint)) + ucs4_t uc; + + mblen = u8_mbtouc (&uc, CHAR_CAST (const uint8_t *, hint + ofs), + hint_len - ofs); + if (rp == root + ? lex_uc_is_id1 (uc) && uc != '$' + : lex_uc_is_idn (uc)) { if (dropped) { - *cp++ = '_'; + *rp++ = '_'; dropped = false; } - if (cp < name + VAR_NAME_LEN) - *cp++ = *hint; + rp += u8_uctomb (CHAR_CAST (uint8_t *, rp), uc, 6); } - else if (cp > name) + else if (rp != root) dropped = true; } - *cp = '\0'; + *rp = '\0'; - if (name[0] != '\0') + if (root[0] != '\0') { - size_t len = strlen (name); unsigned long int i; - if (var_name_is_insertable (dict, name)) - return true; + if (var_name_is_insertable (dict, root)) + return root; for (i = 0; i < ULONG_MAX; i++) { char suffix[INT_BUFSIZE_BOUND (i) + 1]; - int ofs; + char *name; suffix[0] = '_'; if (!str_format_26adic (i + 1, &suffix[1], sizeof suffix - 1)) NOT_REACHED (); - ofs = MIN (VAR_NAME_LEN - strlen (suffix), len); - strcpy (&name[ofs], suffix); - + name = utf8_encoding_concat (root, suffix, dict->encoding, 64); if (var_name_is_insertable (dict, name)) - return true; + { + free (root); + return name; + } + free (name); } } - return false; + free (root); + + return NULL; } -static bool -make_numeric_name (const struct dictionary *dict, unsigned long int *num_start, - char name[VAR_NAME_LEN + 1]) +static char * +make_numeric_name (const struct dictionary *dict, unsigned long int *num_start) { unsigned long int number; @@ -884,27 +931,24 @@ make_numeric_name (const struct dictionary *dict, unsigned long int *num_start, number < ULONG_MAX; number++) { + char name[3 + INT_STRLEN_BOUND (number) + 1]; + sprintf (name, "VAR%03lu", number); if (dict_lookup_var (dict, name) == NULL) { if (num_start != NULL) *num_start = number + 1; - return true; + return xstrdup (name); } } - if (num_start != NULL) - *num_start = ULONG_MAX; - return false; + NOT_REACHED (); } -/* Attempts to devise a variable name unique within DICT. - Returns true if successful, in which case the new variable - name is stored into NAME. Returns false if all names that can - be generated have already been taken. (Returning false is - quite unlikely: at least ULONG_MAX unique names can be - generated.) +/* Devises and returns a variable name unique within DICT. The variable name + is owned by the caller, which must free it with free() when it is no longer + needed. HINT, if it is non-null, is used as a suggestion that will be modified for suitability as a variable name and for @@ -915,14 +959,18 @@ make_numeric_name (const struct dictionary *dict, unsigned long int *num_start, value is used. If NUM_START is non-null, then its value is used as the minimum numeric value to check, and it is updated to the next value to be checked. - */ -bool +*/ +char * dict_make_unique_var_name (const struct dictionary *dict, const char *hint, - unsigned long int *num_start, - char name[VAR_NAME_LEN + 1]) + unsigned long int *num_start) { - return ((hint != NULL && make_hinted_name (dict, hint, name)) - || make_numeric_name (dict, num_start, name)); + if (hint != NULL) + { + char *hinted_name = make_hinted_name (dict, hint); + if (hinted_name != NULL) + return hinted_name; + } + return make_numeric_name (dict, num_start); } /* Returns the weighting variable in dictionary D, or a null @@ -1153,7 +1201,7 @@ dict_get_split_cnt (const struct dictionary *d) /* Removes variable V, which must be in D, from D's set of split variables. */ -void +static void dict_unset_split_var (struct dictionary *d, struct variable *v) { int orig_count; @@ -1206,82 +1254,104 @@ dict_get_label (const struct dictionary *d) } /* Sets D's file label to LABEL, truncating it to a maximum of 60 - characters. */ + characters. + + Removes D's label if LABEL is null or the empty string. */ void dict_set_label (struct dictionary *d, const char *label) { free (d->label); - d->label = label != NULL ? xstrndup (label, 60) : NULL; + d->label = label != NULL && label[0] != '\0' ? xstrndup (label, 60) : NULL; } -/* Returns the documents for D, or a null pointer if D has no - documents. If the return value is nonnull, then the string - will be an exact multiple of DOC_LINE_LENGTH bytes in length, - with each segment corresponding to one line. */ -const char * +/* Returns the documents for D, as an UTF-8 encoded string_array. The + return value is always nonnull; if there are no documents then the + string_arary is empty.*/ +const struct string_array * dict_get_documents (const struct dictionary *d) { - return ds_is_empty (&d->documents) ? NULL : ds_cstr (&d->documents); + return &d->documents; } -/* Sets the documents for D to DOCUMENTS, or removes D's - documents if DOCUMENT is a null pointer. If DOCUMENTS is - nonnull, then it should be an exact multiple of - DOC_LINE_LENGTH bytes in length, with each segment - corresponding to one line. */ +/* Replaces the documents for D by NEW_DOCS, a UTF-8 encoded string_array. */ void -dict_set_documents (struct dictionary *d, const char *documents) +dict_set_documents (struct dictionary *d, const struct string_array *new_docs) { - size_t remainder; + size_t i; - ds_assign_cstr (&d->documents, documents != NULL ? documents : ""); + dict_clear_documents (d); - /* In case the caller didn't get it quite right, pad out the - final line with spaces. */ - remainder = ds_length (&d->documents) % DOC_LINE_LENGTH; - if (remainder != 0) - ds_put_char_multiple (&d->documents, ' ', DOC_LINE_LENGTH - remainder); + for (i = 0; i < new_docs->n; i++) + dict_add_document_line (d, new_docs->strings[i], false); +} + +/* Replaces the documents for D by UTF-8 encoded string NEW_DOCS, dividing it + into individual lines at new-line characters. Each line is truncated to at + most DOC_LINE_LENGTH bytes in D's encoding. */ +void +dict_set_documents_string (struct dictionary *d, const char *new_docs) +{ + const char *s; + + dict_clear_documents (d); + for (s = new_docs; *s != '\0'; ) + { + size_t len = strcspn (s, "\n"); + char *line = xmemdup0 (s, len); + dict_add_document_line (d, line, false); + free (line); + + s += len; + if (*s == '\n') + s++; + } } /* Drops the documents from dictionary D. */ void dict_clear_documents (struct dictionary *d) { - ds_clear (&d->documents); + string_array_clear (&d->documents); } -/* Appends LINE to the documents in D. LINE will be truncated or - padded on the right with spaces to make it exactly - DOC_LINE_LENGTH bytes long. */ -void -dict_add_document_line (struct dictionary *d, const char *line) +/* Appends the UTF-8 encoded LINE to the documents in D. LINE will be + truncated so that it is no more than 80 bytes in the dictionary's + encoding. If this causes some text to be lost, and ISSUE_WARNING is true, + then a warning will be issued. */ +bool +dict_add_document_line (struct dictionary *d, const char *line, + bool issue_warning) { - if (strlen (line) > DOC_LINE_LENGTH) + size_t trunc_len; + bool truncated; + + trunc_len = utf8_encoding_trunc_len (line, d->encoding, DOC_LINE_LENGTH); + truncated = line[trunc_len] != '\0'; + if (truncated && issue_warning) { /* Note to translators: "bytes" is correct, not characters */ msg (SW, _("Truncating document line to %d bytes."), DOC_LINE_LENGTH); } - buf_copy_str_rpad (ds_put_uninit (&d->documents, DOC_LINE_LENGTH), - DOC_LINE_LENGTH, line, ' '); + + string_array_append_nocopy (&d->documents, xmemdup0 (line, trunc_len)); + + return !truncated; } /* Returns the number of document lines in dictionary D. */ size_t dict_get_document_line_cnt (const struct dictionary *d) { - return ds_length (&d->documents) / DOC_LINE_LENGTH; + return d->documents.n; } -/* Copies document line number IDX from dictionary D into - LINE, trimming off any trailing white space. */ -void -dict_get_document_line (const struct dictionary *d, - size_t idx, struct string *line) +/* Returns document line number IDX in dictionary D. The caller must not + modify or free the returned string. */ +const char * +dict_get_document_line (const struct dictionary *d, size_t idx) { - assert (idx < dict_get_document_line_cnt (d)); - ds_assign_substring (line, ds_substr (&d->documents, idx * DOC_LINE_LENGTH, - DOC_LINE_LENGTH)); - ds_rtrim (line, ss_cstr (CC_SPACES)); + assert (idx < d->documents.n); + return d->documents.strings[idx]; } /* Creates in D a vector named NAME that contains the CNT @@ -1362,7 +1432,138 @@ dict_clear_vectors (struct dictionary *d) d->vector = NULL; d->vector_cnt = 0; } + +/* Multiple response sets. */ + +/* Returns the multiple response set in DICT with index IDX, which must be + between 0 and the count returned by dict_get_n_mrsets(), exclusive. */ +const struct mrset * +dict_get_mrset (const struct dictionary *dict, size_t idx) +{ + assert (idx < dict->n_mrsets); + return dict->mrsets[idx]; +} + +/* Returns the number of multiple response sets in DICT. */ +size_t +dict_get_n_mrsets (const struct dictionary *dict) +{ + return dict->n_mrsets; +} +/* Looks for a multiple response set named NAME in DICT. If it finds one, + returns its index; otherwise, returns SIZE_MAX. */ +static size_t +dict_lookup_mrset_idx (const struct dictionary *dict, const char *name) +{ + size_t i; + + for (i = 0; i < dict->n_mrsets; i++) + if (!strcasecmp (name, dict->mrsets[i]->name)) + return i; + + return SIZE_MAX; +} + +/* Looks for a multiple response set named NAME in DICT. If it finds one, + returns it; otherwise, returns NULL. */ +const struct mrset * +dict_lookup_mrset (const struct dictionary *dict, const char *name) +{ + size_t idx = dict_lookup_mrset_idx (dict, name); + return idx != SIZE_MAX ? dict->mrsets[idx] : NULL; +} + +/* Adds MRSET to DICT, replacing any existing set with the same name. Returns + true if a set was replaced, false if none existed with the specified name. + + Ownership of MRSET is transferred to DICT. */ +bool +dict_add_mrset (struct dictionary *dict, struct mrset *mrset) +{ + size_t idx; + + assert (mrset_ok (mrset, dict)); + + idx = dict_lookup_mrset_idx (dict, mrset->name); + if (idx == SIZE_MAX) + { + dict->mrsets = xrealloc (dict->mrsets, + (dict->n_mrsets + 1) * sizeof *dict->mrsets); + dict->mrsets[dict->n_mrsets++] = mrset; + return true; + } + else + { + mrset_destroy (dict->mrsets[idx]); + dict->mrsets[idx] = mrset; + return false; + } +} + +/* Looks for a multiple response set in DICT named NAME. If found, removes it + from DICT and returns true. If none is found, returns false without + modifying DICT. + + Deleting one multiple response set causes the indexes of other sets within + DICT to change. */ +bool +dict_delete_mrset (struct dictionary *dict, const char *name) +{ + size_t idx = dict_lookup_mrset_idx (dict, name); + if (idx != SIZE_MAX) + { + mrset_destroy (dict->mrsets[idx]); + dict->mrsets[idx] = dict->mrsets[--dict->n_mrsets]; + return true; + } + else + return false; +} + +/* Deletes all multiple response sets from DICT. */ +void +dict_clear_mrsets (struct dictionary *dict) +{ + size_t i; + + for (i = 0; i < dict->n_mrsets; i++) + mrset_destroy (dict->mrsets[i]); + free (dict->mrsets); + dict->mrsets = NULL; + dict->n_mrsets = 0; +} + +/* Removes VAR, which must be in DICT, from DICT's multiple response sets. */ +static void +dict_unset_mrset_var (struct dictionary *dict, struct variable *var) +{ + size_t i; + + assert (dict_contains_var (dict, var)); + + for (i = 0; i < dict->n_mrsets; ) + { + struct mrset *mrset = dict->mrsets[i]; + size_t j; + + for (j = 0; j < mrset->n_vars; ) + if (mrset->vars[j] == var) + remove_element (mrset->vars, mrset->n_vars--, + sizeof *mrset->vars, j); + else + j++; + + if (mrset->n_vars < 2) + { + mrset_destroy (mrset); + dict->mrsets[i] = dict->mrsets[--dict->n_mrsets]; + } + else + i++; + } +} + /* Returns D's attribute set. The caller may examine or modify the attribute set, but must not destroy it. Destroying D or calling dict_set_attributes for D will also destroy D's