X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fdata%2Fdictionary.c;h=79d36374fc42767660234911ed533c5732270b92;hb=9ade26c8349b4434008c46cf09bc7473ec743972;hp=467f347efd9f5a5e290435265cbd1077032bfb0b;hpb=afdf3096926b561f4e6511c10fcf73fc6796b9d2;p=pspp-builds.git diff --git a/src/data/dictionary.c b/src/data/dictionary.c index 467f347e..79d36374 100644 --- a/src/data/dictionary.c +++ b/src/data/dictionary.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "data/attributes.h" #include "data/case.h" @@ -36,14 +37,17 @@ #include "libpspp/compiler.h" #include "libpspp/hash-functions.h" #include "libpspp/hmap.h" +#include "libpspp/i18n.h" #include "libpspp/message.h" #include "libpspp/misc.h" #include "libpspp/pool.h" #include "libpspp/str.h" +#include "libpspp/string-array.h" #include "gl/intprops.h" #include "gl/minmax.h" #include "gl/xalloc.h" +#include "gl/xmemdup0.h" #include "gettext.h" #define _(msgid) gettext (msgid) @@ -63,7 +67,7 @@ struct dictionary struct variable *filter; /* FILTER variable. */ casenumber case_limit; /* Current case limit (N command). */ char *label; /* File label. */ - struct string documents; /* Documents, as a string. */ + struct string_array documents; /* Documents. */ struct vector **vector; /* Vectors of variables. */ size_t vector_cnt; /* Number of vectors. */ struct attrset attributes; /* Custom attributes. */ @@ -99,6 +103,15 @@ dict_get_encoding (const struct dictionary *d) return d->encoding ; } +/* Returns true if UTF-8 string ID is an acceptable identifier in DICT's + encoding, false otherwise. If ISSUE_ERROR is true, issues an explanatory + error message on failure. */ +bool +dict_id_is_valid (const struct dictionary *dict, const char *id, + bool issue_error) +{ + return id_is_valid (id, dict->encoding, issue_error); +} void dict_set_change_callback (struct dictionary *d, @@ -268,7 +281,7 @@ dict_clear (struct dictionary *d) d->case_limit = 0; free (d->label); d->label = NULL; - ds_destroy (&d->documents); + string_array_clear (&d->documents); dict_clear_vectors (d); attrset_clear (&d->attributes); } @@ -845,54 +858,67 @@ var_name_is_insertable (const struct dictionary *dict, const char *name) static char * make_hinted_name (const struct dictionary *dict, const char *hint) { - char name[VAR_NAME_LEN + 1]; + size_t hint_len = strlen (hint); bool dropped = false; - char *cp; - - for (cp = name; *hint && cp < name + VAR_NAME_LEN; hint++) + char *root, *rp; + size_t ofs; + int mblen; + + /* The allocation size here is OK: characters that are copied directly fit + OK, and characters that are not copied directly are replaced by a single + '_' byte. If u8_mbtouc() replaces bad input by 0xfffd, then that will get + replaced by '_' too. */ + root = rp = xmalloc (hint_len + 1); + for (ofs = 0; ofs < hint_len; ofs += mblen) { - if (cp == name - ? lex_is_id1 (*hint) && *hint != '$' - : lex_is_idn (*hint)) + ucs4_t uc; + + mblen = u8_mbtouc (&uc, CHAR_CAST (const uint8_t *, hint + ofs), + hint_len - ofs); + if (rp == root + ? lex_uc_is_id1 (uc) && uc != '$' + : lex_uc_is_idn (uc)) { if (dropped) { - *cp++ = '_'; + *rp++ = '_'; dropped = false; } - if (cp < name + VAR_NAME_LEN) - *cp++ = *hint; + rp += u8_uctomb (CHAR_CAST (uint8_t *, rp), uc, 6); } - else if (cp > name) + else if (rp != root) dropped = true; } - *cp = '\0'; + *rp = '\0'; - if (name[0] != '\0') + if (root[0] != '\0') { - size_t len = strlen (name); unsigned long int i; - if (var_name_is_insertable (dict, name)) - return xstrdup (name); + if (var_name_is_insertable (dict, root)) + return root; for (i = 0; i < ULONG_MAX; i++) { char suffix[INT_BUFSIZE_BOUND (i) + 1]; - int ofs; + char *name; suffix[0] = '_'; if (!str_format_26adic (i + 1, &suffix[1], sizeof suffix - 1)) NOT_REACHED (); - ofs = MIN (VAR_NAME_LEN - strlen (suffix), len); - strcpy (&name[ofs], suffix); - + name = utf8_encoding_concat (root, suffix, dict->encoding, 64); if (var_name_is_insertable (dict, name)) - return xstrdup (name); + { + free (root); + return name; + } + free (name); } } + free (root); + return NULL; } @@ -1238,74 +1264,94 @@ dict_set_label (struct dictionary *d, const char *label) d->label = label != NULL && label[0] != '\0' ? xstrndup (label, 60) : NULL; } -/* Returns the documents for D, or a null pointer if D has no - documents. If the return value is nonnull, then the string - will be an exact multiple of DOC_LINE_LENGTH bytes in length, - with each segment corresponding to one line. */ -const char * +/* Returns the documents for D, as an UTF-8 encoded string_array. The + return value is always nonnull; if there are no documents then the + string_arary is empty.*/ +const struct string_array * dict_get_documents (const struct dictionary *d) { - return ds_is_empty (&d->documents) ? NULL : ds_cstr (&d->documents); + return &d->documents; } -/* Sets the documents for D to DOCUMENTS, or removes D's - documents if DOCUMENT is a null pointer. If DOCUMENTS is - nonnull, then it should be an exact multiple of - DOC_LINE_LENGTH bytes in length, with each segment - corresponding to one line. */ +/* Replaces the documents for D by NEW_DOCS, a UTF-8 encoded string_array. */ void -dict_set_documents (struct dictionary *d, const char *documents) +dict_set_documents (struct dictionary *d, const struct string_array *new_docs) { - size_t remainder; + size_t i; - ds_assign_cstr (&d->documents, documents != NULL ? documents : ""); + dict_clear_documents (d); - /* In case the caller didn't get it quite right, pad out the - final line with spaces. */ - remainder = ds_length (&d->documents) % DOC_LINE_LENGTH; - if (remainder != 0) - ds_put_byte_multiple (&d->documents, ' ', DOC_LINE_LENGTH - remainder); + for (i = 0; i < new_docs->n; i++) + dict_add_document_line (d, new_docs->strings[i], false); +} + +/* Replaces the documents for D by UTF-8 encoded string NEW_DOCS, dividing it + into individual lines at new-line characters. Each line is truncated to at + most DOC_LINE_LENGTH bytes in D's encoding. */ +void +dict_set_documents_string (struct dictionary *d, const char *new_docs) +{ + const char *s; + + dict_clear_documents (d); + for (s = new_docs; *s != '\0'; ) + { + size_t len = strcspn (s, "\n"); + char *line = xmemdup0 (s, len); + dict_add_document_line (d, line, false); + free (line); + + s += len; + if (*s == '\n') + s++; + } } /* Drops the documents from dictionary D. */ void dict_clear_documents (struct dictionary *d) { - ds_clear (&d->documents); + string_array_clear (&d->documents); } -/* Appends LINE to the documents in D. LINE will be truncated or - padded on the right with spaces to make it exactly - DOC_LINE_LENGTH bytes long. */ -void -dict_add_document_line (struct dictionary *d, const char *line) +/* Appends the UTF-8 encoded LINE to the documents in D. LINE will be + truncated so that it is no more than 80 bytes in the dictionary's + encoding. If this causes some text to be lost, and ISSUE_WARNING is true, + then a warning will be issued. */ +bool +dict_add_document_line (struct dictionary *d, const char *line, + bool issue_warning) { - if (strlen (line) > DOC_LINE_LENGTH) + size_t trunc_len; + bool truncated; + + trunc_len = utf8_encoding_trunc_len (line, d->encoding, DOC_LINE_LENGTH); + truncated = line[trunc_len] != '\0'; + if (truncated && issue_warning) { /* Note to translators: "bytes" is correct, not characters */ msg (SW, _("Truncating document line to %d bytes."), DOC_LINE_LENGTH); } - buf_copy_str_rpad (ds_put_uninit (&d->documents, DOC_LINE_LENGTH), - DOC_LINE_LENGTH, line, ' '); + + string_array_append_nocopy (&d->documents, xmemdup0 (line, trunc_len)); + + return !truncated; } /* Returns the number of document lines in dictionary D. */ size_t dict_get_document_line_cnt (const struct dictionary *d) { - return ds_length (&d->documents) / DOC_LINE_LENGTH; + return d->documents.n; } -/* Copies document line number IDX from dictionary D into - LINE, trimming off any trailing white space. */ -void -dict_get_document_line (const struct dictionary *d, - size_t idx, struct string *line) +/* Returns document line number IDX in dictionary D. The caller must not + modify or free the returned string. */ +const char * +dict_get_document_line (const struct dictionary *d, size_t idx) { - assert (idx < dict_get_document_line_cnt (d)); - ds_assign_substring (line, ds_substr (&d->documents, idx * DOC_LINE_LENGTH, - DOC_LINE_LENGTH)); - ds_rtrim (line, ss_cstr (CC_SPACES)); + assert (idx < d->documents.n); + return d->documents.strings[idx]; } /* Creates in D a vector named NAME that contains the CNT