From 8021cf8974a46fe82af7b8952e448c0ea6858a48 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sun, 6 May 2007 22:05:21 +0000 Subject: [PATCH] Abstract the documents within a dictionary a little better. Thanks to John Darrington for suggestion, initial version, and review. Patch #5917. * command/file-label.sh: Update to match new DOCUMENT behavior. * title.c (add_document_line): Removed. (add_document_trailer): New function. (cmd_document): Rewrite to yield SPSS-like output and to use the updated document API. (cmd_add_documents): Ditto. * lexer.c (lex_entire_line): Add const to parameter. (lex_entire_line_ds): Ditto. (lex_rest_of_line): Drop end_dot parameter. Update all callers to use lex_end_dot instead. (lex_end_dot): New function. * get.c (mtf_merge_dictionary): Simplify creating merged document. * dictionary.c (struct dictionary): Change `documents' member from char * to struct string. (dict_clear): Destroy struct string. (dict_get_documents): Convert struct string to char *. (dict_set_documents): Set struct string. Pad to 80-character multiple. (dict_clear_documents): New function. (dict_add_document_line): New function. (dict_get_document_line_cnt): New function. (dict_get_document_line): New function. * dictionary.h (macro DOC_LINE_LENGTH): New macro. * sys-file-reader.c (read_documents): Use new document functions. --- src/data/ChangeLog | 21 +++++ src/data/dictionary.c | 73 ++++++++++++---- src/data/dictionary.h | 9 ++ src/data/sys-file-reader.c | 9 +- src/language/command.c | 2 +- src/language/data-io/ChangeLog | 11 +++ src/language/data-io/get.c | 8 +- src/language/dictionary/sys-file-info.c | 21 ++--- src/language/expressions/evaluate.c | 2 +- src/language/lexer/ChangeLog | 12 +++ src/language/lexer/lexer.c | 20 +++-- src/language/lexer/lexer.h | 7 +- src/language/stats/aggregate.c | 2 +- src/language/tests/moments-test.c | 2 +- src/language/utilities/ChangeLog | 12 +++ src/language/utilities/title.c | 107 +++++------------------- tests/ChangeLog | 8 ++ tests/command/file-label.sh | 32 +++---- 18 files changed, 205 insertions(+), 153 deletions(-) diff --git a/src/data/ChangeLog b/src/data/ChangeLog index 99793d35..c4a4e011 100644 --- a/src/data/ChangeLog +++ b/src/data/ChangeLog @@ -1,3 +1,24 @@ +2007-05-06 Ben Pfaff + + Abstract the documents within a dictionary a little better. + Thanks to John Darrington for suggestion, initial version, and + review. Patch #5917. + + * dictionary.c (struct dictionary): Change `documents' member from + char * to struct string. + (dict_clear): Destroy struct string. + (dict_get_documents): Convert struct string to char *. + (dict_set_documents): Set struct string. Pad to 80-character + multiple. + (dict_clear_documents): New function. + (dict_add_document_line): New function. + (dict_get_document_line_cnt): New function. + (dict_get_document_line): New function. + + * dictionary.h (macro DOC_LINE_LENGTH): New macro. + + * sys-file-reader.c (read_documents): Use new document functions. + 2007-04-19 John Darrington * sys-file-reader.c: When reading a system file which has no diff --git a/src/data/dictionary.c b/src/data/dictionary.c index 6857f62f..d77d9fdd 100644 --- a/src/data/dictionary.c +++ b/src/data/dictionary.c @@ -57,7 +57,7 @@ struct dictionary struct variable *filter; /* FILTER variable. */ size_t case_limit; /* Current case limit (N command). */ char *label; /* File label. */ - char *documents; /* Documents, as a string. */ + struct string documents; /* Documents, as a string. */ struct vector **vector; /* Vectors of variables. */ size_t vector_cnt; /* Number of vectors. */ const struct dict_callbacks *callbacks; /* Callbacks on dictionary @@ -177,8 +177,7 @@ dict_clear (struct dictionary *d) d->case_limit = 0; free (d->label); d->label = NULL; - free (d->documents); - d->documents = NULL; + ds_destroy (&d->documents); dict_clear_vectors (d); } @@ -1096,27 +1095,73 @@ dict_set_label (struct dictionary *d, const char *label) } /* Returns the documents for D, or a null pointer if D has no - documents (see cmd_document()).. */ + documents. If the return value is nonnull, then the string + will be an exact multiple of DOC_LINE_LENGTH bytes in length, + with each segment corresponding to one line. */ const char * dict_get_documents (const struct dictionary *d) { - assert (d != NULL); - - return d->documents; + return ds_is_empty (&d->documents) ? NULL : ds_cstr (&d->documents); } /* Sets the documents for D to DOCUMENTS, or removes D's - documents if DOCUMENT is a null pointer. */ + documents if DOCUMENT is a null pointer. If DOCUMENTS is + nonnull, then it should be an exact multiple of + DOC_LINE_LENGTH bytes in length, with each segment + corresponding to one line. */ void dict_set_documents (struct dictionary *d, const char *documents) { - assert (d != NULL); + size_t remainder; + + ds_assign_cstr (&d->documents, documents != NULL ? documents : ""); - free (d->documents); - if (documents == NULL) - d->documents = NULL; - else - d->documents = xstrdup (documents); + /* In case the caller didn't get it quite right, pad out the + final line with spaces. */ + remainder = ds_length (&d->documents) % DOC_LINE_LENGTH; + if (remainder != 0) + ds_put_char_multiple (&d->documents, ' ', DOC_LINE_LENGTH - remainder); +} + +/* Drops the documents from dictionary D. */ +void +dict_clear_documents (struct dictionary *d) +{ + ds_clear (&d->documents); +} + +/* Appends LINE to the documents in D. LINE will be truncated or + padded on the right with spaces to make it exactly + DOC_LINE_LENGTH bytes long. */ +void +dict_add_document_line (struct dictionary *d, const char *line) +{ + if (strlen (line) > DOC_LINE_LENGTH) + { + /* Note to translators: "bytes" is correct, not characters */ + msg (SW, _("Truncating document line to %d bytes."), DOC_LINE_LENGTH); + } + buf_copy_str_rpad (ds_put_uninit (&d->documents, DOC_LINE_LENGTH), + DOC_LINE_LENGTH, line); +} + +/* Returns the number of document lines in dictionary D. */ +size_t +dict_get_document_line_cnt (const struct dictionary *d) +{ + return ds_length (&d->documents) / DOC_LINE_LENGTH; +} + +/* Copies document line number IDX from dictionary D into + LINE, trimming off any trailing white space. */ +void +dict_get_document_line (const struct dictionary *d, + size_t idx, struct string *line) +{ + assert (idx < dict_get_document_line_cnt (d)); + ds_assign_substring (line, ds_substr (&d->documents, idx * DOC_LINE_LENGTH, + DOC_LINE_LENGTH)); + ds_rtrim (line, ss_cstr (CC_SPACES)); } /* Creates in D a vector named NAME that contains the CNT diff --git a/src/data/dictionary.h b/src/data/dictionary.h index 29f8d7ca..570ee462 100644 --- a/src/data/dictionary.h +++ b/src/data/dictionary.h @@ -26,6 +26,7 @@ struct variable; struct dictionary; +struct string; struct dict_callbacks { @@ -123,8 +124,16 @@ void dict_unset_split_var (struct dictionary *d, const char *dict_get_label (const struct dictionary *); void dict_set_label (struct dictionary *, const char *); +/* Fixed length of lines in dictionary documents. */ +#define DOC_LINE_LENGTH 80 + const char *dict_get_documents (const struct dictionary *); void dict_set_documents (struct dictionary *, const char *); +void dict_clear_documents (struct dictionary *); +void dict_add_document_line (struct dictionary *, const char *); +size_t dict_get_document_line_cnt (const struct dictionary *); +void dict_get_document_line (const struct dictionary *, + size_t, struct string *); bool dict_create_vector (struct dictionary *, const char *name, diff --git a/src/data/sys-file-reader.c b/src/data/sys-file-reader.c index 87ba172d..401e3e27 100644 --- a/src/data/sys-file-reader.c +++ b/src/data/sys-file-reader.c @@ -671,9 +671,12 @@ read_documents (struct sfm_reader *r, struct dictionary *dict) sys_error (r, _("Number of document lines (%d) " "must be greater than 0."), line_cnt); - documents = pool_nmalloc (r->pool, line_cnt + 1, 80); - read_string (r, documents, 80 * line_cnt + 1); - dict_set_documents (dict, documents); + documents = pool_nmalloc (r->pool, line_cnt + 1, DOC_LINE_LENGTH); + read_string (r, documents, DOC_LINE_LENGTH * line_cnt + 1); + if (strlen (documents) == DOC_LINE_LENGTH * line_cnt) + dict_set_documents (dict, documents); + else + sys_error (r, _("Document line contains null byte.")); pool_free (r->pool, documents); } diff --git a/src/language/command.c b/src/language/command.c index 70d5b02e..a4821db4 100644 --- a/src/language/command.c +++ b/src/language/command.c @@ -830,7 +830,7 @@ cmd_host (struct lexer *lexer, struct dataset *ds UNUSED) } else { - bool ok = run_command (lex_rest_of_line (lexer, NULL)); + bool ok = run_command (lex_rest_of_line (lexer)); lex_discard_line (lexer); return ok ? CMD_SUCCESS : CMD_FAILURE; } diff --git a/src/language/data-io/ChangeLog b/src/language/data-io/ChangeLog index 7685118d..7d329741 100644 --- a/src/language/data-io/ChangeLog +++ b/src/language/data-io/ChangeLog @@ -1,3 +1,14 @@ +2007-05-06 Ben Pfaff + + Abstract the documents within a dictionary a little better. + Thanks to John Darrington for suggestion, initial version, and + review. Patch #5917. + + * get.c (mtf_merge_dictionary): Simplify creating merged document. + + * sys-file-info.c (display_documents): Use new + dict_get_document_line_cnt and dict_get_document_line functions. + Thu Feb 1 16:56:02 2007 Ben Pfaff * file-handle.q (fh_parse): Update to new fh_create_file diff --git a/src/language/data-io/get.c b/src/language/data-io/get.c index c3b0b176..32b3764e 100644 --- a/src/language/data-io/get.c +++ b/src/language/data-io/get.c @@ -1455,13 +1455,7 @@ mtf_merge_dictionary (struct dictionary *const m, struct mtf_file *f) dict_set_documents (m, d_docs); else { - char *new_docs; - size_t new_len; - - new_len = strlen (m_docs) + strlen (d_docs); - new_docs = xmalloc (new_len + 1); - strcpy (new_docs, m_docs); - strcat (new_docs, d_docs); + char *new_docs = xasprintf ("%s%s", m_docs, d_docs); dict_set_documents (m, new_docs); free (new_docs); } diff --git a/src/language/dictionary/sys-file-info.c b/src/language/dictionary/sys-file-info.c index 21909a96..ec93ea20 100644 --- a/src/language/dictionary/sys-file-info.c +++ b/src/language/dictionary/sys-file-info.c @@ -316,25 +316,18 @@ display_documents (const struct dictionary *dict) "contain any documents.")); else { - size_t n_lines = strlen (documents) / 80; - char buf[81]; + struct string line = DS_EMPTY_INITIALIZER; size_t i; tab_output_text (TAB_LEFT | TAT_TITLE, _("Documents in the active file:")); som_blank_line (); - buf[80] = 0; - for (i = 0; i < n_lines; i++) - { - int len = 79; - - memcpy (buf, &documents[i * 80], 80); - while ((isspace ((unsigned char) buf[len]) || buf[len] == 0) - && len > 0) - len--; - buf[len + 1] = 0; - tab_output_text (TAB_LEFT | TAB_FIX | TAT_NOWRAP, buf); - } + for (i = 0; i < dict_get_document_line_cnt (dict); i++) + { + dict_get_document_line (dict, i, &line); + tab_output_text (TAB_LEFT | TAB_FIX | TAT_NOWRAP, ds_cstr (&line)); + } + ds_destroy (&line); } } diff --git a/src/language/expressions/evaluate.c b/src/language/expressions/evaluate.c index 9318f94b..92f0a0e3 100644 --- a/src/language/expressions/evaluate.c +++ b/src/language/expressions/evaluate.c @@ -199,7 +199,7 @@ cmd_debug_evaluate (struct lexer *lexer, struct dataset *dsother UNUSED) if ( ds != NULL ) fprintf(stderr, "; "); - fprintf (stderr, "%s => ", lex_rest_of_line (lexer, NULL)); + fprintf (stderr, "%s => ", lex_rest_of_line (lexer)); lex_get (lexer); expr = expr_parse_any (lexer, ds, optimize); diff --git a/src/language/lexer/ChangeLog b/src/language/lexer/ChangeLog index c10e681b..792bf087 100644 --- a/src/language/lexer/ChangeLog +++ b/src/language/lexer/ChangeLog @@ -1,3 +1,15 @@ +2007-05-06 Ben Pfaff + + Abstract the documents within a dictionary a little better. + Thanks to John Darrington for suggestion, initial version, and + review. Patch #5917. + + * lexer.c (lex_entire_line): Add const to parameter. + (lex_entire_line_ds): Ditto. + (lex_rest_of_line): Drop end_dot parameter. Update all callers to + use lex_end_dot instead. + (lex_end_dot): New function. + 2007-05-03 John Darrington * lexer.c lexer.h: Added lex_is_string function. diff --git a/src/language/lexer/lexer.c b/src/language/lexer/lexer.c index 6396ecea..149e35c7 100644 --- a/src/language/lexer/lexer.c +++ b/src/language/lexer/lexer.c @@ -719,29 +719,33 @@ lex_put_back_id (struct lexer *lexer, const char *id) /* Returns the entire contents of the current line. */ const char * -lex_entire_line (struct lexer *lexer) +lex_entire_line (const struct lexer *lexer) { return ds_cstr (&lexer->line_buffer); } const struct string * -lex_entire_line_ds (struct lexer *lexer) +lex_entire_line_ds (const struct lexer *lexer) { return &lexer->line_buffer; } /* As lex_entire_line(), but only returns the part of the current line - that hasn't already been tokenized. - If END_DOT is non-null, stores nonzero into *END_DOT if the line - ends with a terminal dot, or zero if it doesn't. */ + that hasn't already been tokenized. */ const char * -lex_rest_of_line (struct lexer *lexer, int *end_dot) +lex_rest_of_line (const struct lexer *lexer) { - if (end_dot) - *end_dot = lexer->dot; return lexer->prog; } +/* Returns true if the current line ends in a terminal dot, + false otherwise. */ +bool +lex_end_dot (const struct lexer *lexer) +{ + return lexer->dot; +} + /* Causes the rest of the current input line to be ignored for tokenization purposes. */ void diff --git a/src/language/lexer/lexer.h b/src/language/lexer/lexer.h index 47fbdd71..b924d9de 100644 --- a/src/language/lexer/lexer.h +++ b/src/language/lexer/lexer.h @@ -69,9 +69,10 @@ void lex_put_back (struct lexer *, int); void lex_put_back_id (struct lexer *, const char *tokid); /* Weird line processing functions. */ -const char *lex_entire_line (struct lexer *); -const struct string *lex_entire_line_ds (struct lexer *); -const char *lex_rest_of_line (struct lexer *, int *end_dot); +const char *lex_entire_line (const struct lexer *); +const struct string *lex_entire_line_ds (const struct lexer *); +const char *lex_rest_of_line (const struct lexer *); +bool lex_end_dot (const struct lexer *); void lex_preprocess_line (struct string *, enum getl_syntax, bool *line_starts_command, bool *line_ends_command); diff --git a/src/language/stats/aggregate.c b/src/language/stats/aggregate.c index 04429746..297d2abe 100644 --- a/src/language/stats/aggregate.c +++ b/src/language/stats/aggregate.c @@ -254,7 +254,7 @@ cmd_aggregate (struct lexer *lexer, struct dataset *ds) /* Delete documents. */ if (!copy_documents) - dict_set_documents (agr.dict, NULL); + dict_clear_documents (agr.dict); /* Cancel SPLIT FILE. */ dict_set_split_vars (agr.dict, NULL, 0); diff --git a/src/language/tests/moments-test.c b/src/language/tests/moments-test.c index e62a3432..a89b50d1 100644 --- a/src/language/tests/moments-test.c +++ b/src/language/tests/moments-test.c @@ -86,7 +86,7 @@ cmd_debug_moments (struct lexer *lexer, struct dataset *ds UNUSED) lex_force_match (lexer, '/'); goto done; } - fprintf (stderr, "%s => ", lex_rest_of_line (lexer, NULL)); + fprintf (stderr, "%s => ", lex_rest_of_line (lexer)); lex_get (lexer); if (two_pass) diff --git a/src/language/utilities/ChangeLog b/src/language/utilities/ChangeLog index de2d9dda..8af47c08 100644 --- a/src/language/utilities/ChangeLog +++ b/src/language/utilities/ChangeLog @@ -1,3 +1,15 @@ +2007-05-06 Ben Pfaff + + Abstract the documents within a dictionary a little better. + Thanks to John Darrington for suggestion, initial version, and + review. Patch #5917. + + * title.c (add_document_line): Removed. + (add_document_trailer): New function. + (cmd_document): Rewrite to yield SPSS-like output and to use the + updated document API. + (cmd_add_documents): Ditto. + 2007-05-03 John Darrington * title.c: Implemented ADD DOCUMENT command. diff --git a/src/language/utilities/title.c b/src/language/utilities/title.c index e7a463d1..f33c8a99 100644 --- a/src/language/utilities/title.c +++ b/src/language/utilities/title.c @@ -76,7 +76,7 @@ get_title (struct lexer *lexer, const char *cmd, char **title) if (*title) free (*title); - *title = xstrdup (lex_rest_of_line (lexer, NULL)); + *title = xstrdup (lex_rest_of_line (lexer)); lex_discard_line (lexer); for (cp = *title; *cp; cp++) *cp = toupper ((unsigned char) (*cp)); @@ -90,7 +90,7 @@ cmd_file_label (struct lexer *lexer, struct dataset *ds) { const char *label; - label = lex_rest_of_line (lexer, NULL); + label = lex_rest_of_line (lexer); lex_discard_line (lexer); while (isspace ((unsigned char) *label)) label++; @@ -100,27 +100,14 @@ cmd_file_label (struct lexer *lexer, struct dataset *ds) return CMD_SUCCESS; } -/* Add LINE as a line of document information to dictionary - indented by INDENT spaces. */ +/* Add entry date line to DICT's documents. */ static void -add_document_line (struct dictionary *dict, const char *line, int indent) +add_document_trailer (struct dictionary *dict) { - const char *old_documents; - size_t old_len; - char *new_documents; - - old_documents = dict_get_documents (dict); - old_len = old_documents != NULL ? strlen (old_documents) : 0; - new_documents = xmalloc (old_len + 81); - - memcpy (new_documents, old_documents, old_len); - memset (new_documents + old_len, ' ', indent); - buf_copy_str_rpad (new_documents + old_len + indent, 80 - indent, line); - new_documents[old_len + 80] = '\0'; - - dict_set_documents (dict, new_documents); - - free (new_documents); + char buf[64]; + + sprintf (buf, _(" (Entered %s)"), get_start_date ()); + dict_add_document_line (dict, buf); } /* Performs the DOCUMENT command. */ @@ -128,40 +115,23 @@ int cmd_document (struct lexer *lexer, struct dataset *ds) { struct dictionary *dict = dataset_dict (ds); - /* Add a few header lines for reference. */ - { - char buf[256]; - - if (dict && dict_get_documents (dict)) - add_document_line (dict, "", 0); - - sprintf (buf, _("Document entered %s by %s:"), get_start_date (), version); - add_document_line (dict, buf, 1); - } + struct string line = DS_EMPTY_INITIALIZER; + bool end_dot; - for (;;) + do { - int had_dot; - const char *orig_line; - char *copy_line; + end_dot = lex_end_dot (lexer); + ds_assign_string (&line, lex_entire_line_ds (lexer)); + if (end_dot) + ds_put_char (&line, '.'); + dict_add_document_line (dict, ds_cstr (&line)); - orig_line = lex_rest_of_line (lexer, &had_dot); lex_discard_line (lexer); - while (isspace ((unsigned char) *orig_line)) - orig_line++; - - copy_line = xmalloc (strlen (orig_line) + 2); - strcpy (copy_line, orig_line); - if (had_dot) - strcat (copy_line, "."); - - add_document_line (dict, copy_line, 3); - free (copy_line); - lex_get_line (lexer); - if (had_dot) - break; } + while (!end_dot); + + add_document_trailer (dict); return CMD_SUCCESS; } @@ -170,7 +140,7 @@ cmd_document (struct lexer *lexer, struct dataset *ds) int cmd_drop_documents (struct lexer *lexer, struct dataset *ds) { - dict_set_documents (dataset_dict (ds), NULL); + dict_clear_documents (dataset_dict (ds)); return lex_end_of_command (lexer); } @@ -180,49 +150,18 @@ cmd_drop_documents (struct lexer *lexer, struct dataset *ds) int cmd_add_documents (struct lexer *lexer, struct dataset *ds) { - int i; - int n_lines = 0; - char buf[256]; - struct string *lines = NULL; - - sprintf (buf, _("(Entered %s)"), get_start_date ()); + struct dictionary *dict = dataset_dict (ds); if ( ! lex_force_string (lexer) ) return CMD_FAILURE; while ( lex_is_string (lexer)) { - const struct string *s = lex_tokstr (lexer); - if ( ds_length (s) > 80) - { - /* Note to translators: "bytes" is correct, not characters */ - msg (SE, _("Document lines may not be more than 80 bytes long.")); - goto failure; - - } - lines = xrealloc (lines, (n_lines + 1) * sizeof (*lines)); - ds_init_string (&lines[n_lines++], s); - + dict_add_document_line (dict, ds_cstr (lex_tokstr (lexer))); lex_get (lexer); } - for ( i = 0 ; i < n_lines ; ++i) - { - add_document_line (dataset_dict (ds), ds_cstr (&lines[i]), 0); - ds_destroy (&lines[i]); - } - - free (lines); - - add_document_line (dataset_dict (ds), buf, 3); + add_document_trailer (dict); return lex_end_of_command (lexer) ; - - failure: - for ( i = 0 ; i < n_lines ; ++i) - ds_destroy (&lines[i]); - - free (lines); - - return CMD_FAILURE; } diff --git a/tests/ChangeLog b/tests/ChangeLog index 4a549761..2801b7b2 100644 --- a/tests/ChangeLog +++ b/tests/ChangeLog @@ -1,3 +1,11 @@ +2007-05-06 Ben Pfaff + + Abstract the documents within a dictionary a little better. + Thanks to John Darrington for suggestion, initial version, and + review. Patch #5917. + + * command/file-label.sh: Update to match new DOCUMENT behavior. + 2007-04-19 John Darrington * tests/command/no_case_size.sh tests/command/sysfiles-old.sh : diff --git a/tests/command/file-label.sh b/tests/command/file-label.sh index 70a07150..48d8739b 100755 --- a/tests/command/file-label.sh +++ b/tests/command/file-label.sh @@ -74,8 +74,8 @@ add value labels x 1 'first label mark two'. /* Add a file label and a few documents. file label This is a test file label. document First line of a document -This is the second very long line of a document in an attempt to overflow the input buffer with a really long line -Note that the last line should end with a period: . +Second line of a document +The last line should end with a period: . /* Display the documents. @@ -135,33 +135,33 @@ diff -b $TEMPDIR/pspp.filtered - <