X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fui%2Fgui%2Fpsppire-delimited-text.c;h=674bd86eac47228e4f3be0cb11543ba52a44371c;hb=d1a98ef3d49f66f6622e2eee6215f78d9dda83bb;hp=b232ef309226ca052846f449d92f9fcfebc9ff2d;hpb=952bce971f10482c5f2fdc64582f48aea1238f27;p=pspp diff --git a/src/ui/gui/psppire-delimited-text.c b/src/ui/gui/psppire-delimited-text.c index b232ef3092..674bd86eac 100644 --- a/src/ui/gui/psppire-delimited-text.c +++ b/src/ui/gui/psppire-delimited-text.c @@ -21,7 +21,9 @@ #include "psppire-delimited-text.h" #include "psppire-text-file.h" +#include "language/data-io/data-parser.h" #include "libpspp/str.h" +#include "libpspp/string-array.h" #include "libpspp/i18n.h" #include @@ -32,21 +34,44 @@ enum PROP_0, PROP_CHILD, PROP_DELIMITERS, + PROP_QUOTE, PROP_FIRST_LINE }; -struct enclosure +static struct data_parser * +make_data_parser (PsppireDelimitedText *tf) { - gunichar opening; - gunichar closing; -}; + struct data_parser *parser = data_parser_create (); + data_parser_set_type (parser, DP_DELIMITED); + data_parser_set_span (parser, false); + data_parser_set_quotes (parser, ss_empty ()); + data_parser_set_quote_escape (parser, true); + data_parser_set_empty_line_has_field (parser, true); + + bool space = false; + struct string hard_delimiters = DS_EMPTY_INITIALIZER; + GSList *del; + for (del = tf->delimiters; del; del = g_slist_next (del)) + { + gunichar c = GPOINTER_TO_INT (del->data); + if (c == ' ') + space = true; + else + ds_put_unichar (&hard_delimiters, c); + } + data_parser_set_soft_delimiters (parser, ss_cstr (space ? " " : "")); + data_parser_set_hard_delimiters (parser, ds_ss (&hard_delimiters)); + ds_destroy (&hard_delimiters); -static const struct enclosure enclosures[3] = - { - {'(', ')'}, - {'"', '"'}, - {'\'', '\''} - }; + if (tf->quote) + { + struct string quote = DS_EMPTY_INITIALIZER; + ds_put_unichar ("e, tf->quote); + data_parser_set_quotes (parser, ds_ss ("e)); + ds_destroy ("e); + } + return parser; +} static void count_delims (PsppireDelimitedText *tf) @@ -54,67 +79,32 @@ count_delims (PsppireDelimitedText *tf) if (tf->child == NULL) return; - tf->max_delimiters = 0; + struct data_parser *parser = make_data_parser (tf); + + tf->max_fields = 0; GtkTreeIter iter; gboolean valid; for (valid = gtk_tree_model_get_iter_first (tf->child, &iter); valid; valid = gtk_tree_model_iter_next (tf->child, &iter)) { - gint enc = -1; - // FIXME: Box these lines to avoid constant allocation/deallocation gchar *line = NULL; gtk_tree_model_get (tf->child, &iter, 1, &line, -1); - { - char *p; - gint count = 0; - for (p = line; ; p = g_utf8_find_next_char (p, NULL)) - { - const gunichar c = g_utf8_get_char (p); - if (c == 0) - break; - if (enc == -1) - { - gint i; - for (i = 0; i < 3; ++i) - { - if (c == enclosures[i].opening) - { - enc = i; - break; - } - } - } - else if (c == enclosures[enc].closing) - { - enc = -1; - } - if (enc == -1) - { - GSList *del; - for (del = tf->delimiters; del; del = g_slist_next (del)) - { - if (c == GPOINTER_TO_INT (del->data)) - count++; - } - } - } - tf->max_delimiters = MAX (tf->max_delimiters, count); - } + size_t n_fields = data_parser_split (parser, ss_cstr (line), NULL); + if (n_fields > tf->max_fields) + tf->max_fields = n_fields; g_free (line); } + + data_parser_destroy (parser); } static void cache_invalidate (PsppireDelimitedText *tf) { - memset (tf->cache_starts, 0, sizeof tf->cache_starts); - if (tf->const_cache.string) - { - ss_dealloc (&tf->const_cache); - tf->const_cache.string = NULL; - tf->cache_row = -1; - } + tf->cache_row = -1; + data_parser_destroy (tf->parser); + tf->parser = make_data_parser (tf); } static void @@ -138,6 +128,9 @@ psppire_delimited_text_set_property (GObject *object, g_slist_free (tf->delimiters); tf->delimiters = g_slist_copy (g_value_get_pointer (value)); break; + case PROP_QUOTE: + tf->quote = g_value_get_uint (value); + break; default: G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); break; @@ -163,6 +156,9 @@ psppire_delimited_text_get_property (GObject *object, case PROP_DELIMITERS: g_value_set_pointer (value, text_file->delimiters); break; + case PROP_QUOTE: + g_value_set_uint (value, text_file->quote); + break; default: G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); break; @@ -311,8 +307,8 @@ __tree_model_get_n_columns (GtkTreeModel *tree_model) { PsppireDelimitedText *tf = PSPPIRE_DELIMITED_TEXT (tree_model); - /* + 1 for the trailing field and +1 for the leading line number column */ - return tf->max_delimiters + 1 + 1; + /* +1 for the leading line number column */ + return tf->max_fields + 1; } @@ -343,85 +339,19 @@ __iter_nth_child (GtkTreeModel *tree_model, return TRUE; } - -static void -nullify_char (struct substring cs) -{ - int char_len = ss_first_mblen (cs); - while (char_len > 0) - { - cs.string[char_len - 1] = '\0'; - char_len--; - } -} - - /* Split row N into it's delimited fields (if it is not already cached) and set this row as the current cache. */ static void split_row_into_fields (PsppireDelimitedText *file, gint n) { if (n == file->cache_row) /* Cache hit */ - { - return; - } - - memset (file->cache_starts, 0, sizeof file->cache_starts); - /* Cache miss */ - if (file->const_cache.string) - { - ss_dealloc (&file->const_cache); - } - ss_alloc_substring_pool (&file->const_cache, - PSPPIRE_TEXT_FILE (file->child)->lines[n], NULL); - struct substring cs = file->const_cache; - int field = 0; - file->cache_starts[0] = cs.string; - gint enc = -1; - for (; - UINT32_MAX != ss_first_mb (cs); - ss_get_mb (&cs)) - { - ucs4_t character = ss_first_mb (cs); - gboolean char_is_quote = FALSE; - if (enc == -1) - { - gint i; - for (i = 0; i < 3; ++i) - { - if (character == enclosures[i].opening) - { - enc = i; - char_is_quote = TRUE; - file->cache_starts[field] += ss_first_mblen (cs); - break; - } - } - } - else if (character == enclosures[enc].closing) - { - char_is_quote = TRUE; - nullify_char (cs); - enc = -1; - } - - if (enc == -1 && char_is_quote == FALSE) - { - GSList *del; - for (del = file->delimiters; del; del = g_slist_next (del)) - { - if (character == GPOINTER_TO_INT (del->data)) - { - field++; - int char_len = ss_first_mblen (cs); - file->cache_starts[field] = cs.string + char_len; - nullify_char (cs); - break; - } - } - } - } + return; + if (!file->parser) + file->parser = make_data_parser (file); + string_array_clear (&file->cache); + data_parser_split (file->parser, PSPPIRE_TEXT_FILE (file->child)->lines[n], + &file->cache); file->cache_row = n; } @@ -433,7 +363,7 @@ psppire_delimited_text_get_header_title (PsppireDelimitedText *file, gint column split_row_into_fields (file, file->first_line - 1); - return file->cache_starts [column]; + return column < file->cache.n ? file->cache.strings[column] : ""; } static void @@ -460,7 +390,9 @@ __get_value (GtkTreeModel *tree_model, split_row_into_fields (file, n); - g_value_set_string (value, file->cache_starts [column - 1]); + size_t idx = column - 1; + const char *s = idx < file->cache.n ? file->cache.strings[idx] : ""; + g_value_set_string (value, s); } @@ -507,6 +439,13 @@ psppire_delimited_text_class_init (PsppireDelimitedTextClass *class) P_("A GSList of gunichars which delimit the fields."), G_PARAM_READWRITE); + GParamSpec *quote_spec = + g_param_spec_unichar ("quote", + "Quote Character", + P_("A character that quotes the field, or 0 to disable quoting."), + 0, + G_PARAM_READWRITE); + GParamSpec *child_spec = g_param_spec_object ("child", "Child Model", @@ -525,6 +464,10 @@ psppire_delimited_text_class_init (PsppireDelimitedTextClass *class) PROP_DELIMITERS, delimiters_spec); + g_object_class_install_property (object_class, + PROP_QUOTE, + quote_spec); + g_object_class_install_property (object_class, PROP_FIRST_LINE, first_line_spec); @@ -541,12 +484,13 @@ psppire_delimited_text_init (PsppireDelimitedText *text_file) text_file->first_line = 0; text_file->delimiters = g_slist_prepend (NULL, GINT_TO_POINTER (':')); - text_file->const_cache.string = NULL; - text_file->const_cache.length = 0; text_file->cache_row = -1; - memset (text_file->cache_starts, 0, sizeof text_file->cache_starts); + string_array_init (&text_file->cache); + text_file->parser = NULL; - text_file->max_delimiters = 0; + text_file->max_fields = 0; + + text_file->quote = 0; text_file->dispose_has_run = FALSE; text_file->stamp = g_random_int (); @@ -568,8 +512,8 @@ psppire_delimited_text_finalize (GObject *object) PsppireDelimitedText *tf = PSPPIRE_DELIMITED_TEXT (object); g_slist_free (tf->delimiters); - - ss_dealloc (&tf->const_cache); + string_array_destroy (&tf->cache); + data_parser_destroy (tf->parser); /* must chain up */ (* parent_class->finalize) (object);