gui: For text data import, use the same parser for preview as for import.
[pspp] / src / ui / gui / psppire-delimited-text.c
index b02a0ac9eabcf867d4575bdfed661b8d2f6faa7a..674bd86eac47228e4f3be0cb11543ba52a44371c 100644 (file)
@@ -21,7 +21,9 @@
 
 #include "psppire-delimited-text.h"
 #include "psppire-text-file.h"
+#include "language/data-io/data-parser.h"
 #include "libpspp/str.h"
+#include "libpspp/string-array.h"
 #include "libpspp/i18n.h"
 
 #include <gtk/gtk.h>
@@ -32,21 +34,44 @@ enum
     PROP_0,
     PROP_CHILD,
     PROP_DELIMITERS,
+    PROP_QUOTE,
     PROP_FIRST_LINE
   };
 
-struct enclosure
+static struct data_parser *
+make_data_parser (PsppireDelimitedText *tf)
 {
-  gunichar opening;
-  gunichar closing;
-};
+  struct data_parser *parser = data_parser_create ();
+  data_parser_set_type (parser, DP_DELIMITED);
+  data_parser_set_span (parser, false);
+  data_parser_set_quotes (parser, ss_empty ());
+  data_parser_set_quote_escape (parser, true);
+  data_parser_set_empty_line_has_field (parser, true);
+
+  bool space = false;
+  struct string hard_delimiters = DS_EMPTY_INITIALIZER;
+  GSList *del;
+  for (del = tf->delimiters; del; del = g_slist_next (del))
+    {
+      gunichar c = GPOINTER_TO_INT (del->data);
+      if (c == ' ')
+        space = true;
+      else
+        ds_put_unichar (&hard_delimiters, c);
+    }
+  data_parser_set_soft_delimiters (parser, ss_cstr (space ? " " : ""));
+  data_parser_set_hard_delimiters (parser, ds_ss (&hard_delimiters));
+  ds_destroy (&hard_delimiters);
 
-static const struct enclosure enclosures[3] =
-  {
-    {'(',   ')'},
-    {'"',   '"'},
-    {'\'',  '\''}
-  };
+  if (tf->quote)
+    {
+      struct string quote = DS_EMPTY_INITIALIZER;
+      ds_put_unichar (&quote, tf->quote);
+      data_parser_set_quotes (parser, ds_ss (&quote));
+      ds_destroy (&quote);
+    }
+  return parser;
+}
 
 static void
 count_delims (PsppireDelimitedText *tf)
@@ -54,66 +79,32 @@ count_delims (PsppireDelimitedText *tf)
   if (tf->child == NULL)
     return;
 
-  tf->max_delimiters = 0;
+  struct data_parser *parser = make_data_parser (tf);
+
+  tf->max_fields = 0;
   GtkTreeIter iter;
   gboolean valid;
   for (valid = gtk_tree_model_get_iter_first (tf->child, &iter);
        valid;
        valid = gtk_tree_model_iter_next (tf->child, &iter))
     {
-      gint enc = -1;
-      // FIXME: Box these lines to avoid constant allocation/deallocation
-      gchar *foo = 0;
-      gtk_tree_model_get (tf->child, &iter, 1, &foo, -1);
-      {
-       char *line = foo;
-       gint count = 0;
-       while (*line)
-         {
-           const gunichar c = *line; //FIXME: Not multibyte safe!
-           if (enc == -1)
-             {
-               gint i;
-               for (i = 0; i < 3; ++i)
-                 {
-                   if (c == enclosures[i].opening)
-                     {
-                       enc = i;
-                       break;
-                     }
-                 }
-             }
-           else if (c == enclosures[enc].closing)
-             {
-               enc = -1;
-             }
-           if (enc == -1)
-             {
-               GSList *del;
-               for (del = tf->delimiters; del; del = g_slist_next (del))
-                 {
-                   if (c == GPOINTER_TO_INT (del->data))
-                     count++;
-                 }
-             }
-           line++;
-         }
-       tf->max_delimiters = MAX (tf->max_delimiters, count);
-      }
-      g_free (foo);
+      gchar *line = NULL;
+      gtk_tree_model_get (tf->child, &iter, 1, &line, -1);
+      size_t n_fields = data_parser_split (parser, ss_cstr (line), NULL);
+      if (n_fields > tf->max_fields)
+        tf->max_fields = n_fields;
+      g_free (line);
     }
+
+  data_parser_destroy (parser);
 }
 
 static void
 cache_invalidate (PsppireDelimitedText *tf)
 {
-  memset (tf->cache_starts, 0, 512);
-  if (tf->const_cache.string)
-    {
-      ss_dealloc (&tf->const_cache);
-      tf->const_cache.string = NULL;
-      tf->cache_row = -1;
-    }
+  tf->cache_row = -1;
+  data_parser_destroy (tf->parser);
+  tf->parser = make_data_parser (tf);
 }
 
 static void
@@ -137,6 +128,9 @@ psppire_delimited_text_set_property (GObject         *object,
       g_slist_free (tf->delimiters);
       tf->delimiters =  g_slist_copy (g_value_get_pointer (value));
       break;
+    case PROP_QUOTE:
+      tf->quote = g_value_get_uint (value);
+      break;
     default:
       G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
       break;
@@ -162,16 +156,15 @@ psppire_delimited_text_get_property (GObject         *object,
     case PROP_DELIMITERS:
       g_value_set_pointer (value, text_file->delimiters);
       break;
+    case PROP_QUOTE:
+      g_value_set_uint (value, text_file->quote);
+      break;
     default:
       G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
       break;
     };
 }
 
-
-static void psppire_delimited_text_init            (PsppireDelimitedText      *text_file);
-static void psppire_delimited_text_class_init      (PsppireDelimitedTextClass *class);
-
 static void psppire_delimited_text_finalize        (GObject           *object);
 static void psppire_delimited_text_dispose        (GObject           *object);
 
@@ -314,8 +307,8 @@ __tree_model_get_n_columns (GtkTreeModel *tree_model)
 {
   PsppireDelimitedText *tf  = PSPPIRE_DELIMITED_TEXT (tree_model);
 
-  /* + 1 for the trailing field and +1 for the leading line number column */
-  return tf->max_delimiters + 1 + 1;
+  /* +1 for the leading line number column */
+  return tf->max_fields + 1;
 }
 
 
@@ -346,85 +339,19 @@ __iter_nth_child (GtkTreeModel *tree_model,
   return TRUE;
 }
 
-
-static void
-nullify_char (struct substring cs)
-{
-  int char_len = ss_first_mblen (cs);
-  while (char_len > 0)
-    {
-      cs.string[char_len - 1] = '\0';
-      char_len--;
-    }
-}
-
-
 /* Split row N into it's delimited fields (if it is not already cached)
    and set this row as the current cache. */
 static void
 split_row_into_fields (PsppireDelimitedText *file, gint n)
 {
   if (n == file->cache_row)  /* Cache hit */
-    {
-      return;
-    }
-
-  memset (file->cache_starts, 0, 512);
-  /* Cache miss */
-  if (file->const_cache.string)
-    {
-      ss_dealloc (&file->const_cache);
-    }
-  ss_alloc_substring_pool (&file->const_cache,
-                          PSPPIRE_TEXT_FILE (file->child)->lines[n], NULL);
-  struct substring cs = file->const_cache;
-  int field = 0;
-  file->cache_starts[0] = cs.string;
-  gint enc = -1;
-  for (;
-       UINT32_MAX != ss_first_mb (cs);
-       ss_get_mb (&cs))
-    {
-      ucs4_t character = ss_first_mb (cs);
-      gboolean char_is_quote = FALSE;
-      if (enc == -1)
-       {
-         gint i;
-         for (i = 0; i < 3; ++i)
-           {
-             if (character == enclosures[i].opening)
-               {
-                 enc = i;
-                 char_is_quote = TRUE;
-                 file->cache_starts[field] += ss_first_mblen (cs);
-                 break;
-               }
-           }
-       }
-      else if (character == enclosures[enc].closing)
-       {
-         char_is_quote = TRUE;
-         nullify_char (cs);
-         enc = -1;
-       }
-
-      if (enc == -1 && char_is_quote == FALSE)
-       {
-         GSList *del;
-         for (del = file->delimiters; del; del = g_slist_next (del))
-           {
-             if (character == GPOINTER_TO_INT (del->data))
-               {
-                 field++;
-                 int char_len = ss_first_mblen (cs);
-                 file->cache_starts[field] = cs.string + char_len;
-                 nullify_char (cs);
-                 break;
-               }
-           }
-       }
-    }
+    return;
+  if (!file->parser)
+    file->parser = make_data_parser (file);
 
+  string_array_clear (&file->cache);
+  data_parser_split (file->parser, PSPPIRE_TEXT_FILE (file->child)->lines[n],
+                     &file->cache);
   file->cache_row = n;
 }
 
@@ -436,7 +363,7 @@ psppire_delimited_text_get_header_title (PsppireDelimitedText *file, gint column
 
   split_row_into_fields (file, file->first_line - 1);
 
-  return file->cache_starts [column];
+  return column < file->cache.n ? file->cache.strings[column] : "";
 }
 
 static void
@@ -463,7 +390,9 @@ __get_value (GtkTreeModel *tree_model,
 
   split_row_into_fields (file, n);
 
-  g_value_set_string (value, file->cache_starts [column - 1]);
+  size_t idx = column - 1;
+  const char *s = idx < file->cache.n ? file->cache.strings[idx] : "";
+  g_value_set_string (value, s);
 }
 
 
@@ -485,44 +414,9 @@ __tree_model_init (GtkTreeModelIface *iface)
   iface->iter_parent     = __iter_parent;
 }
 
-
-GType
-psppire_delimited_text_get_type (void)
-{
-  static GType text_file_type = 0;
-
-  if (!text_file_type)
-    {
-      static const GTypeInfo text_file_info =
-       {
-         sizeof (PsppireDelimitedTextClass),
-         NULL,         /* base_init */
-         NULL,         /* base_finalize */
-         (GClassInitFunc) psppire_delimited_text_class_init,
-         NULL,         /* class_finalize */
-         NULL,         /* class_data */
-         sizeof (PsppireDelimitedText),
-         0,
-         (GInstanceInitFunc) psppire_delimited_text_init,
-       };
-
-      static const GInterfaceInfo tree_model_info = {
-       (GInterfaceInitFunc) __tree_model_init,
-       NULL,
-       NULL
-      };
-
-      text_file_type = g_type_register_static (G_TYPE_OBJECT,
-                                              "PsppireDelimitedText",
-                                              &text_file_info, 0);
-
-      g_type_add_interface_static (text_file_type, GTK_TYPE_TREE_MODEL,
-                                  &tree_model_info);
-    }
-
-  return text_file_type;
-}
-
+G_DEFINE_TYPE_WITH_CODE (PsppireDelimitedText, psppire_delimited_text, G_TYPE_OBJECT,
+                        G_IMPLEMENT_INTERFACE (GTK_TYPE_TREE_MODEL,
+                                               __tree_model_init))
 
 static void
 psppire_delimited_text_class_init (PsppireDelimitedTextClass *class)
@@ -530,7 +424,7 @@ psppire_delimited_text_class_init (PsppireDelimitedTextClass *class)
   GObjectClass *object_class;
 
   parent_class = g_type_class_peek_parent (class);
-  object_class = (GObjectClass*) class;
+  object_class = G_OBJECT_CLASS (class);
 
   GParamSpec *first_line_spec =
     g_param_spec_int ("first-line",
@@ -545,6 +439,13 @@ psppire_delimited_text_class_init (PsppireDelimitedTextClass *class)
                          P_("A GSList of gunichars which delimit the fields."),
                          G_PARAM_READWRITE);
 
+  GParamSpec *quote_spec =
+    g_param_spec_unichar ("quote",
+                         "Quote Character",
+                         P_("A character that quotes the field, or 0 to disable quoting."),
+                         0,
+                         G_PARAM_READWRITE);
+
   GParamSpec *child_spec =
     g_param_spec_object ("child",
                         "Child Model",
@@ -563,6 +464,10 @@ psppire_delimited_text_class_init (PsppireDelimitedTextClass *class)
                                    PROP_DELIMITERS,
                                    delimiters_spec);
 
+  g_object_class_install_property (object_class,
+                                   PROP_QUOTE,
+                                   quote_spec);
+
   g_object_class_install_property (object_class,
                                    PROP_FIRST_LINE,
                                    first_line_spec);
@@ -579,27 +484,26 @@ psppire_delimited_text_init (PsppireDelimitedText *text_file)
   text_file->first_line = 0;
   text_file->delimiters = g_slist_prepend (NULL, GINT_TO_POINTER (':'));
 
-  text_file->const_cache.string = NULL;
-  text_file->const_cache.length = 0;
   text_file->cache_row = -1;
-  memset (text_file->cache_starts, 0, 512);
+  string_array_init (&text_file->cache);
+  text_file->parser = NULL;
+
+  text_file->max_fields = 0;
 
-  text_file->max_delimiters = 0;
+  text_file->quote = 0;
 
   text_file->dispose_has_run = FALSE;
   text_file->stamp = g_random_int ();
 }
 
 
-GtkTreeModel *
+PsppireDelimitedText *
 psppire_delimited_text_new (GtkTreeModel *child)
 {
-  PsppireDelimitedText *retval =
+  return
     g_object_new (PSPPIRE_TYPE_DELIMITED_TEXT,
                  "child", child,
                  NULL);
-
-  return GTK_TREE_MODEL (retval);
 }
 
 static void
@@ -608,8 +512,8 @@ psppire_delimited_text_finalize (GObject *object)
   PsppireDelimitedText *tf = PSPPIRE_DELIMITED_TEXT (object);
 
   g_slist_free (tf->delimiters);
-
-  ss_dealloc (&tf->const_cache);
+  string_array_destroy (&tf->cache);
+  data_parser_destroy (tf->parser);
 
   /* must chain up */
   (* parent_class->finalize) (object);