gui: Make text import assistant accept only one quote character.
[pspp] / src / ui / gui / psppire-delimited-text.c
index 306fb69bfd01ab77b519d5c49293c9dad30d3864..59b423010ec10a118da91823d2705d4e6fc05383 100644 (file)
@@ -32,43 +32,67 @@ enum
     PROP_0,
     PROP_CHILD,
     PROP_DELIMITERS,
+    PROP_QUOTE,
     PROP_FIRST_LINE
   };
 
 static void
 count_delims (PsppireDelimitedText *tf)
 {
-  if (tf->child)
+  if (tf->child == NULL)
+    return;
+
+  tf->max_delimiters = 0;
+  GtkTreeIter iter;
+  gboolean valid;
+  for (valid = gtk_tree_model_get_iter_first (tf->child, &iter);
+       valid;
+       valid = gtk_tree_model_iter_next (tf->child, &iter))
     {
-      tf->max_delimiters = 0;
-      GtkTreeIter iter;
-      gboolean valid;
-      for (valid = gtk_tree_model_get_iter_first (tf->child, &iter);
-          valid;
-          valid = gtk_tree_model_iter_next (tf->child, &iter))
-       {
-         // FIXME: Box these lines to avoid constant allocation/deallocation
-         gchar *foo = 0;
-         gtk_tree_model_get (tf->child, &iter, 1, &foo, -1);
+      gunichar quote = -1;
+      // FIXME: Box these lines to avoid constant allocation/deallocation
+      gchar *line = NULL;
+      gtk_tree_model_get (tf->child, &iter, 1, &line, -1);
+      {
+       char *p;
+       gint count = 0;
+       for (p = line; ; p = g_utf8_find_next_char (p, NULL))
          {
-           char *line = foo;
-           gint count = 0;
-           while (*line)
+           const gunichar c = g_utf8_get_char (p);
+           if (c == 0)
+             break;
+
+            if (c == quote)
+              quote = -1;
+            else if (tf->quote && c == tf->quote)
+              quote = c;
+
+           if (quote == -1)
              {
                GSList *del;
                for (del = tf->delimiters; del; del = g_slist_next (del))
                  {
-                   if (*line == GPOINTER_TO_INT (del->data))
+                   if (c == GPOINTER_TO_INT (del->data))
                      count++;
                  }
-               line++;
              }
-           tf->max_delimiters = MAX (tf->max_delimiters, count);
          }
-         g_free (foo);
-       }
+       tf->max_delimiters = MAX (tf->max_delimiters, count);
+      }
+      g_free (line);
+    }
+}
+
+static void
+cache_invalidate (PsppireDelimitedText *tf)
+{
+  memset (tf->cache_starts, 0, sizeof tf->cache_starts);
+  if (tf->const_cache.string)
+    {
+      ss_dealloc (&tf->const_cache);
+      tf->const_cache.string = NULL;
+      tf->cache_row = -1;
     }
-  //  g_print ("Max Number of delimiters per row: %d\n", tf->max_delimiters);
 }
 
 static void
@@ -83,26 +107,25 @@ psppire_delimited_text_set_property (GObject         *object,
     {
     case PROP_FIRST_LINE:
       tf->first_line = g_value_get_int (value);
-      if (tf->const_cache.string)
-       {
-         ss_dealloc (&tf->const_cache);
-         tf->cache_row = -1;
-       }
       break;
     case PROP_CHILD:
       tf->child = g_value_get_object (value);
+      g_return_if_fail (PSPPIRE_IS_TEXT_FILE (tf->child));
       break;
     case PROP_DELIMITERS:
       g_slist_free (tf->delimiters);
       tf->delimiters =  g_slist_copy (g_value_get_pointer (value));
       break;
+    case PROP_QUOTE:
+      tf->quote = g_value_get_uint (value);
+      break;
     default:
       G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
       break;
     };
 
-  if (tf->child)
-    count_delims (tf);
+  cache_invalidate (tf);
+  count_delims (tf);
 }
 
 static void
@@ -121,21 +144,27 @@ psppire_delimited_text_get_property (GObject         *object,
     case PROP_DELIMITERS:
       g_value_set_pointer (value, text_file->delimiters);
       break;
+    case PROP_QUOTE:
+      g_value_set_uint (value, text_file->quote);
+      break;
     default:
       G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
       break;
     };
 }
 
-
-static void psppire_delimited_text_init            (PsppireDelimitedText      *text_file);
-static void psppire_delimited_text_class_init      (PsppireDelimitedTextClass *class);
-
 static void psppire_delimited_text_finalize        (GObject           *object);
 static void psppire_delimited_text_dispose        (GObject           *object);
 
 static GObjectClass *parent_class = NULL;
 
+static gint
+n_lines (PsppireDelimitedText *file)
+{
+  PsppireTextFile *child = PSPPIRE_TEXT_FILE (file->child);
+
+  return child->maximum_lines;
+}
 
 static gboolean
 __tree_get_iter (GtkTreeModel *tree_model,
@@ -146,8 +175,7 @@ __tree_get_iter (GtkTreeModel *tree_model,
   if (path == NULL)
     return FALSE;
 
-  //  g_print ("%s:%d %s %s\n", __FILE__, __LINE__, __FUNCTION__, gtk_tree_path_to_string (path));
-  
+
   gint *indices = gtk_tree_path_get_indices (path);
 
   if (!indices)
@@ -155,13 +183,12 @@ __tree_get_iter (GtkTreeModel *tree_model,
 
   gint n = *indices;
 
-  gint children = gtk_tree_model_iter_n_children (file->child, NULL);
+  gint children = n_lines (file);
 
   if (n >= children - file->first_line)
     return FALSE;
-  
-  //  g_print ("%s:%d %s  %d Children: %d\n", __FILE__, __LINE__, __FUNCTION__, n, children);
-  
+
+
   iter->user_data = GINT_TO_POINTER (n);
   iter->stamp = file->stamp;
 
@@ -177,10 +204,9 @@ __tree_iter_next (GtkTreeModel *tree_model,
   g_return_val_if_fail (file->stamp == iter->stamp, FALSE);
 
   gint n = GPOINTER_TO_INT (iter->user_data);
-  
-  //  g_print ("%s:%d %s %d\n", __FILE__, __LINE__, __FUNCTION__, n);
 
-  gint children = gtk_tree_model_iter_n_children (file->child, NULL);
+
+  gint children = n_lines (file);
 
   if (n + 1 >= children - file->first_line)
     return FALSE;
@@ -195,7 +221,6 @@ static GType
 __tree_get_column_type (GtkTreeModel *tree_model,
                        gint          index)
 {
-  //  g_print ("%s:%d %s\n", __FILE__, __LINE__, __FUNCTION__);
   if (index == 0)
     return G_TYPE_INT;
 
@@ -206,7 +231,6 @@ static gboolean
 __iter_has_child (GtkTreeModel *tree_model,
                  GtkTreeIter  *iter)
 {
-  g_print ("%s:%d %s\n", __FILE__, __LINE__, __FUNCTION__);
   return 0;
 }
 
@@ -216,7 +240,6 @@ __iter_parent     (GtkTreeModel *tree_model,
                   GtkTreeIter  *iter,
                   GtkTreeIter  *child)
 {
-  g_print ("%s:%d %s\n", __FILE__, __LINE__, __FUNCTION__);
   return 0;
 }
 
@@ -224,17 +247,16 @@ static GtkTreePath *
 __tree_get_path (GtkTreeModel *tree_model,
                 GtkTreeIter  *iter)
 {
-  //  g_print ("%s:%d %s\n", __FILE__, __LINE__, __FUNCTION__);
   PsppireDelimitedText *file  = PSPPIRE_DELIMITED_TEXT (tree_model);
   g_return_val_if_fail (file->stamp == iter->stamp, FALSE);
 
   gint n = GPOINTER_TO_INT (iter->user_data);
 
-  gint children = gtk_tree_model_iter_n_children (file->child, NULL);
+  gint children = n_lines (file);
 
   if (n >= children - file->first_line)
     return NULL;
-  
+
   return gtk_tree_path_new_from_indices (n, -1);
 }
 
@@ -244,7 +266,6 @@ __iter_children (GtkTreeModel *tree_model,
                               GtkTreeIter *iter,
                               GtkTreeIter *parent)
 {
-  g_print ("%s:%d %s\n", __FILE__, __LINE__, __FUNCTION__);
   return 0;
 }
 
@@ -253,16 +274,17 @@ static gint
 __tree_model_iter_n_children (GtkTreeModel *tree_model,
                              GtkTreeIter *iter)
 {
-  g_print ("%s:%d %s\n", __FILE__, __LINE__, __FUNCTION__);
   PsppireDelimitedText *file  = PSPPIRE_DELIMITED_TEXT (tree_model);
   g_assert (iter == NULL);
-  return 0;
+
+  gint children = n_lines (file);
+
+  return children - file->first_line;
 }
 
 static GtkTreeModelFlags
 __tree_model_get_flags (GtkTreeModel *model)
 {
-  //  g_print ("%s:%d %s\n", __FILE__, __LINE__, __FUNCTION__);
   g_return_val_if_fail (PSPPIRE_IS_DELIMITED_TEXT (model), (GtkTreeModelFlags) 0);
 
   return GTK_TREE_MODEL_LIST_ONLY;
@@ -271,7 +293,6 @@ __tree_model_get_flags (GtkTreeModel *model)
 static gint
 __tree_model_get_n_columns (GtkTreeModel *tree_model)
 {
-  //  g_print ("%s:%d %s\n", __FILE__, __LINE__, __FUNCTION__);
   PsppireDelimitedText *tf  = PSPPIRE_DELIMITED_TEXT (tree_model);
 
   /* + 1 for the trailing field and +1 for the leading line number column */
@@ -285,7 +306,6 @@ __iter_nth_child (GtkTreeModel *tree_model,
                  GtkTreeIter *parent,
                  gint n)
 {
-  //  g_print ("%s:%d %s %d\n", __FILE__, __LINE__, __FUNCTION__, n);
   PsppireDelimitedText *file  = PSPPIRE_DELIMITED_TEXT (tree_model);
 
   g_assert (parent == NULL);
@@ -309,65 +329,116 @@ __iter_nth_child (GtkTreeModel *tree_model,
 
 
 static void
-__get_value (GtkTreeModel *tree_model,
-            GtkTreeIter *iter,
-            gint column,
-            GValue *value)
+nullify_char (struct substring cs)
 {
-  //  g_print ("%s:%d %s Col: %d\n", __FILE__, __LINE__, __FUNCTION__, column);
-  PsppireDelimitedText *file  = PSPPIRE_DELIMITED_TEXT (tree_model);
-
-  g_return_if_fail (iter->stamp == file->stamp);
+  int char_len = ss_first_mblen (cs);
+  while (char_len > 0)
+    {
+      cs.string[char_len - 1] = '\0';
+      char_len--;
+    }
+}
 
-  gint n = GPOINTER_TO_INT (iter->user_data) + file->first_line;
 
-  //  g_print ("%s:%d Row: %d\n", __FILE__, __LINE__, n);
-  
-  if (column == 0)
+/* Split row N into it's delimited fields (if it is not already cached)
+   and set this row as the current cache. */
+static void
+split_row_into_fields (PsppireDelimitedText *file, gint n)
+{
+  if (n == file->cache_row)  /* Cache hit */
     {
-      g_value_init (value, G_TYPE_INT);
-      g_value_set_int (value, n + 1);
       return;
     }
 
-  g_value_init (value, G_TYPE_STRING);
-
-  if (n != file->cache_row)
+  memset (file->cache_starts, 0, sizeof file->cache_starts);
+  /* Cache miss */
+  if (file->const_cache.string)
     {
-      if (file->const_cache.string)
+      ss_dealloc (&file->const_cache);
+    }
+  ss_alloc_substring_pool (&file->const_cache,
+                          PSPPIRE_TEXT_FILE (file->child)->lines[n], NULL);
+  struct substring cs = file->const_cache;
+  int field = 0;
+  file->cache_starts[0] = cs.string;
+  gunichar quote = -1;
+  for (;
+       UINT32_MAX != ss_first_mb (cs);
+       ss_get_mb (&cs))
+    {
+      ucs4_t character = ss_first_mb (cs);
+      gboolean char_is_quote = FALSE;
+      if (quote == -1)
+        {
+          if (file->quote && character == file->quote)
+            {
+              quote = character;
+              char_is_quote = TRUE;
+              file->cache_starts[field] += ss_first_mblen (cs);
+            }
+        }
+      else if (character == quote)
        {
-         ss_dealloc (&file->const_cache);
+         char_is_quote = TRUE;
+         nullify_char (cs);
+         quote = -1;
        }
-      ss_alloc_substring (&file->const_cache, PSPPIRE_TEXT_FILE (file->child)->lines[n]);
-      file->cache = file->const_cache;
-      int field = 0;
-      file->cache_starts[0] = file->cache.string;
-      for (;
-          UINT32_MAX != ss_first_mb (file->cache);
-          ss_get_mb (&file->cache))
+
+      if (quote == -1 && char_is_quote == FALSE)
        {
-         ucs4_t xx = ss_first_mb (file->cache);
          GSList *del;
          for (del = file->delimiters; del; del = g_slist_next (del))
            {
-             if (xx == GPOINTER_TO_INT (del->data))
+             if (character == GPOINTER_TO_INT (del->data))
                {
                  field++;
-                 int char_len = ss_first_mblen (file->cache);
-                 file->cache_starts[field] = file->cache.string + char_len;
-                 while (char_len > 0)
-                   {
-                     file->cache.string[char_len - 1] = '\0';
-                     char_len--;
-                   }
+                 int char_len = ss_first_mblen (cs);
+                 file->cache_starts[field] = cs.string + char_len;
+                 nullify_char (cs);
                  break;
                }
            }
        }
+    }
+
+  file->cache_row = n;
+}
+
+const gchar *
+psppire_delimited_text_get_header_title (PsppireDelimitedText *file, gint column)
+{
+  if (file->first_line <= 0)
+    return NULL;
+
+  split_row_into_fields (file, file->first_line - 1);
+
+  return file->cache_starts [column];
+}
+
+static void
+__get_value (GtkTreeModel *tree_model,
+            GtkTreeIter *iter,
+            gint column,
+            GValue *value)
+{
+  PsppireDelimitedText *file  = PSPPIRE_DELIMITED_TEXT (tree_model);
 
-      file->cache_row = n;
+  g_return_if_fail (iter->stamp == file->stamp);
+
+  gint n = GPOINTER_TO_INT (iter->user_data) + file->first_line;
+
+
+  if (column == 0)
+    {
+      g_value_init (value, G_TYPE_INT);
+      g_value_set_int (value, n + 1);
+      return;
     }
-  
+
+  g_value_init (value, G_TYPE_STRING);
+
+  split_row_into_fields (file, n);
+
   g_value_set_string (value, file->cache_starts [column - 1]);
 }
 
@@ -390,44 +461,9 @@ __tree_model_init (GtkTreeModelIface *iface)
   iface->iter_parent     = __iter_parent;
 }
 
-
-GType
-psppire_delimited_text_get_type (void)
-{
-  static GType text_file_type = 0;
-
-  if (!text_file_type)
-    {
-      static const GTypeInfo text_file_info =
-       {
-         sizeof (PsppireDelimitedTextClass),
-         NULL,         /* base_init */
-         NULL,         /* base_finalize */
-         (GClassInitFunc) psppire_delimited_text_class_init,
-         NULL,         /* class_finalize */
-         NULL,         /* class_data */
-         sizeof (PsppireDelimitedText),
-         0,
-         (GInstanceInitFunc) psppire_delimited_text_init,
-       };
-
-      static const GInterfaceInfo tree_model_info = {
-       (GInterfaceInitFunc) __tree_model_init,
-       NULL,
-       NULL
-      };
-
-      text_file_type = g_type_register_static (G_TYPE_OBJECT,
-                                              "PsppireDelimitedText",
-                                              &text_file_info, 0);
-
-      g_type_add_interface_static (text_file_type, GTK_TYPE_TREE_MODEL,
-                                  &tree_model_info);
-    }
-
-  return text_file_type;
-}
-
+G_DEFINE_TYPE_WITH_CODE (PsppireDelimitedText, psppire_delimited_text, G_TYPE_OBJECT,
+                        G_IMPLEMENT_INTERFACE (GTK_TYPE_TREE_MODEL,
+                                               __tree_model_init))
 
 static void
 psppire_delimited_text_class_init (PsppireDelimitedTextClass *class)
@@ -435,7 +471,7 @@ psppire_delimited_text_class_init (PsppireDelimitedTextClass *class)
   GObjectClass *object_class;
 
   parent_class = g_type_class_peek_parent (class);
-  object_class = (GObjectClass*) class;
+  object_class = G_OBJECT_CLASS (class);
 
   GParamSpec *first_line_spec =
     g_param_spec_int ("first-line",
@@ -443,20 +479,27 @@ psppire_delimited_text_class_init (PsppireDelimitedTextClass *class)
                      P_("The first line to be considered."),
                      0, 1000, 0,
                      G_PARAM_READWRITE);
-  
+
   GParamSpec *delimiters_spec =
     g_param_spec_pointer ("delimiters",
                          "Field Delimiters",
                          P_("A GSList of gunichars which delimit the fields."),
                          G_PARAM_READWRITE);
 
-  GParamSpec *child_spec = 
+  GParamSpec *quote_spec =
+    g_param_spec_unichar ("quote",
+                         "Quote Character",
+                         P_("A character that quotes the field, or 0 to disable quoting."),
+                         0,
+                         G_PARAM_READWRITE);
+
+  GParamSpec *child_spec =
     g_param_spec_object ("child",
                         "Child Model",
                         P_("The GtkTextModel which this object wraps."),
                         GTK_TYPE_TREE_MODEL,
                         G_PARAM_CONSTRUCT_ONLY |G_PARAM_READWRITE);
-  
+
   object_class->set_property = psppire_delimited_text_set_property;
   object_class->get_property = psppire_delimited_text_get_property;
 
@@ -468,10 +511,14 @@ psppire_delimited_text_class_init (PsppireDelimitedTextClass *class)
                                    PROP_DELIMITERS,
                                    delimiters_spec);
 
+  g_object_class_install_property (object_class,
+                                   PROP_QUOTE,
+                                   quote_spec);
+
   g_object_class_install_property (object_class,
                                    PROP_FIRST_LINE,
                                    first_line_spec);
-  
+
   object_class->finalize = psppire_delimited_text_finalize;
   object_class->dispose = psppire_delimited_text_dispose;
 }
@@ -487,9 +534,12 @@ psppire_delimited_text_init (PsppireDelimitedText *text_file)
   text_file->const_cache.string = NULL;
   text_file->const_cache.length = 0;
   text_file->cache_row = -1;
+  memset (text_file->cache_starts, 0, sizeof text_file->cache_starts);
 
   text_file->max_delimiters = 0;
 
+  text_file->quote = 0;
+
   text_file->dispose_has_run = FALSE;
   text_file->stamp = g_random_int ();
 }
@@ -498,12 +548,10 @@ psppire_delimited_text_init (PsppireDelimitedText *text_file)
 PsppireDelimitedText *
 psppire_delimited_text_new (GtkTreeModel *child)
 {
-  PsppireDelimitedText *retval =
+  return
     g_object_new (PSPPIRE_TYPE_DELIMITED_TEXT,
                  "child", child,
                  NULL);
-
-  return retval;
 }
 
 static void