gui: Honor quotes in the text data import dialog.
authorBen Pfaff <blp@cs.stanford.edu>
Fri, 27 May 2022 19:55:47 +0000 (12:55 -0700)
committerBen Pfaff <blp@cs.stanford.edu>
Sat, 28 May 2022 01:30:35 +0000 (18:30 -0700)
The text data import dialog's preview of the data to be read
didn't honor the quotes but always treated " and ' and (weirdly)
parentheses as quotes.  This makes it honor the user's settings.

Bug #61809.
Thanks for Friedrich Beckmann for reporting this bug.

src/ui/gui/psppire-delimited-text.c
src/ui/gui/psppire-delimited-text.h
src/ui/gui/psppire-import-textfile.c

index b232ef309226ca052846f449d92f9fcfebc9ff2d..6c8f0ae35b168984fe5a521466c4d3bc5d1b6c9d 100644 (file)
@@ -32,22 +32,10 @@ enum
     PROP_0,
     PROP_CHILD,
     PROP_DELIMITERS,
+    PROP_QUOTES,
     PROP_FIRST_LINE
   };
 
-struct enclosure
-{
-  gunichar opening;
-  gunichar closing;
-};
-
-static const struct enclosure enclosures[3] =
-  {
-    {'(',   ')'},
-    {'"',   '"'},
-    {'\'',  '\''}
-  };
-
 static void
 count_delims (PsppireDelimitedText *tf)
 {
@@ -61,7 +49,7 @@ count_delims (PsppireDelimitedText *tf)
        valid;
        valid = gtk_tree_model_iter_next (tf->child, &iter))
     {
-      gint enc = -1;
+      gunichar quote = -1;
       // FIXME: Box these lines to avoid constant allocation/deallocation
       gchar *line = NULL;
       gtk_tree_model_get (tf->child, &iter, 1, &line, -1);
@@ -73,23 +61,13 @@ count_delims (PsppireDelimitedText *tf)
            const gunichar c = g_utf8_get_char (p);
            if (c == 0)
              break;
-           if (enc == -1)
-             {
-               gint i;
-               for (i = 0; i < 3; ++i)
-                 {
-                   if (c == enclosures[i].opening)
-                     {
-                       enc = i;
-                       break;
-                     }
-                 }
-             }
-           else if (c == enclosures[enc].closing)
-             {
-               enc = -1;
-             }
-           if (enc == -1)
+
+            if (c == quote)
+              quote = -1;
+            else if (c == tf->quotes[0] || c == tf->quotes[1])
+              quote = c;
+
+           if (quote == -1)
              {
                GSList *del;
                for (del = tf->delimiters; del; del = g_slist_next (del))
@@ -138,6 +116,18 @@ psppire_delimited_text_set_property (GObject         *object,
       g_slist_free (tf->delimiters);
       tf->delimiters =  g_slist_copy (g_value_get_pointer (value));
       break;
+    case PROP_QUOTES:
+      {
+        tf->quotes[0] = tf->quotes[1] = -1;
+
+        const gchar *s = g_value_get_string (value);
+        for (size_t i = 0; i < 2 && s && s[0]; i++)
+          {
+            tf->quotes[i] = g_utf8_get_char (s);
+            s = g_utf8_find_next_char (s, NULL);
+          }
+      }
+      break;
     default:
       G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
       break;
@@ -163,6 +153,17 @@ psppire_delimited_text_get_property (GObject         *object,
     case PROP_DELIMITERS:
       g_value_set_pointer (value, text_file->delimiters);
       break;
+    case PROP_QUOTES:
+      {
+        GString *s = g_string_new (NULL);
+        for (size_t i = 0; i < 2; i++)
+          {
+            gunichar quote = text_file->quotes[i];
+            if (quote && quote != -1)
+              g_string_append_unichar (s, quote);
+          }
+      }
+      break;
     default:
       G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
       break;
@@ -377,35 +378,30 @@ split_row_into_fields (PsppireDelimitedText *file, gint n)
   struct substring cs = file->const_cache;
   int field = 0;
   file->cache_starts[0] = cs.string;
-  gint enc = -1;
+  gunichar quote = -1;
   for (;
        UINT32_MAX != ss_first_mb (cs);
        ss_get_mb (&cs))
     {
       ucs4_t character = ss_first_mb (cs);
       gboolean char_is_quote = FALSE;
-      if (enc == -1)
-       {
-         gint i;
-         for (i = 0; i < 3; ++i)
-           {
-             if (character == enclosures[i].opening)
-               {
-                 enc = i;
-                 char_is_quote = TRUE;
-                 file->cache_starts[field] += ss_first_mblen (cs);
-                 break;
-               }
-           }
-       }
-      else if (character == enclosures[enc].closing)
+      if (quote == -1)
+        {
+          if (character == file->quotes[0] || character == file->quotes[1])
+            {
+              quote = character;
+              char_is_quote = TRUE;
+              file->cache_starts[field] += ss_first_mblen (cs);
+            }
+        }
+      else if (character == quote)
        {
          char_is_quote = TRUE;
          nullify_char (cs);
-         enc = -1;
+         quote = -1;
        }
 
-      if (enc == -1 && char_is_quote == FALSE)
+      if (quote == -1 && char_is_quote == FALSE)
        {
          GSList *del;
          for (del = file->delimiters; del; del = g_slist_next (del))
@@ -507,6 +503,13 @@ psppire_delimited_text_class_init (PsppireDelimitedTextClass *class)
                          P_("A GSList of gunichars which delimit the fields."),
                          G_PARAM_READWRITE);
 
+  GParamSpec *quotes_spec =
+    g_param_spec_string ("quotes",
+                         "Field Quotes",
+                         P_("A string of characters that quote the fields."),
+                         P_(""),
+                         G_PARAM_READWRITE);
+
   GParamSpec *child_spec =
     g_param_spec_object ("child",
                         "Child Model",
@@ -525,6 +528,10 @@ psppire_delimited_text_class_init (PsppireDelimitedTextClass *class)
                                    PROP_DELIMITERS,
                                    delimiters_spec);
 
+  g_object_class_install_property (object_class,
+                                   PROP_QUOTES,
+                                   quotes_spec);
+
   g_object_class_install_property (object_class,
                                    PROP_FIRST_LINE,
                                    first_line_spec);
@@ -548,6 +555,8 @@ psppire_delimited_text_init (PsppireDelimitedText *text_file)
 
   text_file->max_delimiters = 0;
 
+  text_file->quotes[0] = text_file->quotes[1] = -1;
+
   text_file->dispose_has_run = FALSE;
   text_file->stamp = g_random_int ();
 }
index b0ef9499fb0a26c9fc173050c94123050ae27e76..3e1094b777c9470995d266dceff98db5d66b45fa 100644 (file)
@@ -61,6 +61,8 @@ struct _PsppireDelimitedText
   GSList *delimiters;
   gint max_delimiters;
 
+  gunichar quotes[2];
+
   /*< private >*/
   gboolean dispose_has_run ;
   gint stamp;
index f73280b66b940d4bd148f016133d1a46b9b6c87a..c3192f555bacc408e88def19be1a38958a82b713 100644 (file)
 static void choose_column_names (PsppireImportAssistant *ia);
 
 /* Revises the contents of the fields tree view based on the
-   currently chosen set of separators. */
+   currently chosen set of separators and quotes. */
 static void
 revise_fields_preview (PsppireImportAssistant *ia)
 {
+  GSList *delimiters = NULL;
+  for (int i = 0; i < N_SEPARATORS; i++)
+    {
+      const struct separator *s = &separators[i];
+      GtkWidget *button = get_widget_assert (ia->text_builder, s->name);
+      if (gtk_toggle_button_get_active (GTK_TOGGLE_BUTTON (button)))
+       {
+         delimiters = g_slist_prepend (delimiters,  GINT_TO_POINTER (s->c));
+       }
+    }
+
+  GtkComboBoxText *cbt = GTK_COMBO_BOX_TEXT (ia->quote_combo);
+  GtkToggleButton *quote_cb = GTK_TOGGLE_BUTTON (ia->quote_cb);
+  const gchar *quotes = (gtk_toggle_button_get_active (quote_cb)
+                         ? gtk_combo_box_text_get_active_text (cbt)
+                         : "");
+
+  g_object_set (ia->delimiters_model,
+                "delimiters", delimiters,
+                "quotes", quotes,
+                NULL);
+
   choose_column_names (ia);
 }
 
@@ -533,20 +555,6 @@ static void
 on_separator_toggle (GtkToggleButton *toggle UNUSED,
                      PsppireImportAssistant *ia)
 {
-  int i;
-  GSList *delimiters = NULL;
-  for (i = 0; i < N_SEPARATORS; i++)
-    {
-      const struct separator *s = &separators[i];
-      GtkWidget *button = get_widget_assert (ia->text_builder, s->name);
-      if (gtk_toggle_button_get_active (GTK_TOGGLE_BUTTON (button)))
-       {
-         delimiters = g_slist_prepend (delimiters,  GINT_TO_POINTER (s->c));
-       }
-    }
-
-  g_object_set (ia->delimiters_model, "delimiters", delimiters, NULL);
-
   revise_fields_preview (ia);
 }
 
@@ -596,7 +604,7 @@ static void
 reset_separators_page (PsppireImportAssistant *ia)
 {
   gtk_toggle_button_set_active (GTK_TOGGLE_BUTTON (ia->custom_cb), FALSE);
-  gtk_toggle_button_set_active (GTK_TOGGLE_BUTTON (ia->quote_cb), FALSE);
+  gtk_toggle_button_set_active (GTK_TOGGLE_BUTTON (ia->quote_cb), TRUE);
   gtk_entry_set_text (GTK_ENTRY (ia->custom_entry), "");
 
   for (gint i = 0; i < N_SEPARATORS; i++)
@@ -606,10 +614,13 @@ reset_separators_page (PsppireImportAssistant *ia)
       gtk_toggle_button_set_active (GTK_TOGGLE_BUTTON (button), FALSE);
     }
 
-  repopulate_delimiter_columns (ia);
+  if (ia->delimiters_model)
+    {
+      repopulate_delimiter_columns (ia);
 
-  revise_fields_preview (ia);
-  choose_likely_separators (ia);
+      revise_fields_preview (ia);
+      choose_likely_separators (ia);
+    }
 }
 
 /* Called just before the separators page becomes visible in the
@@ -655,6 +666,7 @@ separators_page_create (PsppireImportAssistant *ia)
                            gtk_toggle_button_get_active (GTK_TOGGLE_BUTTON (ia->custom_cb)));
 
   gtk_combo_box_set_active (GTK_COMBO_BOX (ia->quote_combo), 0);
+  gtk_entry_set_max_length (GTK_ENTRY (gtk_bin_get_child (GTK_BIN (ia->quote_combo))), 2);
 
   if (ia->fields_tree_view == NULL)
     {