gui: Honor quotes in the text data import dialog.
[pspp] / src / ui / gui / psppire-delimited-text.c
1 /* PSPPIRE - a graphical user interface for PSPP.
2    Copyright (C) 2017 Free Software Foundation
3
4    This program is free software: you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation, either version 3 of the License, or
7    (at your option) any later version.
8
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13
14    You should have received a copy of the GNU General Public License
15    along with this program.  If not, see <http://www.gnu.org/licenses/>. */
16
17 #include <config.h>
18 #include <gettext.h>
19 #define _(msgid) gettext (msgid)
20 #define P_(msgid) msgid
21
22 #include "psppire-delimited-text.h"
23 #include "psppire-text-file.h"
24 #include "libpspp/str.h"
25 #include "libpspp/i18n.h"
26
27 #include <gtk/gtk.h>
28
29 /* Properties */
30 enum
31   {
32     PROP_0,
33     PROP_CHILD,
34     PROP_DELIMITERS,
35     PROP_QUOTES,
36     PROP_FIRST_LINE
37   };
38
39 static void
40 count_delims (PsppireDelimitedText *tf)
41 {
42   if (tf->child == NULL)
43     return;
44
45   tf->max_delimiters = 0;
46   GtkTreeIter iter;
47   gboolean valid;
48   for (valid = gtk_tree_model_get_iter_first (tf->child, &iter);
49        valid;
50        valid = gtk_tree_model_iter_next (tf->child, &iter))
51     {
52       gunichar quote = -1;
53       // FIXME: Box these lines to avoid constant allocation/deallocation
54       gchar *line = NULL;
55       gtk_tree_model_get (tf->child, &iter, 1, &line, -1);
56       {
57         char *p;
58         gint count = 0;
59         for (p = line; ; p = g_utf8_find_next_char (p, NULL))
60           {
61             const gunichar c = g_utf8_get_char (p);
62             if (c == 0)
63               break;
64
65             if (c == quote)
66               quote = -1;
67             else if (c == tf->quotes[0] || c == tf->quotes[1])
68               quote = c;
69
70             if (quote == -1)
71               {
72                 GSList *del;
73                 for (del = tf->delimiters; del; del = g_slist_next (del))
74                   {
75                     if (c == GPOINTER_TO_INT (del->data))
76                       count++;
77                   }
78               }
79           }
80         tf->max_delimiters = MAX (tf->max_delimiters, count);
81       }
82       g_free (line);
83     }
84 }
85
86 static void
87 cache_invalidate (PsppireDelimitedText *tf)
88 {
89   memset (tf->cache_starts, 0, sizeof tf->cache_starts);
90   if (tf->const_cache.string)
91     {
92       ss_dealloc (&tf->const_cache);
93       tf->const_cache.string = NULL;
94       tf->cache_row = -1;
95     }
96 }
97
98 static void
99 psppire_delimited_text_set_property (GObject         *object,
100                                 guint            prop_id,
101                                 const GValue    *value,
102                                 GParamSpec      *pspec)
103 {
104   PsppireDelimitedText *tf = PSPPIRE_DELIMITED_TEXT (object);
105
106   switch (prop_id)
107     {
108     case PROP_FIRST_LINE:
109       tf->first_line = g_value_get_int (value);
110       break;
111     case PROP_CHILD:
112       tf->child = g_value_get_object (value);
113       g_return_if_fail (PSPPIRE_IS_TEXT_FILE (tf->child));
114       break;
115     case PROP_DELIMITERS:
116       g_slist_free (tf->delimiters);
117       tf->delimiters =  g_slist_copy (g_value_get_pointer (value));
118       break;
119     case PROP_QUOTES:
120       {
121         tf->quotes[0] = tf->quotes[1] = -1;
122
123         const gchar *s = g_value_get_string (value);
124         for (size_t i = 0; i < 2 && s && s[0]; i++)
125           {
126             tf->quotes[i] = g_utf8_get_char (s);
127             s = g_utf8_find_next_char (s, NULL);
128           }
129       }
130       break;
131     default:
132       G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
133       break;
134     };
135
136   cache_invalidate (tf);
137   count_delims (tf);
138 }
139
140 static void
141 psppire_delimited_text_get_property (GObject         *object,
142                                 guint            prop_id,
143                                 GValue          *value,
144                                 GParamSpec      *pspec)
145 {
146   PsppireDelimitedText *text_file = PSPPIRE_DELIMITED_TEXT (object);
147
148   switch (prop_id)
149     {
150     case PROP_FIRST_LINE:
151       g_value_set_int (value, text_file->first_line);
152       break;
153     case PROP_DELIMITERS:
154       g_value_set_pointer (value, text_file->delimiters);
155       break;
156     case PROP_QUOTES:
157       {
158         GString *s = g_string_new (NULL);
159         for (size_t i = 0; i < 2; i++)
160           {
161             gunichar quote = text_file->quotes[i];
162             if (quote && quote != -1)
163               g_string_append_unichar (s, quote);
164           }
165       }
166       break;
167     default:
168       G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
169       break;
170     };
171 }
172
173 static void psppire_delimited_text_finalize        (GObject           *object);
174 static void psppire_delimited_text_dispose        (GObject           *object);
175
176 static GObjectClass *parent_class = NULL;
177
178 static gint
179 n_lines (PsppireDelimitedText *file)
180 {
181   PsppireTextFile *child = PSPPIRE_TEXT_FILE (file->child);
182
183   return child->maximum_lines;
184 }
185
186 static gboolean
187 __tree_get_iter (GtkTreeModel *tree_model,
188                  GtkTreeIter *iter,
189                  GtkTreePath *path)
190 {
191   PsppireDelimitedText *file = PSPPIRE_DELIMITED_TEXT (tree_model);
192   if (path == NULL)
193     return FALSE;
194
195
196   gint *indices = gtk_tree_path_get_indices (path);
197
198   if (!indices)
199     return FALSE;
200
201   gint n = *indices;
202
203   gint children = n_lines (file);
204
205   if (n >= children - file->first_line)
206     return FALSE;
207
208
209   iter->user_data = GINT_TO_POINTER (n);
210   iter->stamp = file->stamp;
211
212   return TRUE;
213 }
214
215
216 static gboolean
217 __tree_iter_next (GtkTreeModel *tree_model,
218                   GtkTreeIter *iter)
219 {
220   PsppireDelimitedText *file  = PSPPIRE_DELIMITED_TEXT (tree_model);
221   g_return_val_if_fail (file->stamp == iter->stamp, FALSE);
222
223   gint n = GPOINTER_TO_INT (iter->user_data);
224
225
226   gint children = n_lines (file);
227
228   if (n + 1 >= children - file->first_line)
229     return FALSE;
230
231   iter->user_data = GINT_TO_POINTER (n + 1);
232
233   return TRUE;
234 }
235
236
237 static GType
238 __tree_get_column_type (GtkTreeModel *tree_model,
239                         gint          index)
240 {
241   if (index == 0)
242     return G_TYPE_INT;
243
244   return G_TYPE_STRING;
245 }
246
247 static gboolean
248 __iter_has_child (GtkTreeModel *tree_model,
249                   GtkTreeIter  *iter)
250 {
251   return 0;
252 }
253
254
255 static gboolean
256 __iter_parent     (GtkTreeModel *tree_model,
257                    GtkTreeIter  *iter,
258                    GtkTreeIter  *child)
259 {
260   return 0;
261 }
262
263 static GtkTreePath *
264 __tree_get_path (GtkTreeModel *tree_model,
265                  GtkTreeIter  *iter)
266 {
267   PsppireDelimitedText *file  = PSPPIRE_DELIMITED_TEXT (tree_model);
268   g_return_val_if_fail (file->stamp == iter->stamp, FALSE);
269
270   gint n = GPOINTER_TO_INT (iter->user_data);
271
272   gint children = n_lines (file);
273
274   if (n >= children - file->first_line)
275     return NULL;
276
277   return gtk_tree_path_new_from_indices (n, -1);
278 }
279
280
281 static gboolean
282 __iter_children (GtkTreeModel *tree_model,
283                               GtkTreeIter *iter,
284                               GtkTreeIter *parent)
285 {
286   return 0;
287 }
288
289
290 static gint
291 __tree_model_iter_n_children (GtkTreeModel *tree_model,
292                               GtkTreeIter *iter)
293 {
294   PsppireDelimitedText *file  = PSPPIRE_DELIMITED_TEXT (tree_model);
295   g_assert (iter == NULL);
296
297   gint children = n_lines (file);
298
299   return children - file->first_line;
300 }
301
302 static GtkTreeModelFlags
303 __tree_model_get_flags (GtkTreeModel *model)
304 {
305   g_return_val_if_fail (PSPPIRE_IS_DELIMITED_TEXT (model), (GtkTreeModelFlags) 0);
306
307   return GTK_TREE_MODEL_LIST_ONLY;
308 }
309
310 static gint
311 __tree_model_get_n_columns (GtkTreeModel *tree_model)
312 {
313   PsppireDelimitedText *tf  = PSPPIRE_DELIMITED_TEXT (tree_model);
314
315   /* + 1 for the trailing field and +1 for the leading line number column */
316   return tf->max_delimiters + 1 + 1;
317 }
318
319
320 static gboolean
321 __iter_nth_child (GtkTreeModel *tree_model,
322                   GtkTreeIter *iter,
323                   GtkTreeIter *parent,
324                   gint n)
325 {
326   PsppireDelimitedText *file  = PSPPIRE_DELIMITED_TEXT (tree_model);
327
328   g_assert (parent == NULL);
329
330   g_return_val_if_fail (file, FALSE);
331
332   gint children = gtk_tree_model_iter_n_children (file->child, NULL);
333
334   if (n >= children - file->first_line)
335     {
336       iter->stamp = -1;
337       iter->user_data = NULL;
338       return FALSE;
339     }
340
341   iter->user_data = GINT_TO_POINTER (n);
342   iter->stamp = file->stamp;
343
344   return TRUE;
345 }
346
347
348 static void
349 nullify_char (struct substring cs)
350 {
351   int char_len = ss_first_mblen (cs);
352   while (char_len > 0)
353     {
354       cs.string[char_len - 1] = '\0';
355       char_len--;
356     }
357 }
358
359
360 /* Split row N into it's delimited fields (if it is not already cached)
361    and set this row as the current cache. */
362 static void
363 split_row_into_fields (PsppireDelimitedText *file, gint n)
364 {
365   if (n == file->cache_row)  /* Cache hit */
366     {
367       return;
368     }
369
370   memset (file->cache_starts, 0, sizeof file->cache_starts);
371   /* Cache miss */
372   if (file->const_cache.string)
373     {
374       ss_dealloc (&file->const_cache);
375     }
376   ss_alloc_substring_pool (&file->const_cache,
377                            PSPPIRE_TEXT_FILE (file->child)->lines[n], NULL);
378   struct substring cs = file->const_cache;
379   int field = 0;
380   file->cache_starts[0] = cs.string;
381   gunichar quote = -1;
382   for (;
383        UINT32_MAX != ss_first_mb (cs);
384        ss_get_mb (&cs))
385     {
386       ucs4_t character = ss_first_mb (cs);
387       gboolean char_is_quote = FALSE;
388       if (quote == -1)
389         {
390           if (character == file->quotes[0] || character == file->quotes[1])
391             {
392               quote = character;
393               char_is_quote = TRUE;
394               file->cache_starts[field] += ss_first_mblen (cs);
395             }
396         }
397       else if (character == quote)
398         {
399           char_is_quote = TRUE;
400           nullify_char (cs);
401           quote = -1;
402         }
403
404       if (quote == -1 && char_is_quote == FALSE)
405         {
406           GSList *del;
407           for (del = file->delimiters; del; del = g_slist_next (del))
408             {
409               if (character == GPOINTER_TO_INT (del->data))
410                 {
411                   field++;
412                   int char_len = ss_first_mblen (cs);
413                   file->cache_starts[field] = cs.string + char_len;
414                   nullify_char (cs);
415                   break;
416                 }
417             }
418         }
419     }
420
421   file->cache_row = n;
422 }
423
424 const gchar *
425 psppire_delimited_text_get_header_title (PsppireDelimitedText *file, gint column)
426 {
427   if (file->first_line <= 0)
428     return NULL;
429
430   split_row_into_fields (file, file->first_line - 1);
431
432   return file->cache_starts [column];
433 }
434
435 static void
436 __get_value (GtkTreeModel *tree_model,
437              GtkTreeIter *iter,
438              gint column,
439              GValue *value)
440 {
441   PsppireDelimitedText *file  = PSPPIRE_DELIMITED_TEXT (tree_model);
442
443   g_return_if_fail (iter->stamp == file->stamp);
444
445   gint n = GPOINTER_TO_INT (iter->user_data) + file->first_line;
446
447
448   if (column == 0)
449     {
450       g_value_init (value, G_TYPE_INT);
451       g_value_set_int (value, n + 1);
452       return;
453     }
454
455   g_value_init (value, G_TYPE_STRING);
456
457   split_row_into_fields (file, n);
458
459   g_value_set_string (value, file->cache_starts [column - 1]);
460 }
461
462
463 static void
464 __tree_model_init (GtkTreeModelIface *iface)
465 {
466   iface->get_flags       = __tree_model_get_flags;
467   iface->get_n_columns   = __tree_model_get_n_columns ;
468   iface->get_column_type = __tree_get_column_type;
469   iface->get_iter        = __tree_get_iter;
470   iface->iter_next       = __tree_iter_next;
471   iface->get_path        = __tree_get_path;
472   iface->get_value       = __get_value;
473
474   iface->iter_children   = __iter_children;
475   iface->iter_has_child  = __iter_has_child;
476   iface->iter_n_children = __tree_model_iter_n_children;
477   iface->iter_nth_child  = __iter_nth_child;
478   iface->iter_parent     = __iter_parent;
479 }
480
481 G_DEFINE_TYPE_WITH_CODE (PsppireDelimitedText, psppire_delimited_text, G_TYPE_OBJECT,
482                          G_IMPLEMENT_INTERFACE (GTK_TYPE_TREE_MODEL,
483                                                 __tree_model_init))
484
485 static void
486 psppire_delimited_text_class_init (PsppireDelimitedTextClass *class)
487 {
488   GObjectClass *object_class;
489
490   parent_class = g_type_class_peek_parent (class);
491   object_class = G_OBJECT_CLASS (class);
492
493   GParamSpec *first_line_spec =
494     g_param_spec_int ("first-line",
495                       "First Line",
496                       P_("The first line to be considered."),
497                       0, 1000, 0,
498                       G_PARAM_READWRITE);
499
500   GParamSpec *delimiters_spec =
501     g_param_spec_pointer ("delimiters",
502                           "Field Delimiters",
503                           P_("A GSList of gunichars which delimit the fields."),
504                           G_PARAM_READWRITE);
505
506   GParamSpec *quotes_spec =
507     g_param_spec_string ("quotes",
508                          "Field Quotes",
509                          P_("A string of characters that quote the fields."),
510                          P_(""),
511                          G_PARAM_READWRITE);
512
513   GParamSpec *child_spec =
514     g_param_spec_object ("child",
515                          "Child Model",
516                          P_("The GtkTextModel which this object wraps."),
517                          GTK_TYPE_TREE_MODEL,
518                          G_PARAM_CONSTRUCT_ONLY |G_PARAM_READWRITE);
519
520   object_class->set_property = psppire_delimited_text_set_property;
521   object_class->get_property = psppire_delimited_text_get_property;
522
523   g_object_class_install_property (object_class,
524                                    PROP_CHILD,
525                                    child_spec);
526
527   g_object_class_install_property (object_class,
528                                    PROP_DELIMITERS,
529                                    delimiters_spec);
530
531   g_object_class_install_property (object_class,
532                                    PROP_QUOTES,
533                                    quotes_spec);
534
535   g_object_class_install_property (object_class,
536                                    PROP_FIRST_LINE,
537                                    first_line_spec);
538
539   object_class->finalize = psppire_delimited_text_finalize;
540   object_class->dispose = psppire_delimited_text_dispose;
541 }
542
543
544 static void
545 psppire_delimited_text_init (PsppireDelimitedText *text_file)
546 {
547   text_file->child = NULL;
548   text_file->first_line = 0;
549   text_file->delimiters = g_slist_prepend (NULL, GINT_TO_POINTER (':'));
550
551   text_file->const_cache.string = NULL;
552   text_file->const_cache.length = 0;
553   text_file->cache_row = -1;
554   memset (text_file->cache_starts, 0, sizeof text_file->cache_starts);
555
556   text_file->max_delimiters = 0;
557
558   text_file->quotes[0] = text_file->quotes[1] = -1;
559
560   text_file->dispose_has_run = FALSE;
561   text_file->stamp = g_random_int ();
562 }
563
564
565 PsppireDelimitedText *
566 psppire_delimited_text_new (GtkTreeModel *child)
567 {
568   return
569     g_object_new (PSPPIRE_TYPE_DELIMITED_TEXT,
570                   "child", child,
571                   NULL);
572 }
573
574 static void
575 psppire_delimited_text_finalize (GObject *object)
576 {
577   PsppireDelimitedText *tf = PSPPIRE_DELIMITED_TEXT (object);
578
579   g_slist_free (tf->delimiters);
580
581   ss_dealloc (&tf->const_cache);
582
583   /* must chain up */
584   (* parent_class->finalize) (object);
585 }
586
587
588 static void
589 psppire_delimited_text_dispose (GObject *object)
590 {
591   PsppireDelimitedText *ds = PSPPIRE_DELIMITED_TEXT (object);
592
593   if (ds->dispose_has_run)
594     return;
595
596   /* must chain up */
597   (* parent_class->dispose) (object);
598
599   ds->dispose_has_run = TRUE;
600 }