gui: Make text import assistant accept only one quote character.
[pspp] / src / ui / gui / psppire-delimited-text.c
1 /* PSPPIRE - a graphical user interface for PSPP.
2    Copyright (C) 2017 Free Software Foundation
3
4    This program is free software: you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation, either version 3 of the License, or
7    (at your option) any later version.
8
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13
14    You should have received a copy of the GNU General Public License
15    along with this program.  If not, see <http://www.gnu.org/licenses/>. */
16
17 #include <config.h>
18 #include <gettext.h>
19 #define _(msgid) gettext (msgid)
20 #define P_(msgid) msgid
21
22 #include "psppire-delimited-text.h"
23 #include "psppire-text-file.h"
24 #include "libpspp/str.h"
25 #include "libpspp/i18n.h"
26
27 #include <gtk/gtk.h>
28
29 /* Properties */
30 enum
31   {
32     PROP_0,
33     PROP_CHILD,
34     PROP_DELIMITERS,
35     PROP_QUOTE,
36     PROP_FIRST_LINE
37   };
38
39 static void
40 count_delims (PsppireDelimitedText *tf)
41 {
42   if (tf->child == NULL)
43     return;
44
45   tf->max_delimiters = 0;
46   GtkTreeIter iter;
47   gboolean valid;
48   for (valid = gtk_tree_model_get_iter_first (tf->child, &iter);
49        valid;
50        valid = gtk_tree_model_iter_next (tf->child, &iter))
51     {
52       gunichar quote = -1;
53       // FIXME: Box these lines to avoid constant allocation/deallocation
54       gchar *line = NULL;
55       gtk_tree_model_get (tf->child, &iter, 1, &line, -1);
56       {
57         char *p;
58         gint count = 0;
59         for (p = line; ; p = g_utf8_find_next_char (p, NULL))
60           {
61             const gunichar c = g_utf8_get_char (p);
62             if (c == 0)
63               break;
64
65             if (c == quote)
66               quote = -1;
67             else if (tf->quote && c == tf->quote)
68               quote = c;
69
70             if (quote == -1)
71               {
72                 GSList *del;
73                 for (del = tf->delimiters; del; del = g_slist_next (del))
74                   {
75                     if (c == GPOINTER_TO_INT (del->data))
76                       count++;
77                   }
78               }
79           }
80         tf->max_delimiters = MAX (tf->max_delimiters, count);
81       }
82       g_free (line);
83     }
84 }
85
86 static void
87 cache_invalidate (PsppireDelimitedText *tf)
88 {
89   memset (tf->cache_starts, 0, sizeof tf->cache_starts);
90   if (tf->const_cache.string)
91     {
92       ss_dealloc (&tf->const_cache);
93       tf->const_cache.string = NULL;
94       tf->cache_row = -1;
95     }
96 }
97
98 static void
99 psppire_delimited_text_set_property (GObject         *object,
100                                 guint            prop_id,
101                                 const GValue    *value,
102                                 GParamSpec      *pspec)
103 {
104   PsppireDelimitedText *tf = PSPPIRE_DELIMITED_TEXT (object);
105
106   switch (prop_id)
107     {
108     case PROP_FIRST_LINE:
109       tf->first_line = g_value_get_int (value);
110       break;
111     case PROP_CHILD:
112       tf->child = g_value_get_object (value);
113       g_return_if_fail (PSPPIRE_IS_TEXT_FILE (tf->child));
114       break;
115     case PROP_DELIMITERS:
116       g_slist_free (tf->delimiters);
117       tf->delimiters =  g_slist_copy (g_value_get_pointer (value));
118       break;
119     case PROP_QUOTE:
120       tf->quote = g_value_get_uint (value);
121       break;
122     default:
123       G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
124       break;
125     };
126
127   cache_invalidate (tf);
128   count_delims (tf);
129 }
130
131 static void
132 psppire_delimited_text_get_property (GObject         *object,
133                                 guint            prop_id,
134                                 GValue          *value,
135                                 GParamSpec      *pspec)
136 {
137   PsppireDelimitedText *text_file = PSPPIRE_DELIMITED_TEXT (object);
138
139   switch (prop_id)
140     {
141     case PROP_FIRST_LINE:
142       g_value_set_int (value, text_file->first_line);
143       break;
144     case PROP_DELIMITERS:
145       g_value_set_pointer (value, text_file->delimiters);
146       break;
147     case PROP_QUOTE:
148       g_value_set_uint (value, text_file->quote);
149       break;
150     default:
151       G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
152       break;
153     };
154 }
155
156 static void psppire_delimited_text_finalize        (GObject           *object);
157 static void psppire_delimited_text_dispose        (GObject           *object);
158
159 static GObjectClass *parent_class = NULL;
160
161 static gint
162 n_lines (PsppireDelimitedText *file)
163 {
164   PsppireTextFile *child = PSPPIRE_TEXT_FILE (file->child);
165
166   return child->maximum_lines;
167 }
168
169 static gboolean
170 __tree_get_iter (GtkTreeModel *tree_model,
171                  GtkTreeIter *iter,
172                  GtkTreePath *path)
173 {
174   PsppireDelimitedText *file = PSPPIRE_DELIMITED_TEXT (tree_model);
175   if (path == NULL)
176     return FALSE;
177
178
179   gint *indices = gtk_tree_path_get_indices (path);
180
181   if (!indices)
182     return FALSE;
183
184   gint n = *indices;
185
186   gint children = n_lines (file);
187
188   if (n >= children - file->first_line)
189     return FALSE;
190
191
192   iter->user_data = GINT_TO_POINTER (n);
193   iter->stamp = file->stamp;
194
195   return TRUE;
196 }
197
198
199 static gboolean
200 __tree_iter_next (GtkTreeModel *tree_model,
201                   GtkTreeIter *iter)
202 {
203   PsppireDelimitedText *file  = PSPPIRE_DELIMITED_TEXT (tree_model);
204   g_return_val_if_fail (file->stamp == iter->stamp, FALSE);
205
206   gint n = GPOINTER_TO_INT (iter->user_data);
207
208
209   gint children = n_lines (file);
210
211   if (n + 1 >= children - file->first_line)
212     return FALSE;
213
214   iter->user_data = GINT_TO_POINTER (n + 1);
215
216   return TRUE;
217 }
218
219
220 static GType
221 __tree_get_column_type (GtkTreeModel *tree_model,
222                         gint          index)
223 {
224   if (index == 0)
225     return G_TYPE_INT;
226
227   return G_TYPE_STRING;
228 }
229
230 static gboolean
231 __iter_has_child (GtkTreeModel *tree_model,
232                   GtkTreeIter  *iter)
233 {
234   return 0;
235 }
236
237
238 static gboolean
239 __iter_parent     (GtkTreeModel *tree_model,
240                    GtkTreeIter  *iter,
241                    GtkTreeIter  *child)
242 {
243   return 0;
244 }
245
246 static GtkTreePath *
247 __tree_get_path (GtkTreeModel *tree_model,
248                  GtkTreeIter  *iter)
249 {
250   PsppireDelimitedText *file  = PSPPIRE_DELIMITED_TEXT (tree_model);
251   g_return_val_if_fail (file->stamp == iter->stamp, FALSE);
252
253   gint n = GPOINTER_TO_INT (iter->user_data);
254
255   gint children = n_lines (file);
256
257   if (n >= children - file->first_line)
258     return NULL;
259
260   return gtk_tree_path_new_from_indices (n, -1);
261 }
262
263
264 static gboolean
265 __iter_children (GtkTreeModel *tree_model,
266                               GtkTreeIter *iter,
267                               GtkTreeIter *parent)
268 {
269   return 0;
270 }
271
272
273 static gint
274 __tree_model_iter_n_children (GtkTreeModel *tree_model,
275                               GtkTreeIter *iter)
276 {
277   PsppireDelimitedText *file  = PSPPIRE_DELIMITED_TEXT (tree_model);
278   g_assert (iter == NULL);
279
280   gint children = n_lines (file);
281
282   return children - file->first_line;
283 }
284
285 static GtkTreeModelFlags
286 __tree_model_get_flags (GtkTreeModel *model)
287 {
288   g_return_val_if_fail (PSPPIRE_IS_DELIMITED_TEXT (model), (GtkTreeModelFlags) 0);
289
290   return GTK_TREE_MODEL_LIST_ONLY;
291 }
292
293 static gint
294 __tree_model_get_n_columns (GtkTreeModel *tree_model)
295 {
296   PsppireDelimitedText *tf  = PSPPIRE_DELIMITED_TEXT (tree_model);
297
298   /* + 1 for the trailing field and +1 for the leading line number column */
299   return tf->max_delimiters + 1 + 1;
300 }
301
302
303 static gboolean
304 __iter_nth_child (GtkTreeModel *tree_model,
305                   GtkTreeIter *iter,
306                   GtkTreeIter *parent,
307                   gint n)
308 {
309   PsppireDelimitedText *file  = PSPPIRE_DELIMITED_TEXT (tree_model);
310
311   g_assert (parent == NULL);
312
313   g_return_val_if_fail (file, FALSE);
314
315   gint children = gtk_tree_model_iter_n_children (file->child, NULL);
316
317   if (n >= children - file->first_line)
318     {
319       iter->stamp = -1;
320       iter->user_data = NULL;
321       return FALSE;
322     }
323
324   iter->user_data = GINT_TO_POINTER (n);
325   iter->stamp = file->stamp;
326
327   return TRUE;
328 }
329
330
331 static void
332 nullify_char (struct substring cs)
333 {
334   int char_len = ss_first_mblen (cs);
335   while (char_len > 0)
336     {
337       cs.string[char_len - 1] = '\0';
338       char_len--;
339     }
340 }
341
342
343 /* Split row N into it's delimited fields (if it is not already cached)
344    and set this row as the current cache. */
345 static void
346 split_row_into_fields (PsppireDelimitedText *file, gint n)
347 {
348   if (n == file->cache_row)  /* Cache hit */
349     {
350       return;
351     }
352
353   memset (file->cache_starts, 0, sizeof file->cache_starts);
354   /* Cache miss */
355   if (file->const_cache.string)
356     {
357       ss_dealloc (&file->const_cache);
358     }
359   ss_alloc_substring_pool (&file->const_cache,
360                            PSPPIRE_TEXT_FILE (file->child)->lines[n], NULL);
361   struct substring cs = file->const_cache;
362   int field = 0;
363   file->cache_starts[0] = cs.string;
364   gunichar quote = -1;
365   for (;
366        UINT32_MAX != ss_first_mb (cs);
367        ss_get_mb (&cs))
368     {
369       ucs4_t character = ss_first_mb (cs);
370       gboolean char_is_quote = FALSE;
371       if (quote == -1)
372         {
373           if (file->quote && character == file->quote)
374             {
375               quote = character;
376               char_is_quote = TRUE;
377               file->cache_starts[field] += ss_first_mblen (cs);
378             }
379         }
380       else if (character == quote)
381         {
382           char_is_quote = TRUE;
383           nullify_char (cs);
384           quote = -1;
385         }
386
387       if (quote == -1 && char_is_quote == FALSE)
388         {
389           GSList *del;
390           for (del = file->delimiters; del; del = g_slist_next (del))
391             {
392               if (character == GPOINTER_TO_INT (del->data))
393                 {
394                   field++;
395                   int char_len = ss_first_mblen (cs);
396                   file->cache_starts[field] = cs.string + char_len;
397                   nullify_char (cs);
398                   break;
399                 }
400             }
401         }
402     }
403
404   file->cache_row = n;
405 }
406
407 const gchar *
408 psppire_delimited_text_get_header_title (PsppireDelimitedText *file, gint column)
409 {
410   if (file->first_line <= 0)
411     return NULL;
412
413   split_row_into_fields (file, file->first_line - 1);
414
415   return file->cache_starts [column];
416 }
417
418 static void
419 __get_value (GtkTreeModel *tree_model,
420              GtkTreeIter *iter,
421              gint column,
422              GValue *value)
423 {
424   PsppireDelimitedText *file  = PSPPIRE_DELIMITED_TEXT (tree_model);
425
426   g_return_if_fail (iter->stamp == file->stamp);
427
428   gint n = GPOINTER_TO_INT (iter->user_data) + file->first_line;
429
430
431   if (column == 0)
432     {
433       g_value_init (value, G_TYPE_INT);
434       g_value_set_int (value, n + 1);
435       return;
436     }
437
438   g_value_init (value, G_TYPE_STRING);
439
440   split_row_into_fields (file, n);
441
442   g_value_set_string (value, file->cache_starts [column - 1]);
443 }
444
445
446 static void
447 __tree_model_init (GtkTreeModelIface *iface)
448 {
449   iface->get_flags       = __tree_model_get_flags;
450   iface->get_n_columns   = __tree_model_get_n_columns ;
451   iface->get_column_type = __tree_get_column_type;
452   iface->get_iter        = __tree_get_iter;
453   iface->iter_next       = __tree_iter_next;
454   iface->get_path        = __tree_get_path;
455   iface->get_value       = __get_value;
456
457   iface->iter_children   = __iter_children;
458   iface->iter_has_child  = __iter_has_child;
459   iface->iter_n_children = __tree_model_iter_n_children;
460   iface->iter_nth_child  = __iter_nth_child;
461   iface->iter_parent     = __iter_parent;
462 }
463
464 G_DEFINE_TYPE_WITH_CODE (PsppireDelimitedText, psppire_delimited_text, G_TYPE_OBJECT,
465                          G_IMPLEMENT_INTERFACE (GTK_TYPE_TREE_MODEL,
466                                                 __tree_model_init))
467
468 static void
469 psppire_delimited_text_class_init (PsppireDelimitedTextClass *class)
470 {
471   GObjectClass *object_class;
472
473   parent_class = g_type_class_peek_parent (class);
474   object_class = G_OBJECT_CLASS (class);
475
476   GParamSpec *first_line_spec =
477     g_param_spec_int ("first-line",
478                       "First Line",
479                       P_("The first line to be considered."),
480                       0, 1000, 0,
481                       G_PARAM_READWRITE);
482
483   GParamSpec *delimiters_spec =
484     g_param_spec_pointer ("delimiters",
485                           "Field Delimiters",
486                           P_("A GSList of gunichars which delimit the fields."),
487                           G_PARAM_READWRITE);
488
489   GParamSpec *quote_spec =
490     g_param_spec_unichar ("quote",
491                          "Quote Character",
492                          P_("A character that quotes the field, or 0 to disable quoting."),
493                          0,
494                          G_PARAM_READWRITE);
495
496   GParamSpec *child_spec =
497     g_param_spec_object ("child",
498                          "Child Model",
499                          P_("The GtkTextModel which this object wraps."),
500                          GTK_TYPE_TREE_MODEL,
501                          G_PARAM_CONSTRUCT_ONLY |G_PARAM_READWRITE);
502
503   object_class->set_property = psppire_delimited_text_set_property;
504   object_class->get_property = psppire_delimited_text_get_property;
505
506   g_object_class_install_property (object_class,
507                                    PROP_CHILD,
508                                    child_spec);
509
510   g_object_class_install_property (object_class,
511                                    PROP_DELIMITERS,
512                                    delimiters_spec);
513
514   g_object_class_install_property (object_class,
515                                    PROP_QUOTE,
516                                    quote_spec);
517
518   g_object_class_install_property (object_class,
519                                    PROP_FIRST_LINE,
520                                    first_line_spec);
521
522   object_class->finalize = psppire_delimited_text_finalize;
523   object_class->dispose = psppire_delimited_text_dispose;
524 }
525
526
527 static void
528 psppire_delimited_text_init (PsppireDelimitedText *text_file)
529 {
530   text_file->child = NULL;
531   text_file->first_line = 0;
532   text_file->delimiters = g_slist_prepend (NULL, GINT_TO_POINTER (':'));
533
534   text_file->const_cache.string = NULL;
535   text_file->const_cache.length = 0;
536   text_file->cache_row = -1;
537   memset (text_file->cache_starts, 0, sizeof text_file->cache_starts);
538
539   text_file->max_delimiters = 0;
540
541   text_file->quote = 0;
542
543   text_file->dispose_has_run = FALSE;
544   text_file->stamp = g_random_int ();
545 }
546
547
548 PsppireDelimitedText *
549 psppire_delimited_text_new (GtkTreeModel *child)
550 {
551   return
552     g_object_new (PSPPIRE_TYPE_DELIMITED_TEXT,
553                   "child", child,
554                   NULL);
555 }
556
557 static void
558 psppire_delimited_text_finalize (GObject *object)
559 {
560   PsppireDelimitedText *tf = PSPPIRE_DELIMITED_TEXT (object);
561
562   g_slist_free (tf->delimiters);
563
564   ss_dealloc (&tf->const_cache);
565
566   /* must chain up */
567   (* parent_class->finalize) (object);
568 }
569
570
571 static void
572 psppire_delimited_text_dispose (GObject *object)
573 {
574   PsppireDelimitedText *ds = PSPPIRE_DELIMITED_TEXT (object);
575
576   if (ds->dispose_has_run)
577     return;
578
579   /* must chain up */
580   (* parent_class->dispose) (object);
581
582   ds->dispose_has_run = TRUE;
583 }