1 /* PSPPIRE - a graphical user interface for PSPP.
2 Copyright (C) 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "page-separators.h"
21 #include "ui/gui/text-data-import-dialog.h"
25 #include <gtk-contrib/psppire-sheet.h>
31 #include "data/data-in.h"
32 #include "data/data-out.h"
33 #include "data/format-guesser.h"
34 #include "data/value-labels.h"
35 #include "language/data-io/data-parser.h"
36 #include "language/lexer/lexer.h"
37 #include "libpspp/assertion.h"
38 #include "libpspp/i18n.h"
39 #include "libpspp/line-reader.h"
40 #include "libpspp/message.h"
41 #include "ui/gui/checkbox-treeview.h"
42 #include "ui/gui/dialog-common.h"
43 #include "ui/gui/executor.h"
44 #include "ui/gui/helper.h"
45 #include "ui/gui/builder-wrapper.h"
46 #include "ui/gui/psppire-data-window.h"
47 #include "ui/gui/psppire-dialog.h"
48 #include "ui/gui/psppire-encoding-selector.h"
49 #include "ui/gui/psppire-empty-list-store.h"
50 #include "ui/gui/psppire-var-sheet.h"
51 #include "ui/gui/psppire-var-store.h"
52 #include "ui/gui/psppire-scanf.h"
53 #include "ui/syntax-gen.h"
56 #include "gl/intprops.h"
57 #include "gl/xalloc.h"
60 #define _(msgid) gettext (msgid)
61 #define N_(msgid) msgid
63 /* Page where the user chooses field separators. */
64 struct separators_page
66 /* How to break lines into columns. */
67 struct string separators; /* Field separators. */
68 struct string quotes; /* Quote characters. */
69 bool escape; /* Doubled quotes yield a quote mark? */
73 GtkWidget *custom_entry;
75 GtkWidget *quote_combo;
76 GtkEntry *quote_entry;
78 GtkTreeView *fields_tree_view;
81 /* The "separators" page of the assistant. */
83 static void revise_fields_preview (struct import_assistant *ia);
84 static void choose_likely_separators (struct import_assistant *ia);
85 static void find_commonest_chars (unsigned long int histogram[UCHAR_MAX + 1],
86 const char *targets, const char *def,
87 struct string *result);
88 static void clear_fields (struct import_assistant *ia);
89 static void revise_fields_preview (struct import_assistant *);
90 static void set_separators (struct import_assistant *);
91 static void get_separators (struct import_assistant *);
92 static void on_separators_custom_entry_notify (GObject *UNUSED,
94 struct import_assistant *);
95 static void on_separators_custom_cb_toggle (GtkToggleButton *custom_cb,
96 struct import_assistant *);
97 static void on_quote_combo_change (GtkComboBox *combo,
98 struct import_assistant *);
99 static void on_quote_cb_toggle (GtkToggleButton *quote_cb,
100 struct import_assistant *);
101 static void on_separator_toggle (GtkToggleButton *, struct import_assistant *);
103 /* A common field separator and its identifying name. */
106 const char *name; /* Name (for use with get_widget_assert). */
107 int c; /* Separator character. */
110 /* All the separators in the dialog box. */
111 static const struct separator separators[] =
123 #define SEPARATOR_CNT (sizeof separators / sizeof *separators)
126 set_quote_list (GtkComboBoxEntry *cb)
128 GtkListStore *list = gtk_list_store_new (1, G_TYPE_STRING);
131 const gchar *seperator[3] = {"'\"", "\'", "\""};
133 for (i = 0; i < 3; i++)
135 const gchar *s = seperator[i];
137 /* Add a new row to the model */
138 gtk_list_store_append (list, &iter);
139 gtk_list_store_set (list, &iter,
145 gtk_combo_box_set_model (GTK_COMBO_BOX (cb), GTK_TREE_MODEL (list));
146 g_object_unref (list);
148 gtk_combo_box_entry_set_text_column (cb, 0);
151 /* Initializes IA's separators substructure. */
153 struct separators_page *
154 separators_page_create (struct import_assistant *ia)
156 GtkBuilder *builder = ia->asst.builder;
160 struct separators_page *p = xzalloc (sizeof *p);
162 p->page = add_page_to_assistant (ia, get_widget_assert (builder, "Separators"),
163 GTK_ASSISTANT_PAGE_CONTENT);
165 p->custom_cb = get_widget_assert (builder, "custom-cb");
166 p->custom_entry = get_widget_assert (builder, "custom-entry");
167 p->quote_combo = get_widget_assert (builder, "quote-combo");
168 p->quote_entry = GTK_ENTRY (gtk_bin_get_child (GTK_BIN (p->quote_combo)));
169 p->quote_cb = get_widget_assert (builder, "quote-cb");
170 p->escape_cb = get_widget_assert (builder, "escape");
172 set_quote_list (GTK_COMBO_BOX_ENTRY (p->quote_combo));
173 p->fields_tree_view = GTK_TREE_VIEW (get_widget_assert (builder, "fields"));
174 g_signal_connect (p->quote_combo, "changed",
175 G_CALLBACK (on_quote_combo_change), ia);
176 g_signal_connect (p->quote_cb, "toggled",
177 G_CALLBACK (on_quote_cb_toggle), ia);
178 g_signal_connect (p->custom_entry, "notify::text",
179 G_CALLBACK (on_separators_custom_entry_notify), ia);
180 g_signal_connect (p->custom_cb, "toggled",
181 G_CALLBACK (on_separators_custom_cb_toggle), ia);
182 for (i = 0; i < SEPARATOR_CNT; i++)
183 g_signal_connect (get_widget_assert (builder, separators[i].name),
184 "toggled", G_CALLBACK (on_separator_toggle), ia);
185 g_signal_connect (p->escape_cb, "toggled",
186 G_CALLBACK (on_separator_toggle), ia);
191 /* Frees IA's separators substructure. */
193 destroy_separators_page (struct import_assistant *ia)
195 struct separators_page *s = ia->separators;
197 ds_destroy (&s->separators);
198 ds_destroy (&s->quotes);
202 /* Called just before the separators page becomes visible in the
205 prepare_separators_page (struct import_assistant *ia)
207 revise_fields_preview (ia);
210 /* Called when the Reset button is clicked on the separators
211 page, resets the separators to the defaults. */
213 reset_separators_page (struct import_assistant *ia)
215 choose_likely_separators (ia);
219 /* Frees and clears the column data in IA's separators
222 clear_fields (struct import_assistant *ia)
224 if (ia->column_cnt > 0)
229 for (row = 0; row < ia->file.line_cnt; row++)
231 const struct string *line = &ia->file.lines[row];
232 const char *line_start = ds_data (line);
233 const char *line_end = ds_end (line);
235 for (col = ia->columns; col < &ia->columns[ia->column_cnt]; col++)
237 char *s = ss_data (col->contents[row]);
238 if (!(s >= line_start && s <= line_end))
239 ss_dealloc (&col->contents[row]);
243 for (col = ia->columns; col < &ia->columns[ia->column_cnt]; col++)
246 free (col->contents);
255 /* Breaks the file data in IA into columns based on the
256 separators set in IA's separators substructure. */
258 split_fields (struct import_assistant *ia)
260 struct separators_page *s = ia->separators;
261 size_t columns_allocated;
267 /* Is space in the set of separators? */
268 space_sep = ss_find_byte (ds_ss (&s->separators), ' ') != SIZE_MAX;
270 /* Split all the lines, not just those from
271 ia->first_line.skip_lines on, so that we split the line that
272 contains variables names if ia->first_line.variable_names is
274 columns_allocated = 0;
275 for (row = 0; row < ia->file.line_cnt; row++)
277 struct string *line = &ia->file.lines[row];
278 struct substring text = ds_ss (line);
281 for (column_idx = 0; ; column_idx++)
283 struct substring field;
284 struct column *column;
287 ss_ltrim (&text, ss_cstr (" "));
288 if (ss_is_empty (text))
294 else if (!ds_is_empty (&s->quotes)
295 && ds_find_byte (&s->quotes, text.string[0]) != SIZE_MAX)
297 int quote = ss_get_byte (&text);
299 ss_get_until (&text, quote, &field);
306 while ((c = ss_get_byte (&text)) != EOF)
309 else if (ss_match_byte (&text, quote))
310 ds_put_byte (&s, quote);
317 ss_get_bytes (&text, ss_cspan (text, ds_ss (&s->separators)),
320 if (column_idx >= ia->column_cnt)
322 struct column *column;
324 if (ia->column_cnt >= columns_allocated)
325 ia->columns = x2nrealloc (ia->columns, &columns_allocated,
326 sizeof *ia->columns);
327 column = &ia->columns[ia->column_cnt++];
330 column->contents = xcalloc (ia->file.line_cnt,
331 sizeof *column->contents);
333 column = &ia->columns[column_idx];
334 column->contents[row] = field;
335 if (ss_length (field) > column->width)
336 column->width = ss_length (field);
339 ss_ltrim (&text, ss_cstr (" "));
340 if (ss_is_empty (text))
342 if (ss_find_byte (ds_ss (&s->separators), ss_first (text))
344 ss_advance (&text, 1);
349 /* Chooses a name for each column on the separators page */
351 choose_column_names (struct import_assistant *ia)
353 struct dictionary *dict;
354 unsigned long int generated_name_count = 0;
358 dict = dict_create (get_default_encoding ());
359 name_row = ia->variable_names && ia->skip_lines ? ia->skip_lines : 0;
360 for (col = ia->columns; col < &ia->columns[ia->column_cnt]; col++)
364 hint = name_row ? ss_xstrdup (col->contents[name_row - 1]) : NULL;
365 name = dict_make_unique_var_name (dict, hint, &generated_name_count);
369 dict_create_var_assert (dict, name, 0);
374 /* Picks the most likely separator and quote characters based on
377 choose_likely_separators (struct import_assistant *ia)
379 unsigned long int histogram[UCHAR_MAX + 1] = { 0 };
382 /* Construct a histogram of all the characters used in the
384 for (row = 0; row < ia->file.line_cnt; row++)
386 struct substring line = ds_ss (&ia->file.lines[row]);
387 size_t length = ss_length (line);
389 for (i = 0; i < length; i++)
390 histogram[(unsigned char) line.string[i]]++;
393 find_commonest_chars (histogram, "\"'", "", &ia->separators->quotes);
394 find_commonest_chars (histogram, ",;:/|!\t-", ",", &ia->separators->separators);
395 ia->separators->escape = true;
398 /* Chooses the most common character among those in TARGETS,
399 based on the frequency data in HISTOGRAM, and stores it in
400 RESULT. If there is a tie for the most common character among
401 those in TARGETS, the earliest character is chosen. If none
402 of the TARGETS appear at all, then DEF is used as a
405 find_commonest_chars (unsigned long int histogram[UCHAR_MAX + 1],
406 const char *targets, const char *def,
407 struct string *result)
409 unsigned char max = 0;
410 unsigned long int max_count = 0;
412 for (; *targets != '\0'; targets++)
414 unsigned char c = *targets;
415 unsigned long int count = histogram[c];
416 if (count > max_count)
425 ds_put_byte (result, max);
428 ds_assign_cstr (result, def);
431 /* Revises the contents of the fields tree view based on the
432 currently chosen set of separators. */
434 revise_fields_preview (struct import_assistant *ia)
438 push_watch_cursor (ia);
440 w = GTK_WIDGET (ia->separators->fields_tree_view);
441 gtk_widget_destroy (w);
444 choose_column_names (ia);
445 ia->separators->fields_tree_view = create_data_tree_view (
447 GTK_CONTAINER (get_widget_assert (ia->asst.builder, "fields-scroller")),
450 pop_watch_cursor (ia);
453 /* Sets the widgets to match IA's separators substructure. */
455 set_separators (struct import_assistant *ia)
457 struct separators_page *s = ia->separators;
459 struct string custom;
464 ds_init_empty (&custom);
466 for (i = 0; i < ds_length (&s->separators); i++)
468 unsigned char c = ds_at (&s->separators, i);
471 for (j = 0; j < SEPARATOR_CNT; j++)
473 const struct separator *s = &separators[j];
481 ds_put_byte (&custom, c);
485 for (i = 0; i < SEPARATOR_CNT; i++)
487 const struct separator *s = &separators[i];
488 GtkWidget *button = get_widget_assert (ia->asst.builder, s->name);
489 gtk_toggle_button_set_active (GTK_TOGGLE_BUTTON (button),
490 (seps & (1u << i)) != 0);
492 any_custom = !ds_is_empty (&custom);
493 gtk_entry_set_text (GTK_ENTRY (s->custom_entry), ds_cstr (&custom));
494 gtk_toggle_button_set_active (GTK_TOGGLE_BUTTON (s->custom_cb),
496 gtk_widget_set_sensitive (s->custom_entry, any_custom);
497 ds_destroy (&custom);
499 any_quotes = !ds_is_empty (&s->quotes);
501 gtk_entry_set_text (s->quote_entry,
502 any_quotes ? ds_cstr (&s->quotes) : "\"");
503 gtk_toggle_button_set_active (GTK_TOGGLE_BUTTON (s->quote_cb),
505 gtk_toggle_button_set_active (GTK_TOGGLE_BUTTON (s->escape_cb),
507 gtk_widget_set_sensitive (s->quote_combo, any_quotes);
508 gtk_widget_set_sensitive (s->escape_cb, any_quotes);
511 /* Sets IA's separators substructure to match the widgets. */
513 get_separators (struct import_assistant *ia)
515 struct separators_page *s = ia->separators;
518 ds_clear (&s->separators);
519 for (i = 0; i < SEPARATOR_CNT; i++)
521 const struct separator *sep = &separators[i];
522 GtkWidget *button = get_widget_assert (ia->asst.builder, sep->name);
523 if (gtk_toggle_button_get_active (GTK_TOGGLE_BUTTON (button)))
524 ds_put_byte (&s->separators, sep->c);
527 if (gtk_toggle_button_get_active (GTK_TOGGLE_BUTTON (s->custom_cb)))
528 ds_put_cstr (&s->separators,
529 gtk_entry_get_text (GTK_ENTRY (s->custom_entry)));
531 if (gtk_toggle_button_get_active (GTK_TOGGLE_BUTTON (s->quote_cb)))
533 gchar *text = gtk_combo_box_get_active_text (
534 GTK_COMBO_BOX (s->quote_combo));
535 ds_assign_cstr (&s->quotes, text);
539 ds_clear (&s->quotes);
540 s->escape = gtk_toggle_button_get_active (GTK_TOGGLE_BUTTON (s->escape_cb));
543 /* Called when the user changes the entry field for custom
546 on_separators_custom_entry_notify (GObject *gobject UNUSED,
547 GParamSpec *arg1 UNUSED,
548 struct import_assistant *ia)
550 revise_fields_preview (ia);
553 /* Called when the user toggles the checkbox that enables custom
556 on_separators_custom_cb_toggle (GtkToggleButton *custom_cb,
557 struct import_assistant *ia)
559 bool is_active = gtk_toggle_button_get_active (custom_cb);
560 gtk_widget_set_sensitive (ia->separators->custom_entry, is_active);
561 revise_fields_preview (ia);
564 /* Called when the user changes the selection in the combo box
565 that selects a quote character. */
567 on_quote_combo_change (GtkComboBox *combo, struct import_assistant *ia)
569 revise_fields_preview (ia);
572 /* Called when the user toggles the checkbox that enables
575 on_quote_cb_toggle (GtkToggleButton *quote_cb, struct import_assistant *ia)
577 bool is_active = gtk_toggle_button_get_active (quote_cb);
578 gtk_widget_set_sensitive (ia->separators->quote_combo, is_active);
579 gtk_widget_set_sensitive (ia->separators->escape_cb, is_active);
580 revise_fields_preview (ia);
583 /* Called when the user toggles one of the separators
586 on_separator_toggle (GtkToggleButton *toggle UNUSED,
587 struct import_assistant *ia)
589 revise_fields_preview (ia);
595 separators_append_syntax (const struct import_assistant *ia, struct string *s)
598 ds_put_cstr (s, " /DELIMITERS=\"");
599 if (ds_find_byte (&ia->separators->separators, '\t') != SIZE_MAX)
600 ds_put_cstr (s, "\\t");
601 if (ds_find_byte (&ia->separators->separators, '\\') != SIZE_MAX)
602 ds_put_cstr (s, "\\\\");
603 for (i = 0; i < ds_length (&ia->separators->separators); i++)
605 char c = ds_at (&ia->separators->separators, i);
607 ds_put_cstr (s, "\"\"");
608 else if (c != '\t' && c != '\\')
611 ds_put_cstr (s, "\"\n");
612 if (!ds_is_empty (&ia->separators->quotes))
613 syntax_gen_pspp (s, " /QUALIFIER=%sq\n", ds_cstr (&ia->separators->quotes));
614 if (!ds_is_empty (&ia->separators->quotes) && ia->separators->escape)
615 ds_put_cstr (s, " /ESCAPE\n");