1 /* PSPPIRE - a graphical user interface for PSPP.
2 Copyright (C) 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "ui/gui/text-data-import-dialog.h"
23 #include <gtk-contrib/psppire-sheet.h>
29 #include "data/data-in.h"
30 #include "data/data-out.h"
31 #include "data/format-guesser.h"
32 #include "data/casereader.h"
33 #include "data/gnumeric-reader.h"
34 #include "data/ods-reader.h"
35 #include "data/spreadsheet-reader.h"
36 #include "data/value-labels.h"
37 #include "language/data-io/data-parser.h"
38 #include "language/lexer/lexer.h"
39 #include "libpspp/assertion.h"
40 #include "libpspp/i18n.h"
41 #include "libpspp/line-reader.h"
42 #include "libpspp/message.h"
43 #include "ui/gui/checkbox-treeview.h"
44 #include "ui/gui/dialog-common.h"
45 #include "ui/gui/executor.h"
46 #include "ui/gui/helper.h"
47 #include "ui/gui/builder-wrapper.h"
48 #include "ui/gui/psppire-data-window.h"
49 #include "ui/gui/psppire-dialog.h"
50 #include "ui/gui/psppire-encoding-selector.h"
51 #include "ui/gui/psppire-empty-list-store.h"
52 #include "ui/gui/psppire-var-sheet.h"
53 #include "ui/gui/psppire-var-store.h"
54 #include "ui/gui/psppire-scanf.h"
55 #include "ui/syntax-gen.h"
58 #include "gl/intprops.h"
59 #include "gl/xalloc.h"
62 #define _(msgid) gettext (msgid)
63 #define N_(msgid) msgid
65 struct import_assistant;
68 static char *choose_file (GtkWindow *parent_window, gchar **encodingp);
69 enum { MAX_PREVIEW_LINES = 1000 }; /* Max number of lines to read. */
73 Update IA according to the contents of DICT and CREADER.
74 CREADER will be destroyed by this function.
77 update_assistant (struct import_assistant *ia)
79 struct sheet_spec_page *ssp = &ia->sheet_spec;
81 struct file *file = &ia->file;
82 struct separators_page *sepp = &ia->separators;
87 sepp->column_cnt = dict_get_var_cnt (ssp->dict);
88 sepp->columns = xcalloc (sepp->column_cnt, sizeof (*sepp->columns));
89 for (col = 0; col < sepp->column_cnt ; ++col)
91 const struct variable *var = dict_get_var (ssp->dict, col);
92 sepp->columns[col].name = xstrdup (var_get_name (var));
93 sepp->columns[col].contents = NULL;
96 for (; (c = casereader_read (ssp->reader)) != NULL; case_unref (c))
99 for (col = 0; col < sepp->column_cnt ; ++col)
102 const struct variable *var = dict_get_var (ssp->dict, col);
104 sepp->columns[col].contents = xrealloc (sepp->columns[col].contents,
105 sizeof (struct substring) * rows);
107 ss = data_out (case_data (c, var), dict_get_encoding (ssp->dict),
108 var_get_print_format (var));
110 sepp->columns[col].contents[rows - 1] = ss_cstr (ss);
113 if (rows > MAX_PREVIEW_LINES)
120 file->line_cnt = rows;
121 casereader_destroy (ssp->reader);
126 /* Obtains the file to import from the user and initializes IA's
127 file substructure. PARENT_WINDOW must be the window to use
128 as the file chooser window's parent.
130 Returns true if successful, false if the file name could not
131 be obtained or the file could not be read. */
133 init_file (struct import_assistant *ia, GtkWindow *parent_window)
135 enum { MAX_LINE_LEN = 16384 }; /* Max length of an acceptable line. */
136 struct file *file = &ia->file;
137 struct casereader *creader = NULL;
138 struct dictionary *dict = NULL;
139 struct spreadsheet_read_info sri;
140 struct spreadsheet_read_options opts;
143 file->file_name = choose_file (parent_window, &file->encoding);
144 if (file->file_name == NULL)
147 opts.sheet_name = NULL;
148 opts.cell_range = NULL;
149 opts.sheet_index = 1;
151 sri.file_name = file->file_name;
152 sri.read_names = true;
157 creader = gnumeric_open_reader (&sri, &opts, &dict);
158 ia->file.type = FTYPE_GNUMERIC;
163 creader = ods_open_reader (&sri, &opts, &dict);
164 ia->file.type = FTYPE_ODS;
169 struct sheet_spec_page *ssp = &ia->sheet_spec;
171 ssp->reader = creader;
173 update_assistant (ia);
178 struct line_reader *reader = line_reader_for_file (file->encoding, file->file_name, O_RDONLY);
181 msg (ME, _("Could not open `%s': %s"),
182 file->file_name, strerror (errno));
186 ds_init_empty (&input);
187 file->lines = xnmalloc (MAX_PREVIEW_LINES, sizeof *file->lines);
188 for (; file->line_cnt < MAX_PREVIEW_LINES; file->line_cnt++)
191 if (!line_reader_read (reader, &input, MAX_LINE_LEN + 1)
192 || ds_length (&input) > MAX_LINE_LEN)
194 if (line_reader_eof (reader))
196 else if (line_reader_error (reader))
197 msg (ME, _("Error reading `%s': %s"),
198 file->file_name, strerror (line_reader_error (reader)));
200 msg (ME, _("Failed to read `%s', because it contains a line "
201 "over %d bytes long and therefore appears not to be "
203 file->file_name, MAX_LINE_LEN);
204 line_reader_close (reader);
210 ds_init_cstr (&file->lines[file->line_cnt],
211 recode_string ("UTF-8", line_reader_get_encoding (reader),
212 ds_cstr (&input), ds_length (&input)));
216 if (file->line_cnt == 0)
218 msg (ME, _("`%s' is empty."), file->file_name);
219 line_reader_close (reader);
224 /* Estimate the number of lines in the file. */
225 if (file->line_cnt < MAX_PREVIEW_LINES)
226 file->total_lines = file->line_cnt;
230 off_t position = line_reader_tell (reader);
231 if (fstat (line_reader_fileno (reader), &s) == 0 && position > 0)
232 file->total_lines = (double) file->line_cnt / position * s.st_size;
234 file->total_lines = 0;
237 line_reader_close (reader);
238 ia->file.type = FTYPE_TEXT;
244 /* Frees IA's file substructure. */
246 destroy_file (struct import_assistant *ia)
248 struct file *f = &ia->file;
253 for (i = 0; i < f->line_cnt; i++)
254 ds_destroy (&f->lines[i]);
258 g_free (f->file_name);
259 g_free (f->encoding);
262 /* Obtains the file to read from the user. If successful, returns the name of
263 the file and stores the user's chosen encoding for the file into *ENCODINGP.
264 The caller must free each of these strings with g_free().
266 On failure, stores a null pointer and stores NULL in *ENCODINGP.
268 PARENT_WINDOW must be the window to use as the file chooser window's
271 choose_file (GtkWindow *parent_window, gchar **encodingp)
274 GtkFileFilter *filter = NULL;
276 GtkWidget *dialog = gtk_file_chooser_dialog_new (_("Import Delimited Text Data"),
278 GTK_FILE_CHOOSER_ACTION_OPEN,
279 GTK_STOCK_CANCEL, GTK_RESPONSE_CANCEL,
280 GTK_STOCK_OPEN, GTK_RESPONSE_ACCEPT,
283 g_object_set (dialog, "local-only", FALSE, NULL);
285 filter = gtk_file_filter_new ();
286 gtk_file_filter_set_name (filter, _("Text files"));
287 gtk_file_filter_add_mime_type (filter, "text/*");
288 gtk_file_chooser_add_filter (GTK_FILE_CHOOSER (dialog), filter);
290 filter = gtk_file_filter_new ();
291 gtk_file_filter_set_name (filter, _("Text (*.txt) Files"));
292 gtk_file_filter_add_pattern (filter, "*.txt");
293 gtk_file_filter_add_pattern (filter, "*.TXT");
294 gtk_file_chooser_add_filter (GTK_FILE_CHOOSER (dialog), filter);
296 filter = gtk_file_filter_new ();
297 gtk_file_filter_set_name (filter, _("Plain Text (ASCII) Files"));
298 gtk_file_filter_add_mime_type (filter, "text/plain");
299 gtk_file_chooser_add_filter (GTK_FILE_CHOOSER (dialog), filter);
301 filter = gtk_file_filter_new ();
302 gtk_file_filter_set_name (filter, _("Comma Separated Value Files"));
303 gtk_file_filter_add_mime_type (filter, "text/csv");
304 gtk_file_chooser_add_filter (GTK_FILE_CHOOSER (dialog), filter);
306 /* I've never encountered one of these, but it's listed here:
307 http://www.iana.org/assignments/media-types/text/tab-separated-values */
308 filter = gtk_file_filter_new ();
309 gtk_file_filter_set_name (filter, _("Tab Separated Value Files"));
310 gtk_file_filter_add_mime_type (filter, "text/tab-separated-values");
311 gtk_file_chooser_add_filter (GTK_FILE_CHOOSER (dialog), filter);
313 filter = gtk_file_filter_new ();
314 gtk_file_filter_set_name (filter, _("Gnumeric Spreadsheet Files"));
315 gtk_file_filter_add_mime_type (filter, "application/x-gnumeric");
316 gtk_file_chooser_add_filter (GTK_FILE_CHOOSER (dialog), filter);
318 filter = gtk_file_filter_new ();
319 gtk_file_filter_set_name (filter, _("OpenOffice.Org Spreadsheet Files"));
320 gtk_file_filter_add_mime_type (filter, "application/vnd.oasis.opendocument.spreadsheet");
321 gtk_file_chooser_add_filter (GTK_FILE_CHOOSER (dialog), filter);
323 filter = gtk_file_filter_new ();
324 gtk_file_filter_set_name (filter, _("All Spreadsheet Files"));
325 gtk_file_filter_add_mime_type (filter, "application/x-gnumeric");
326 gtk_file_filter_add_mime_type (filter, "application/vnd.oasis.opendocument.spreadsheet");
327 gtk_file_chooser_add_filter (GTK_FILE_CHOOSER (dialog), filter);
330 gtk_file_chooser_set_extra_widget (
331 GTK_FILE_CHOOSER (dialog), psppire_encoding_selector_new ("Auto", true));
333 filter = gtk_file_filter_new ();
334 gtk_file_filter_set_name (filter, _("All Files"));
335 gtk_file_filter_add_pattern (filter, "*");
336 gtk_file_chooser_add_filter (GTK_FILE_CHOOSER (dialog), filter);
338 switch (gtk_dialog_run (GTK_DIALOG (dialog)))
340 case GTK_RESPONSE_ACCEPT:
341 file_name = gtk_file_chooser_get_filename (GTK_FILE_CHOOSER (dialog));
342 *encodingp = psppire_encoding_selector_get_encoding (
343 gtk_file_chooser_get_extra_widget (GTK_FILE_CHOOSER (dialog)));
350 gtk_widget_destroy (dialog);