1 /* PSPPIRE - a graphical user interface for PSPP.
2 Copyright (C) 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "ui/gui/text-data-import-dialog.h"
23 #include <gtk-contrib/psppire-sheet.h>
29 #include "data/data-in.h"
30 #include "data/data-out.h"
31 #include "data/format-guesser.h"
32 #include "data/casereader.h"
33 #include "data/gnumeric-reader.h"
34 #include "data/ods-reader.h"
35 #include "data/spreadsheet-reader.h"
36 #include "data/value-labels.h"
37 #include "language/data-io/data-parser.h"
38 #include "language/lexer/lexer.h"
39 #include "libpspp/assertion.h"
40 #include "libpspp/i18n.h"
41 #include "libpspp/line-reader.h"
42 #include "libpspp/message.h"
43 #include "ui/gui/checkbox-treeview.h"
44 #include "ui/gui/dialog-common.h"
45 #include "ui/gui/executor.h"
46 #include "ui/gui/helper.h"
47 #include "ui/gui/builder-wrapper.h"
48 #include "ui/gui/psppire-data-window.h"
49 #include "ui/gui/psppire-dialog.h"
50 #include "ui/gui/psppire-encoding-selector.h"
51 #include "ui/gui/psppire-empty-list-store.h"
52 #include "ui/gui/psppire-var-sheet.h"
53 #include "ui/gui/psppire-var-store.h"
54 #include "ui/gui/psppire-scanf.h"
55 #include "ui/syntax-gen.h"
58 #include "gl/intprops.h"
59 #include "gl/xalloc.h"
62 #define _(msgid) gettext (msgid)
63 #define N_(msgid) msgid
65 struct import_assistant;
68 static char *choose_file (GtkWindow *parent_window, gchar **encodingp);
69 enum { MAX_PREVIEW_LINES = 1000 }; /* Max number of lines to read. */
73 Update IA according to the contents of DICT and CREADER.
74 CREADER will be destroyed by this function.
77 update_assistant (struct import_assistant *ia)
79 struct sheet_spec_page *ssp = &ia->sheet_spec;
81 struct file *file = &ia->file;
82 struct separators_page *sepp = &ia->separators;
89 sepp->column_cnt = dict_get_var_cnt (ssp->dict);
90 sepp->columns = xcalloc (sepp->column_cnt, sizeof (*sepp->columns));
91 for (col = 0; col < sepp->column_cnt ; ++col)
93 const struct variable *var = dict_get_var (ssp->dict, col);
94 sepp->columns[col].name = xstrdup (var_get_name (var));
95 sepp->columns[col].contents = NULL;
98 for (; (c = casereader_read (ssp->reader)) != NULL; case_unref (c))
101 for (col = 0; col < sepp->column_cnt ; ++col)
104 const struct variable *var = dict_get_var (ssp->dict, col);
106 sepp->columns[col].contents = xrealloc (sepp->columns[col].contents,
107 sizeof (struct substring) * rows);
109 ss = data_out (case_data (c, var), dict_get_encoding (ssp->dict),
110 var_get_print_format (var));
112 sepp->columns[col].contents[rows - 1] = ss_cstr (ss);
115 if (rows > MAX_PREVIEW_LINES)
123 file->line_cnt = rows;
124 casereader_destroy (ssp->reader);
129 /* Obtains the file to import from the user and initializes IA's
130 file substructure. PARENT_WINDOW must be the window to use
131 as the file chooser window's parent.
133 Returns true if successful, false if the file name could not
134 be obtained or the file could not be read. */
136 init_file (struct import_assistant *ia, GtkWindow *parent_window)
138 enum { MAX_LINE_LEN = 16384 }; /* Max length of an acceptable line. */
139 struct file *file = &ia->file;
140 struct casereader *creader = NULL;
141 struct dictionary *dict = NULL;
142 struct spreadsheet_read_info sri;
143 struct spreadsheet_read_options opts;
146 file->file_name = choose_file (parent_window, &file->encoding);
147 if (file->file_name == NULL)
150 opts.sheet_name = NULL;
151 opts.cell_range = NULL;
152 opts.sheet_index = 1;
154 sri.file_name = file->file_name;
155 sri.read_names = true;
160 creader = gnumeric_open_reader (&sri, &opts, &dict);
161 ia->file.type = FTYPE_GNUMERIC;
166 creader = ods_open_reader (&sri, &opts, &dict);
167 ia->file.type = FTYPE_ODS;
172 struct sheet_spec_page *ssp = &ia->sheet_spec;
174 ssp->reader = creader;
176 update_assistant (ia);
181 struct line_reader *reader = line_reader_for_file (file->encoding, file->file_name, O_RDONLY);
184 msg (ME, _("Could not open `%s': %s"),
185 file->file_name, strerror (errno));
189 ds_init_empty (&input);
190 file->lines = xnmalloc (MAX_PREVIEW_LINES, sizeof *file->lines);
191 for (; file->line_cnt < MAX_PREVIEW_LINES; file->line_cnt++)
194 if (!line_reader_read (reader, &input, MAX_LINE_LEN + 1)
195 || ds_length (&input) > MAX_LINE_LEN)
197 if (line_reader_eof (reader))
199 else if (line_reader_error (reader))
200 msg (ME, _("Error reading `%s': %s"),
201 file->file_name, strerror (line_reader_error (reader)));
203 msg (ME, _("Failed to read `%s', because it contains a line "
204 "over %d bytes long and therefore appears not to be "
206 file->file_name, MAX_LINE_LEN);
207 line_reader_close (reader);
213 ds_init_cstr (&file->lines[file->line_cnt],
214 recode_string ("UTF-8", line_reader_get_encoding (reader),
215 ds_cstr (&input), ds_length (&input)));
219 if (file->line_cnt == 0)
221 msg (ME, _("`%s' is empty."), file->file_name);
222 line_reader_close (reader);
227 /* Estimate the number of lines in the file. */
228 if (file->line_cnt < MAX_PREVIEW_LINES)
229 file->total_lines = file->line_cnt;
233 off_t position = line_reader_tell (reader);
234 if (fstat (line_reader_fileno (reader), &s) == 0 && position > 0)
235 file->total_lines = (double) file->line_cnt / position * s.st_size;
237 file->total_lines = 0;
240 line_reader_close (reader);
241 ia->file.type = FTYPE_TEXT;
247 /* Frees IA's file substructure. */
249 destroy_file (struct import_assistant *ia)
251 struct file *f = &ia->file;
256 for (i = 0; i < f->line_cnt; i++)
257 ds_destroy (&f->lines[i]);
261 g_free (f->file_name);
262 g_free (f->encoding);
265 /* Obtains the file to read from the user. If successful, returns the name of
266 the file and stores the user's chosen encoding for the file into *ENCODINGP.
267 The caller must free each of these strings with g_free().
269 On failure, stores a null pointer and stores NULL in *ENCODINGP.
271 PARENT_WINDOW must be the window to use as the file chooser window's
274 choose_file (GtkWindow *parent_window, gchar **encodingp)
277 GtkFileFilter *filter = NULL;
279 GtkWidget *dialog = gtk_file_chooser_dialog_new (_("Import Delimited Text Data"),
281 GTK_FILE_CHOOSER_ACTION_OPEN,
282 GTK_STOCK_CANCEL, GTK_RESPONSE_CANCEL,
283 GTK_STOCK_OPEN, GTK_RESPONSE_ACCEPT,
286 g_object_set (dialog, "local-only", FALSE, NULL);
288 filter = gtk_file_filter_new ();
289 gtk_file_filter_set_name (filter, _("Text files"));
290 gtk_file_filter_add_mime_type (filter, "text/*");
291 gtk_file_chooser_add_filter (GTK_FILE_CHOOSER (dialog), filter);
293 filter = gtk_file_filter_new ();
294 gtk_file_filter_set_name (filter, _("Text (*.txt) Files"));
295 gtk_file_filter_add_pattern (filter, "*.txt");
296 gtk_file_filter_add_pattern (filter, "*.TXT");
297 gtk_file_chooser_add_filter (GTK_FILE_CHOOSER (dialog), filter);
299 filter = gtk_file_filter_new ();
300 gtk_file_filter_set_name (filter, _("Plain Text (ASCII) Files"));
301 gtk_file_filter_add_mime_type (filter, "text/plain");
302 gtk_file_chooser_add_filter (GTK_FILE_CHOOSER (dialog), filter);
304 filter = gtk_file_filter_new ();
305 gtk_file_filter_set_name (filter, _("Comma Separated Value Files"));
306 gtk_file_filter_add_mime_type (filter, "text/csv");
307 gtk_file_chooser_add_filter (GTK_FILE_CHOOSER (dialog), filter);
309 /* I've never encountered one of these, but it's listed here:
310 http://www.iana.org/assignments/media-types/text/tab-separated-values */
311 filter = gtk_file_filter_new ();
312 gtk_file_filter_set_name (filter, _("Tab Separated Value Files"));
313 gtk_file_filter_add_mime_type (filter, "text/tab-separated-values");
314 gtk_file_chooser_add_filter (GTK_FILE_CHOOSER (dialog), filter);
316 filter = gtk_file_filter_new ();
317 gtk_file_filter_set_name (filter, _("Gnumeric Spreadsheet Files"));
318 gtk_file_filter_add_mime_type (filter, "application/x-gnumeric");
319 gtk_file_chooser_add_filter (GTK_FILE_CHOOSER (dialog), filter);
321 filter = gtk_file_filter_new ();
322 gtk_file_filter_set_name (filter, _("OpenOffice.Org Spreadsheet Files"));
323 gtk_file_filter_add_mime_type (filter, "application/vnd.oasis.opendocument.spreadsheet");
324 gtk_file_chooser_add_filter (GTK_FILE_CHOOSER (dialog), filter);
326 filter = gtk_file_filter_new ();
327 gtk_file_filter_set_name (filter, _("All Spreadsheet Files"));
328 gtk_file_filter_add_mime_type (filter, "application/x-gnumeric");
329 gtk_file_filter_add_mime_type (filter, "application/vnd.oasis.opendocument.spreadsheet");
330 gtk_file_chooser_add_filter (GTK_FILE_CHOOSER (dialog), filter);
333 gtk_file_chooser_set_extra_widget (
334 GTK_FILE_CHOOSER (dialog), psppire_encoding_selector_new ("Auto", true));
336 filter = gtk_file_filter_new ();
337 gtk_file_filter_set_name (filter, _("All Files"));
338 gtk_file_filter_add_pattern (filter, "*");
339 gtk_file_chooser_add_filter (GTK_FILE_CHOOSER (dialog), filter);
341 switch (gtk_dialog_run (GTK_DIALOG (dialog)))
343 case GTK_RESPONSE_ACCEPT:
344 file_name = gtk_file_chooser_get_filename (GTK_FILE_CHOOSER (dialog));
345 *encodingp = psppire_encoding_selector_get_encoding (
346 gtk_file_chooser_get_extra_widget (GTK_FILE_CHOOSER (dialog)));
353 gtk_widget_destroy (dialog);