1 /* PSPPIRE - a graphical user interface for PSPP.
2 Copyright (C) 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "ui/gui/text-data-import-dialog.h"
23 #include <gtk-contrib/psppire-sheet.h>
29 #include "data/data-in.h"
30 #include "data/data-out.h"
31 #include "data/format-guesser.h"
32 #include "data/casereader.h"
33 #include "data/gnumeric-reader.h"
34 #include "data/ods-reader.h"
35 #include "data/spreadsheet-reader.h"
36 #include "data/value-labels.h"
37 #include "language/data-io/data-parser.h"
38 #include "language/lexer/lexer.h"
39 #include "libpspp/assertion.h"
40 #include "libpspp/i18n.h"
41 #include "libpspp/line-reader.h"
42 #include "libpspp/message.h"
43 #include "ui/gui/checkbox-treeview.h"
44 #include "ui/gui/dialog-common.h"
45 #include "ui/gui/executor.h"
46 #include "ui/gui/helper.h"
47 #include "ui/gui/builder-wrapper.h"
48 #include "ui/gui/psppire-data-window.h"
49 #include "ui/gui/psppire-dialog.h"
50 #include "ui/gui/psppire-encoding-selector.h"
51 #include "ui/gui/psppire-empty-list-store.h"
52 #include "ui/gui/psppire-var-sheet.h"
53 #include "ui/gui/psppire-var-store.h"
54 #include "ui/gui/psppire-scanf.h"
55 #include "ui/syntax-gen.h"
58 #include "gl/intprops.h"
59 #include "gl/xalloc.h"
62 #define _(msgid) gettext (msgid)
63 #define N_(msgid) msgid
65 struct import_assistant;
68 static char *choose_file (GtkWindow *parent_window, gchar **encodingp);
69 enum { MAX_PREVIEW_LINES = 1000 }; /* Max number of lines to read. */
73 Update IA according to the contents of DICT and CREADER.
74 CREADER will be destroyed by this function.
77 update_assistant (struct import_assistant *ia)
79 struct sheet_spec_page *ssp = &ia->sheet_spec;
81 struct file *file = &ia->file;
82 struct separators_page *sepp = &ia->separators;
89 sepp->column_cnt = dict_get_var_cnt (ssp->dict);
90 sepp->columns = xcalloc (sepp->column_cnt, sizeof (*sepp->columns));
91 for (col = 0; col < sepp->column_cnt ; ++col)
93 const struct variable *var = dict_get_var (ssp->dict, col);
94 sepp->columns[col].name = xstrdup (var_get_name (var));
95 sepp->columns[col].contents = NULL;
98 for (; (c = casereader_read (ssp->reader)) != NULL; case_unref (c))
101 for (col = 0; col < sepp->column_cnt ; ++col)
104 const struct variable *var = dict_get_var (ssp->dict, col);
106 sepp->columns[col].contents = xrealloc (sepp->columns[col].contents,
107 sizeof (struct substring) * rows);
109 ss = data_out (case_data (c, var), dict_get_encoding (ssp->dict),
110 var_get_print_format (var));
112 sepp->columns[col].contents[rows - 1] = ss_cstr (ss);
115 if (rows > MAX_PREVIEW_LINES)
123 file->line_cnt = rows;
124 casereader_destroy (ssp->reader);
129 /* Obtains the file to import from the user and initializes IA's
130 file substructure. PARENT_WINDOW must be the window to use
131 as the file chooser window's parent.
133 Returns true if successful, false if the file name could not
134 be obtained or the file could not be read. */
136 init_file (struct import_assistant *ia, GtkWindow *parent_window)
138 enum { MAX_LINE_LEN = 16384 }; /* Max length of an acceptable line. */
139 struct file *file = &ia->file;
140 struct sheet_spec_page *ssp = &ia->sheet_spec;
141 struct spreadsheet_read_info sri;
142 struct spreadsheet_read_options opts;
145 file->file_name = choose_file (parent_window, &file->encoding);
146 if (file->file_name == NULL)
149 opts.sheet_name = NULL;
150 opts.cell_range = NULL;
151 opts.sheet_index = 1;
153 sri.file_name = file->file_name;
154 sri.read_names = true;
157 ssp->spreadsheet = gnumeric_probe (sri.file_name);
159 if (ssp->spreadsheet)
161 struct casereader *creader = NULL;
162 struct dictionary *dict = NULL;
164 if (ssp->spreadsheet->type == SPREADSHEET_GNUMERIC)
166 ia->file.type = FTYPE_GNUMERIC;
168 else if (ssp->spreadsheet->type == SPREADSHEET_ODS)
170 ia->file.type = FTYPE_ODS;
178 struct sheet_spec_page *ssp = &ia->sheet_spec;
180 ssp->reader = creader;
182 update_assistant (ia);
189 struct line_reader *reader = line_reader_for_file (file->encoding, file->file_name, O_RDONLY);
192 msg (ME, _("Could not open `%s': %s"),
193 file->file_name, strerror (errno));
197 ds_init_empty (&input);
198 file->lines = xnmalloc (MAX_PREVIEW_LINES, sizeof *file->lines);
199 for (; file->line_cnt < MAX_PREVIEW_LINES; file->line_cnt++)
202 if (!line_reader_read (reader, &input, MAX_LINE_LEN + 1)
203 || ds_length (&input) > MAX_LINE_LEN)
205 if (line_reader_eof (reader))
207 else if (line_reader_error (reader))
208 msg (ME, _("Error reading `%s': %s"),
209 file->file_name, strerror (line_reader_error (reader)));
211 msg (ME, _("Failed to read `%s', because it contains a line "
212 "over %d bytes long and therefore appears not to be "
214 file->file_name, MAX_LINE_LEN);
215 line_reader_close (reader);
221 ds_init_cstr (&file->lines[file->line_cnt],
222 recode_string ("UTF-8", line_reader_get_encoding (reader),
223 ds_cstr (&input), ds_length (&input)));
227 if (file->line_cnt == 0)
229 msg (ME, _("`%s' is empty."), file->file_name);
230 line_reader_close (reader);
235 /* Estimate the number of lines in the file. */
236 if (file->line_cnt < MAX_PREVIEW_LINES)
237 file->total_lines = file->line_cnt;
241 off_t position = line_reader_tell (reader);
242 if (fstat (line_reader_fileno (reader), &s) == 0 && position > 0)
243 file->total_lines = (double) file->line_cnt / position * s.st_size;
245 file->total_lines = 0;
248 line_reader_close (reader);
249 ia->file.type = FTYPE_TEXT;
255 /* Frees IA's file substructure. */
257 destroy_file (struct import_assistant *ia)
259 struct file *f = &ia->file;
264 for (i = 0; i < f->line_cnt; i++)
265 ds_destroy (&f->lines[i]);
269 g_free (f->file_name);
270 g_free (f->encoding);
273 /* Obtains the file to read from the user. If successful, returns the name of
274 the file and stores the user's chosen encoding for the file into *ENCODINGP.
275 The caller must free each of these strings with g_free().
277 On failure, stores a null pointer and stores NULL in *ENCODINGP.
279 PARENT_WINDOW must be the window to use as the file chooser window's
282 choose_file (GtkWindow *parent_window, gchar **encodingp)
285 GtkFileFilter *filter = NULL;
287 GtkWidget *dialog = gtk_file_chooser_dialog_new (_("Import Delimited Text Data"),
289 GTK_FILE_CHOOSER_ACTION_OPEN,
290 GTK_STOCK_CANCEL, GTK_RESPONSE_CANCEL,
291 GTK_STOCK_OPEN, GTK_RESPONSE_ACCEPT,
294 g_object_set (dialog, "local-only", FALSE, NULL);
296 filter = gtk_file_filter_new ();
297 gtk_file_filter_set_name (filter, _("Text files"));
298 gtk_file_filter_add_mime_type (filter, "text/*");
299 gtk_file_chooser_add_filter (GTK_FILE_CHOOSER (dialog), filter);
301 filter = gtk_file_filter_new ();
302 gtk_file_filter_set_name (filter, _("Text (*.txt) Files"));
303 gtk_file_filter_add_pattern (filter, "*.txt");
304 gtk_file_filter_add_pattern (filter, "*.TXT");
305 gtk_file_chooser_add_filter (GTK_FILE_CHOOSER (dialog), filter);
307 filter = gtk_file_filter_new ();
308 gtk_file_filter_set_name (filter, _("Plain Text (ASCII) Files"));
309 gtk_file_filter_add_mime_type (filter, "text/plain");
310 gtk_file_chooser_add_filter (GTK_FILE_CHOOSER (dialog), filter);
312 filter = gtk_file_filter_new ();
313 gtk_file_filter_set_name (filter, _("Comma Separated Value Files"));
314 gtk_file_filter_add_mime_type (filter, "text/csv");
315 gtk_file_chooser_add_filter (GTK_FILE_CHOOSER (dialog), filter);
317 /* I've never encountered one of these, but it's listed here:
318 http://www.iana.org/assignments/media-types/text/tab-separated-values */
319 filter = gtk_file_filter_new ();
320 gtk_file_filter_set_name (filter, _("Tab Separated Value Files"));
321 gtk_file_filter_add_mime_type (filter, "text/tab-separated-values");
322 gtk_file_chooser_add_filter (GTK_FILE_CHOOSER (dialog), filter);
324 filter = gtk_file_filter_new ();
325 gtk_file_filter_set_name (filter, _("Gnumeric Spreadsheet Files"));
326 gtk_file_filter_add_mime_type (filter, "application/x-gnumeric");
327 gtk_file_chooser_add_filter (GTK_FILE_CHOOSER (dialog), filter);
329 filter = gtk_file_filter_new ();
330 gtk_file_filter_set_name (filter, _("OpenOffice.Org Spreadsheet Files"));
331 gtk_file_filter_add_mime_type (filter, "application/vnd.oasis.opendocument.spreadsheet");
332 gtk_file_chooser_add_filter (GTK_FILE_CHOOSER (dialog), filter);
334 filter = gtk_file_filter_new ();
335 gtk_file_filter_set_name (filter, _("All Spreadsheet Files"));
336 gtk_file_filter_add_mime_type (filter, "application/x-gnumeric");
337 gtk_file_filter_add_mime_type (filter, "application/vnd.oasis.opendocument.spreadsheet");
338 gtk_file_chooser_add_filter (GTK_FILE_CHOOSER (dialog), filter);
341 gtk_file_chooser_set_extra_widget (
342 GTK_FILE_CHOOSER (dialog), psppire_encoding_selector_new ("Auto", true));
344 filter = gtk_file_filter_new ();
345 gtk_file_filter_set_name (filter, _("All Files"));
346 gtk_file_filter_add_pattern (filter, "*");
347 gtk_file_chooser_add_filter (GTK_FILE_CHOOSER (dialog), filter);
349 switch (gtk_dialog_run (GTK_DIALOG (dialog)))
351 case GTK_RESPONSE_ACCEPT:
352 file_name = gtk_file_chooser_get_filename (GTK_FILE_CHOOSER (dialog));
353 *encodingp = psppire_encoding_selector_get_encoding (
354 gtk_file_chooser_get_extra_widget (GTK_FILE_CHOOSER (dialog)));
361 gtk_widget_destroy (dialog);