1 /* PSPPIRE - a graphical user interface for PSPP.
2 Copyright (C) 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "ui/gui/text-data-import-dialog.h"
23 #include <gtk-contrib/psppire-sheet.h>
29 #include "data/data-in.h"
30 #include "data/data-out.h"
31 #include "data/format-guesser.h"
32 #include "data/casereader.h"
33 #include "data/gnumeric-reader.h"
34 #include "data/ods-reader.h"
35 #include "data/spreadsheet-reader.h"
36 #include "data/value-labels.h"
37 #include "language/data-io/data-parser.h"
38 #include "language/lexer/lexer.h"
39 #include "libpspp/assertion.h"
40 #include "libpspp/i18n.h"
41 #include "libpspp/line-reader.h"
42 #include "libpspp/message.h"
43 #include "ui/gui/checkbox-treeview.h"
44 #include "ui/gui/dialog-common.h"
45 #include "ui/gui/executor.h"
46 #include "ui/gui/helper.h"
47 #include "ui/gui/builder-wrapper.h"
48 #include "ui/gui/psppire-data-window.h"
49 #include "ui/gui/psppire-dialog.h"
50 #include "ui/gui/psppire-encoding-selector.h"
51 #include "ui/gui/psppire-empty-list-store.h"
52 #include "ui/gui/psppire-var-sheet.h"
53 #include "ui/gui/psppire-var-store.h"
54 #include "ui/gui/psppire-scanf.h"
55 #include "ui/syntax-gen.h"
58 #include "gl/intprops.h"
59 #include "gl/xalloc.h"
62 #define _(msgid) gettext (msgid)
63 #define N_(msgid) msgid
65 struct import_assistant;
68 static char *choose_file (GtkWindow *parent_window, gchar **encodingp);
69 enum { MAX_PREVIEW_LINES = 1000 }; /* Max number of lines to read. */
73 Update IA according to the contents of DICT and CREADER.
74 CREADER will be destroyed by this function.
77 update_assistant (struct import_assistant *ia)
79 struct sheet_spec_page *ssp = ia->sheet_spec;
81 struct file *file = &ia->file;
82 struct separators_page *sepp = ia->separators;
90 sepp->column_cnt = dict_get_var_cnt (ssp->dict);
91 sepp->columns = xcalloc (sepp->column_cnt, sizeof (*sepp->columns));
92 for (col = 0; col < sepp->column_cnt ; ++col)
94 const struct variable *var = dict_get_var (ssp->dict, col);
95 sepp->columns[col].name = xstrdup (var_get_name (var));
96 sepp->columns[col].contents = NULL;
99 for (; (c = casereader_read (ssp->reader)) != NULL; case_unref (c))
102 for (col = 0; col < sepp->column_cnt ; ++col)
105 const struct variable *var = dict_get_var (ssp->dict, col);
107 sepp->columns[col].contents = xrealloc (sepp->columns[col].contents,
108 sizeof (struct substring) * rows);
110 ss = data_out (case_data (c, var), dict_get_encoding (ssp->dict),
111 var_get_print_format (var));
113 sepp->columns[col].contents[rows - 1] = ss_cstr (ss);
116 if (rows > MAX_PREVIEW_LINES)
124 file->line_cnt = rows;
128 /* Obtains the file to import from the user and initializes IA's
129 file substructure. PARENT_WINDOW must be the window to use
130 as the file chooser window's parent.
132 Returns true if successful, false if the file name could not
133 be obtained or the file could not be read. */
135 init_file (struct import_assistant *ia, GtkWindow *parent_window)
137 enum { MAX_LINE_LEN = 16384 }; /* Max length of an acceptable line. */
138 struct file *file = &ia->file;
139 struct sheet_spec_page *ssp = ia->sheet_spec;
140 struct spreadsheet_read_info sri;
141 struct spreadsheet_read_options opts;
144 file->file_name = choose_file (parent_window, &file->encoding);
145 if (file->file_name == NULL)
148 opts.sheet_name = NULL;
149 opts.cell_range = NULL;
150 opts.sheet_index = 1;
152 sri.read_names = true;
155 if (ssp->spreadsheet == NULL)
156 ssp->spreadsheet = gnumeric_probe (file->file_name);
158 if (ssp->spreadsheet == NULL)
159 ssp->spreadsheet = ods_probe (file->file_name);
161 if (ssp->spreadsheet)
163 // update_assistant (ia);
168 struct line_reader *reader = line_reader_for_file (file->encoding, file->file_name, O_RDONLY);
171 msg (ME, _("Could not open `%s': %s"),
172 file->file_name, strerror (errno));
176 ds_init_empty (&input);
177 file->lines = xnmalloc (MAX_PREVIEW_LINES, sizeof *file->lines);
178 for (; file->line_cnt < MAX_PREVIEW_LINES; file->line_cnt++)
181 if (!line_reader_read (reader, &input, MAX_LINE_LEN + 1)
182 || ds_length (&input) > MAX_LINE_LEN)
184 if (line_reader_eof (reader))
186 else if (line_reader_error (reader))
187 msg (ME, _("Error reading `%s': %s"),
188 file->file_name, strerror (line_reader_error (reader)));
190 msg (ME, _("Failed to read `%s', because it contains a line "
191 "over %d bytes long and therefore appears not to be "
193 file->file_name, MAX_LINE_LEN);
194 line_reader_close (reader);
200 ds_init_cstr (&file->lines[file->line_cnt],
201 recode_string ("UTF-8", line_reader_get_encoding (reader),
202 ds_cstr (&input), ds_length (&input)));
206 if (file->line_cnt == 0)
208 msg (ME, _("`%s' is empty."), file->file_name);
209 line_reader_close (reader);
214 /* Estimate the number of lines in the file. */
215 if (file->line_cnt < MAX_PREVIEW_LINES)
216 file->total_lines = file->line_cnt;
220 off_t position = line_reader_tell (reader);
221 if (fstat (line_reader_fileno (reader), &s) == 0 && position > 0)
222 file->total_lines = (double) file->line_cnt / position * s.st_size;
224 file->total_lines = 0;
227 line_reader_close (reader);
233 /* Frees IA's file substructure. */
235 destroy_file (struct import_assistant *ia)
237 struct file *f = &ia->file;
242 for (i = 0; i < f->line_cnt; i++)
243 ds_destroy (&f->lines[i]);
247 g_free (f->file_name);
248 g_free (f->encoding);
251 /* Obtains the file to read from the user. If successful, returns the name of
252 the file and stores the user's chosen encoding for the file into *ENCODINGP.
253 The caller must free each of these strings with g_free().
255 On failure, stores a null pointer and stores NULL in *ENCODINGP.
257 PARENT_WINDOW must be the window to use as the file chooser window's
260 choose_file (GtkWindow *parent_window, gchar **encodingp)
263 GtkFileFilter *filter = NULL;
265 GtkWidget *dialog = gtk_file_chooser_dialog_new (_("Import Delimited Text Data"),
267 GTK_FILE_CHOOSER_ACTION_OPEN,
268 GTK_STOCK_CANCEL, GTK_RESPONSE_CANCEL,
269 GTK_STOCK_OPEN, GTK_RESPONSE_ACCEPT,
272 g_object_set (dialog, "local-only", FALSE, NULL);
274 filter = gtk_file_filter_new ();
275 gtk_file_filter_set_name (filter, _("Text files"));
276 gtk_file_filter_add_mime_type (filter, "text/*");
277 gtk_file_chooser_add_filter (GTK_FILE_CHOOSER (dialog), filter);
279 filter = gtk_file_filter_new ();
280 gtk_file_filter_set_name (filter, _("Text (*.txt) Files"));
281 gtk_file_filter_add_pattern (filter, "*.txt");
282 gtk_file_filter_add_pattern (filter, "*.TXT");
283 gtk_file_chooser_add_filter (GTK_FILE_CHOOSER (dialog), filter);
285 filter = gtk_file_filter_new ();
286 gtk_file_filter_set_name (filter, _("Plain Text (ASCII) Files"));
287 gtk_file_filter_add_mime_type (filter, "text/plain");
288 gtk_file_chooser_add_filter (GTK_FILE_CHOOSER (dialog), filter);
290 filter = gtk_file_filter_new ();
291 gtk_file_filter_set_name (filter, _("Comma Separated Value Files"));
292 gtk_file_filter_add_mime_type (filter, "text/csv");
293 gtk_file_chooser_add_filter (GTK_FILE_CHOOSER (dialog), filter);
295 /* I've never encountered one of these, but it's listed here:
296 http://www.iana.org/assignments/media-types/text/tab-separated-values */
297 filter = gtk_file_filter_new ();
298 gtk_file_filter_set_name (filter, _("Tab Separated Value Files"));
299 gtk_file_filter_add_mime_type (filter, "text/tab-separated-values");
300 gtk_file_chooser_add_filter (GTK_FILE_CHOOSER (dialog), filter);
302 filter = gtk_file_filter_new ();
303 gtk_file_filter_set_name (filter, _("Gnumeric Spreadsheet Files"));
304 gtk_file_filter_add_mime_type (filter, "application/x-gnumeric");
305 gtk_file_chooser_add_filter (GTK_FILE_CHOOSER (dialog), filter);
307 filter = gtk_file_filter_new ();
308 gtk_file_filter_set_name (filter, _("OpenOffice.Org Spreadsheet Files"));
309 gtk_file_filter_add_mime_type (filter, "application/vnd.oasis.opendocument.spreadsheet");
310 gtk_file_chooser_add_filter (GTK_FILE_CHOOSER (dialog), filter);
312 filter = gtk_file_filter_new ();
313 gtk_file_filter_set_name (filter, _("All Spreadsheet Files"));
314 gtk_file_filter_add_mime_type (filter, "application/x-gnumeric");
315 gtk_file_filter_add_mime_type (filter, "application/vnd.oasis.opendocument.spreadsheet");
316 gtk_file_chooser_add_filter (GTK_FILE_CHOOSER (dialog), filter);
319 gtk_file_chooser_set_extra_widget (
320 GTK_FILE_CHOOSER (dialog), psppire_encoding_selector_new ("Auto", true));
322 filter = gtk_file_filter_new ();
323 gtk_file_filter_set_name (filter, _("All Files"));
324 gtk_file_filter_add_pattern (filter, "*");
325 gtk_file_chooser_add_filter (GTK_FILE_CHOOSER (dialog), filter);
327 switch (gtk_dialog_run (GTK_DIALOG (dialog)))
329 case GTK_RESPONSE_ACCEPT:
330 file_name = gtk_file_chooser_get_filename (GTK_FILE_CHOOSER (dialog));
331 *encodingp = psppire_encoding_selector_get_encoding (
332 gtk_file_chooser_get_extra_widget (GTK_FILE_CHOOSER (dialog)));
339 gtk_widget_destroy (dialog);