1 /* PSPPIRE - a graphical user interface for PSPP.
2 Copyright (C) 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "ui/gui/text-data-import-dialog.h"
23 #include <gtk-contrib/psppire-sheet.h>
29 #include "data/data-in.h"
30 #include "data/data-out.h"
31 #include "data/format-guesser.h"
32 #include "data/casereader.h"
33 #include "data/gnumeric-reader.h"
34 #include "data/ods-reader.h"
35 #include "data/spreadsheet-reader.h"
36 #include "data/value-labels.h"
37 #include "language/data-io/data-parser.h"
38 #include "language/lexer/lexer.h"
39 #include "libpspp/assertion.h"
40 #include "libpspp/i18n.h"
41 #include "libpspp/line-reader.h"
42 #include "libpspp/message.h"
43 #include "ui/gui/checkbox-treeview.h"
44 #include "ui/gui/dialog-common.h"
45 #include "ui/gui/executor.h"
46 #include "ui/gui/helper.h"
47 #include "ui/gui/builder-wrapper.h"
48 #include "ui/gui/psppire-data-window.h"
49 #include "ui/gui/psppire-dialog.h"
50 #include "ui/gui/psppire-encoding-selector.h"
51 #include "ui/gui/psppire-empty-list-store.h"
52 #include "ui/gui/psppire-var-sheet.h"
53 #include "ui/gui/psppire-var-store.h"
54 #include "ui/gui/psppire-scanf.h"
55 #include "ui/syntax-gen.h"
58 #include "gl/intprops.h"
59 #include "gl/xalloc.h"
62 #define _(msgid) gettext (msgid)
63 #define N_(msgid) msgid
65 struct import_assistant;
67 /* Choosing a file and reading it. */
69 static char *choose_file (GtkWindow *parent_window, gchar **encodingp);
71 /* Obtains the file to import from the user and initializes IA's
72 file substructure. PARENT_WINDOW must be the window to use
73 as the file chooser window's parent.
75 Returns true if successful, false if the file name could not
76 be obtained or the file could not be read. */
78 init_file (struct import_assistant *ia, GtkWindow *parent_window)
80 enum { MAX_PREVIEW_LINES = 1000 }; /* Max number of lines to read. */
81 enum { MAX_LINE_LEN = 16384 }; /* Max length of an acceptable line. */
82 struct file *file = &ia->file;
83 struct separators_page *sepp = &ia->separators;
84 struct casereader *creader = NULL;
85 struct dictionary *dict = NULL;
86 struct spreadsheet_read_info sri;
87 struct spreadsheet_read_options opts;
90 file->file_name = choose_file (parent_window, &file->encoding);
91 if (file->file_name == NULL)
94 opts.sheet_name = NULL;
95 opts.cell_range = NULL;
98 sri.file_name = file->file_name;
99 sri.read_names = true;
104 creader = gnumeric_open_reader (&sri, &opts, &dict);
105 ia->file.type = FTYPE_GNUMERIC;
110 creader = ods_open_reader (&sri, &opts, &dict);
111 ia->file.type = FTYPE_ODS;
120 sepp->column_cnt = dict_get_var_cnt (dict);
121 sepp->columns = xcalloc (sepp->column_cnt, sizeof (*sepp->columns));
122 for (col = 0; col < sepp->column_cnt ; ++col)
124 const struct variable *var = dict_get_var (dict, col);
125 sepp->columns[col].name = xstrdup (var_get_name (var));
126 sepp->columns[col].contents = NULL;
129 for (; (c = casereader_read (creader)) != NULL; case_unref (c))
132 for (col = 0; col < sepp->column_cnt ; ++col)
135 const struct variable *var = dict_get_var (dict, col);
137 sepp->columns[col].contents = xrealloc (sepp->columns[col].contents,
138 sizeof (struct substring) * rows);
140 ss = data_out (case_data (c, var), dict_get_encoding (dict),
141 var_get_print_format (var));
143 sepp->columns[col].contents[rows - 1] = ss_cstr (ss);
146 if (rows > MAX_PREVIEW_LINES)
153 file->line_cnt = rows;
154 casereader_destroy (creader);
159 struct line_reader *reader = line_reader_for_file (file->encoding, file->file_name, O_RDONLY);
162 msg (ME, _("Could not open `%s': %s"),
163 file->file_name, strerror (errno));
167 ds_init_empty (&input);
168 file->lines = xnmalloc (MAX_PREVIEW_LINES, sizeof *file->lines);
169 for (; file->line_cnt < MAX_PREVIEW_LINES; file->line_cnt++)
172 if (!line_reader_read (reader, &input, MAX_LINE_LEN + 1)
173 || ds_length (&input) > MAX_LINE_LEN)
175 if (line_reader_eof (reader))
177 else if (line_reader_error (reader))
178 msg (ME, _("Error reading `%s': %s"),
179 file->file_name, strerror (line_reader_error (reader)));
181 msg (ME, _("Failed to read `%s', because it contains a line "
182 "over %d bytes long and therefore appears not to be "
184 file->file_name, MAX_LINE_LEN);
185 line_reader_close (reader);
191 ds_init_cstr (&file->lines[file->line_cnt],
192 recode_string ("UTF-8", line_reader_get_encoding (reader),
193 ds_cstr (&input), ds_length (&input)));
197 if (file->line_cnt == 0)
199 msg (ME, _("`%s' is empty."), file->file_name);
200 line_reader_close (reader);
205 /* Estimate the number of lines in the file. */
206 if (file->line_cnt < MAX_PREVIEW_LINES)
207 file->total_lines = file->line_cnt;
211 off_t position = line_reader_tell (reader);
212 if (fstat (line_reader_fileno (reader), &s) == 0 && position > 0)
213 file->total_lines = (double) file->line_cnt / position * s.st_size;
215 file->total_lines = 0;
218 line_reader_close (reader);
219 ia->file.type = FTYPE_TEXT;
225 /* Frees IA's file substructure. */
227 destroy_file (struct import_assistant *ia)
229 struct file *f = &ia->file;
234 for (i = 0; i < f->line_cnt; i++)
235 ds_destroy (&f->lines[i]);
239 g_free (f->file_name);
240 g_free (f->encoding);
243 /* Obtains the file to read from the user. If successful, returns the name of
244 the file and stores the user's chosen encoding for the file into *ENCODINGP.
245 The caller must free each of these strings with g_free().
247 On failure, stores a null pointer and stores NULL in *ENCODINGP.
249 PARENT_WINDOW must be the window to use as the file chooser window's
252 choose_file (GtkWindow *parent_window, gchar **encodingp)
255 GtkFileFilter *filter = NULL;
257 GtkWidget *dialog = gtk_file_chooser_dialog_new (_("Import Delimited Text Data"),
259 GTK_FILE_CHOOSER_ACTION_OPEN,
260 GTK_STOCK_CANCEL, GTK_RESPONSE_CANCEL,
261 GTK_STOCK_OPEN, GTK_RESPONSE_ACCEPT,
264 g_object_set (dialog, "local-only", FALSE, NULL);
266 filter = gtk_file_filter_new ();
267 gtk_file_filter_set_name (filter, _("Text files"));
268 gtk_file_filter_add_mime_type (filter, "text/*");
269 gtk_file_chooser_add_filter (GTK_FILE_CHOOSER (dialog), filter);
271 filter = gtk_file_filter_new ();
272 gtk_file_filter_set_name (filter, _("Text (*.txt) Files"));
273 gtk_file_filter_add_pattern (filter, "*.txt");
274 gtk_file_filter_add_pattern (filter, "*.TXT");
275 gtk_file_chooser_add_filter (GTK_FILE_CHOOSER (dialog), filter);
277 filter = gtk_file_filter_new ();
278 gtk_file_filter_set_name (filter, _("Plain Text (ASCII) Files"));
279 gtk_file_filter_add_mime_type (filter, "text/plain");
280 gtk_file_chooser_add_filter (GTK_FILE_CHOOSER (dialog), filter);
282 filter = gtk_file_filter_new ();
283 gtk_file_filter_set_name (filter, _("Comma Separated Value Files"));
284 gtk_file_filter_add_mime_type (filter, "text/csv");
285 gtk_file_chooser_add_filter (GTK_FILE_CHOOSER (dialog), filter);
287 /* I've never encountered one of these, but it's listed here:
288 http://www.iana.org/assignments/media-types/text/tab-separated-values */
289 filter = gtk_file_filter_new ();
290 gtk_file_filter_set_name (filter, _("Tab Separated Value Files"));
291 gtk_file_filter_add_mime_type (filter, "text/tab-separated-values");
292 gtk_file_chooser_add_filter (GTK_FILE_CHOOSER (dialog), filter);
294 filter = gtk_file_filter_new ();
295 gtk_file_filter_set_name (filter, _("Gnumeric Spreadsheet Files"));
296 gtk_file_filter_add_mime_type (filter, "application/x-gnumeric");
297 gtk_file_chooser_add_filter (GTK_FILE_CHOOSER (dialog), filter);
299 filter = gtk_file_filter_new ();
300 gtk_file_filter_set_name (filter, _("OpenOffice.Org Spreadsheet Files"));
301 gtk_file_filter_add_mime_type (filter, "application/vnd.oasis.opendocument.spreadsheet");
302 gtk_file_chooser_add_filter (GTK_FILE_CHOOSER (dialog), filter);
304 filter = gtk_file_filter_new ();
305 gtk_file_filter_set_name (filter, _("All Spreadsheet Files"));
306 gtk_file_filter_add_mime_type (filter, "application/x-gnumeric");
307 gtk_file_filter_add_mime_type (filter, "application/vnd.oasis.opendocument.spreadsheet");
308 gtk_file_chooser_add_filter (GTK_FILE_CHOOSER (dialog), filter);
311 gtk_file_chooser_set_extra_widget (
312 GTK_FILE_CHOOSER (dialog), psppire_encoding_selector_new ("Auto", true));
314 filter = gtk_file_filter_new ();
315 gtk_file_filter_set_name (filter, _("All Files"));
316 gtk_file_filter_add_pattern (filter, "*");
317 gtk_file_chooser_add_filter (GTK_FILE_CHOOSER (dialog), filter);
319 switch (gtk_dialog_run (GTK_DIALOG (dialog)))
321 case GTK_RESPONSE_ACCEPT:
322 file_name = gtk_file_chooser_get_filename (GTK_FILE_CHOOSER (dialog));
323 *encodingp = psppire_encoding_selector_get_encoding (
324 gtk_file_chooser_get_extra_widget (GTK_FILE_CHOOSER (dialog)));
331 gtk_widget_destroy (dialog);