#include "data/casereader-provider.h"
#include "data/data-in.h"
#include "data/format-guesser.h"
+#include "data/value-labels.h"
#include "builder-wrapper.h"
static void choose_column_names (PsppireImportAssistant *ia);
/* Revises the contents of the fields tree view based on the
- currently chosen set of separators. */
+ currently chosen set of separators and quotes. */
static void
revise_fields_preview (PsppireImportAssistant *ia)
{
- choose_column_names (ia);
-}
-
-
-struct separator_count_node
-{
- struct hmap_node node;
- int occurance; /* The number of times the separator occurs in a line */
- int quantity; /* The number of lines with this occurance */
-};
-
-
-/* Picks the most likely separator and quote characters based on
- IA's file data. */
-static void
-choose_likely_separators (PsppireImportAssistant *ia)
-{
- gint first_line = 0;
- g_object_get (ia->delimiters_model, "first-line", &first_line, NULL);
-
- gboolean valid;
- GtkTreeIter iter;
- int j;
-
- struct hmap count_map[SEPARATOR_CNT];
- for (j = 0; j < SEPARATOR_CNT; ++j)
- hmap_init (count_map + j);
-
- GtkTreePath *p = gtk_tree_path_new_from_indices (first_line, -1);
-
- for (valid = gtk_tree_model_get_iter (GTK_TREE_MODEL (ia->text_file), &iter, p);
- valid;
- valid = gtk_tree_model_iter_next (GTK_TREE_MODEL (ia->text_file), &iter))
+ GSList *delimiters = NULL;
+ for (int i = 0; i < N_SEPARATORS; i++)
{
- gchar *line_text = NULL;
- gtk_tree_model_get (GTK_TREE_MODEL (ia->text_file), &iter, 1, &line_text, -1);
-
- gint *counts = xzalloc (sizeof *counts * SEPARATOR_CNT);
-
- struct substring cs = ss_cstr (line_text);
- for (;
- UINT32_MAX != ss_first_mb (cs);
- ss_get_mb (&cs))
- {
- ucs4_t character = ss_first_mb (cs);
-
- int s;
- for (s = 0; s < SEPARATOR_CNT; ++s)
- {
- if (character == separators[s].c)
- counts[s]++;
- }
- }
-
- int j;
- for (j = 0; j < SEPARATOR_CNT; ++j)
+ const struct separator *s = &separators[i];
+ GtkWidget *button = get_widget_assert (ia->text_builder, s->name);
+ if (gtk_toggle_button_get_active (GTK_TOGGLE_BUTTON (button)))
{
- if (counts[j] > 0)
- {
- struct separator_count_node *cn = NULL;
- unsigned int hash = hash_int (counts[j], 0);
- HMAP_FOR_EACH_WITH_HASH (cn, struct separator_count_node, node, hash, &count_map[j])
- {
- if (cn->occurance == counts[j])
- break;
- }
-
- if (cn == NULL)
- {
- struct separator_count_node *new_cn = xzalloc (sizeof *new_cn);
- new_cn->occurance = counts[j];
- new_cn->quantity = 1;
- hmap_insert (&count_map[j], &new_cn->node, hash);
- }
- else
- cn->quantity++;
- }
+ delimiters = g_slist_prepend (delimiters, GINT_TO_POINTER (s->c));
}
-
- free (line_text);
- free (counts);
}
- gtk_tree_path_free (p);
- if (hmap_count (count_map) > 0)
- {
- int most_frequent = -1;
- int largest = 0;
- for (j = 0; j < SEPARATOR_CNT; ++j)
- {
- struct separator_count_node *cn;
- struct separator_count_node *next;
- HMAP_FOR_EACH_SAFE (cn, next, struct separator_count_node, node, &count_map[j])
- {
- if (largest < cn->quantity)
- {
- largest = cn->quantity;
- most_frequent = j;
- }
- free (cn);
- }
- hmap_destroy (&count_map[j]);
- }
+ GtkComboBoxText *cbt = GTK_COMBO_BOX_TEXT (ia->quote_combo);
+ GtkToggleButton *quote_cb = GTK_TOGGLE_BUTTON (ia->quote_cb);
+ const gchar *quotes = (gtk_toggle_button_get_active (quote_cb)
+ ? gtk_combo_box_text_get_active_text (cbt)
+ : "");
- g_return_if_fail (most_frequent >= 0);
+ g_object_set (ia->delimiters_model,
+ "delimiters", delimiters,
+ "quotes", quotes,
+ NULL);
- GtkWidget *toggle =
- get_widget_assert (ia->text_builder, separators[most_frequent].name);
- gtk_toggle_button_set_active (GTK_TOGGLE_BUTTON (toggle), TRUE);
- }
+ choose_column_names (ia);
}
+
static void
repopulate_delimiter_columns (PsppireImportAssistant *ia)
{
"preview purposes in the following screens. ",
"Only the first %zu lines of the file will be shown for "
"preview purposes in the following screens. ",
- ia->text_file->line_cnt),
- ia->text_file->line_cnt);
+ ia->text_file->n_lines),
+ ia->text_file->n_lines);
}
}
on_separator_toggle (GtkToggleButton *toggle UNUSED,
PsppireImportAssistant *ia)
{
- int i;
- GSList *delimiters = NULL;
- for (i = 0; i < SEPARATOR_CNT; i++)
- {
- const struct separator *s = &separators[i];
- GtkWidget *button = get_widget_assert (ia->text_builder, s->name);
- if (gtk_toggle_button_get_active (GTK_TOGGLE_BUTTON (button)))
- {
- delimiters = g_slist_prepend (delimiters, GINT_TO_POINTER (s->c));
- }
- }
-
- g_object_set (ia->delimiters_model, "delimiters", delimiters, NULL);
-
revise_fields_preview (ia);
}
static void
on_quote_combo_change (GtkComboBox *combo, PsppireImportAssistant *ia)
{
- // revise_fields_preview (ia);
+ revise_fields_preview (ia);
}
/* Called when the user toggles the checkbox that enables
reset_separators_page (PsppireImportAssistant *ia)
{
gtk_toggle_button_set_active (GTK_TOGGLE_BUTTON (ia->custom_cb), FALSE);
- gtk_toggle_button_set_active (GTK_TOGGLE_BUTTON (ia->quote_cb), FALSE);
gtk_entry_set_text (GTK_ENTRY (ia->custom_entry), "");
+ gtk_toggle_button_set_active (GTK_TOGGLE_BUTTON (ia->quote_cb), TRUE);
+ gtk_combo_box_set_active (GTK_COMBO_BOX (ia->quote_combo), 0);
- for (gint i = 0; i < SEPARATOR_CNT; i++)
+ for (gint i = 0; i < N_SEPARATORS; i++)
{
const struct separator *s = &separators[i];
GtkWidget *button = get_widget_assert (ia->text_builder, s->name);
- gtk_toggle_button_set_active (GTK_TOGGLE_BUTTON (button), FALSE);
+ gtk_toggle_button_set_active (GTK_TOGGLE_BUTTON (button), s->c == ',');
}
- repopulate_delimiter_columns (ia);
+ if (ia->delimiters_model)
+ {
+ repopulate_delimiter_columns (ia);
- revise_fields_preview (ia);
- choose_likely_separators (ia);
+ revise_fields_preview (ia);
+ }
}
/* Called just before the separators page becomes visible in the
assistant. */
static void
-prepare_separators_page (PsppireImportAssistant *ia)
+prepare_separators_page (PsppireImportAssistant *ia, GtkWidget *new_page, enum IMPORT_ASSISTANT_DIRECTION dir)
{
+ if (dir != IMPORT_ASSISTANT_FORWARDS)
+ return;
+
gtk_tree_view_set_model (GTK_TREE_VIEW (ia->fields_tree_view),
GTK_TREE_MODEL (ia->delimiters_model));
gtk_widget_set_sensitive (ia->custom_entry,
gtk_toggle_button_get_active (GTK_TOGGLE_BUTTON (ia->custom_cb)));
- gtk_combo_box_set_active (GTK_COMBO_BOX (ia->quote_combo), 0);
+ gtk_entry_set_max_length (GTK_ENTRY (gtk_bin_get_child (GTK_BIN (ia->quote_combo))), 1);
if (ia->fields_tree_view == NULL)
{
G_CALLBACK (on_separators_custom_entry_notify), ia);
g_signal_connect (ia->custom_cb, "toggled",
G_CALLBACK (on_separators_custom_cb_toggle), ia);
- for (i = 0; i < SEPARATOR_CNT; i++)
+ for (i = 0; i < N_SEPARATORS; i++)
g_signal_connect (get_widget_assert (builder, separators[i].name),
"toggled", G_CALLBACK (on_separator_toggle), ia);
char *xx = data_in (ss_cstr (ss),
"UTF-8",
var_get_write_format (var)->type,
+ settings_get_fmt_settings (),
v, var_get_width (var), "UTF-8");
free (xx);
}
static void
-my_advance (struct casereader *reader, void *aux, casenumber cnt)
+my_advance (struct casereader *reader, void *aux, casenumber n)
{
g_print ("%s:%d\n", __FILE__, __LINE__);
}
static struct casereader *
textfile_create_reader (PsppireImportAssistant *ia)
{
- int n_vars = dict_get_var_cnt (ia->dict);
+ int n_vars = dict_get_n_vars (ia->dict);
int i;
PsppireImportAssistant *ia = PSPPIRE_IMPORT_ASSISTANT (data);
struct caseproto *proto = caseproto_create();
- for (int i = 0; i < dict_get_var_cnt (ia->dict); i++)
+ for (int i = 0; i < dict_get_n_vars (ia->dict); i++)
{
const struct variable *var = dict_get_var (ia->dict, i);
int width = var_get_width (var);
g_object_set (ia->data_sheet, "data-model", store, NULL);
g_object_set (ia->var_sheet, "data-model", dict, NULL);
}
+
+static void
+first_line_append_syntax (const PsppireImportAssistant *ia, struct string *s)
+{
+ gint first_case = 0;
+ g_object_get (ia->delimiters_model, "first-line", &first_case, NULL);
+
+ if (first_case > 0)
+ ds_put_format (s, " /FIRSTCASE=%d\n", first_case + 1);
+}
+
+/* Emits PSPP syntax to S that applies the dictionary attributes
+ (such as missing values and value labels) of the variables in
+ DICT. */
+static void
+apply_dict (const struct dictionary *dict, struct string *s)
+{
+ size_t n_vars = dict_get_n_vars (dict);
+
+ for (size_t i = 0; i < n_vars; i++)
+ {
+ struct variable *var = dict_get_var (dict, i);
+ const char *name = var_get_name (var);
+ enum val_type type = var_get_type (var);
+ int width = var_get_width (var);
+ enum measure measure = var_get_measure (var);
+ enum var_role role = var_get_role (var);
+ enum alignment alignment = var_get_alignment (var);
+ const struct fmt_spec *format = var_get_print_format (var);
+
+ if (var_has_missing_values (var))
+ {
+ const struct missing_values *mv = var_get_missing_values (var);
+ size_t j;
+
+ syntax_gen_pspp (s, "MISSING VALUES %ss (", name);
+ for (j = 0; j < mv_n_values (mv); j++)
+ {
+ if (j)
+ ds_put_cstr (s, ", ");
+ syntax_gen_value (s, mv_get_value (mv, j), width, format);
+ }
+
+ if (mv_has_range (mv))
+ {
+ double low, high;
+ if (mv_has_value (mv))
+ ds_put_cstr (s, ", ");
+ mv_get_range (mv, &low, &high);
+ syntax_gen_num_range (s, low, high, format);
+ }
+ ds_put_cstr (s, ").\n");
+ }
+ if (var_has_value_labels (var))
+ {
+ const struct val_labs *vls = var_get_value_labels (var);
+ const struct val_lab **labels = val_labs_sorted (vls);
+ size_t n_labels = val_labs_count (vls);
+
+ syntax_gen_pspp (s, "VALUE LABELS %ss", name);
+ for (size_t j = 0; j < n_labels; j++)
+ {
+ const struct val_lab *vl = labels[j];
+ ds_put_cstr (s, "\n ");
+ syntax_gen_value (s, &vl->value, width, format);
+ ds_put_byte (s, ' ');
+ syntax_gen_string (s, ss_cstr (val_lab_get_escaped_label (vl)));
+ }
+ free (labels);
+ ds_put_cstr (s, ".\n");
+ }
+ if (var_has_label (var))
+ syntax_gen_pspp (s, "VARIABLE LABELS %ss %sq.\n",
+ name, var_get_label (var));
+ if (measure != var_default_measure (type))
+ syntax_gen_pspp (s, "VARIABLE LEVEL %ss (%ss).\n",
+ name, measure_to_syntax (measure));
+ if (role != ROLE_INPUT)
+ syntax_gen_pspp (s, "VARIABLE ROLE /%ss %ss.\n",
+ var_role_to_syntax (role), name);
+ if (alignment != var_default_alignment (type))
+ syntax_gen_pspp (s, "VARIABLE ALIGNMENT %ss (%ss).\n",
+ name, alignment_to_syntax (alignment));
+ if (var_get_display_width (var) != var_default_display_width (width))
+ syntax_gen_pspp (s, "VARIABLE WIDTH %ss (%d).\n",
+ name, var_get_display_width (var));
+ }
+}
+
+
+static void
+intro_append_syntax (const PsppireImportAssistant *ia, struct string *s)
+{
+ gint first_line = 0;
+ g_object_get (ia->delimiters_model, "first-line", &first_line, NULL);
+
+ if (gtk_toggle_button_get_active (GTK_TOGGLE_BUTTON (ia->n_cases_button)))
+ ds_put_format (s, "SELECT IF ($CASENUM <= %d).\n",
+ gtk_spin_button_get_value_as_int (GTK_SPIN_BUTTON (ia->n_cases_spin)) - first_line);
+ else if (gtk_toggle_button_get_active (GTK_TOGGLE_BUTTON (ia->percent_button)))
+ ds_put_format (s, "SAMPLE %.4g.\n",
+ gtk_spin_button_get_value (GTK_SPIN_BUTTON (ia->percent_spin)) / 100.0);
+}
+
+
+static void
+formats_append_syntax (const PsppireImportAssistant *ia, struct string *s)
+{
+ g_return_if_fail (ia->dict);
+
+ ds_put_cstr (s, " /VARIABLES=\n");
+
+ int n_vars = dict_get_n_vars (ia->dict);
+ for (int i = 0; i < n_vars; i++)
+ {
+ struct variable *var = dict_get_var (ia->dict, i);
+ char format_string[FMT_STRING_LEN_MAX + 1];
+ fmt_to_string (var_get_print_format (var), format_string);
+ ds_put_format (s, " %s %s%s\n",
+ var_get_name (var), format_string,
+ i == n_vars - 1 ? "." : "");
+ }
+}
+
+static void
+separators_append_syntax (const PsppireImportAssistant *ia, struct string *s)
+{
+ int i;
+
+ ds_put_cstr (s, " /DELIMITERS=\"");
+
+ if (gtk_toggle_button_get_active (GTK_TOGGLE_BUTTON (get_widget_assert (ia->text_builder, "tab"))))
+ ds_put_cstr (s, "\\t");
+ for (i = 0; i < N_SEPARATORS; i++)
+ {
+ const struct separator *seps = &separators[i];
+ GtkWidget *button = get_widget_assert (ia->text_builder, seps->name);
+ if (gtk_toggle_button_get_active (GTK_TOGGLE_BUTTON (button)))
+ {
+ if (seps->c == '\t')
+ continue;
+
+ ds_put_byte (s, seps->c);
+ }
+ }
+ ds_put_cstr (s, "\"\n");
+
+ if (gtk_toggle_button_get_active (GTK_TOGGLE_BUTTON (ia->quote_cb)))
+ {
+ GtkComboBoxText *cbt = GTK_COMBO_BOX_TEXT (ia->quote_combo);
+ gchar *quotes = gtk_combo_box_text_get_active_text (cbt);
+ if (quotes && *quotes)
+ syntax_gen_pspp (s, " /QUALIFIER=%sq\n", quotes);
+ free (quotes);
+ }
+}
+
+
+void
+text_spec_gen_syntax (PsppireImportAssistant *ia, struct string *s)
+{
+ gchar *file_name = NULL;
+ gchar *encoding = NULL;
+ g_object_get (ia->text_file,
+ "file-name", &file_name,
+ "encoding", &encoding,
+ NULL);
+
+ if (file_name == NULL)
+ return;
+
+ syntax_gen_pspp (s,
+ "GET DATA"
+ "\n /TYPE=TXT"
+ "\n /FILE=%sq\n",
+ file_name);
+ if (encoding && strcmp (encoding, "Auto"))
+ syntax_gen_pspp (s, " /ENCODING=%sq\n", encoding);
+
+ ds_put_cstr (s,
+ " /ARRANGEMENT=DELIMITED\n"
+ " /DELCASE=LINE\n");
+
+ first_line_append_syntax (ia, s);
+ separators_append_syntax (ia, s);
+
+ formats_append_syntax (ia, s);
+ apply_dict (ia->dict, s);
+ intro_append_syntax (ia, s);
+}