#include "psppire-delimited-text.h"
#include "psppire-text-file.h"
+#include "language/data-io/data-parser.h"
#include "libpspp/str.h"
+#include "libpspp/string-array.h"
#include "libpspp/i18n.h"
#include <gtk/gtk.h>
PROP_0,
PROP_CHILD,
PROP_DELIMITERS,
+ PROP_QUOTE,
PROP_FIRST_LINE
};
-struct enclosure
+static struct data_parser *
+make_data_parser (PsppireDelimitedText *tf)
{
- gunichar opening;
- gunichar closing;
-};
+ struct data_parser *parser = data_parser_create ();
+ data_parser_set_type (parser, DP_DELIMITED);
+ data_parser_set_span (parser, false);
+ data_parser_set_quotes (parser, ss_empty ());
+ data_parser_set_quote_escape (parser, true);
+ data_parser_set_empty_line_has_field (parser, true);
+
+ bool space = false;
+ struct string hard_delimiters = DS_EMPTY_INITIALIZER;
+ GSList *del;
+ for (del = tf->delimiters; del; del = g_slist_next (del))
+ {
+ gunichar c = GPOINTER_TO_INT (del->data);
+ if (c == ' ')
+ space = true;
+ else
+ ds_put_unichar (&hard_delimiters, c);
+ }
+ data_parser_set_soft_delimiters (parser, ss_cstr (space ? " " : ""));
+ data_parser_set_hard_delimiters (parser, ds_ss (&hard_delimiters));
+ ds_destroy (&hard_delimiters);
-static const struct enclosure enclosures[3] =
- {
- {'(', ')'},
- {'"', '"'},
- {'\'', '\''}
- };
+ if (tf->quote)
+ {
+ struct string quote = DS_EMPTY_INITIALIZER;
+ ds_put_unichar ("e, tf->quote);
+ data_parser_set_quotes (parser, ds_ss ("e));
+ ds_destroy ("e);
+ }
+ return parser;
+}
static void
count_delims (PsppireDelimitedText *tf)
if (tf->child == NULL)
return;
- tf->max_delimiters = 0;
+ struct data_parser *parser = make_data_parser (tf);
+
+ tf->max_fields = 0;
GtkTreeIter iter;
gboolean valid;
for (valid = gtk_tree_model_get_iter_first (tf->child, &iter);
valid;
valid = gtk_tree_model_iter_next (tf->child, &iter))
{
- gint enc = -1;
- // FIXME: Box these lines to avoid constant allocation/deallocation
gchar *line = NULL;
gtk_tree_model_get (tf->child, &iter, 1, &line, -1);
- {
- char *p;
- gint count = 0;
- for (p = line; ; p = g_utf8_find_next_char (p, NULL))
- {
- const gunichar c = g_utf8_get_char (p);
- if (c == NULL)
- break;
- if (enc == -1)
- {
- gint i;
- for (i = 0; i < 3; ++i)
- {
- if (c == enclosures[i].opening)
- {
- enc = i;
- break;
- }
- }
- }
- else if (c == enclosures[enc].closing)
- {
- enc = -1;
- }
- if (enc == -1)
- {
- GSList *del;
- for (del = tf->delimiters; del; del = g_slist_next (del))
- {
- if (c == GPOINTER_TO_INT (del->data))
- count++;
- }
- }
- }
- tf->max_delimiters = MAX (tf->max_delimiters, count);
- }
+ size_t n_fields = data_parser_split (parser, ss_cstr (line), NULL);
+ if (n_fields > tf->max_fields)
+ tf->max_fields = n_fields;
g_free (line);
}
+
+ data_parser_destroy (parser);
}
static void
cache_invalidate (PsppireDelimitedText *tf)
{
- memset (tf->cache_starts, 0, 512);
- if (tf->const_cache.string)
- {
- ss_dealloc (&tf->const_cache);
- tf->const_cache.string = NULL;
- tf->cache_row = -1;
- }
+ tf->cache_row = -1;
+ data_parser_destroy (tf->parser);
+ tf->parser = make_data_parser (tf);
}
static void
g_slist_free (tf->delimiters);
tf->delimiters = g_slist_copy (g_value_get_pointer (value));
break;
+ case PROP_QUOTE:
+ tf->quote = g_value_get_uint (value);
+ break;
default:
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
break;
case PROP_DELIMITERS:
g_value_set_pointer (value, text_file->delimiters);
break;
+ case PROP_QUOTE:
+ g_value_set_uint (value, text_file->quote);
+ break;
default:
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
break;
};
}
-
-static void psppire_delimited_text_init (PsppireDelimitedText *text_file);
-static void psppire_delimited_text_class_init (PsppireDelimitedTextClass *class);
-
static void psppire_delimited_text_finalize (GObject *object);
static void psppire_delimited_text_dispose (GObject *object);
{
PsppireDelimitedText *tf = PSPPIRE_DELIMITED_TEXT (tree_model);
- /* + 1 for the trailing field and +1 for the leading line number column */
- return tf->max_delimiters + 1 + 1;
+ /* +1 for the leading line number column */
+ return tf->max_fields + 1;
}
return TRUE;
}
-
-static void
-nullify_char (struct substring cs)
-{
- int char_len = ss_first_mblen (cs);
- while (char_len > 0)
- {
- cs.string[char_len - 1] = '\0';
- char_len--;
- }
-}
-
-
/* Split row N into it's delimited fields (if it is not already cached)
and set this row as the current cache. */
static void
split_row_into_fields (PsppireDelimitedText *file, gint n)
{
if (n == file->cache_row) /* Cache hit */
- {
- return;
- }
-
- memset (file->cache_starts, 0, 512);
- /* Cache miss */
- if (file->const_cache.string)
- {
- ss_dealloc (&file->const_cache);
- }
- ss_alloc_substring_pool (&file->const_cache,
- PSPPIRE_TEXT_FILE (file->child)->lines[n], NULL);
- struct substring cs = file->const_cache;
- int field = 0;
- file->cache_starts[0] = cs.string;
- gint enc = -1;
- for (;
- UINT32_MAX != ss_first_mb (cs);
- ss_get_mb (&cs))
- {
- ucs4_t character = ss_first_mb (cs);
- gboolean char_is_quote = FALSE;
- if (enc == -1)
- {
- gint i;
- for (i = 0; i < 3; ++i)
- {
- if (character == enclosures[i].opening)
- {
- enc = i;
- char_is_quote = TRUE;
- file->cache_starts[field] += ss_first_mblen (cs);
- break;
- }
- }
- }
- else if (character == enclosures[enc].closing)
- {
- char_is_quote = TRUE;
- nullify_char (cs);
- enc = -1;
- }
-
- if (enc == -1 && char_is_quote == FALSE)
- {
- GSList *del;
- for (del = file->delimiters; del; del = g_slist_next (del))
- {
- if (character == GPOINTER_TO_INT (del->data))
- {
- field++;
- int char_len = ss_first_mblen (cs);
- file->cache_starts[field] = cs.string + char_len;
- nullify_char (cs);
- break;
- }
- }
- }
- }
+ return;
+ if (!file->parser)
+ file->parser = make_data_parser (file);
+ string_array_clear (&file->cache);
+ data_parser_split (file->parser, PSPPIRE_TEXT_FILE (file->child)->lines[n],
+ &file->cache);
file->cache_row = n;
}
split_row_into_fields (file, file->first_line - 1);
- return file->cache_starts [column];
+ return column < file->cache.n ? file->cache.strings[column] : "";
}
static void
split_row_into_fields (file, n);
- g_value_set_string (value, file->cache_starts [column - 1]);
+ size_t idx = column - 1;
+ const char *s = idx < file->cache.n ? file->cache.strings[idx] : "";
+ g_value_set_string (value, s);
}
iface->iter_parent = __iter_parent;
}
-
-GType
-psppire_delimited_text_get_type (void)
-{
- static GType text_file_type = 0;
-
- if (!text_file_type)
- {
- static const GTypeInfo text_file_info =
- {
- sizeof (PsppireDelimitedTextClass),
- NULL, /* base_init */
- NULL, /* base_finalize */
- (GClassInitFunc) psppire_delimited_text_class_init,
- NULL, /* class_finalize */
- NULL, /* class_data */
- sizeof (PsppireDelimitedText),
- 0,
- (GInstanceInitFunc) psppire_delimited_text_init,
- };
-
- static const GInterfaceInfo tree_model_info = {
- (GInterfaceInitFunc) __tree_model_init,
- NULL,
- NULL
- };
-
- text_file_type = g_type_register_static (G_TYPE_OBJECT,
- "PsppireDelimitedText",
- &text_file_info, 0);
-
- g_type_add_interface_static (text_file_type, GTK_TYPE_TREE_MODEL,
- &tree_model_info);
- }
-
- return text_file_type;
-}
-
+G_DEFINE_TYPE_WITH_CODE (PsppireDelimitedText, psppire_delimited_text, G_TYPE_OBJECT,
+ G_IMPLEMENT_INTERFACE (GTK_TYPE_TREE_MODEL,
+ __tree_model_init))
static void
psppire_delimited_text_class_init (PsppireDelimitedTextClass *class)
GObjectClass *object_class;
parent_class = g_type_class_peek_parent (class);
- object_class = (GObjectClass*) class;
+ object_class = G_OBJECT_CLASS (class);
GParamSpec *first_line_spec =
g_param_spec_int ("first-line",
P_("A GSList of gunichars which delimit the fields."),
G_PARAM_READWRITE);
+ GParamSpec *quote_spec =
+ g_param_spec_unichar ("quote",
+ "Quote Character",
+ P_("A character that quotes the field, or 0 to disable quoting."),
+ 0,
+ G_PARAM_READWRITE);
+
GParamSpec *child_spec =
g_param_spec_object ("child",
"Child Model",
PROP_DELIMITERS,
delimiters_spec);
+ g_object_class_install_property (object_class,
+ PROP_QUOTE,
+ quote_spec);
+
g_object_class_install_property (object_class,
PROP_FIRST_LINE,
first_line_spec);
text_file->first_line = 0;
text_file->delimiters = g_slist_prepend (NULL, GINT_TO_POINTER (':'));
- text_file->const_cache.string = NULL;
- text_file->const_cache.length = 0;
text_file->cache_row = -1;
- memset (text_file->cache_starts, 0, 512);
+ string_array_init (&text_file->cache);
+ text_file->parser = NULL;
+
+ text_file->max_fields = 0;
- text_file->max_delimiters = 0;
+ text_file->quote = 0;
text_file->dispose_has_run = FALSE;
text_file->stamp = g_random_int ();
}
-GtkTreeModel *
+PsppireDelimitedText *
psppire_delimited_text_new (GtkTreeModel *child)
{
- PsppireDelimitedText *retval =
+ return
g_object_new (PSPPIRE_TYPE_DELIMITED_TEXT,
"child", child,
NULL);
-
- return GTK_TREE_MODEL (retval);
}
static void
PsppireDelimitedText *tf = PSPPIRE_DELIMITED_TEXT (object);
g_slist_free (tf->delimiters);
-
- ss_dealloc (&tf->const_cache);
+ string_array_destroy (&tf->cache);
+ data_parser_destroy (tf->parser);
/* must chain up */
(* parent_class->finalize) (object);