#include "psppire-delimited-text.h"
#include "psppire-text-file.h"
+#include "language/data-io/data-parser.h"
#include "libpspp/str.h"
+#include "libpspp/string-array.h"
#include "libpspp/i18n.h"
#include <gtk/gtk.h>
PROP_0,
PROP_CHILD,
PROP_DELIMITERS,
+ PROP_QUOTE,
PROP_FIRST_LINE
};
+static struct data_parser *
+make_data_parser (PsppireDelimitedText *tf)
+{
+ struct data_parser *parser = data_parser_create ();
+ data_parser_set_type (parser, DP_DELIMITED);
+ data_parser_set_span (parser, false);
+ data_parser_set_quotes (parser, ss_empty ());
+ data_parser_set_quote_escape (parser, true);
+ data_parser_set_empty_line_has_field (parser, true);
+
+ bool space = false;
+ struct string hard_delimiters = DS_EMPTY_INITIALIZER;
+ GSList *del;
+ for (del = tf->delimiters; del; del = g_slist_next (del))
+ {
+ gunichar c = GPOINTER_TO_INT (del->data);
+ if (c == ' ')
+ space = true;
+ else
+ ds_put_unichar (&hard_delimiters, c);
+ }
+ data_parser_set_soft_delimiters (parser, ss_cstr (space ? " " : ""));
+ data_parser_set_hard_delimiters (parser, ds_ss (&hard_delimiters));
+ ds_destroy (&hard_delimiters);
+
+ if (tf->quote)
+ {
+ struct string quote = DS_EMPTY_INITIALIZER;
+ ds_put_unichar ("e, tf->quote);
+ data_parser_set_quotes (parser, ds_ss ("e));
+ ds_destroy ("e);
+ }
+ return parser;
+}
+
static void
count_delims (PsppireDelimitedText *tf)
{
- if (tf->child)
+ if (tf->child == NULL)
+ return;
+
+ struct data_parser *parser = make_data_parser (tf);
+
+ tf->max_fields = 0;
+ GtkTreeIter iter;
+ gboolean valid;
+ for (valid = gtk_tree_model_get_iter_first (tf->child, &iter);
+ valid;
+ valid = gtk_tree_model_iter_next (tf->child, &iter))
{
- tf->max_delimiters = 0;
- GtkTreeIter iter;
- gboolean valid;
- for (valid = gtk_tree_model_get_iter_first (tf->child, &iter);
- valid;
- valid = gtk_tree_model_iter_next (tf->child, &iter))
- {
- // FIXME: Box these lines to avoid constant allocation/deallocation
- gchar *foo = 0;
- gtk_tree_model_get (tf->child, &iter, 1, &foo, -1);
- {
- char *line = foo;
- gint count = 0;
- while (*line)
- {
- GSList *del;
- for (del = tf->delimiters; del; del = g_slist_next (del))
- {
- if (*line == GPOINTER_TO_INT (del->data))
- count++;
- }
- line++;
- }
- tf->max_delimiters = MAX (tf->max_delimiters, count);
- }
- g_free (foo);
- }
+ gchar *line = NULL;
+ gtk_tree_model_get (tf->child, &iter, 1, &line, -1);
+ size_t n_fields = data_parser_split (parser, ss_cstr (line), NULL);
+ if (n_fields > tf->max_fields)
+ tf->max_fields = n_fields;
+ g_free (line);
}
- // g_print ("Max Number of delimiters per row: %d\n", tf->max_delimiters);
+
+ data_parser_destroy (parser);
+}
+
+static void
+cache_invalidate (PsppireDelimitedText *tf)
+{
+ tf->cache_row = -1;
+ data_parser_destroy (tf->parser);
+ tf->parser = make_data_parser (tf);
}
static void
{
case PROP_FIRST_LINE:
tf->first_line = g_value_get_int (value);
- if (tf->const_cache.string)
- {
- ss_dealloc (&tf->const_cache);
- tf->cache_row = -1;
- }
break;
case PROP_CHILD:
tf->child = g_value_get_object (value);
+ g_return_if_fail (PSPPIRE_IS_TEXT_FILE (tf->child));
break;
case PROP_DELIMITERS:
g_slist_free (tf->delimiters);
tf->delimiters = g_slist_copy (g_value_get_pointer (value));
break;
+ case PROP_QUOTE:
+ tf->quote = g_value_get_uint (value);
+ break;
default:
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
break;
};
- if (tf->child)
- count_delims (tf);
+ cache_invalidate (tf);
+ count_delims (tf);
}
static void
case PROP_DELIMITERS:
g_value_set_pointer (value, text_file->delimiters);
break;
+ case PROP_QUOTE:
+ g_value_set_uint (value, text_file->quote);
+ break;
default:
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
break;
};
}
-
-static void psppire_delimited_text_init (PsppireDelimitedText *text_file);
-static void psppire_delimited_text_class_init (PsppireDelimitedTextClass *class);
-
static void psppire_delimited_text_finalize (GObject *object);
static void psppire_delimited_text_dispose (GObject *object);
static GObjectClass *parent_class = NULL;
+static gint
+n_lines (PsppireDelimitedText *file)
+{
+ PsppireTextFile *child = PSPPIRE_TEXT_FILE (file->child);
+
+ return child->maximum_lines;
+}
static gboolean
__tree_get_iter (GtkTreeModel *tree_model,
if (path == NULL)
return FALSE;
- // g_print ("%s:%d %s %s\n", __FILE__, __LINE__, __FUNCTION__, gtk_tree_path_to_string (path));
-
+
gint *indices = gtk_tree_path_get_indices (path);
if (!indices)
gint n = *indices;
- gint children = gtk_tree_model_iter_n_children (file->child, NULL);
+ gint children = n_lines (file);
if (n >= children - file->first_line)
return FALSE;
-
- // g_print ("%s:%d %s %d Children: %d\n", __FILE__, __LINE__, __FUNCTION__, n, children);
-
+
+
iter->user_data = GINT_TO_POINTER (n);
iter->stamp = file->stamp;
g_return_val_if_fail (file->stamp == iter->stamp, FALSE);
gint n = GPOINTER_TO_INT (iter->user_data);
-
- // g_print ("%s:%d %s %d\n", __FILE__, __LINE__, __FUNCTION__, n);
- gint children = gtk_tree_model_iter_n_children (file->child, NULL);
+
+ gint children = n_lines (file);
if (n + 1 >= children - file->first_line)
return FALSE;
__tree_get_column_type (GtkTreeModel *tree_model,
gint index)
{
- // g_print ("%s:%d %s\n", __FILE__, __LINE__, __FUNCTION__);
if (index == 0)
return G_TYPE_INT;
__iter_has_child (GtkTreeModel *tree_model,
GtkTreeIter *iter)
{
- g_print ("%s:%d %s\n", __FILE__, __LINE__, __FUNCTION__);
return 0;
}
GtkTreeIter *iter,
GtkTreeIter *child)
{
- g_print ("%s:%d %s\n", __FILE__, __LINE__, __FUNCTION__);
return 0;
}
__tree_get_path (GtkTreeModel *tree_model,
GtkTreeIter *iter)
{
- // g_print ("%s:%d %s\n", __FILE__, __LINE__, __FUNCTION__);
PsppireDelimitedText *file = PSPPIRE_DELIMITED_TEXT (tree_model);
g_return_val_if_fail (file->stamp == iter->stamp, FALSE);
gint n = GPOINTER_TO_INT (iter->user_data);
- gint children = gtk_tree_model_iter_n_children (file->child, NULL);
+ gint children = n_lines (file);
if (n >= children - file->first_line)
return NULL;
-
+
return gtk_tree_path_new_from_indices (n, -1);
}
GtkTreeIter *iter,
GtkTreeIter *parent)
{
- g_print ("%s:%d %s\n", __FILE__, __LINE__, __FUNCTION__);
return 0;
}
__tree_model_iter_n_children (GtkTreeModel *tree_model,
GtkTreeIter *iter)
{
- g_print ("%s:%d %s\n", __FILE__, __LINE__, __FUNCTION__);
PsppireDelimitedText *file = PSPPIRE_DELIMITED_TEXT (tree_model);
g_assert (iter == NULL);
- return 0;
+
+ gint children = n_lines (file);
+
+ return children - file->first_line;
}
static GtkTreeModelFlags
__tree_model_get_flags (GtkTreeModel *model)
{
- // g_print ("%s:%d %s\n", __FILE__, __LINE__, __FUNCTION__);
g_return_val_if_fail (PSPPIRE_IS_DELIMITED_TEXT (model), (GtkTreeModelFlags) 0);
return GTK_TREE_MODEL_LIST_ONLY;
static gint
__tree_model_get_n_columns (GtkTreeModel *tree_model)
{
- // g_print ("%s:%d %s\n", __FILE__, __LINE__, __FUNCTION__);
PsppireDelimitedText *tf = PSPPIRE_DELIMITED_TEXT (tree_model);
- /* + 1 for the trailing field and +1 for the leading line number column */
- return tf->max_delimiters + 1 + 1;
+ /* +1 for the leading line number column */
+ return tf->max_fields + 1;
}
GtkTreeIter *parent,
gint n)
{
- // g_print ("%s:%d %s %d\n", __FILE__, __LINE__, __FUNCTION__, n);
PsppireDelimitedText *file = PSPPIRE_DELIMITED_TEXT (tree_model);
g_assert (parent == NULL);
return TRUE;
}
+/* Split row N into it's delimited fields (if it is not already cached)
+ and set this row as the current cache. */
+static void
+split_row_into_fields (PsppireDelimitedText *file, gint n)
+{
+ if (n == file->cache_row) /* Cache hit */
+ return;
+ if (!file->parser)
+ file->parser = make_data_parser (file);
+
+ string_array_clear (&file->cache);
+ data_parser_split (file->parser, PSPPIRE_TEXT_FILE (file->child)->lines[n],
+ &file->cache);
+ file->cache_row = n;
+}
+
+const gchar *
+psppire_delimited_text_get_header_title (PsppireDelimitedText *file, gint column)
+{
+ if (file->first_line <= 0)
+ return NULL;
+
+ split_row_into_fields (file, file->first_line - 1);
+
+ return column < file->cache.n ? file->cache.strings[column] : "";
+}
static void
__get_value (GtkTreeModel *tree_model,
gint column,
GValue *value)
{
- // g_print ("%s:%d %s Col: %d\n", __FILE__, __LINE__, __FUNCTION__, column);
PsppireDelimitedText *file = PSPPIRE_DELIMITED_TEXT (tree_model);
g_return_if_fail (iter->stamp == file->stamp);
gint n = GPOINTER_TO_INT (iter->user_data) + file->first_line;
- // g_print ("%s:%d Row: %d\n", __FILE__, __LINE__, n);
-
+
if (column == 0)
{
g_value_init (value, G_TYPE_INT);
g_value_init (value, G_TYPE_STRING);
- if (n != file->cache_row)
- {
- if (file->const_cache.string)
- {
- ss_dealloc (&file->const_cache);
- }
- ss_alloc_substring (&file->const_cache, PSPPIRE_TEXT_FILE (file->child)->lines[n]);
- file->cache = file->const_cache;
- int field = 0;
- file->cache_starts[0] = file->cache.string;
- for (;
- UINT32_MAX != ss_first_mb (file->cache);
- ss_get_mb (&file->cache))
- {
- ucs4_t xx = ss_first_mb (file->cache);
- GSList *del;
- for (del = file->delimiters; del; del = g_slist_next (del))
- {
- if (xx == GPOINTER_TO_INT (del->data))
- {
- field++;
- int char_len = ss_first_mblen (file->cache);
- file->cache_starts[field] = file->cache.string + char_len;
- while (char_len > 0)
- {
- file->cache.string[char_len - 1] = '\0';
- char_len--;
- }
- break;
- }
- }
- }
-
- file->cache_row = n;
- }
-
- g_value_set_string (value, file->cache_starts [column - 1]);
+ split_row_into_fields (file, n);
+
+ size_t idx = column - 1;
+ const char *s = idx < file->cache.n ? file->cache.strings[idx] : "";
+ g_value_set_string (value, s);
}
iface->iter_parent = __iter_parent;
}
-
-GType
-psppire_delimited_text_get_type (void)
-{
- static GType text_file_type = 0;
-
- if (!text_file_type)
- {
- static const GTypeInfo text_file_info =
- {
- sizeof (PsppireDelimitedTextClass),
- NULL, /* base_init */
- NULL, /* base_finalize */
- (GClassInitFunc) psppire_delimited_text_class_init,
- NULL, /* class_finalize */
- NULL, /* class_data */
- sizeof (PsppireDelimitedText),
- 0,
- (GInstanceInitFunc) psppire_delimited_text_init,
- };
-
- static const GInterfaceInfo tree_model_info = {
- (GInterfaceInitFunc) __tree_model_init,
- NULL,
- NULL
- };
-
- text_file_type = g_type_register_static (G_TYPE_OBJECT,
- "PsppireDelimitedText",
- &text_file_info, 0);
-
- g_type_add_interface_static (text_file_type, GTK_TYPE_TREE_MODEL,
- &tree_model_info);
- }
-
- return text_file_type;
-}
-
+G_DEFINE_TYPE_WITH_CODE (PsppireDelimitedText, psppire_delimited_text, G_TYPE_OBJECT,
+ G_IMPLEMENT_INTERFACE (GTK_TYPE_TREE_MODEL,
+ __tree_model_init))
static void
psppire_delimited_text_class_init (PsppireDelimitedTextClass *class)
GObjectClass *object_class;
parent_class = g_type_class_peek_parent (class);
- object_class = (GObjectClass*) class;
+ object_class = G_OBJECT_CLASS (class);
GParamSpec *first_line_spec =
g_param_spec_int ("first-line",
P_("The first line to be considered."),
0, 1000, 0,
G_PARAM_READWRITE);
-
+
GParamSpec *delimiters_spec =
g_param_spec_pointer ("delimiters",
"Field Delimiters",
P_("A GSList of gunichars which delimit the fields."),
G_PARAM_READWRITE);
- GParamSpec *child_spec =
+ GParamSpec *quote_spec =
+ g_param_spec_unichar ("quote",
+ "Quote Character",
+ P_("A character that quotes the field, or 0 to disable quoting."),
+ 0,
+ G_PARAM_READWRITE);
+
+ GParamSpec *child_spec =
g_param_spec_object ("child",
"Child Model",
P_("The GtkTextModel which this object wraps."),
GTK_TYPE_TREE_MODEL,
G_PARAM_CONSTRUCT_ONLY |G_PARAM_READWRITE);
-
+
object_class->set_property = psppire_delimited_text_set_property;
object_class->get_property = psppire_delimited_text_get_property;
PROP_DELIMITERS,
delimiters_spec);
+ g_object_class_install_property (object_class,
+ PROP_QUOTE,
+ quote_spec);
+
g_object_class_install_property (object_class,
PROP_FIRST_LINE,
first_line_spec);
-
+
object_class->finalize = psppire_delimited_text_finalize;
object_class->dispose = psppire_delimited_text_dispose;
}
text_file->first_line = 0;
text_file->delimiters = g_slist_prepend (NULL, GINT_TO_POINTER (':'));
- text_file->const_cache.string = NULL;
- text_file->const_cache.length = 0;
text_file->cache_row = -1;
+ string_array_init (&text_file->cache);
+ text_file->parser = NULL;
+
+ text_file->max_fields = 0;
- text_file->max_delimiters = 0;
+ text_file->quote = 0;
text_file->dispose_has_run = FALSE;
text_file->stamp = g_random_int ();
PsppireDelimitedText *
psppire_delimited_text_new (GtkTreeModel *child)
{
- PsppireDelimitedText *retval =
+ return
g_object_new (PSPPIRE_TYPE_DELIMITED_TEXT,
"child", child,
NULL);
-
- return retval;
}
static void
PsppireDelimitedText *tf = PSPPIRE_DELIMITED_TEXT (object);
g_slist_free (tf->delimiters);
-
- ss_dealloc (&tf->const_cache);
+ string_array_destroy (&tf->cache);
+ data_parser_destroy (tf->parser);
/* must chain up */
(* parent_class->finalize) (object);