From e0701896ff3fcdc18a26ca1881d8c657833dad77 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Mon, 20 Jul 2015 20:43:00 -0700 Subject: [PATCH] GET DATA/TYPE=TXT: Remove ESCAPE subcommand and make its behavior default. Frans Houweling reported that this is the default and only behavior for GET DATA/TYPE=TXT in SPSS 14, at: http://lists.gnu.org/archive/html/pspp-users/2015-07/msg00027.html Therefore, this commit makes this behavior the default and only behavior in PSPP as well. (In my opinion it's the only sensible behavior, but SPSS didn't document it so I assumed that it didn't do it that way.) --- NEWS | 12 +++++-- doc/files.texi | 15 +++------ src/language/data-io/get-data.c | 6 ++-- src/ui/gui/page-separators.c | 44 ++++++++------------------ src/ui/gui/text-data-import.ui | 16 ---------- tests/language/data-io/get-data-txt.at | 2 +- 6 files changed, 30 insertions(+), 65 deletions(-) diff --git a/NEWS b/NEWS index 6ff6af7ed7..ac9a6beab4 100644 --- a/NEWS +++ b/NEWS @@ -9,9 +9,15 @@ Changes since 0.8.5: * The graphical user interface uses Gtk+ version 3 instead of version 2. Accordingly, it has a somewhat different look and feel. - * A bug, where the correlation coefficient in the paired samples t-test - procedure was incorrectly calculated when presented with weighted data, - has been fixed. + * Bug fixes, including the following notable ones: + + - The correlation coefficient in the paired samples t-test + procedure is now correctly calculated when presented with + weighted data. + + - The ESCAPE subcommand has been removed from GET DATA /TYPE=TXT. + For compatibility, the behavior that ESCAPE enabled is now the + default and only supported behavior. Changes from 0.8.4 to 0.8.5: diff --git a/doc/files.texi b/doc/files.texi index eb5a369e78..6b3a78c75b 100644 --- a/doc/files.texi +++ b/doc/files.texi @@ -428,7 +428,7 @@ GET DATA /TYPE=TXT [/IMPORTCASE=@{ALL,FIRST @var{max_cases},PERCENT @var{percent}@}] /DELIMITERS="@var{delimiters}" - [/QUALIFIER="@var{quotes}" [/ESCAPE]] + [/QUALIFIER="@var{quotes}" [/DELCASE=@{LINE,VARIABLES @var{n_variables}@}] /VARIABLES=@var{del_var1} [@var{del_var2}]@dots{} where each @var{del_var} takes the form: @@ -467,15 +467,10 @@ matching quote. Intervening delimiters become part of the field, instead of terminating it. The ability to specify more than one quote character is a @pspp{} extension. -By default, a character specified on @subcmd{QUALIFIER} cannot itself be -embedded within a field that it quotes, because the quote character -always terminates the quoted field. With ESCAPE, however, a doubled -quote character within a quoted field inserts a single instance of the -quote into the field. For example, if @samp{'} is specified on -@subcmd{QUALIFIER}, then without ESCAPE @code{'a''b'} specifies a pair of -fields that contain @samp{a} and @samp{b}, but with ESCAPE it -specifies a single field that contains @samp{a'b}. ESCAPE is a @pspp{} -extension. +The character specified on @subcmd{QUALIFIER} can be embedded within a +field that it quotes by doubling the qualifier. For example, if +@samp{'} is specified on @subcmd{QUALIFIER}, then @code{'a''b'} +specifies a field that contains @samp{a'b}. The @subcmd{DELCASE} subcommand controls how data may be broken across lines in the data file. With LINE, the default setting, each line must contain diff --git a/src/language/data-io/get-data.c b/src/language/data-io/get-data.c index e6d5eac9f6..01817179b8 100644 --- a/src/language/data-io/get-data.c +++ b/src/language/data-io/get-data.c @@ -1,6 +1,6 @@ /* PSPP - a program for statistical analysis. Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, - 2013 Free Software Foundation, Inc. + 2013, 2015 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -390,6 +390,7 @@ parse_get_txt (struct lexer *lexer, struct dataset *ds) data_parser_set_type (parser, DP_DELIMITED); data_parser_set_span (parser, false); data_parser_set_quotes (parser, ss_empty ()); + data_parser_set_quote_escape (parser, true); data_parser_set_empty_line_has_field (parser, true); for (;;) @@ -562,9 +563,6 @@ parse_get_txt (struct lexer *lexer, struct dataset *ds) data_parser_set_quotes (parser, lex_tokss (lexer)); lex_get (lexer); } - else if (settings_get_syntax () == ENHANCED - && lex_match_id (lexer, "ESCAPE")) - data_parser_set_quote_escape (parser, true); else if (lex_match_id (lexer, "VARIABLES")) break; else diff --git a/src/ui/gui/page-separators.c b/src/ui/gui/page-separators.c index c81785cd30..6e59202369 100644 --- a/src/ui/gui/page-separators.c +++ b/src/ui/gui/page-separators.c @@ -1,5 +1,5 @@ /* PSPPIRE - a graphical user interface for PSPP. - Copyright (C) 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation + Copyright (C) 2008, 2009, 2010, 2011, 2012, 2013, 2015 Free Software Foundation This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -62,7 +62,6 @@ struct separators_page /* How to break lines into columns. */ struct string separators; /* Field separators. */ struct string quotes; /* Quote characters. */ - bool escape; /* Doubled quotes yield a quote mark? */ GtkWidget *page; GtkWidget *custom_cb; @@ -70,7 +69,6 @@ struct separators_page GtkWidget *quote_cb; GtkWidget *quote_combo; GtkEntry *quote_entry; - GtkWidget *escape_cb; PsppSheetView *fields_tree_view; }; @@ -163,7 +161,6 @@ separators_page_create (struct import_assistant *ia) p->quote_combo = get_widget_assert (builder, "quote-combo"); p->quote_entry = GTK_ENTRY (gtk_bin_get_child (GTK_BIN (p->quote_combo))); p->quote_cb = get_widget_assert (builder, "quote-cb"); - p->escape_cb = get_widget_assert (builder, "escape"); set_quote_list (GTK_COMBO_BOX (p->quote_combo)); p->fields_tree_view = PSPP_SHEET_VIEW (get_widget_assert (builder, "fields")); @@ -178,8 +175,6 @@ separators_page_create (struct import_assistant *ia) for (i = 0; i < SEPARATOR_CNT; i++) g_signal_connect (get_widget_assert (builder, separators[i].name), "toggled", G_CALLBACK (on_separator_toggle), ia); - g_signal_connect (p->escape_cb, "toggled", - G_CALLBACK (on_separator_toggle), ia); return p; } @@ -291,23 +286,18 @@ split_fields (struct import_assistant *ia) && ds_find_byte (&s->quotes, text.string[0]) != SIZE_MAX) { int quote = ss_get_byte (&text); - if (!s->escape) - ss_get_until (&text, quote, &field); - else - { - struct string s; - int c; - - ds_init_empty (&s); - while ((c = ss_get_byte (&text)) != EOF) - if (c != quote) - ds_put_byte (&s, c); - else if (ss_match_byte (&text, quote)) - ds_put_byte (&s, quote); - else - break; - field = ds_ss (&s); - } + struct string s; + int c; + + ds_init_empty (&s); + while ((c = ss_get_byte (&text)) != EOF) + if (c != quote) + ds_put_byte (&s, c); + else if (ss_match_byte (&text, quote)) + ds_put_byte (&s, quote); + else + break; + field = ds_ss (&s); } else ss_get_bytes (&text, ss_cspan (text, ds_ss (&s->separators)), @@ -388,7 +378,6 @@ choose_likely_separators (struct import_assistant *ia) find_commonest_chars (histogram, "\"'", "", &ia->separators->quotes); find_commonest_chars (histogram, ",;:/|!\t-", ",", &ia->separators->separators); - ia->separators->escape = true; } /* Chooses the most common character among those in TARGETS, @@ -498,10 +487,7 @@ set_separators (struct import_assistant *ia) any_quotes ? ds_cstr (&s->quotes) : "\""); gtk_toggle_button_set_active (GTK_TOGGLE_BUTTON (s->quote_cb), any_quotes); - gtk_toggle_button_set_active (GTK_TOGGLE_BUTTON (s->escape_cb), - s->escape); gtk_widget_set_sensitive (s->quote_combo, any_quotes); - gtk_widget_set_sensitive (s->escape_cb, any_quotes); } /* Sets IA's separators substructure to match the widgets. */ @@ -531,7 +517,6 @@ get_separators (struct import_assistant *ia) } else ds_clear (&s->quotes); - s->escape = gtk_toggle_button_get_active (GTK_TOGGLE_BUTTON (s->escape_cb)); } /* Called when the user changes the entry field for custom @@ -570,7 +555,6 @@ on_quote_cb_toggle (GtkToggleButton *quote_cb, struct import_assistant *ia) { bool is_active = gtk_toggle_button_get_active (quote_cb); gtk_widget_set_sensitive (ia->separators->quote_combo, is_active); - gtk_widget_set_sensitive (ia->separators->escape_cb, is_active); revise_fields_preview (ia); } @@ -605,6 +589,4 @@ separators_append_syntax (const struct import_assistant *ia, struct string *s) ds_put_cstr (s, "\"\n"); if (!ds_is_empty (&ia->separators->quotes)) syntax_gen_pspp (s, " /QUALIFIER=%sq\n", ds_cstr (&ia->separators->quotes)); - if (!ds_is_empty (&ia->separators->quotes) && ia->separators->escape) - ds_put_cstr (s, " /ESCAPE\n"); } diff --git a/src/ui/gui/text-data-import.ui b/src/ui/gui/text-data-import.ui index 526e0fb93f..28f13e2bdc 100644 --- a/src/ui/gui/text-data-import.ui +++ b/src/ui/gui/text-data-import.ui @@ -409,22 +409,6 @@ The selected file contains N lines of text. Only the first M of these will be s 2 6 6 - - - Doubled quote mark treated as escape - True - True - False - GDK_POINTER_MOTION_MASK | GDK_POINTER_MOTION_HINT_MASK | GDK_BUTTON_PRESS_MASK | GDK_BUTTON_RELEASE_MASK - True - - - 2 - 1 - 2 - - - True diff --git a/tests/language/data-io/get-data-txt.at b/tests/language/data-io/get-data-txt.at index 60409df37e..df814f0027 100644 --- a/tests/language/data-io/get-data-txt.at +++ b/tests/language/data-io/get-data-txt.at @@ -263,7 +263,7 @@ AT_DATA([pets.data], [dnl "Gilly", , White, "10 Apr 2007", 10, "3""", "Guinea Pig" ]) AT_DATA([pets.sps], [dnl -GET DATA /TYPE=TXT /FILE='pets.data' /DELIMITERS=', ' /QUALIFIER='''"' /ESCAPE +GET DATA /TYPE=TXT /FILE='pets.data' /DELIMITERS=', ' /QUALIFIER='''"' /FIRSTCASE=3 /VARIABLES=name A10 age F3.1 -- 2.30.2