X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fdata-io%2Fprint.c;h=0396df55b08cf24fa55e76dd0b6c998a3e7d6f4f;hb=621b1074dec940ae9ebfb1732f1ffa44d5d493b8;hp=d8f95d4e98368540dd77b017f01512f5d0b4e48c;hpb=43b1296aafe7582e7dbe6c2b6a8b478d7d9b0fcf;p=pspp diff --git a/src/language/data-io/print.c b/src/language/data-io/print.c index d8f95d4e98..0396df55b0 100644 --- a/src/language/data-io/print.c +++ b/src/language/data-io/print.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2006, 2009, 2010, 2011, 2012 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -17,28 +17,33 @@ #include #include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include + +#include "data/case.h" +#include "data/dataset.h" +#include "data/data-out.h" +#include "data/format.h" +#include "data/transformations.h" +#include "data/variable.h" +#include "language/command.h" +#include "language/data-io/data-writer.h" +#include "language/data-io/file-handle.h" +#include "language/data-io/placement-parser.h" +#include "language/lexer/format-parser.h" +#include "language/lexer/lexer.h" +#include "language/lexer/variable-parser.h" +#include "libpspp/assertion.h" +#include "libpspp/compiler.h" +#include "libpspp/i18n.h" +#include "libpspp/ll.h" +#include "libpspp/message.h" +#include "libpspp/misc.h" +#include "libpspp/pool.h" +#include "libpspp/u8-line.h" +#include "output/tab.h" +#include "output/text-item.h" + +#include "gl/xalloc.h" #include "gettext.h" #define _(msgid) gettext (msgid) @@ -67,24 +72,19 @@ struct prt_out_spec /* PRT_LITERAL only. */ struct string string; /* String to output. */ + int width; /* Width of 'string', in display columns. */ }; -static inline struct prt_out_spec * -ll_to_prt_out_spec (struct ll *ll) -{ - return ll_data (ll, struct prt_out_spec, ll); -} - /* PRINT, PRINT EJECT, WRITE private data structure. */ struct print_trns { struct pool *pool; /* Stores related data. */ bool eject; /* Eject page before printing? */ bool include_prefix; /* Prefix lines with space? */ + const char *encoding; /* Encoding to use for output. */ struct dfm_writer *writer; /* Output file, NULL=listing file. */ struct ll_list specs; /* List of struct prt_out_specs. */ size_t record_cnt; /* Number of records to write. */ - struct string line; /* Output buffer. */ }; enum which_formats @@ -95,7 +95,7 @@ enum which_formats static int internal_cmd_print (struct lexer *, struct dataset *ds, enum which_formats, bool eject); -static trns_proc_func print_trns_proc; +static trns_proc_func print_text_trns_proc, print_binary_trns_proc; static trns_free_func print_trns_free; static bool parse_specs (struct lexer *, struct pool *tmp_pool, struct print_trns *, struct dictionary *dict, enum which_formats); @@ -129,10 +129,13 @@ static int internal_cmd_print (struct lexer *lexer, struct dataset *ds, enum which_formats which_formats, bool eject) { - bool print_table = 0; + bool print_table = false; + const struct prt_out_spec *spec; struct print_trns *trns; struct file_handle *fh = NULL; + char *encoding = NULL; struct pool *tmp_pool; + bool binary; /* Fill in prt to facilitate error-handling. */ trns = pool_create_container (struct print_trns, pool); @@ -140,31 +143,40 @@ internal_cmd_print (struct lexer *lexer, struct dataset *ds, trns->writer = NULL; trns->record_cnt = 0; ll_init (&trns->specs); - ds_init_empty (&trns->line); - ds_register_pool (&trns->line, trns->pool); tmp_pool = pool_create_subpool (trns->pool); /* Parse the command options. */ - while (lex_token (lexer) != '/' && lex_token (lexer) != '.') + while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD) { if (lex_match_id (lexer, "OUTFILE")) { - lex_match (lexer, '='); + lex_match (lexer, T_EQUALS); - fh = fh_parse (lexer, FH_REF_FILE); + fh = fh_parse (lexer, FH_REF_FILE, NULL); if (fh == NULL) goto error; } + else if (lex_match_id (lexer, "ENCODING")) + { + lex_match (lexer, T_EQUALS); + if (!lex_force_string (lexer)) + goto error; + + free (encoding); + encoding = ss_xstrdup (lex_tokss (lexer)); + + lex_get (lexer); + } else if (lex_match_id (lexer, "RECORDS")) { - lex_match (lexer, '='); - lex_match (lexer, '('); + lex_match (lexer, T_EQUALS); + lex_match (lexer, T_LPAREN); if (!lex_force_int (lexer)) goto error; trns->record_cnt = lex_integer (lexer); lex_get (lexer); - lex_match (lexer, ')'); + lex_match (lexer, T_RPAREN); } else if (lex_match_id (lexer, "TABLE")) print_table = true; @@ -185,29 +197,59 @@ internal_cmd_print (struct lexer *lexer, struct dataset *ds, if (!parse_specs (lexer, tmp_pool, trns, dataset_dict (ds), which_formats)) goto error; + /* Are there any binary formats? + + There are real difficulties figuring out what to do when both binary + formats and nontrivial encodings enter the picture. So when binary + formats are present we fall back to much simpler handling. */ + binary = false; + ll_for_each (spec, struct prt_out_spec, ll, &trns->specs) + { + if (spec->type == PRT_VAR + && fmt_get_category (spec->format.type) == FMT_CAT_BINARY) + { + binary = true; + break; + } + } + if (binary && fh == NULL) + { + msg (SE, _("%s is required when binary formats are specified."), "OUTFILE"); + goto error; + } + if (lex_end_of_command (lexer) != CMD_SUCCESS) goto error; if (fh != NULL) { - trns->writer = dfm_open_writer (fh); + trns->writer = dfm_open_writer (fh, encoding); if (trns->writer == NULL) goto error; + trns->encoding = dfm_writer_get_encoding (trns->writer); } + else + trns->encoding = UTF8; /* Output the variable table if requested. */ if (print_table) dump_table (trns, fh); /* Put the transformation in the queue. */ - add_transformation (ds, print_trns_proc, print_trns_free, trns); + add_transformation (ds, + (binary + ? print_binary_trns_proc + : print_text_trns_proc), + print_trns_free, trns); pool_destroy (tmp_pool); + fh_unref (fh); return CMD_SUCCESS; error: print_trns_free (trns); + fh_unref (fh); return CMD_FAILURE; } @@ -230,34 +272,34 @@ parse_specs (struct lexer *lexer, struct pool *tmp_pool, struct print_trns *trns int record = 0; int column = 1; - if (lex_token (lexer) == '.') + if (lex_token (lexer) == T_ENDCMD) { trns->record_cnt = 1; return true; } - while (lex_token (lexer) != '.') + while (lex_token (lexer) != T_ENDCMD) { bool ok; if (!parse_record_placement (lexer, &record, &column)) return false; - if (lex_token (lexer) == T_STRING) + if (lex_is_string (lexer)) ok = parse_string_argument (lexer, trns, record, &column); else - ok = parse_variable_argument (lexer, dict, trns, tmp_pool, &record, &column, - which_formats); + ok = parse_variable_argument (lexer, dict, trns, tmp_pool, &record, + &column, which_formats); if (!ok) return 0; - lex_match (lexer, ','); + lex_match (lexer, T_COMMA); } if (trns->record_cnt != 0 && trns->record_cnt != record) - msg (SW, _("Output calls for %d records but %u specified on RECORDS " + msg (SW, _("Output calls for %d records but %zu specified on RECORDS " "subcommand."), - record, (unsigned int) trns->record_cnt); + record, trns->record_cnt); trns->record_cnt = record; return true; @@ -271,7 +313,7 @@ parse_string_argument (struct lexer *lexer, struct print_trns *trns, int record, spec->type = PRT_LITERAL; spec->record = record; spec->first_column = *column; - ds_init_string (&spec->string, lex_tokstr (lexer)); + ds_init_substring (&spec->string, lex_tokss (lexer)); ds_register_pool (&spec->string, trns->pool); lex_get (lexer); @@ -281,14 +323,19 @@ parse_string_argument (struct lexer *lexer, struct print_trns *trns, int record, int first_column, last_column; bool range_specified; - if (!parse_column_range (lexer, &first_column, &last_column, &range_specified)) + if (!parse_column_range (lexer, 1, + &first_column, &last_column, &range_specified)) return false; spec->first_column = first_column; if (range_specified) ds_set_length (&spec->string, last_column - first_column + 1, ' '); } - *column = spec->first_column + ds_length (&spec->string); + + spec->width = u8_strwidth (CHAR_CAST (const uint8_t *, + ds_cstr (&spec->string)), + UTF8); + *column = spec->first_column + spec->width; ll_push_tail (&trns->specs, &spec->ll); return true; @@ -313,9 +360,9 @@ parse_variable_argument (struct lexer *lexer, const struct dictionary *dict, &vars, &var_cnt, PV_DUPLICATE)) return false; - if (lex_is_number (lexer) || lex_token (lexer) == '(') + if (lex_is_number (lexer) || lex_token (lexer) == T_LPAREN) { - if (!parse_var_placements (lexer, tmp_pool, var_cnt, false, + if (!parse_var_placements (lexer, tmp_pool, var_cnt, FMT_FOR_OUTPUT, &formats, &format_cnt)) return false; add_space = false; @@ -324,7 +371,7 @@ parse_variable_argument (struct lexer *lexer, const struct dictionary *dict, { size_t i; - lex_match (lexer, '*'); + lex_match (lexer, T_ASTERISK); formats = pool_nmalloc (tmp_pool, var_cnt, sizeof *formats); format_cnt = var_cnt; @@ -386,8 +433,7 @@ dump_table (struct print_trns *trns, const struct file_handle *fh) int row; spec_cnt = ll_count (&trns->specs); - t = tab_create (4, spec_cnt + 1, 0); - tab_columns (t, TAB_COL_DOWN, 1); + t = tab_create (4, spec_cnt + 1); tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 3, spec_cnt); tab_hline (t, TAL_2, 0, 3, 1); tab_headers (t, 0, 0, 1, 0); @@ -395,7 +441,6 @@ dump_table (struct print_trns *trns, const struct file_handle *fh) tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Record")); tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("Columns")); tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Format")); - tab_dim (t, tab_natural_dimensions); row = 1; ll_for_each (spec, struct prt_out_spec, ll, &trns->specs) { @@ -404,8 +449,9 @@ dump_table (struct print_trns *trns, const struct file_handle *fh) switch (spec->type) { case PRT_LITERAL: - tab_text (t, 0, row, TAB_LEFT | TAB_FIX | TAT_PRINTF, "\"%.*s\"", - (int) ds_length (&spec->string), ds_data (&spec->string)); + tab_text_format (t, 0, row, TAB_LEFT | TAB_FIX, "`%.*s'", + (int) ds_length (&spec->string), + ds_data (&spec->string)); width = ds_length (&spec->string); break; case PRT_VAR: @@ -417,59 +463,90 @@ dump_table (struct print_trns *trns, const struct file_handle *fh) default: NOT_REACHED (); } - tab_text (t, 1, row, TAT_PRINTF, "%d", spec->record); - tab_text (t, 2, row, TAT_PRINTF, "%3d-%3d", - spec->first_column, spec->first_column + width - 1); + tab_text_format (t, 1, row, 0, "%d", spec->record); + tab_text_format (t, 2, row, 0, "%3d-%3d", + spec->first_column, spec->first_column + width - 1); row++; } if (fh != NULL) - tab_title (t, ngettext ("Writing %d record to %s.", - "Writing %d records to %s.", trns->record_cnt), + tab_title (t, ngettext ("Writing %zu record to %s.", + "Writing %zu records to %s.", trns->record_cnt), trns->record_cnt, fh_get_name (fh)); else - tab_title (t, ngettext ("Writing %d record.", - "Writing %d records.", trns->record_cnt), + tab_title (t, ngettext ("Writing %zu record.", + "Writing %zu records.", trns->record_cnt), trns->record_cnt); tab_submit (t); } -/* Transformation. */ +/* Transformation, for all-text output. */ -static void flush_records (struct print_trns *, int target_record, - bool *eject, int *record); +static void print_text_flush_records (struct print_trns *, struct u8_line *, + int target_record, + bool *eject, int *record); /* Performs the transformation inside print_trns T on case C. */ static int -print_trns_proc (void *trns_, struct ccase *c, casenumber case_num UNUSED) +print_text_trns_proc (void *trns_, struct ccase **c, + casenumber case_num UNUSED) { struct print_trns *trns = trns_; + struct prt_out_spec *spec; + struct u8_line line; + bool eject = trns->eject; int record = 1; - struct prt_out_spec *spec; - ds_clear (&trns->line); - ds_put_char (&trns->line, ' '); + u8_line_init (&line); ll_for_each (spec, struct prt_out_spec, ll, &trns->specs) { - flush_records (trns, spec->record, &eject, &record); + int x0 = spec->first_column; + + print_text_flush_records (trns, &line, spec->record, &eject, &record); - ds_set_length (&trns->line, spec->first_column, ' '); + u8_line_set_length (&line, spec->first_column); if (spec->type == PRT_VAR) { - const union value *input = case_data (c, spec->var); - char *output = ds_put_uninit (&trns->line, spec->format.w); + const union value *input = case_data (*c, spec->var); + int x1; + if (!spec->sysmis_as_spaces || input->f != SYSMIS) - data_out (input, &spec->format, output); + { + size_t len; + int width; + char *s; + + s = data_out (input, var_get_encoding (spec->var), + &spec->format); + len = strlen (s); + width = u8_width (CHAR_CAST (const uint8_t *, s), len, UTF8); + x1 = x0 + width; + u8_line_put (&line, x0, x1, s, len); + free (s); + } else - memset (output, ' ', spec->format.w); + { + int n = spec->format.w; + + x1 = x0 + n; + memset (u8_line_reserve (&line, x0, x1, n), ' ', n); + } + if (spec->add_space) - ds_put_char (&trns->line, ' '); + *u8_line_reserve (&line, x1, x1 + 1, 1) = ' '; } else - ds_put_substring (&trns->line, ds_ss (&spec->string)); + { + const struct string *s = &spec->string; + + u8_line_put (&line, x0, x0 + spec->width, + ds_data (s), ds_length (s)); + } } - flush_records (trns, trns->record_cnt + 1, &eject, &record); + print_text_flush_records (trns, &line, trns->record_cnt + 1, + &eject, &record); + u8_line_destroy (&line); if (trns->writer != NULL && dfm_write_error (trns->writer)) return TRNS_ERROR; @@ -481,41 +558,138 @@ print_trns_proc (void *trns_, struct ccase *c, casenumber case_num UNUSED) output is preceded by ejecting the page (and *EJECT is set false). */ static void -flush_records (struct print_trns *trns, int target_record, - bool *eject, int *record) +print_text_flush_records (struct print_trns *trns, struct u8_line *line, + int target_record, bool *eject, int *record) { for (; target_record > *record; (*record)++) { - char *line = ds_cstr (&trns->line); - size_t length = ds_length (&trns->line); char leader = ' '; if (*eject) { *eject = false; if (trns->writer == NULL) - som_eject_page (); + text_item_submit (text_item_create (TEXT_ITEM_EJECT_PAGE, "")); else leader = '1'; } - line[0] = leader; + *u8_line_reserve (line, 0, 1, 1) = leader; if (trns->writer == NULL) - tab_output_text (TAB_FIX | TAT_NOWRAP, &line[1]); + tab_output_text (TAB_FIX, ds_cstr (&line->s) + 1); else { + size_t len = ds_length (&line->s); + char *s = ds_cstr (&line->s); + if (!trns->include_prefix) { - line++; - length--; + s++; + len--; + } + + if (is_encoding_utf8 (trns->encoding)) + dfm_put_record (trns->writer, s, len); + else + { + char *recoded = recode_string (trns->encoding, UTF8, s, len); + dfm_put_record (trns->writer, recoded, strlen (recoded)); + free (recoded); } - dfm_put_record (trns->writer, line, length); } + } +} + +/* Transformation, for output involving binary. */ + +static void print_binary_flush_records (struct print_trns *, + struct string *line, int target_record, + bool *eject, int *record); + +/* Performs the transformation inside print_trns T on case C. */ +static int +print_binary_trns_proc (void *trns_, struct ccase **c, + casenumber case_num UNUSED) +{ + struct print_trns *trns = trns_; + bool eject = trns->eject; + char encoded_space = recode_byte (trns->encoding, C_ENCODING, ' '); + int record = 1; + struct prt_out_spec *spec; + struct string line; + + ds_init_empty (&line); + ds_put_byte (&line, ' '); + ll_for_each (spec, struct prt_out_spec, ll, &trns->specs) + { + print_binary_flush_records (trns, &line, spec->record, &eject, &record); - ds_truncate (&trns->line, 1); + ds_set_length (&line, spec->first_column, encoded_space); + if (spec->type == PRT_VAR) + { + const union value *input = case_data (*c, spec->var); + if (!spec->sysmis_as_spaces || input->f != SYSMIS) + data_out_recode (input, var_get_encoding (spec->var), + &spec->format, &line, trns->encoding); + else + ds_put_byte_multiple (&line, encoded_space, spec->format.w); + if (spec->add_space) + ds_put_byte (&line, encoded_space); + } + else + { + ds_put_substring (&line, ds_ss (&spec->string)); + if (0 != strcmp (trns->encoding, UTF8)) + { + size_t length = ds_length (&spec->string); + char *data = ss_data (ds_tail (&line, length)); + char *s = recode_string (trns->encoding, UTF8, data, length); + memcpy (data, s, length); + free (s); + } + } } + print_binary_flush_records (trns, &line, trns->record_cnt + 1, + &eject, &record); + ds_destroy (&line); + + if (trns->writer != NULL && dfm_write_error (trns->writer)) + return TRNS_ERROR; + return TRNS_CONTINUE; } +/* Advance from *RECORD to TARGET_RECORD, outputting records + along the way. If *EJECT is true, then the first record + output is preceded by ejecting the page (and *EJECT is set + false). */ +static void +print_binary_flush_records (struct print_trns *trns, struct string *line, + int target_record, bool *eject, int *record) +{ + for (; target_record > *record; (*record)++) + { + char *s = ds_cstr (line); + size_t length = ds_length (line); + char leader = ' '; + + if (*eject) + { + *eject = false; + leader = '1'; + } + s[0] = recode_byte (trns->encoding, C_ENCODING, leader); + + if (!trns->include_prefix) + { + s++; + length--; + } + dfm_put_record (trns->writer, s, length); + + ds_truncate (line, 1); + } +} + /* Frees TRNS. */ static bool print_trns_free (void *trns_)