From: Ben Pfaff Date: Sun, 22 Apr 2012 23:29:56 +0000 (-0700) Subject: work on print encodigns X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?p=pspp;a=commitdiff_plain;h=e3f69c13478f07af6a956a9e4f1ea6eddb0d2986 work on print encodigns --- diff --git a/src/language/data-io/print.c b/src/language/data-io/print.c index cffa3bd49f..71791ba595 100644 --- a/src/language/data-io/print.c +++ b/src/language/data-io/print.c @@ -88,7 +88,11 @@ struct print_trns struct dfm_writer *writer; /* Output file, NULL=listing file. */ struct ll_list specs; /* List of struct prt_out_specs. */ size_t record_cnt; /* Number of records to write. */ - struct string line; /* Output buffer. */ + struct u8_line line; /* Output buffer. */ + + int unit; /* Unit width, in bytes. */ + char one[MAX_UNIT]; /* '1' in encoding, 'unit' bytes long. */ + char space[MAX_UNIT]; /* \n in encoding, 'unit' bytes long. */ }; enum which_formats @@ -145,8 +149,8 @@ internal_cmd_print (struct lexer *lexer, struct dataset *ds, trns->writer = NULL; trns->record_cnt = 0; ll_init (&trns->specs); - ds_init_empty (&trns->line); - ds_register_pool (&trns->line, trns->pool); + u8_line_init (&trns->line); + u8_line_register_pool (&trns->line, trns->pool); tmp_pool = pool_create_subpool (trns->pool); @@ -470,20 +474,33 @@ print_trns_proc (void *trns_, struct ccase **c, casenumber case_num UNUSED) int record = 1; struct prt_out_spec *spec; - ds_clear (&trns->line); - ds_put_byte (&trns->line, ' '); + u8_line_clear (&trns->line); + + ds_put_byte (&trns->line.s, ' '); + trns->line.width = 0; + ll_for_each (spec, struct prt_out_spec, ll, &trns->specs) { flush_records (trns, spec->record, &eject, &record); - ds_set_length (&trns->line, spec->first_column, encoded_space); if (spec->type == PRT_VAR) { const union value *input = case_data (*c, spec->var); if (!spec->sysmis_as_spaces || input->f != SYSMIS) - data_out_recode (input, var_get_encoding (spec->var), - &spec->format, &trns->line, trns->encoding); + { + char *s = data_out (input, var_get_encoding (spec->var), + &spec->format); + int width = u8_strwidth (s); + size_t n = strlen (s); + u8_line_put (&trns->line, spec->first_column, + spec->first_column + width, s, n); + free (s); + } else + { + memset (u8_line-record + + } ds_put_byte_multiple (&trns->line, encoded_space, spec->format.w); if (spec->add_space) ds_put_byte (&trns->line, encoded_space); @@ -491,11 +508,11 @@ print_trns_proc (void *trns_, struct ccase **c, casenumber case_num UNUSED) else { ds_put_substring (&trns->line, ds_ss (&spec->string)); - if (0 != strcmp (trns->encoding, C_ENCODING)) + if (0 != strcmp (trns->encoding, UTF8)) { size_t length = ds_length (&spec->string); char *data = ss_data (ds_tail (&trns->line, length)); - char *s = recode_string (trns->encoding, C_ENCODING, data, length); + char *s = recode_string (trns->encoding, UTF8, data, length); memcpy (data, s, length); free (s); } diff --git a/src/libpspp/automake.mk b/src/libpspp/automake.mk index 2f81243cf9..77e34860ec 100644 --- a/src/libpspp/automake.mk +++ b/src/libpspp/automake.mk @@ -92,6 +92,8 @@ src_libpspp_liblibpspp_la_SOURCES = \ src/libpspp/tower.h \ src/libpspp/u8-istream.c \ src/libpspp/u8-istream.h \ + src/libpspp/u8-line.c \ + src/libpspp/u8-line.h \ src/libpspp/version.h \ src/libpspp/zip-private.h \ src/libpspp/zip-reader.c \ diff --git a/src/libpspp/u8-line.c b/src/libpspp/u8-line.c new file mode 100644 index 0000000000..36516b4369 --- /dev/null +++ b/src/libpspp/u8-line.c @@ -0,0 +1,171 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2011, 2012 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include + +#include "libpspp/u8-line.h" +#include +#include +#include "libpspp/cast.h" +#include "libpspp/str.h" + +void +u8_line_init (struct u8_line *line) +{ + ds_init_empty (&line->s); + line->width = 0; +} + +void +u8_line_clear (struct u8_line *line) +{ + ds_clear (&line->s); + line->width = 0; +} + +void +u8_line_destroy (struct u8_line *line) +{ + ds_destroy (&line->s); +} + +static int +u8_mb_to_display (int *wp, const uint8_t *s, size_t n) +{ + size_t ofs; + ucs4_t uc; + int w; + + ofs = u8_mbtouc (&uc, s, n); + if (ofs < n && s[ofs] == '\b') + { + ofs++; + ofs += u8_mbtouc (&uc, s + ofs, n - ofs); + } + + w = uc_width (uc, "UTF-8"); + if (w <= 0) + { + *wp = 0; + return ofs; + } + + while (ofs < n) + { + int mblen = u8_mbtouc (&uc, s + ofs, n - ofs); + if (uc_width (uc, "UTF-8") > 0) + break; + ofs += mblen; + } + + *wp = w; + return ofs; +} + +struct u8_pos + { + int x0; + int x1; + size_t ofs0; + size_t ofs1; + }; + +static void +u8_line_find_pos (struct u8_line *line, int target_x, struct u8_pos *c) +{ + const uint8_t *s = CHAR_CAST (const uint8_t *, ds_cstr (&line->s)); + size_t length = ds_length (&line->s); + size_t ofs; + int mblen; + int x; + + x = 0; + for (ofs = 0; ; ofs += mblen) + { + int w; + + mblen = u8_mb_to_display (&w, s + ofs, length - ofs); + if (x + w > target_x) + { + c->x0 = x; + c->x1 = x + w; + c->ofs0 = ofs; + c->ofs1 = ofs + mblen; + return; + } + x += w; + } +} + +char * +u8_line_reserve (struct u8_line *line, int x0, int x1, int n) +{ + if (x0 >= line->width) + { + /* The common case: adding new characters at the end of a line. */ + ds_put_byte_multiple (&line->s, ' ', x0 - line->width); + line->width = x1; + return ds_put_uninit (&line->s, n); + } + else if (x0 == x1) + return NULL; + else + { + /* An unusual case: overwriting characters in the middle of a line. We + don't keep any kind of mapping from bytes to display positions, so we + have to iterate over the whole line starting from the beginning. */ + struct u8_pos p0, p1; + char *s; + + /* Find the positions of the first and last character. We must find both + characters' positions before changing the line, because that would + prevent finding the other character's position. */ + u8_line_find_pos (line, x0, &p0); + if (x1 < line->width) + u8_line_find_pos (line, x1, &p1); + + /* If a double-width character occupies both x0 - 1 and x0, then replace + its first character width by '?'. */ + s = ds_data (&line->s); + while (p0.x0 < x0) + { + s[p0.ofs0++] = '?'; + p0.x0++; + } + + if (x1 >= line->width) + { + ds_truncate (&line->s, p0.ofs0); + line->width = x1; + return ds_put_uninit (&line->s, n); + } + + /* If a double-width character occupies both x1 - 1 and x1, then replace + its second character width by '?'. */ + if (p1.x0 < x1) + { + do + { + s[--p1.ofs1] = '?'; + p1.x0++; + } + while (p1.x0 < x1); + return ds_splice_uninit (&line->s, p0.ofs0, p1.ofs1 - p0.ofs0, n); + } + + return ds_splice_uninit (&line->s, p0.ofs0, p1.ofs0 - p0.ofs0, n); + } +} diff --git a/src/libpspp/u8-line.h b/src/libpspp/u8-line.h new file mode 100644 index 0000000000..dee727fc58 --- /dev/null +++ b/src/libpspp/u8-line.h @@ -0,0 +1,34 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2011, 2012 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#ifndef LIBPSPP_U8_LINE_H +#define LIBPSPP_U8_LINE_H 1 + +#include "libpspp/str.h" + +/* A line of text encoded in UTF-8, designed to make appending text */ +struct u8_line + { + struct string s; /* Content, in UTF-8. */ + size_t width; /* Display width, in character positions. */ + }; + +void u8_line_init (struct u8_line *); +void u8_line_destroy (struct u8_line *); +void u8_line_clear (struct u8_line *); +char *u8_line_reserve (struct u8_line *, int x0, int x1, int n); + +#endif /* libpspp/u8-line.h */ diff --git a/src/output/ascii.c b/src/output/ascii.c index 1688fd1bf1..616a51538d 100644 --- a/src/output/ascii.c +++ b/src/output/ascii.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000, 2007, 2009, 2010, 2011 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2007, 2009, 2010, 2011, 2012 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -22,8 +22,8 @@ #include #include #include -#include #include +#include #include #include @@ -35,6 +35,7 @@ #include "libpspp/message.h" #include "libpspp/start-date.h" #include "libpspp/string-map.h" +#include "libpspp/u8-line.h" #include "libpspp/version.h" #include "output/ascii.h" #include "output/cairo.h" @@ -128,13 +129,6 @@ make_box_index (int left, int right, int top, int bottom) return ((right * 3 + bottom) * 3 + left) * 3 + top; } -/* A line of text. */ -struct ascii_line - { - struct string s; /* Content, in UTF-8. */ - size_t width; /* Display width, in character positions. */ - }; - /* How to emphasize text. */ enum emphasis_style { @@ -174,7 +168,7 @@ struct ascii_driver FILE *file; /* Output file. */ bool error; /* Output error? */ int page_number; /* Current page number. */ - struct ascii_line *lines; /* Page content. */ + struct u8_line *lines; /* Page content. */ int allocated_lines; /* Number of lines allocated. */ int chart_cnt; /* Number of charts so far. */ int y; @@ -210,11 +204,7 @@ reallocate_lines (struct ascii_driver *a) int i; a->lines = xnrealloc (a->lines, a->length, sizeof *a->lines); for (i = a->allocated_lines; i < a->length; i++) - { - struct ascii_line *line = &a->lines[i]; - ds_init_empty (&line->s); - line->width = 0; - } + u8_line_init (&a->lines[i]); a->allocated_lines = a->length; } } @@ -380,7 +370,7 @@ ascii_destroy (struct output_driver *driver) free (a->file_name); free (a->chart_file_name); for (i = 0; i < a->allocated_lines; i++) - ds_destroy (&a->lines[i].s); + u8_line_destroy (&a->lines[i]); free (a->lines); free (a); } @@ -698,136 +688,11 @@ ascii_draw_cell (void *a_, const struct table_cell *cell, ascii_layout_cell (a, cell, bb, clip, &w, &h); } -static int -u8_mb_to_display (int *wp, const uint8_t *s, size_t n) -{ - size_t ofs; - ucs4_t uc; - int w; - - ofs = u8_mbtouc (&uc, s, n); - if (ofs < n && s[ofs] == '\b') - { - ofs++; - ofs += u8_mbtouc (&uc, s + ofs, n - ofs); - } - - w = uc_width (uc, "UTF-8"); - if (w <= 0) - { - *wp = 0; - return ofs; - } - - while (ofs < n) - { - int mblen = u8_mbtouc (&uc, s + ofs, n - ofs); - if (uc_width (uc, "UTF-8") > 0) - break; - ofs += mblen; - } - - *wp = w; - return ofs; -} - -struct ascii_pos - { - int x0; - int x1; - size_t ofs0; - size_t ofs1; - }; - -static void -find_ascii_pos (struct ascii_line *line, int target_x, struct ascii_pos *c) -{ - const uint8_t *s = CHAR_CAST (const uint8_t *, ds_cstr (&line->s)); - size_t length = ds_length (&line->s); - size_t ofs; - int mblen; - int x; - - x = 0; - for (ofs = 0; ; ofs += mblen) - { - int w; - - mblen = u8_mb_to_display (&w, s + ofs, length - ofs); - if (x + w > target_x) - { - c->x0 = x; - c->x1 = x + w; - c->ofs0 = ofs; - c->ofs1 = ofs + mblen; - return; - } - x += w; - } -} - static char * ascii_reserve (struct ascii_driver *a, int y, int x0, int x1, int n) { - struct ascii_line *line; assert (y < a->allocated_lines); - line = &a->lines[y]; - - if (x0 >= line->width) - { - /* The common case: adding new characters at the end of a line. */ - ds_put_byte_multiple (&line->s, ' ', x0 - line->width); - line->width = x1; - return ds_put_uninit (&line->s, n); - } - else if (x0 == x1) - return NULL; - else - { - /* An unusual case: overwriting characters in the middle of a line. We - don't keep any kind of mapping from bytes to display positions, so we - have to iterate over the whole line starting from the beginning. */ - struct ascii_pos p0, p1; - char *s; - - /* Find the positions of the first and last character. We must find the - both characters' positions before changing the line, because that - would prevent finding the other character's position. */ - find_ascii_pos (line, x0, &p0); - if (x1 < line->width) - find_ascii_pos (line, x1, &p1); - - /* If a double-width character occupies both x0 - 1 and x0, then replace - its first character width by '?'. */ - s = ds_data (&line->s); - while (p0.x0 < x0) - { - s[p0.ofs0++] = '?'; - p0.x0++; - } - - if (x1 >= line->width) - { - ds_truncate (&line->s, p0.ofs0); - line->width = x1; - return ds_put_uninit (&line->s, n); - } - - /* If a double-width character occupies both x1 - 1 and x1, then we need - to replace its second character width by '?'. */ - if (p1.x0 < x1) - { - do - { - s[--p1.ofs1] = '?'; - p1.x0++; - } - while (p1.x0 < x1); - return ds_splice_uninit (&line->s, p0.ofs0, p1.ofs1 - p0.ofs0, n); - } - - return ds_splice_uninit (&line->s, p0.ofs0, p1.ofs0 - p0.ofs0, n); - } + return u8_line_reserve (&a->lines[y], x0, x1, n); } static void @@ -1132,11 +997,7 @@ ascii_open_page (struct ascii_driver *a) reallocate_lines (a); for (i = 0; i < a->length; i++) - { - struct ascii_line *line = &a->lines[i]; - ds_clear (&line->s); - line->width = 0; - } + u8_line_clear (&a->lines[i]); return true; } @@ -1195,7 +1056,7 @@ ascii_close_page (struct ascii_driver *a) any_blank = false; for (y = 0; y < a->allocated_lines; y++) { - struct ascii_line *line = &a->lines[y]; + struct u8_line *line = &a->lines[y]; if (a->squeeze_blank_lines && y > 0 && line->width == 0) any_blank = true;