struct dfm_writer *writer; /* Output file, NULL=listing file. */
struct ll_list specs; /* List of struct prt_out_specs. */
size_t record_cnt; /* Number of records to write. */
- struct string line; /* Output buffer. */
+ struct u8_line line; /* Output buffer. */
+
+ int unit; /* Unit width, in bytes. */
+ char one[MAX_UNIT]; /* '1' in encoding, 'unit' bytes long. */
+ char space[MAX_UNIT]; /* \n in encoding, 'unit' bytes long. */
};
enum which_formats
trns->writer = NULL;
trns->record_cnt = 0;
ll_init (&trns->specs);
- ds_init_empty (&trns->line);
- ds_register_pool (&trns->line, trns->pool);
+ u8_line_init (&trns->line);
+ u8_line_register_pool (&trns->line, trns->pool);
tmp_pool = pool_create_subpool (trns->pool);
int record = 1;
struct prt_out_spec *spec;
- ds_clear (&trns->line);
- ds_put_byte (&trns->line, ' ');
+ u8_line_clear (&trns->line);
+
+ ds_put_byte (&trns->line.s, ' ');
+ trns->line.width = 0;
+
ll_for_each (spec, struct prt_out_spec, ll, &trns->specs)
{
flush_records (trns, spec->record, &eject, &record);
- ds_set_length (&trns->line, spec->first_column, encoded_space);
if (spec->type == PRT_VAR)
{
const union value *input = case_data (*c, spec->var);
if (!spec->sysmis_as_spaces || input->f != SYSMIS)
- data_out_recode (input, var_get_encoding (spec->var),
- &spec->format, &trns->line, trns->encoding);
+ {
+ char *s = data_out (input, var_get_encoding (spec->var),
+ &spec->format);
+ int width = u8_strwidth (s);
+ size_t n = strlen (s);
+ u8_line_put (&trns->line, spec->first_column,
+ spec->first_column + width, s, n);
+ free (s);
+ }
else
+ {
+ memset (u8_line-record
+
+ }
ds_put_byte_multiple (&trns->line, encoded_space, spec->format.w);
if (spec->add_space)
ds_put_byte (&trns->line, encoded_space);
else
{
ds_put_substring (&trns->line, ds_ss (&spec->string));
- if (0 != strcmp (trns->encoding, C_ENCODING))
+ if (0 != strcmp (trns->encoding, UTF8))
{
size_t length = ds_length (&spec->string);
char *data = ss_data (ds_tail (&trns->line, length));
- char *s = recode_string (trns->encoding, C_ENCODING, data, length);
+ char *s = recode_string (trns->encoding, UTF8, data, length);
memcpy (data, s, length);
free (s);
}
src/libpspp/tower.h \
src/libpspp/u8-istream.c \
src/libpspp/u8-istream.h \
+ src/libpspp/u8-line.c \
+ src/libpspp/u8-line.h \
src/libpspp/version.h \
src/libpspp/zip-private.h \
src/libpspp/zip-reader.c \
--- /dev/null
+/* PSPP - a program for statistical analysis.
+ Copyright (C) 2011, 2012 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+#include "libpspp/u8-line.h"
+#include <unistr.h>
+#include <uniwidth.h>
+#include "libpspp/cast.h"
+#include "libpspp/str.h"
+
+void
+u8_line_init (struct u8_line *line)
+{
+ ds_init_empty (&line->s);
+ line->width = 0;
+}
+
+void
+u8_line_clear (struct u8_line *line)
+{
+ ds_clear (&line->s);
+ line->width = 0;
+}
+
+void
+u8_line_destroy (struct u8_line *line)
+{
+ ds_destroy (&line->s);
+}
+
+static int
+u8_mb_to_display (int *wp, const uint8_t *s, size_t n)
+{
+ size_t ofs;
+ ucs4_t uc;
+ int w;
+
+ ofs = u8_mbtouc (&uc, s, n);
+ if (ofs < n && s[ofs] == '\b')
+ {
+ ofs++;
+ ofs += u8_mbtouc (&uc, s + ofs, n - ofs);
+ }
+
+ w = uc_width (uc, "UTF-8");
+ if (w <= 0)
+ {
+ *wp = 0;
+ return ofs;
+ }
+
+ while (ofs < n)
+ {
+ int mblen = u8_mbtouc (&uc, s + ofs, n - ofs);
+ if (uc_width (uc, "UTF-8") > 0)
+ break;
+ ofs += mblen;
+ }
+
+ *wp = w;
+ return ofs;
+}
+
+struct u8_pos
+ {
+ int x0;
+ int x1;
+ size_t ofs0;
+ size_t ofs1;
+ };
+
+static void
+u8_line_find_pos (struct u8_line *line, int target_x, struct u8_pos *c)
+{
+ const uint8_t *s = CHAR_CAST (const uint8_t *, ds_cstr (&line->s));
+ size_t length = ds_length (&line->s);
+ size_t ofs;
+ int mblen;
+ int x;
+
+ x = 0;
+ for (ofs = 0; ; ofs += mblen)
+ {
+ int w;
+
+ mblen = u8_mb_to_display (&w, s + ofs, length - ofs);
+ if (x + w > target_x)
+ {
+ c->x0 = x;
+ c->x1 = x + w;
+ c->ofs0 = ofs;
+ c->ofs1 = ofs + mblen;
+ return;
+ }
+ x += w;
+ }
+}
+
+char *
+u8_line_reserve (struct u8_line *line, int x0, int x1, int n)
+{
+ if (x0 >= line->width)
+ {
+ /* The common case: adding new characters at the end of a line. */
+ ds_put_byte_multiple (&line->s, ' ', x0 - line->width);
+ line->width = x1;
+ return ds_put_uninit (&line->s, n);
+ }
+ else if (x0 == x1)
+ return NULL;
+ else
+ {
+ /* An unusual case: overwriting characters in the middle of a line. We
+ don't keep any kind of mapping from bytes to display positions, so we
+ have to iterate over the whole line starting from the beginning. */
+ struct u8_pos p0, p1;
+ char *s;
+
+ /* Find the positions of the first and last character. We must find both
+ characters' positions before changing the line, because that would
+ prevent finding the other character's position. */
+ u8_line_find_pos (line, x0, &p0);
+ if (x1 < line->width)
+ u8_line_find_pos (line, x1, &p1);
+
+ /* If a double-width character occupies both x0 - 1 and x0, then replace
+ its first character width by '?'. */
+ s = ds_data (&line->s);
+ while (p0.x0 < x0)
+ {
+ s[p0.ofs0++] = '?';
+ p0.x0++;
+ }
+
+ if (x1 >= line->width)
+ {
+ ds_truncate (&line->s, p0.ofs0);
+ line->width = x1;
+ return ds_put_uninit (&line->s, n);
+ }
+
+ /* If a double-width character occupies both x1 - 1 and x1, then replace
+ its second character width by '?'. */
+ if (p1.x0 < x1)
+ {
+ do
+ {
+ s[--p1.ofs1] = '?';
+ p1.x0++;
+ }
+ while (p1.x0 < x1);
+ return ds_splice_uninit (&line->s, p0.ofs0, p1.ofs1 - p0.ofs0, n);
+ }
+
+ return ds_splice_uninit (&line->s, p0.ofs0, p1.ofs0 - p0.ofs0, n);
+ }
+}
--- /dev/null
+/* PSPP - a program for statistical analysis.
+ Copyright (C) 2011, 2012 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#ifndef LIBPSPP_U8_LINE_H
+#define LIBPSPP_U8_LINE_H 1
+
+#include "libpspp/str.h"
+
+/* A line of text encoded in UTF-8, designed to make appending text */
+struct u8_line
+ {
+ struct string s; /* Content, in UTF-8. */
+ size_t width; /* Display width, in character positions. */
+ };
+
+void u8_line_init (struct u8_line *);
+void u8_line_destroy (struct u8_line *);
+void u8_line_clear (struct u8_line *);
+char *u8_line_reserve (struct u8_line *, int x0, int x1, int n);
+
+#endif /* libpspp/u8-line.h */
/* PSPP - a program for statistical analysis.
- Copyright (C) 1997-9, 2000, 2007, 2009, 2010, 2011 Free Software Foundation, Inc.
+ Copyright (C) 1997-9, 2000, 2007, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
#include <stdint.h>
#include <stdlib.h>
#include <signal.h>
-#include <unistd.h>
#include <unilbrk.h>
+#include <unistd.h>
#include <unistr.h>
#include <uniwidth.h>
#include "libpspp/message.h"
#include "libpspp/start-date.h"
#include "libpspp/string-map.h"
+#include "libpspp/u8-line.h"
#include "libpspp/version.h"
#include "output/ascii.h"
#include "output/cairo.h"
return ((right * 3 + bottom) * 3 + left) * 3 + top;
}
-/* A line of text. */
-struct ascii_line
- {
- struct string s; /* Content, in UTF-8. */
- size_t width; /* Display width, in character positions. */
- };
-
/* How to emphasize text. */
enum emphasis_style
{
FILE *file; /* Output file. */
bool error; /* Output error? */
int page_number; /* Current page number. */
- struct ascii_line *lines; /* Page content. */
+ struct u8_line *lines; /* Page content. */
int allocated_lines; /* Number of lines allocated. */
int chart_cnt; /* Number of charts so far. */
int y;
int i;
a->lines = xnrealloc (a->lines, a->length, sizeof *a->lines);
for (i = a->allocated_lines; i < a->length; i++)
- {
- struct ascii_line *line = &a->lines[i];
- ds_init_empty (&line->s);
- line->width = 0;
- }
+ u8_line_init (&a->lines[i]);
a->allocated_lines = a->length;
}
}
free (a->file_name);
free (a->chart_file_name);
for (i = 0; i < a->allocated_lines; i++)
- ds_destroy (&a->lines[i].s);
+ u8_line_destroy (&a->lines[i]);
free (a->lines);
free (a);
}
ascii_layout_cell (a, cell, bb, clip, &w, &h);
}
-static int
-u8_mb_to_display (int *wp, const uint8_t *s, size_t n)
-{
- size_t ofs;
- ucs4_t uc;
- int w;
-
- ofs = u8_mbtouc (&uc, s, n);
- if (ofs < n && s[ofs] == '\b')
- {
- ofs++;
- ofs += u8_mbtouc (&uc, s + ofs, n - ofs);
- }
-
- w = uc_width (uc, "UTF-8");
- if (w <= 0)
- {
- *wp = 0;
- return ofs;
- }
-
- while (ofs < n)
- {
- int mblen = u8_mbtouc (&uc, s + ofs, n - ofs);
- if (uc_width (uc, "UTF-8") > 0)
- break;
- ofs += mblen;
- }
-
- *wp = w;
- return ofs;
-}
-
-struct ascii_pos
- {
- int x0;
- int x1;
- size_t ofs0;
- size_t ofs1;
- };
-
-static void
-find_ascii_pos (struct ascii_line *line, int target_x, struct ascii_pos *c)
-{
- const uint8_t *s = CHAR_CAST (const uint8_t *, ds_cstr (&line->s));
- size_t length = ds_length (&line->s);
- size_t ofs;
- int mblen;
- int x;
-
- x = 0;
- for (ofs = 0; ; ofs += mblen)
- {
- int w;
-
- mblen = u8_mb_to_display (&w, s + ofs, length - ofs);
- if (x + w > target_x)
- {
- c->x0 = x;
- c->x1 = x + w;
- c->ofs0 = ofs;
- c->ofs1 = ofs + mblen;
- return;
- }
- x += w;
- }
-}
-
static char *
ascii_reserve (struct ascii_driver *a, int y, int x0, int x1, int n)
{
- struct ascii_line *line;
assert (y < a->allocated_lines);
- line = &a->lines[y];
-
- if (x0 >= line->width)
- {
- /* The common case: adding new characters at the end of a line. */
- ds_put_byte_multiple (&line->s, ' ', x0 - line->width);
- line->width = x1;
- return ds_put_uninit (&line->s, n);
- }
- else if (x0 == x1)
- return NULL;
- else
- {
- /* An unusual case: overwriting characters in the middle of a line. We
- don't keep any kind of mapping from bytes to display positions, so we
- have to iterate over the whole line starting from the beginning. */
- struct ascii_pos p0, p1;
- char *s;
-
- /* Find the positions of the first and last character. We must find the
- both characters' positions before changing the line, because that
- would prevent finding the other character's position. */
- find_ascii_pos (line, x0, &p0);
- if (x1 < line->width)
- find_ascii_pos (line, x1, &p1);
-
- /* If a double-width character occupies both x0 - 1 and x0, then replace
- its first character width by '?'. */
- s = ds_data (&line->s);
- while (p0.x0 < x0)
- {
- s[p0.ofs0++] = '?';
- p0.x0++;
- }
-
- if (x1 >= line->width)
- {
- ds_truncate (&line->s, p0.ofs0);
- line->width = x1;
- return ds_put_uninit (&line->s, n);
- }
-
- /* If a double-width character occupies both x1 - 1 and x1, then we need
- to replace its second character width by '?'. */
- if (p1.x0 < x1)
- {
- do
- {
- s[--p1.ofs1] = '?';
- p1.x0++;
- }
- while (p1.x0 < x1);
- return ds_splice_uninit (&line->s, p0.ofs0, p1.ofs1 - p0.ofs0, n);
- }
-
- return ds_splice_uninit (&line->s, p0.ofs0, p1.ofs0 - p0.ofs0, n);
- }
+ return u8_line_reserve (&a->lines[y], x0, x1, n);
}
static void
reallocate_lines (a);
for (i = 0; i < a->length; i++)
- {
- struct ascii_line *line = &a->lines[i];
- ds_clear (&line->s);
- line->width = 0;
- }
+ u8_line_clear (&a->lines[i]);
return true;
}
any_blank = false;
for (y = 0; y < a->allocated_lines; y++)
{
- struct ascii_line *line = &a->lines[y];
+ struct u8_line *line = &a->lines[y];
if (a->squeeze_blank_lines && y > 0 && line->width == 0)
any_blank = true;