/* PSPP - a program for statistical analysis.
- Copyright (C) 1997-2000, 2006-2012 Free Software Foundation, Inc.
+ Copyright (C) 1997-2000, 2006-2013 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
/* Compression buffering.
- Compressed data is output as groups of 8 1-byte opcodes
- followed by up to 8 (depending on the opcodes) 8-byte data
- items. Data items and opcodes arrive at the same time but
- must be reordered for writing to disk, thus a small amount
- of buffering here. */
- uint8_t opcodes[8]; /* Buffered opcodes. */
- int opcode_cnt; /* Number of buffered opcodes. */
- uint8_t data[8][8]; /* Buffered data. */
- int data_cnt; /* Number of buffered data items. */
+ Compressed data is output as a series of 8-byte elements, with 1 to 9
+ such elements clustered together. The first element in a cluster is 8
+ 1-byte opcodes. Some opcodes call for an additional element in the
+ cluster (hence, if there are eight such opcodes, then the cluster
+ contains a full 9 elements).
+
+ cbuf[] holds a cluster at a time. */
+ uint8_t cbuf[9][8];
+ int n_opcodes; /* Number of opcodes in cbuf[0] so far. */
+ int n_elements; /* Number of elements in cbuf[] so far. */
/* Variables. */
struct sfm_var *sfm_vars; /* Variables. */
static void write_long_string_value_labels (struct sfm_writer *,
const struct dictionary *);
+static void write_long_string_missing_values (struct sfm_writer *,
+ const struct dictionary *);
static void write_mrsets (struct sfm_writer *, const struct dictionary *,
bool pre_v14);
w->compress = opts.compress;
w->case_cnt = 0;
- w->opcode_cnt = w->data_cnt = 0;
+ w->n_opcodes = w->n_elements = 0;
+ memset (w->cbuf[0], 0, 8);
/* Figure out how to map in-memory case data to on-disk case
data. Also count the number of segments. Very long strings
write_vls_length_table (w, d);
write_long_string_value_labels (w, d);
+ write_long_string_missing_values (w, d);
- if (attrset_count (dict_get_attributes (d)))
- write_data_file_attributes (w, d);
- write_variable_attributes (w, d);
+ if (opts.version >= 3)
+ {
+ if (attrset_count (dict_get_attributes (d)))
+ write_data_file_attributes (w, d);
+ write_variable_attributes (w, d);
+ }
write_mrsets (w, d, false);
int segment_cnt = sfm_width_to_segments (width);
int seg0_width = sfm_segment_alloc_width (width, 0);
const char *encoding = var_get_encoding (v);
- struct missing_values mv;
int i;
/* Record type. */
/* Number of missing values. If there is a range, then the
range counts as 2 missing values and causes the number to be
- negated. */
- mv_copy (&mv, var_get_missing_values (v));
- if (mv_get_width (&mv) > 8)
- mv_resize (&mv, 8);
- if (mv_has_range (&mv))
- write_int (w, -2 - mv_n_values (&mv));
+ negated.
+
+ Missing values for long string variables are written in a separate
+ record. */
+ if (width <= MAX_SHORT_STRING)
+ {
+ const struct missing_values *mv = var_get_missing_values (v);
+ if (mv_has_range (mv))
+ write_int (w, -2 - mv_n_values (mv));
+ else
+ write_int (w, mv_n_values (mv));
+ }
else
- write_int (w, mv_n_values (&mv));
+ write_int (w, 0);
/* Print and write formats. */
write_format (w, *var_get_print_format (v), seg0_width);
}
/* Write the missing values, if any, range first. */
- if (mv_has_range (&mv))
+ if (width <= MAX_SHORT_STRING)
{
- double x, y;
- mv_get_range (&mv, &x, &y);
- write_float (w, x);
- write_float (w, y);
+ const struct missing_values *mv = var_get_missing_values (v);
+ if (mv_has_range (mv))
+ {
+ double x, y;
+ mv_get_range (mv, &x, &y);
+ write_float (w, x);
+ write_float (w, y);
+ }
+ for (i = 0; i < mv_n_values (mv); i++)
+ write_value (w, mv_get_value (mv, i), width);
}
- for (i = 0; i < mv_n_values (&mv); i++)
- write_value (w, mv_get_value (&mv, i), mv_get_width (&mv));
write_variable_continuation_records (w, seg0_width);
write_variable_continuation_records (w, seg_width);
}
-
- mv_destroy (&mv);
}
/* Writes the value labels to system file W.
ds_destroy (&s);
}
+static void
+add_role_attribute (enum var_role role, struct attrset *attrs)
+{
+ struct attribute *attr;
+ const char *s;
+
+ switch (role)
+ {
+ case ROLE_INPUT:
+ default:
+ s = "0";
+ break;
+
+ case ROLE_TARGET:
+ s = "1";
+ break;
+
+ case ROLE_BOTH:
+ s = "2";
+ break;
+
+ case ROLE_NONE:
+ s = "3";
+ break;
+
+ case ROLE_PARTITION:
+ s = "4";
+ break;
+
+ case ROLE_SPLIT:
+ s = "5";
+ break;
+ }
+ attrset_delete (attrs, "$@Role");
+
+ attr = attribute_create ("$@Role");
+ attribute_add_value (attr, s);
+ attrset_add (attrs, attr);
+}
+
static void
write_variable_attributes (struct sfm_writer *w, const struct dictionary *d)
{
for (i = 0; i < n_vars; i++)
{
struct variable *v = dict_get_var (d, i);
- struct attrset *attrs = var_get_attributes (v);
- if (attrset_count (attrs))
- {
- if (n_attrsets++)
- ds_put_byte (&s, '/');
- ds_put_format (&s, "%s:", var_get_name (v));
- put_attrset (&s, attrs);
- }
+ struct attrset attrs;
+
+ attrset_clone (&attrs, var_get_attributes (v));
+
+ add_role_attribute (var_get_role (v), &attrs);
+ if (n_attrsets++)
+ ds_put_byte (&s, '/');
+ ds_put_format (&s, "%s:", var_get_name (v));
+ put_attrset (&s, &attrs);
+ attrset_destroy (&attrs);
}
if (n_attrsets)
write_utf8_record (w, dict_get_encoding (d), &s, 18);
ds_destroy (&map);
}
-
static void
write_long_string_value_labels (struct sfm_writer *w,
const struct dictionary *dict)
{
+ const char *encoding = dict_get_encoding (dict);
size_t n_vars = dict_get_var_cnt (dict);
size_t size, i;
off_t start UNUSED;
{
struct variable *var = dict_get_var (dict, i);
const struct val_labs *val_labs = var_get_value_labels (var);
- const char *encoding = var_get_encoding (var);
int width = var_get_width (var);
const struct val_lab *val_lab;
{
struct variable *var = dict_get_var (dict, i);
const struct val_labs *val_labs = var_get_value_labels (var);
- const char *encoding = var_get_encoding (var);
int width = var_get_width (var);
const struct val_lab *val_lab;
char *var_name;
assert (ftello (w->file) == start + size);
}
+static void
+write_long_string_missing_values (struct sfm_writer *w,
+ const struct dictionary *dict)
+{
+ const char *encoding = dict_get_encoding (dict);
+ size_t n_vars = dict_get_var_cnt (dict);
+ size_t size, i;
+ off_t start UNUSED;
+
+ /* Figure out the size in advance. */
+ size = 0;
+ for (i = 0; i < n_vars; i++)
+ {
+ struct variable *var = dict_get_var (dict, i);
+ const struct missing_values *mv = var_get_missing_values (var);
+ int width = var_get_width (var);
+
+ if (mv_is_empty (mv) || width < 9)
+ continue;
+
+ size += 4;
+ size += recode_string_len (encoding, "UTF-8", var_get_name (var), -1);
+ size += 1;
+ size += mv_n_values (mv) * (4 + 8);
+ }
+ if (size == 0)
+ return;
+
+ write_int (w, 7); /* Record type. */
+ write_int (w, 22); /* Record subtype */
+ write_int (w, 1); /* Data item (byte) size. */
+ write_int (w, size); /* Number of data items. */
+
+ start = ftello (w->file);
+ for (i = 0; i < n_vars; i++)
+ {
+ struct variable *var = dict_get_var (dict, i);
+ const struct missing_values *mv = var_get_missing_values (var);
+ int width = var_get_width (var);
+ uint8_t n_missing_values;
+ char *var_name;
+ int j;
+
+ if (mv_is_empty (mv) || width < 9)
+ continue;
+
+ var_name = recode_string (encoding, "UTF-8", var_get_name (var), -1);
+ write_int (w, strlen (var_name));
+ write_bytes (w, var_name, strlen (var_name));
+ free (var_name);
+
+ n_missing_values = mv_n_values (mv);
+ write_bytes (w, &n_missing_values, 1);
+
+ for (j = 0; j < n_missing_values; j++)
+ {
+ const union value *value = mv_get_value (mv, j);
+
+ write_int (w, 8);
+ write_bytes (w, value_str (value, width), 8);
+ }
+ }
+ assert (ftello (w->file) == start + size);
+}
+
static void
write_encoding_record (struct sfm_writer *w,
const struct dictionary *d)
if (w->file != NULL)
{
/* Flush buffer. */
- if (w->opcode_cnt > 0)
- flush_compressed (w);
+ flush_compressed (w);
fflush (w->file);
ok = !write_error (w);
&& d == (int) d)
put_cmp_opcode (w, (int) d + COMPRESSION_BIAS);
else
- {
- put_cmp_opcode (w, 253);
- put_cmp_number (w, d);
- }
+ put_cmp_number (w, d);
}
else
{
if (!memcmp (data, " ", chunk_size))
put_cmp_opcode (w, 254);
else
- {
- put_cmp_opcode (w, 253);
- put_cmp_string (w, data, chunk_size);
- }
+ put_cmp_string (w, data, chunk_size);
}
/* This code deals properly with padding that is not a
}
}
-/* Flushes buffered compressed opcodes and data to W.
- The compression buffer must not be empty. */
+/* Flushes buffered compressed opcodes and data to W. */
static void
flush_compressed (struct sfm_writer *w)
{
- assert (w->opcode_cnt > 0 && w->opcode_cnt <= 8);
-
- write_bytes (w, w->opcodes, w->opcode_cnt);
- write_zeros (w, 8 - w->opcode_cnt);
-
- write_bytes (w, w->data, w->data_cnt * sizeof *w->data);
-
- w->opcode_cnt = w->data_cnt = 0;
+ if (w->n_opcodes)
+ {
+ write_bytes (w, w->cbuf, 8 * (1 + w->n_elements));
+ w->n_opcodes = w->n_elements = 0;
+ memset (w->cbuf[0], 0, 8);
+ }
}
/* Appends OPCODE to the buffered set of compression opcodes in
static void
put_cmp_opcode (struct sfm_writer *w, uint8_t opcode)
{
- if (w->opcode_cnt >= 8)
+ if (w->n_opcodes >= 8)
flush_compressed (w);
- w->opcodes[w->opcode_cnt++] = opcode;
+ w->cbuf[0][w->n_opcodes++] = opcode;
}
-/* Appends NUMBER to the buffered compression data in W. The
- buffer must not be full; the way to assure that is to call
- this function only just after a call to put_cmp_opcode, which
- will flush the buffer as necessary. */
+/* Appends NUMBER to the buffered compression data in W. */
static void
put_cmp_number (struct sfm_writer *w, double number)
{
- assert (w->opcode_cnt > 0);
- assert (w->data_cnt < 8);
-
- convert_double_to_output_format (number, w->data[w->data_cnt++]);
+ put_cmp_opcode (w, 253);
+ convert_double_to_output_format (number, w->cbuf[++w->n_elements]);
}
/* Appends SIZE bytes of DATA to the buffered compression data in
W, followed by enough spaces to pad the output data to exactly
- 8 bytes (thus, SIZE must be no greater than 8). The buffer
- must not be full; the way to assure that is to call this
- function only just after a call to put_cmp_opcode, which will
- flush the buffer as necessary. */
+ 8 bytes (thus, SIZE must be no greater than 8). */
static void
put_cmp_string (struct sfm_writer *w, const void *data, size_t size)
{
- assert (w->opcode_cnt > 0);
- assert (w->data_cnt < 8);
assert (size <= 8);
- memset (w->data[w->data_cnt], w->space, 8);
- memcpy (w->data[w->data_cnt], data, size);
- w->data_cnt++;
+ put_cmp_opcode (w, 253);
+ w->n_elements++;
+ memset (w->cbuf[w->n_elements], w->space, 8);
+ memcpy (w->cbuf[w->n_elements], data, size);
}
\f
/* Writes 32-bit integer X to the output file for writer W. */