#include <config.h>
#include "sys-file-writer.h"
#include "sfm-private.h"
-#include "message.h"
+#include <libpspp/message.h>
#include <stdlib.h>
#include <ctype.h>
#include <errno.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <time.h>
-#if HAVE_UNISTD_H
-#include <unistd.h> /* Required by SunOS4. */
-#endif
-#include "alloc.h"
+#include <unistd.h>
+#include <libpspp/alloc.h>
#include "case.h"
#include "dictionary.h"
-#include "message.h"
+#include <libpspp/message.h>
#include "file-handle-def.h"
-#include "hash.h"
-#include "magic.h"
-#include "misc.h"
+#include <libpspp/hash.h>
+#include <libpspp/magic.h>
+#include <libpspp/misc.h>
#include "settings.h"
#include "stat-macros.h"
-#include "str.h"
+#include <libpspp/str.h>
#include "value-labels.h"
#include "variable.h"
-#include "version.h"
+#include <libpspp/version.h>
+#include <minmax.h>
#include "gettext.h"
#define _(msgid) gettext (msgid)
-#include "debug-print.h"
-
/* Compression bias used by PSPP. Values between (1 -
COMPRESSION_BIAS) and (251 - COMPRESSION_BIAS) inclusive can be
compressed. */
int compress; /* 1=compressed, 0=not compressed. */
int case_cnt; /* Number of cases written so far. */
size_t flt64_cnt; /* Number of flt64 elements in case. */
+ bool has_vls; /* Does the dict have very long strings? */
/* Compression buffering. */
flt64 *buf; /* Buffered data. */
/* Variables. */
struct sfm_var *vars; /* Variables. */
size_t var_cnt; /* Number of variables. */
+ size_t var_cnt_vls; /* Number of variables including
+ very long string components. */
};
/* A variable in a system file. */
static char *append_string_max (char *, const char *, const char *);
static void write_header (struct sfm_writer *, const struct dictionary *);
static void buf_write (struct sfm_writer *, const void *, size_t);
-static void write_variable (struct sfm_writer *, struct variable *);
+static void write_variable (struct sfm_writer *, const struct variable *);
static void write_value_labels (struct sfm_writer *,
struct variable *, int idx);
static void write_rec_7_34 (struct sfm_writer *);
static void write_longvar_table (struct sfm_writer *w,
const struct dictionary *dict);
+static void write_vls_length_table (struct sfm_writer *w,
+ const struct dictionary *dict);
+
+
static void write_variable_display_parameters (struct sfm_writer *w,
const struct dictionary *dict);
static void write_documents (struct sfm_writer *, const struct dictionary *);
-static int does_dict_need_translation (const struct dictionary *);
static inline int
var_flt64_cnt (const struct variable *v)
+{
+ assert(sizeof(flt64) == MAX_SHORT_STRING);
+ return width_to_bytes(v->width) / MAX_SHORT_STRING ;
+}
+
+static inline int
+var_flt64_cnt_nom (const struct variable *v)
{
return v->type == NUMERIC ? 1 : DIV_RND_UP (v->width, sizeof (flt64));
}
+
/* Returns default options for writing a system file. */
struct sfm_write_options
sfm_writer_default_options (void)
return opts;
}
+
+/* Return a short variable name to be used as the continuation of the
+ variable with the short name SN.
+
+ FIXME: Need to resolve clashes somehow.
+
+ */
+static const char *
+cont_var_name(const char *sn, int idx)
+{
+ static char s[SHORT_NAME_LEN + 1];
+
+ char abb[SHORT_NAME_LEN + 1 - 3]= {0};
+
+ strncpy(abb, sn, SHORT_NAME_LEN - 3);
+
+ snprintf(s, SHORT_NAME_LEN + 1, "%s%03d", abb, idx);
+
+ return s;
+}
+
+
/* Opens the system file designated by file handle FH for writing
cases from dictionary D according to the given OPTS. If
COMPRESS is nonzero, the system file will be compressed.
mode = S_IRUSR | S_IRGRP | S_IROTH;
if (opts.create_writeable)
mode |= S_IWUSR | S_IWGRP | S_IWOTH;
- fd = open (fh_get_filename (fh), O_WRONLY | O_CREAT | O_TRUNC, mode);
+ fd = open (fh_get_file_name (fh), O_WRONLY | O_CREAT | O_TRUNC, mode);
if (fd < 0)
goto open_error;
w->fh = fh;
w->file = fdopen (fd, "w");
- w->needs_translation = does_dict_need_translation (d);
+ w->needs_translation = dict_compacting_would_change (d);
w->compress = opts.compress;
w->case_cnt = 0;
w->flt64_cnt = 0;
+ w->has_vls = false;
w->buf = w->end = w->ptr = NULL;
w->x = w->y = NULL;
w->var_cnt = dict_get_var_cnt (d);
+ w->var_cnt_vls = w->var_cnt;
w->vars = xnmalloc (w->var_cnt, sizeof *w->vars);
for (i = 0; i < w->var_cnt; i++)
{
const struct variable *dv = dict_get_var (d, i);
struct sfm_var *sv = &w->vars[i];
sv->width = dv->width;
+ /* spss compatibility nonsense */
+ if ( dv->width > MAX_LONG_STRING )
+ w->has_vls = true;
+
sv->fv = dv->fv;
sv->flt64_cnt = var_flt64_cnt (dv);
}
/* Write basic variable info. */
dict_assign_short_names (d);
for (i = 0; i < dict_get_var_cnt (d); i++)
- write_variable (w, dict_get_var (d, i));
+ {
+ int count = 0;
+ const struct variable *v = dict_get_var(d, i);
+ int wcount = v->width;
+
+ do {
+ struct variable var_cont = *v;
+ if ( v->type == ALPHA)
+ {
+ if ( 0 != count )
+ {
+ mv_init(&var_cont.miss, 0);
+ strcpy(var_cont.short_name,
+ cont_var_name(v->short_name, count));
+ var_cont.label = NULL;
+ w->var_cnt_vls++;
+ }
+ count++;
+ if ( wcount > MAX_LONG_STRING )
+ {
+ var_cont.width = MAX_LONG_STRING;
+ wcount -= EFFECTIVE_LONG_STRING_LENGTH;
+ }
+ else
+ {
+ var_cont.width = wcount;
+ wcount -= var_cont.width;
+ }
+
+ var_cont.write.w = var_cont.width;
+ var_cont.print.w = var_cont.width;
+ }
+
+ write_variable (w, &var_cont);
+ } while(wcount > 0);
+ }
/* Write out value labels. */
for (idx = i = 0; i < dict_get_var_cnt (d); i++)
if (opts.version >= 3)
write_longvar_table (w, d);
+ write_vls_length_table(w, d);
+
/* Write end-of-headers record. */
{
struct
{
- int32 rec_type P;
- int32 filler P;
+ int32_t rec_type P;
+ int32_t filler P;
}
rec_999;
open_error:
msg (ME, _("Error opening \"%s\" for writing as a system file: %s."),
- fh_get_filename (fh), strerror (errno));
+ fh_get_file_name (fh), strerror (errno));
goto error;
}
-static int
-does_dict_need_translation (const struct dictionary *d)
-{
- size_t case_idx;
- size_t i;
-
- case_idx = 0;
- for (i = 0; i < dict_get_var_cnt (d); i++)
- {
- struct variable *v = dict_get_var (d, i);
- if (v->fv != case_idx)
- return 0;
- case_idx += v->nv;
- }
- return 1;
-}
-
/* Returns value of X truncated to two least-significant digits. */
static int
rerange (int x)
w->flt64_cnt = 0;
for (i = 0; i < dict_get_var_cnt (d); i++)
- w->flt64_cnt += var_flt64_cnt (dict_get_var (d, i));
- hdr.case_size = w->flt64_cnt;
+ {
+ w->flt64_cnt += var_flt64_cnt (dict_get_var (d, i));
+ }
+ hdr.nominal_case_size = w->flt64_cnt;
hdr.compress = w->compress;
/* Translates format spec from internal form in SRC to system file
format in DEST. */
static inline void
-write_format_spec (struct fmt_spec *src, int32 *dest)
+write_format_spec (const struct fmt_spec *src, int32_t *dest)
{
+ assert(check_output_specifier(src, true));
*dest = (formats[src->type].spss << 16) | (src->w << 8) | src->d;
}
/* Write the variable record(s) for primary variable P and secondary
variable S to system file W. */
static void
-write_variable (struct sfm_writer *w, struct variable *v)
+write_variable (struct sfm_writer *w, const struct variable *v)
{
struct sysfile_variable sv;
int nm; /* Number of missing values, possibly negative. */
sv.rec_type = 2;
- sv.type = v->width;
+ sv.type = min(v->width, MAX_LONG_STRING);
sv.has_var_label = (v->label != NULL);
mv_copy (&mv, &v->miss);
{
struct label
{
- int32 label_len P;
+ int32_t label_len P;
char label[255] P;
}
l;
memset (&sv.write, 0, sizeof sv.write);
memset (&sv.name, 0, sizeof sv.name);
- pad_count = DIV_RND_UP (v->width, (int) sizeof (flt64)) - 1;
+ pad_count = DIV_RND_UP (min(v->width, MAX_LONG_STRING),
+ (int) sizeof (flt64)) - 1;
for (i = 0; i < pad_count; i++)
buf_write (w, &sv, sizeof sv);
}
{
struct value_label_rec
{
- int32 rec_type P;
- int32 n_labels P;
+ int32_t rec_type P;
+ int32_t n_labels P;
flt64 labels[1] P;
};
struct var_idx_rec
{
- int32 rec_type P;
- int32 n_vars P;
- int32 vars[1] P;
+ int32_t rec_type P;
+ int32_t n_vars P;
+ int32_t vars[1] P;
};
struct val_labs_iterator *i;
{
struct
{
- int32 rec_type P; /* Always 6. */
- int32 n_lines P; /* Number of lines of documents. */
+ int32_t rec_type P; /* Always 6. */
+ int32_t n_lines P; /* Number of lines of documents. */
}
rec_6;
struct
{
- int32 rec_type P;
- int32 subtype P;
- int32 elem_size P;
- int32 n_elem P;
+ int32_t rec_type P;
+ int32_t subtype P;
+ int32_t elem_size P;
+ int32_t n_elem P;
} vdp_hdr;
vdp_hdr.rec_type = 7;
vdp_hdr.subtype = 11;
vdp_hdr.elem_size = 4;
- vdp_hdr.n_elem = w->var_cnt * 3;
+ vdp_hdr.n_elem = w->var_cnt_vls * 3;
buf_write (w, &vdp_hdr, sizeof vdp_hdr);
struct variable *v;
struct
{
- int32 measure P;
- int32 width P;
- int32 align P;
+ int32_t measure P;
+ int32_t width P;
+ int32_t align P;
}
params;
params.align = v->alignment;
buf_write (w, ¶ms, sizeof(params));
+
+ if ( v->width > MAX_LONG_STRING )
+ {
+ int wcount = v->width - EFFECTIVE_LONG_STRING_LENGTH ;
+
+ while (wcount > 0)
+ {
+ params.width = wcount > MAX_LONG_STRING ? 32 : wcount;
+
+ buf_write (w, ¶ms, sizeof(params));
+
+ wcount -= EFFECTIVE_LONG_STRING_LENGTH ;
+ }
+ }
}
}
+/* Writes the table of lengths for Very Long String Variables */
+static void
+write_vls_length_table (struct sfm_writer *w,
+ const struct dictionary *dict)
+{
+ int i;
+ struct
+ {
+ int32_t rec_type P;
+ int32_t subtype P;
+ int32_t elem_size P;
+ int32_t n_elem P;
+ }
+ vls_hdr;
+
+ struct string vls_length_map;
+
+ ds_init_empty (&vls_length_map);
+
+ vls_hdr.rec_type = 7;
+ vls_hdr.subtype = 14;
+ vls_hdr.elem_size = 1;
+
+
+ for (i = 0; i < dict_get_var_cnt (dict); ++i)
+ {
+ const struct variable *v = dict_get_var (dict, i);
+
+ if ( v->width <= MAX_LONG_STRING )
+ continue;
+
+ ds_put_format (&vls_length_map, "%s=%05d", v->short_name, v->width);
+ ds_put_char (&vls_length_map, '\0');
+ ds_put_char (&vls_length_map, '\t');
+ }
+
+ vls_hdr.n_elem = ds_length (&vls_length_map);
+
+ if ( vls_hdr.n_elem > 0 )
+ {
+ buf_write (w, &vls_hdr, sizeof vls_hdr);
+ buf_write (w, ds_data (&vls_length_map), ds_length (&vls_length_map));
+ }
+
+ ds_destroy (&vls_length_map);
+}
+
/* Writes the long variable name table */
static void
write_longvar_table (struct sfm_writer *w, const struct dictionary *dict)
{
struct
{
- int32 rec_type P;
- int32 subtype P;
- int32 elem_size P;
- int32 n_elem P;
+ int32_t rec_type P;
+ int32_t subtype P;
+ int32_t elem_size P;
+ int32_t n_elem P;
}
lv_hdr;
struct string long_name_map;
size_t i;
- ds_init (&long_name_map, 10 * dict_get_var_cnt (dict));
+ ds_init_empty (&long_name_map);
for (i = 0; i < dict_get_var_cnt (dict); i++)
{
struct variable *v = dict_get_var (dict, i);
if (i)
- ds_putc (&long_name_map, '\t');
- ds_printf (&long_name_map, "%s=%s", v->short_name, v->name);
+ ds_put_char (&long_name_map, '\t');
+ ds_put_format (&long_name_map, "%s=%s", v->short_name, v->name);
}
lv_hdr.rec_type = 7;
{
struct
{
- int32 rec_type_3 P;
- int32 subtype_3 P;
- int32 data_type_3 P;
- int32 n_elem_3 P;
- int32 elem_3[8] P;
- int32 rec_type_4 P;
- int32 subtype_4 P;
- int32 data_type_4 P;
- int32 n_elem_4 P;
+ int32_t rec_type_3 P;
+ int32_t subtype_3 P;
+ int32_t data_type_3 P;
+ int32_t n_elem_3 P;
+ int32_t elem_3[8] P;
+ int32_t rec_type_4 P;
+ int32_t subtype_4 P;
+ int32_t data_type_4 P;
+ int32_t n_elem_4 P;
flt64 elem_4[3] P;
}
rec_7;
rec_7.rec_type_3 = 7;
rec_7.subtype_3 = 3;
- rec_7.data_type_3 = sizeof (int32);
+ rec_7.data_type_3 = sizeof (int32_t);
rec_7.n_elem_3 = 8;
rec_7.elem_3[0] = version_component[0];
rec_7.elem_3[1] = version_component[1];
w->case_cnt++;
if (!w->needs_translation && !w->compress
- && sizeof (flt64) == sizeof (union value))
+ && sizeof (flt64) == sizeof (union value) && ! w->has_vls )
{
/* Fast path: external and internal representations are the
same and the dictionary is properly ordered. Write
Write into a bounce buffer, then write to W. */
flt64 *bounce;
flt64 *bounce_cur;
+ flt64 *bounce_end;
size_t bounce_size;
size_t i;
bounce_size = sizeof *bounce * w->flt64_cnt;
bounce = bounce_cur = local_alloc (bounce_size);
+ bounce_end = bounce + bounce_size;
for (i = 0; i < w->var_cnt; i++)
{
struct sfm_var *v = &w->vars[i];
+ memset(bounce_cur, ' ', v->flt64_cnt * sizeof (flt64));
+
if (v->width == 0)
- *bounce_cur = case_num (c, v->fv);
+ {
+ *bounce_cur = case_num (c, v->fv);
+ bounce_cur += v->flt64_cnt;
+ }
else
- memcpy (bounce_cur, case_data (c, v->fv)->s, v->width);
- bounce_cur += v->flt64_cnt;
+ { int ofs = 0;
+ while (ofs < v->width)
+ {
+ int chunk = MIN (MAX_LONG_STRING, v->width - ofs);
+ int nv = DIV_RND_UP (chunk, sizeof (flt64));
+ buf_copy_rpad ((char *) bounce_cur, nv * sizeof (flt64),
+ case_data (c, v->fv)->s + ofs, chunk);
+ bounce_cur += nv;
+ ofs += chunk;
+ }
+ }
+
}
if (!w->compress)
if (ok && !fseek (w->file, offsetof (struct sysfile_header, case_cnt),
SEEK_SET))
{
- int32 case_cnt = w->case_cnt;
+ int32_t case_cnt = w->case_cnt;
fwrite (&case_cnt, sizeof case_cnt, 1, w->file);
clearerr (w->file);
}
if (!ok)
msg (ME, _("An I/O error occurred writing system file \"%s\"."),
- fh_get_filename (w->fh));
+ fh_get_file_name (w->fh));
}
fh_close (w->fh, "system file", "we");