+2007-07-23 Ben Pfaff <blp@gnu.org>
+
+ Improvements to system file reader and writer.
+
+ First, move all detailed knowledge of very long strings into
+ sys-file-private.[ch], so that this nasty stuff can be isolated.
+
+ * sys-file-private.c (REAL_VLS_CHUNK): New macro.
+ (EFFECTIVE_VLS_CHUNK): New macro.
+ (min_int): New function.
+ (max_int): New function.
+ (sfm_width_to_bytes): Rewrite.
+ (sfm_width_to_octs): New function.
+ (sfm_segment_alloc_width): New function.
+ (sfm_segment_alloc_bytes): New function.
+ (sfm_segment_used_bytes): New function.
+ (sfm_segment_offset): New function.
+ (sfm_segment_effective_offset): New function.
+ (sfm_dictionary_to_sfm_vars): New function.
+
+ * sys-file-private.h (MIN_VERY_LONG_STRING): Removed.
+ (EFFECTIVE_LONG_STRING_LENGTH): Removed.
+ (struct sfm_var): New structure.
+
+ Next, improvements to the system file reader.
+
+ * sys-file-reader.h (struct sfm_read_info): Changed `case_cnt' to
+ type casenumber. Added `version_major', `version_minor',
+ `version_revision'.
+
+ * sys-file-reader.c (struct sfm_reader): Replaced `flt64_cnt' by
+ `oct_cnt'. Rename `vars', `var_cnt' to `sfm_vars', `sfm_var_cnt'.
+ Change `case_cnt' to type casenumber. Removed `has_vls'.
+ (struct sfm_var): Removed.
+ (sfm_open_reader): Don't warn on wrong case size if the file was
+ written by SPSS 13, which tends to get it wrong. Use
+ sfm_dictionary_to_sfm_vars.
+ (read_header): Always output system file info.
+ (read_variable_record): Simplify code for reading missing values.
+ (read_machine_int32_info): Save version numbers from system file
+ into info struct passed as new argument.
+ (read_long_string_map): Restructured to use new sys-file-private
+ functions.
+ (read_value_labels): Use size_overflow_p.
+ (sys_file_casereader_read): Get rid of distinction between fast
+ and slow paths. Use information provided by sys-file-primate's
+ struct sfm_var to simplify code.
+ (skip_whole_strings): New function.
+ (read_int32): Renamed read_int. Changed return value to int.
+ Updated all callers.
+ (read_flt64): Renamed read_float. Changed return value to
+ double. Updated all callers.
+ (int32_to_native): Removed. Changed callers to use
+ integer_convert.
+ (flt64_to_double): Removed. Changed callers to use float_convert.
+
+ Finally, get rid of int32, flt64 terminology and types in system
+ file writer. The former wasn't very useful since a POSIX "int"
+ can hold the whole range of int32 and we generally didn't have a
+ need for it to be exactly-32-bits, just at-least-32-bits. The
+ latter was inconvenient because we had to assume that it could be
+ different from double and thereby convert special values SYSMIS,
+ HIGHEST, LOWEST to and from it in multiple places. Instead, now
+ we just use "int" and "double" in most places, and do conversions,
+ if necessary, very close to where we do I/O. This change meant
+ that the writer code couldn't represent records in the file as C
+ structs any longer, but that's no great loss. The code actually
+ seems to be more readable without them.
+
+ Simplify the compression buffering code: only buffer as much as
+ necessary, which is no more than eight 8-byte units at any given
+ time.
+
+ * sys-file-writer.c (typedef flt64): Removed.
+ (macro second_lowest_flt64): Removed.
+ (struct sysfile_header): Removed.
+ (struct sysfile_variable): Removed.
+ (struct sfm_writer): Removed `needs_translation', `has_vls',
+ `flt64_cnt'. Changed `compress' to type bool and `case_cnt' to
+ type casenumber. Renamed `vars' to `sfm_vars', `var_cnt' to
+ `sfm_var_cnt'. Replaced `buf', `end', `ptr', `x', `y' for
+ compression buffering by `opcodes', `opcode_cnt', `data',
+ `data_cnt'. Renamed `var_cnt_vls' as `segment_cnt'.
+ (sfm_open_writer): Use sfm_dictionary_to_sfm_vars. Use simple
+ data writer functions instead of structures.
+ (calc_oct_idx): New function.
+ (write_header): Use simple data writer functions instead of
+ structures.
+ (write_format_spec): Renamed write_format. New argument.
+ (write_variable_continuation_records): New function.
+ (write_variable): Use simple data writer functions instead of
+ structures. Use write_variable_continuation_records. Write
+ entire very long string instead of requiring caller to understand
+ them.
+ (write_value_labels): Use simple data writer functions instead of
+ structures.
+ (write_documents): Ditto.
+ (write_variable_display_parameters): Use sys-file-private
+ functions to simplify. Use simple data writer functions instead
+ of structures.
+ (write_vls_length_table): Use simple data writer functions instead
+ of structures.
+ (write_longvar_table): Ditto.
+ (write_rec_7_34): Break into new functions
+ write_integer_info_record, write_float_info_record. Use simple
+ data writer functions instead of structures.
+ (buf_write): Removed.
+ (append_string_max): Removed.
+ (ensure_buf_space): Removed.
+ (sys_file_casewriter_write): Get rid of the distinction between
+ fast and slow paths, which didn't seem to be too useful. Use new
+ functions write_case_uncompressed, write_case_compressed.
+ (put_instruction): Removed.
+ (put_element): Removed.
+ (write_compressed_data): Removed.
+ (close_writer): Use flush_compressed. Only write case count to
+ system file if it will fit in the field.
+ (write_case_compressed): New function.
+ (write_case_uncompressed): New function.
+ (flush_compressed): New function.
+ (put_cmp_opcode): New function.
+ (put_cmp_number): New function.
+ (write_int): New function.
+ (convert_double_to_output_format): New function.
+ (write_float): New function.
+ (write_value): New function.
+ (write_string): New function.
+ (write_bytes): New function.
+ (write_zeros): New function.
+ (write_spaces): New function.
+
2007-07-22 Ben Pfaff <blp@gnu.org>
Don't try to write very long strings to portable files. The
along with this program. If not, see <http://www.gnu.org/licenses/>. */
#include <config.h>
-#include "sys-file-private.h"
+#include <data/sys-file-private.h>
+
+#include <data/dictionary.h>
#include <data/value.h>
+#include <data/variable.h>
#include <libpspp/assertion.h>
-/* Return the number of bytes used when writing case_data for a variable
- of WIDTH */
-int
+#include "minmax.h"
+#include "xalloc.h"
+
+/* Number of bytes really stored in each segment of a very long
+ string variable. */
+#define REAL_VLS_CHUNK 255
+
+/* Number of bytes per segment by which the amount of space for
+ very long string variables is allocated. */
+#define EFFECTIVE_VLS_CHUNK 252
+
+/* Returns true if WIDTH is a very long string width,
+ false otherwise. */
+static bool
+is_very_long (int width)
+{
+ return width >= 256;
+}
+
+/* Returns the smaller of A or B.
+ (Defined as a function to avoid evaluating A or B more than
+ once.) */
+static int
+min_int (int a, int b)
+{
+ return MIN (a, b);
+}
+
+/* Returns the larger of A or B.
+ (Defined as a function to avoid evaluating A or B more than
+ once.) */
+static int
+max_int (int a, int b)
+{
+ return MAX (a, b);
+}
+
+/* Returns the number of bytes of uncompressed case data used for
+ writing a variable of the given WIDTH to a system file. All
+ required space is included, including trailing padding and
+ internal padding. */
+static int
sfm_width_to_bytes (int width)
{
+ int bytes;
+
assert (width >= 0);
if (width == 0)
- return MAX_SHORT_STRING;
- else if (width < MIN_VERY_LONG_STRING)
- return ROUND_UP (width, MAX_SHORT_STRING);
+ bytes = 8;
+ else if (!is_very_long (width))
+ bytes = width;
else
{
- int chunks = width / EFFECTIVE_LONG_STRING_LENGTH ;
- int remainder = width % EFFECTIVE_LONG_STRING_LENGTH ;
- int bytes = remainder + (chunks * MIN_VERY_LONG_STRING);
- return ROUND_UP (bytes, MAX_SHORT_STRING);
+ int chunks = width / EFFECTIVE_VLS_CHUNK;
+ int remainder = width % EFFECTIVE_VLS_CHUNK;
+ bytes = remainder + (chunks * ROUND_UP (REAL_VLS_CHUNK, 8));
}
+ return ROUND_UP (bytes, 8);
+}
+
+/* Returns the number of 8-byte units (octs) used to write data
+ for a variable of the given WIDTH. */
+int
+sfm_width_to_octs (int width)
+{
+ return sfm_width_to_bytes (width) / 8;
}
/* Returns the number of "segments" used for writing case data
{
assert (width >= 0);
- return (width < MIN_VERY_LONG_STRING ? 1
- : DIV_RND_UP (width, EFFECTIVE_LONG_STRING_LENGTH));
+ return !is_very_long (width) ? 1 : DIV_RND_UP (width, EFFECTIVE_VLS_CHUNK);
+}
+
+/* Returns the width to allocate to the given SEGMENT within a
+ variable of the given WIDTH. A segment is a physical variable
+ in the system file that represents some piece of a logical
+ variable as seen by a PSPP user. */
+int
+sfm_segment_alloc_width (int width, int segment)
+{
+ assert (segment < sfm_width_to_segments (width));
+
+ return (!is_very_long (width) ? width
+ : segment < sfm_width_to_segments (width) - 1 ? 255
+ : width - segment * EFFECTIVE_VLS_CHUNK);
+}
+
+/* Returns the number of bytes to allocate to the given SEGMENT
+ within a variable of the given width. This is the same as
+ sfm_segment_alloc_width, except that a numeric value takes up
+ 8 bytes despite having a width of 0. */
+static int
+sfm_segment_alloc_bytes (int width, int segment)
+{
+ assert (segment < sfm_width_to_segments (width));
+ return (width == 0 ? 8
+ : ROUND_UP (sfm_segment_alloc_width (width, segment), 8));
+}
+
+/* Returns the number of bytes in the given SEGMENT within a
+ variable of the given WIDTH that are actually used to store
+ data. For a numeric value (WIDTH of 0), this is 8 bytes; for
+ a string value less than 256 bytes wide, it is WIDTH bytes.
+ For very long string values, the calculation is more
+ complicated and ranges between 255 bytes for the first segment
+ to as little as 0 bytes for final segments. */
+static int
+sfm_segment_used_bytes (int width, int segment)
+{
+ assert (segment < sfm_width_to_segments (width));
+ return (width == 0 ? 8
+ : !is_very_long (width) ? width
+ : max_int (0, min_int (width - REAL_VLS_CHUNK * segment,
+ REAL_VLS_CHUNK)));
+}
+
+/* Returns the number of bytes at the end of the given SEGMENT
+ within a variable of the given WIDTH that are not used for
+ data; that is, the number of bytes that must be padded with
+ data that a reader ignores. */
+static int
+sfm_segment_padding (int width, int segment)
+{
+ return (sfm_segment_alloc_bytes (width, segment)
+ - sfm_segment_used_bytes (width, segment));
+}
+
+/* Returns the byte offset of the start of the given SEGMENT
+ within a variable of the given WIDTH. The first segment
+ starts at offset 0; only very long string variables have any
+ other segments. */
+static int
+sfm_segment_offset (int width, int segment)
+{
+ assert (segment < sfm_width_to_segments (width));
+ return min_int (REAL_VLS_CHUNK * segment, width);
+}
+
+/* Returns the byte offset of the start of the given SEGMENT
+ within a variable of the given WIDTH, given the (incorrect)
+ assumption that there are EFFECTIVE_VLS_CHUNK bytes per
+ segment. (Use of this function is questionable at best.) */
+int
+sfm_segment_effective_offset (int width, int segment)
+{
+ assert (segment < sfm_width_to_segments (width));
+ return EFFECTIVE_VLS_CHUNK * segment;
+}
+
+/* Creates and initializes an array of struct sfm_vars that
+ describe how a case drawn from dictionary DICT is laid out in
+ a system file. Returns the number of segments in a case. A
+ segment is a physical variable in the system file that
+ represents some piece of a logical variable as seen by a PSPP
+ user.
+
+ The array is allocated with malloc and stored in *SFM_VARS,
+ and its number of elements is stored in *SFM_VAR_CNT. The
+ caller is responsible for freeing it when it is no longer
+ needed. */
+int
+sfm_dictionary_to_sfm_vars (const struct dictionary *dict,
+ struct sfm_var **sfm_vars, size_t *sfm_var_cnt)
+{
+ size_t var_cnt = dict_get_var_cnt (dict);
+ size_t segment_cnt;
+ size_t i;
+
+ /* Estimate the number of sfm_vars that will be needed.
+ We might not need all of these, because very long string
+ variables can have segments that are all padding, which do
+ not need sfm_vars of their own. */
+ segment_cnt = 0;
+ for (i = 0; i < var_cnt; i++)
+ {
+ const struct variable *v = dict_get_var (dict, i);
+ segment_cnt += sfm_width_to_segments (var_get_width (v));
+ }
+
+ /* Compose the sfm_vars. */
+ *sfm_vars = xnmalloc (segment_cnt, sizeof **sfm_vars);
+ *sfm_var_cnt = 0;
+ for (i = 0; i < var_cnt; i++)
+ {
+ const struct variable *dv = dict_get_var (dict, i);
+ int width = var_get_width (dv);
+ int j;
+
+ for (j = 0; j < sfm_width_to_segments (width); j++)
+ {
+ int used_bytes = sfm_segment_used_bytes (width, j);
+ int padding = sfm_segment_padding (width, j);
+ struct sfm_var *sv;
+ if (used_bytes != 0)
+ {
+ sv = &(*sfm_vars)[(*sfm_var_cnt)++];
+ sv->width = width == 0 ? 0 : used_bytes;
+ sv->case_index = var_get_case_index (dv);
+ sv->offset = sfm_segment_offset (width, j);
+ sv->padding = padding;
+ }
+ else
+ {
+ /* Segment is all padding. Just add it to the
+ previous segment. (Otherwise we'd have an
+ ambiguity whether ->width of 0 indicates a
+ numeric variable or an all-padding segment.) */
+ sv = &(*sfm_vars)[*sfm_var_cnt - 1];
+ sv->padding += padding;
+ }
+ assert ((sv->width + sv->padding) % 8 == 0);
+ }
+ }
+
+ return segment_cnt;
}
/* PSPP - a program for statistical analysis.
- Copyright (C) 2006 Free Software Foundation, Inc.
+ Copyright (C) 2006, 2007 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. */
+/* Infrastructure common to system file reader and writer.
+
+ Old versions of SPSS limited string variables to a width of
+ 255 bytes. For backward compatibility with these older
+ versions, the system file format represents a string longer
+ than 255 bytes, called a "very long string", as a collection
+ of strings no longer than 255 bytes each. The strings
+ concatenated to make a very long string are called its
+ "segments"; for consistency, variables other than very long
+ strings are considered to have a single segment.
+
+ The interfaces in this file primarily provide support for
+ dealing with very long strings. */
+
#ifndef DATA_SYS_FILE_PRIVATE_H
#define DATA_SYS_FILE_PRIVATE_H 1
-/* This nonsense is required for SPSS compatibility. */
+#include <stddef.h>
+
+struct dictionary;
-#define MIN_VERY_LONG_STRING 256
-#define EFFECTIVE_LONG_STRING_LENGTH (MIN_VERY_LONG_STRING - 4)
+/* A variable in a system file. */
+struct sfm_var
+ {
+ int width; /* Value width (0=numeric, else string). */
+ int case_index; /* Index into case. */
-int sfm_width_to_bytes (int width);
+ /* The following members are interesting only for string
+ variables (width != 0). For numeric variables (width ==
+ 0) their values are always 0.
+
+ Note: width + padding is always a multiple of 8. */
+ int offset; /* Offset within string variable in case. */
+ int padding; /* Number of padding bytes following data. */
+ };
+
+int sfm_dictionary_to_sfm_vars (const struct dictionary *,
+ struct sfm_var **, size_t *);
+
+int sfm_width_to_octs (int width);
int sfm_width_to_segments (int width);
+int sfm_segment_effective_offset (int width, int segment);
+int sfm_segment_alloc_width (int width, int segment);
+
#endif /* data/sys-file-private.h */
/* File format. */
enum integer_format integer_format; /* On-disk integer format. */
enum float_format float_format; /* On-disk floating point format. */
- int flt64_cnt; /* Number of 8-byte units per case. */
- struct sfm_var *vars; /* Variables. */
- size_t var_cnt; /* Number of variables. */
- int32_t case_cnt; /* Number of cases */
+ int oct_cnt; /* Number of 8-byte units per case. */
+ struct sfm_var *sfm_vars; /* Variables. */
+ size_t sfm_var_cnt; /* Number of variables. */
+ casenumber case_cnt; /* Number of cases */
bool has_long_var_names; /* File has a long variable name map */
- bool has_vls; /* File has one or more very long strings? */
/* Decompression. */
bool compressed; /* File is compressed? */
size_t opcode_idx; /* Next opcode to interpret, 8 if none left. */
};
-/* A variable in a system file. */
-struct sfm_var
- {
- int width; /* 0=numeric, otherwise string width. */
- int case_index; /* Index into case. */
- };
-
static struct casereader_class sys_file_casereader_class;
static bool close_reader (struct sfm_reader *);
static void read_bytes (struct sfm_reader *, void *, size_t);
static bool try_read_bytes (struct sfm_reader *, void *, size_t);
-static int32_t read_int32 (struct sfm_reader *);
-static double read_flt64 (struct sfm_reader *);
+static int read_int (struct sfm_reader *);
+static double read_float (struct sfm_reader *);
static void read_string (struct sfm_reader *, char *, size_t);
static void skip_bytes (struct sfm_reader *, size_t);
-static int32_t int32_to_native (const struct sfm_reader *, const uint8_t[4]);
-static double flt64_to_double (const struct sfm_reader *, const uint8_t[8]);
-
static struct variable_to_value_map *open_variable_to_value_map (
struct sfm_reader *, size_t size);
static void close_variable_to_value_map (struct sfm_reader *r,
};
static void read_header (struct sfm_reader *, struct dictionary *,
- int *weight_idx, int *claimed_flt64_cnt,
+ int *weight_idx, int *claimed_oct_cnt,
struct sfm_read_info *);
static void read_variable_record (struct sfm_reader *, struct dictionary *,
int *format_warning_cnt);
-static void parse_format_spec (struct sfm_reader *, uint32_t,
+static void parse_format_spec (struct sfm_reader *, unsigned int,
enum which_format, struct variable *,
int *format_warning_cnt);
static void setup_weight (struct sfm_reader *, int weight_idx,
static void read_value_labels (struct sfm_reader *, struct dictionary *,
struct variable **var_by_value_idx);
-static void read_extension_record (struct sfm_reader *, struct dictionary *);
-static void read_machine_int32_info (struct sfm_reader *,
- size_t size, size_t count);
-static void read_machine_flt64_info (struct sfm_reader *,
+static void read_extension_record (struct sfm_reader *, struct dictionary *,
+ struct sfm_read_info *);
+static void read_machine_integer_info (struct sfm_reader *,
+ size_t size, size_t count,
+ struct sfm_read_info *);
+static void read_machine_float_info (struct sfm_reader *,
size_t size, size_t count);
static void read_display_parameters (struct sfm_reader *,
size_t size, size_t count,
system file. */
struct casereader *
sfm_open_reader (struct file_handle *fh, struct dictionary **dict,
- struct sfm_read_info *info)
+ struct sfm_read_info *volatile info)
{
struct sfm_reader *volatile r = NULL;
struct variable **var_by_value_idx;
+ struct sfm_read_info local_info;
int format_warning_cnt = 0;
int weight_idx;
- int claimed_flt64_cnt;
+ int claimed_oct_cnt;
int rec_type;
- size_t i;
if (!fh_open (fh, FH_REF_FILE, "system file", "rs"))
return NULL;
r->fh = fh;
r->file = fn_open (fh_get_file_name (fh), "rb");
r->error = false;
- r->flt64_cnt = 0;
- r->has_vls = false;
+ r->oct_cnt = 0;
r->has_long_var_names = false;
r->opcode_idx = sizeof r->opcodes;
+ /* Initialize info. */
+ if (info == NULL)
+ info = &local_info;
+ memset (info, 0, sizeof *info);
+
if (setjmp (r->bail_out))
{
close_reader (r);
}
/* Read header. */
- read_header (r, *dict, &weight_idx, &claimed_flt64_cnt, info);
+ read_header (r, *dict, &weight_idx, &claimed_oct_cnt, info);
/* Read all the variable definition records. */
- rec_type = read_int32 (r);
+ rec_type = read_int (r);
while (rec_type == 2)
{
read_variable_record (r, *dict, &format_warning_cnt);
- rec_type = read_int32 (r);
+ rec_type = read_int (r);
}
/* Figure out the case format. */
break;
case 7:
- read_extension_record (r, *dict);
+ read_extension_record (r, *dict, info);
break;
default:
sys_error (r, _("Unrecognized record type %d."), rec_type);
}
- rec_type = read_int32 (r);
+ rec_type = read_int (r);
}
}
/* Read record 999 data, which is just filler. */
- read_int32 (r);
-
- if (claimed_flt64_cnt != -1 && claimed_flt64_cnt != r->flt64_cnt)
+ read_int (r);
+
+ /* Warn if the actual amount of data per case differs from the
+ amount that the header claims. SPSS version 13 gets this
+ wrong when very long strings are involved, so don't warn in
+ that case. */
+ if (claimed_oct_cnt != -1 && claimed_oct_cnt != r->oct_cnt
+ && info->version_major != 13)
sys_warn (r, _("File header claims %d variable positions but "
"%d were read from file."),
- claimed_flt64_cnt, r->flt64_cnt);
+ claimed_oct_cnt, r->oct_cnt);
/* Create an index of dictionary variable widths for
sfm_read_case to use. We cannot use the `struct variable's
from the dictionary we created, because the caller owns the
dictionary and may destroy or modify its variables. */
- r->var_cnt = dict_get_var_cnt (*dict);
- r->vars = pool_nalloc (r->pool, r->var_cnt, sizeof *r->vars);
- for (i = 0; i < r->var_cnt; i++)
- {
- struct variable *v = dict_get_var (*dict, i);
- struct sfm_var *sv = &r->vars[i];
- sv->width = var_get_width (v);
- sv->case_index = var_get_case_index (v);
- }
+ sfm_dictionary_to_sfm_vars (*dict, &r->sfm_vars, &r->sfm_var_cnt);
+ pool_register (r->pool, free, r->sfm_vars);
pool_free (r->pool, var_by_value_idx);
r->value_cnt = dict_get_next_value_idx (*dict);
Sets DICT's file label to the system file's label.
Sets *WEIGHT_IDX to 0 if the system file is unweighted,
or to the value index of the weight variable otherwise.
- Sets *CLAIMED_FLT64_CNT to the number of values that the file
- claims to have (although it is not always correct).
- If INFO is non-null, initializes *INFO with header
- information. */
+ Sets *CLAIMED_OCT_CNT to the number of "octs" (8-byte units)
+ per case that the file claims to have (although it is not
+ always correct).
+ Initializes INFO with header information. */
static void
read_header (struct sfm_reader *r, struct dictionary *dict,
- int *weight_idx, int *claimed_flt64_cnt,
+ int *weight_idx, int *claimed_oct_cnt,
struct sfm_read_info *info)
{
char rec_type[5];
char creation_time[9];
char file_label[65];
struct substring file_label_ss;
+ struct substring product;
read_string (r, rec_type, sizeof rec_type);
read_string (r, eye_catcher, sizeof eye_catcher);
&& r->integer_format != INTEGER_LSB_FIRST))
sys_error (r, _("This is not an SPSS system file."));
- *claimed_flt64_cnt = read_int32 (r);
- if (*claimed_flt64_cnt < 0 || *claimed_flt64_cnt > INT_MAX / 16)
- *claimed_flt64_cnt = -1;
+ *claimed_oct_cnt = read_int (r);
+ if (*claimed_oct_cnt < 0 || *claimed_oct_cnt > INT_MAX / 16)
+ *claimed_oct_cnt = -1;
- r->compressed = read_int32 (r) != 0;
+ r->compressed = read_int (r) != 0;
- *weight_idx = read_int32 (r);
+ *weight_idx = read_int (r);
- r->case_cnt = read_int32 (r);
+ r->case_cnt = read_int (r);
if ( r->case_cnt > INT_MAX / 2)
r->case_cnt = -1;
dict_set_label (dict, ss_data (file_label_ss));
}
- if (info)
- {
- struct substring product;
-
- strcpy (info->creation_date, creation_date);
- strcpy (info->creation_time, creation_time);
- info->integer_format = r->integer_format;
- info->float_format = r->float_format;
- info->compressed = r->compressed;
- info->case_cnt = r->case_cnt;
-
- product = ss_cstr (eye_catcher);
- ss_match_string (&product, ss_cstr ("@(#) SPSS DATA FILE"));
- ss_trim (&product, ss_cstr (" "));
- str_copy_buf_trunc (info->product, sizeof info->product,
- ss_data (product), ss_length (product));
- }
+ strcpy (info->creation_date, creation_date);
+ strcpy (info->creation_time, creation_time);
+ info->integer_format = r->integer_format;
+ info->float_format = r->float_format;
+ info->compressed = r->compressed;
+ info->case_cnt = r->case_cnt;
+
+ product = ss_cstr (eye_catcher);
+ ss_match_string (&product, ss_cstr ("@(#) SPSS DATA FILE"));
+ ss_trim (&product, ss_cstr (" "));
+ str_copy_buf_trunc (info->product, sizeof info->product,
+ ss_data (product), ss_length (product));
}
/* Reads a variable (type 2) record from R and adds the
struct variable *var;
int nv;
- width = read_int32 (r);
- has_variable_label = read_int32 (r);
- missing_value_code = read_int32 (r);
- print_format = read_int32 (r);
- write_format = read_int32 (r);
+ width = read_int (r);
+ has_variable_label = read_int (r);
+ missing_value_code = read_int (r);
+ print_format = read_int (r);
+ write_format = read_int (r);
read_string (r, name, sizeof name);
name[strcspn (name, " ")] = '\0';
size_t len;
char label[255 + 1];
- len = read_int32 (r);
+ len = read_int (r);
if (len >= sizeof label)
sys_error (r, _("Variable %s has label of invalid length %u."),
name, (unsigned int) len);
}
/* Set missing values. */
- if (missing_value_code < -3 || missing_value_code > 3
- || missing_value_code == -1)
- sys_error (r, _("Missing value indicator field is not "
- "-3, -2, 0, 1, 2, or 3."));
if (missing_value_code != 0)
{
struct missing_values mv;
+ int i;
+
mv_init (&mv, var_get_width (var));
if (var_is_numeric (var))
{
- if (missing_value_code > 0)
+ if (missing_value_code < -3 || missing_value_code > 3
+ || missing_value_code == -1)
+ sys_error (r, _("Numeric missing value indicator field is not "
+ "-3, -2, 0, 1, 2, or 3."));
+ if (missing_value_code < 0)
{
- int i;
- for (i = 0; i < missing_value_code; i++)
- mv_add_num (&mv, read_flt64 (r));
- }
- else
- {
- double low = read_flt64 (r);
- double high = read_flt64 (r);
+ double low = read_float (r);
+ double high = read_float (r);
mv_add_num_range (&mv, low, high);
- if (missing_value_code == -3)
- mv_add_num (&mv, read_flt64 (r));
+ missing_value_code = -missing_value_code - 2;
}
+ for (i = 0; i < missing_value_code; i++)
+ mv_add_num (&mv, read_float (r));
}
else if (var_get_width (var) <= MAX_SHORT_STRING)
{
- if (missing_value_code > 0)
+ if (missing_value_code < 1 || missing_value_code > 3)
+ sys_error (r, _("String missing value indicator field is not "
+ "0, 1, 2, or 3."));
+ for (i = 0; i < missing_value_code; i++)
{
- int i;
- for (i = 0; i < missing_value_code; i++)
- {
- char string[9];
- read_string (r, string, sizeof string);
- mv_add_str (&mv, string);
- }
+ char string[9];
+ read_string (r, string, sizeof string);
+ mv_add_str (&mv, string);
}
- else
- sys_error (r, _("String variable %s may not have missing "
- "values specified as a range."),
- name);
}
- else /* var->width > MAX_SHORT_STRING */
+ else
sys_error (r, _("Long string variable %s may not have missing "
- "values."),
- name);
+ "values."), name);
var_set_missing_values (var, &mv);
}
/* Account for values.
Skip long string continuation records, if any. */
nv = width == 0 ? 1 : DIV_RND_UP (width, 8);
- r->flt64_cnt += nv;
+ r->oct_cnt += nv;
if (width > 8)
{
int i;
for (i = 1; i < nv; i++)
{
/* Check for record type 2 and width -1. */
- if (read_int32 (r) != 2 || read_int32 (r) != -1)
+ if (read_int (r) != 2 || read_int (r) != -1)
sys_error (r, _("Missing string continuation record."));
/* Skip and ignore remaining continuation data. */
- has_variable_label = read_int32 (r);
- missing_value_code = read_int32 (r);
- print_format = read_int32 (r);
- write_format = read_int32 (r);
+ has_variable_label = read_int (r);
+ missing_value_code = read_int (r);
+ print_format = read_int (r);
+ write_format = read_int (r);
read_string (r, name, sizeof name);
/* Variable label fields on continuation records have
been spotted in system files created by "SPSS Power
Macintosh Release 6.1". */
if (has_variable_label)
- skip_bytes (r, ROUND_UP (read_int32 (r), 4));
+ skip_bytes (r, ROUND_UP (read_int (r), 4));
}
}
}
/* Translates the format spec from sysfile format to internal
format. */
static void
-parse_format_spec (struct sfm_reader *r, uint32_t s,
+parse_format_spec (struct sfm_reader *r, unsigned int s,
enum which_format which, struct variable *v,
int *format_warning_cnt)
{
if (dict_get_documents (dict) != NULL)
sys_error (r, _("Multiple type 6 (document) records."));
- line_cnt = read_int32 (r);
+ line_cnt = read_int (r);
if (line_cnt <= 0)
sys_error (r, _("Number of document lines (%d) "
"must be greater than 0."), line_cnt);
/* Read a type 7 extension record. */
static void
-read_extension_record (struct sfm_reader *r, struct dictionary *dict)
+read_extension_record (struct sfm_reader *r, struct dictionary *dict,
+ struct sfm_read_info *info)
{
- int subtype = read_int32 (r);
- size_t size = read_int32 (r);
- size_t count = read_int32 (r);
+ int subtype = read_int (r);
+ size_t size = read_int (r);
+ size_t count = read_int (r);
size_t bytes = size * count;
/* Check that SIZE * COUNT + 1 doesn't overflow. Adding 1
switch (subtype)
{
case 3:
- read_machine_int32_info (r, size, count);
+ read_machine_integer_info (r, size, count, info);
return;
case 4:
- read_machine_flt64_info (r, size, count);
+ read_machine_float_info (r, size, count);
return;
case 5:
/* Read record type 7, subtype 3. */
static void
-read_machine_int32_info (struct sfm_reader *r, size_t size, size_t count)
+read_machine_integer_info (struct sfm_reader *r, size_t size, size_t count,
+ struct sfm_read_info *info)
{
- int version_major UNUSED = read_int32 (r);
- int version_minor UNUSED = read_int32 (r);
- int version_revision UNUSED = read_int32 (r);
- int machine_code UNUSED = read_int32 (r);
- int float_representation = read_int32 (r);
- int compression_code UNUSED = read_int32 (r);
- int integer_representation = read_int32 (r);
- int character_code UNUSED = read_int32 (r);
+ int version_major = read_int (r);
+ int version_minor = read_int (r);
+ int version_revision = read_int (r);
+ int machine_code UNUSED = read_int (r);
+ int float_representation = read_int (r);
+ int compression_code UNUSED = read_int (r);
+ int integer_representation = read_int (r);
+ int character_code UNUSED = read_int (r);
int expected_float_format;
int expected_integer_format;
"subtype 3."),
(unsigned int) size, (unsigned int) count);
+ /* Save version info. */
+ info->version_major = version_major;
+ info->version_minor = version_minor;
+ info->version_revision = version_revision;
+
/* Check floating point format. */
if (r->float_format == FLOAT_IEEE_DOUBLE_BE
|| r->float_format == FLOAT_IEEE_DOUBLE_LE)
/* Read record type 7, subtype 4. */
static void
-read_machine_flt64_info (struct sfm_reader *r, size_t size, size_t count)
+read_machine_float_info (struct sfm_reader *r, size_t size, size_t count)
{
- double sysmis = read_flt64 (r);
- double highest = read_flt64 (r);
- double lowest = read_flt64 (r);
+ double sysmis = read_float (r);
+ double highest = read_float (r);
+ double lowest = read_float (r);
if (size != 8 || count != 3)
sys_error (r, _("Bad size (%u) or count (%u) on extension 4."),
for (i = 0; i < n_vars; ++i)
{
- int measure = read_int32 (r);
- int width = read_int32 (r);
- int align = read_int32 (r);
struct variable *v = dict_get_var (dict, i);
+ int measure = read_int (r);
+ int width = read_int (r);
+ int align = read_int (r);
- /* spss v14 sometimes seems to set string variables' measure to zero */
- if ( 0 == measure && var_is_alpha (v) ) measure = 1;
-
+ /* SPSS 14 sometimes seems to set string variables' measure
+ to zero. */
+ if (0 == measure && var_is_alpha (v))
+ measure = 1;
if (measure < 1 || measure > 3 || align < 0 || align > 2)
{
afterward. */
short_name_cnt = var_get_short_name_cnt (var);
short_names = xnmalloc (short_name_cnt, sizeof *short_names);
- for (i = 0; i < short_name_cnt; i++)
+ for (i = 0; i < short_name_cnt; i++)
{
const char *s = var_get_short_name (var, i);
short_names[i] = s != NULL ? xstrdup (s) : NULL;
dict_rename_var (dict, var, long_name);
/* Restore short names. */
- for (i = 0; i < short_name_cnt; i++)
+ for (i = 0; i < short_name_cnt; i++)
{
var_set_short_name (var, i, short_names[i]);
free (short_names[i]);
char *length_s;
int warning_cnt = 0;
- r->has_vls = true;
-
map = open_variable_to_value_map (r, size * count);
while (read_variable_to_value_map (r, dict, map, &var, &length_s,
&warning_cnt))
{
- long length, remaining_length;
- size_t idx;
+ size_t idx = var_get_dict_index (var);
+ long int length;
+ int segment_cnt;
+ int i;
/* Get length. */
length = strtol (length_s, NULL, 10);
- if (length < MIN_VERY_LONG_STRING || length == LONG_MAX)
+ if (length < 1 || length > MAX_STRING)
{
- sys_warn (r, _("%s listed as string of length %s "
- "in length table."),
+ sys_warn (r, _("%s listed as string of invalid length %s "
+ "in very length string record."),
var_get_name (var), length_s);
continue;
}
- /* Group multiple variables into single variable
- and delete all but the first. */
- remaining_length = length;
- for (idx = var_get_dict_index (var); remaining_length > 0; idx++)
- if (idx < dict_get_var_cnt (dict))
- remaining_length -= MIN (var_get_width (dict_get_var (dict, idx)),
- EFFECTIVE_LONG_STRING_LENGTH);
- else
- sys_error (r, _("Very long string %s overflows dictionary."),
- var_get_name (var));
- dict_delete_consecutive_vars (dict,
- var_get_dict_index (var) + 1,
- idx - var_get_dict_index (var) - 1);
-
- /* Assign all the length to the first variable. */
+ /* Check segments. */
+ segment_cnt = sfm_width_to_segments (length);
+ if (segment_cnt == 1)
+ {
+ sys_warn (r, _("%s listed in very long string record with width %s, "
+ "which requires only one segment."),
+ var_get_name (var), length_s);
+ continue;
+ }
+ if (idx + segment_cnt > dict_get_var_cnt (dict))
+ sys_error (r, _("Very long string %s overflows dictionary."),
+ var_get_name (var));
+
+ /* Get the short names from the segments and check their
+ lengths. */
+ for (i = 0; i < segment_cnt; i++)
+ {
+ struct variable *seg = dict_get_var (dict, idx + i);
+ int alloc_width = sfm_segment_alloc_width (length, i);
+ int width = var_get_width (seg);
+
+ if (i > 0)
+ var_set_short_name (var, i, var_get_short_name (seg, 0));
+ if (ROUND_UP (width, 8) != ROUND_UP (alloc_width, 8))
+ sys_error (r, _("Very long string with width %ld has segment %d "
+ "of width %d (expected %d)"),
+ length, i, width, alloc_width);
+ }
+ dict_delete_consecutive_vars (dict, idx + 1, segment_cnt - 1);
var_set_width (var, length);
}
close_variable_to_value_map (r, map);
of numeric or string type. */
/* Read number of labels. */
- label_cnt = read_int32 (r);
+ label_cnt = read_int (r);
- if (label_cnt >= INT32_MAX / sizeof *labels)
+ if (size_overflow_p (xtimes (label_cnt, sizeof *labels)))
{
sys_warn (r, _("Invalid number of labels: %d. Ignoring labels."),
label_cnt);
to which the value labels are to be applied. */
/* Read record type of type 4 record. */
- if (read_int32 (r) != 4)
+ if (read_int (r) != 4)
sys_error (r, _("Variable index record (type 4) does not immediately "
"follow value label record (type 3) as it should."));
/* Read number of variables associated with value label from type 4
record. */
- var_cnt = read_int32 (r);
+ var_cnt = read_int (r);
if (var_cnt < 1 || var_cnt > dict_get_var_cnt (dict))
sys_error (r, _("Number of variables associated with a value label (%d) "
"is not between 1 and the number of variables (%u)."),
var = pool_nalloc (subpool, var_cnt, sizeof *var);
for (i = 0; i < var_cnt; i++)
{
- var[i] = lookup_var_by_value_idx (r, var_by_value_idx, read_int32 (r));
+ var[i] = lookup_var_by_value_idx (r, var_by_value_idx, read_int (r));
if (var_is_long_string (var[i]))
sys_error (r, _("Value labels are not allowed on long string "
"variables (%s)."), var_get_name (var[i]));
buf_copy_rpad (label->value.s, sizeof label->value.s,
label->raw_value, sizeof label->raw_value);
else
- label->value.f = flt64_to_double (r, (uint8_t *) label->raw_value);
+ label->value.f = float_get_double (r->float_format, label->raw_value);
}
/* Assign the `value_label's to each variable. */
static void read_error (struct casereader *, const struct sfm_reader *);
-
static bool read_case_number (struct sfm_reader *, double *);
static bool read_case_string (struct sfm_reader *, char *, size_t);
static int read_opcode (struct sfm_reader *);
static bool read_compressed_number (struct sfm_reader *, double *);
static bool read_compressed_string (struct sfm_reader *, char *);
static bool read_whole_strings (struct sfm_reader *, char *, size_t);
+static bool skip_whole_strings (struct sfm_reader *, size_t);
/* Reads one case from READER's file into C. Returns true only
if successful. */
struct ccase *c)
{
struct sfm_reader *r = r_;
+ int i;
+
if (r->error)
return false;
return false;
}
- if (!r->compressed && sizeof (double) == 8 && !r->has_vls)
+ for (i = 0; i < r->sfm_var_cnt; i++)
{
- /* Fast path. Read the whole case directly. */
- if (!try_read_bytes (r, case_data_all_rw (c),
- sizeof (union value) * r->flt64_cnt))
- {
- case_destroy (c);
- if ( r->case_cnt != -1 )
- read_error (reader, r);
- return false;
- }
+ struct sfm_var *sv = &r->sfm_vars[i];
+ union value *v = case_data_rw_idx (c, sv->case_index);
- /* Convert floating point numbers to native format if needed. */
- if (r->float_format != FLOAT_NATIVE_DOUBLE)
+ if (sv->width == 0)
{
- int i;
-
- for (i = 0; i < r->var_cnt; i++)
- if (r->vars[i].width == 0)
- {
- double *d = &case_data_rw_idx (c, r->vars[i].case_index)->f;
- float_convert (r->float_format, d, FLOAT_NATIVE_DOUBLE, d);
- }
+ if (!read_case_number (r, &v->f))
+ goto eof;
}
- return true;
- }
- else
- {
- /* Slow path. Convert from external to internal format. */
- int i;
-
- for (i = 0; i < r->var_cnt; i++)
+ else
{
- struct sfm_var *sv = &r->vars[i];
- union value *v = case_data_rw_idx (c, sv->case_index);
-
- if (sv->width == 0)
- {
- if (!read_case_number (r, &v->f))
- goto eof;
- }
- else
- {
- /* Read the string data in segments up to 255 bytes
- at a time, packed into 8-byte units. */
- const int max_chunk = MIN_VERY_LONG_STRING - 1;
- int ofs, chunk_size;
- for (ofs = 0; ofs < sv->width; ofs += chunk_size)
- {
- chunk_size = MIN (max_chunk, sv->width - ofs);
- if (!read_case_string (r, v->s + ofs, chunk_size))
- {
- if (ofs)
- partial_record (r);
- goto eof;
- }
- }
-
- /* Very long strings have trailing wasted space
- that we must skip. */
- if (sv->width >= MIN_VERY_LONG_STRING)
- {
- int bytes_read = (sv->width / max_chunk * 256
- + ROUND_UP (sv->width % max_chunk, 8));
- int total_bytes = sfm_width_to_bytes (sv->width);
- int excess_bytes = total_bytes - bytes_read;
-
- while (excess_bytes > 0)
- {
- char buffer[1024];
- size_t chunk = MIN (sizeof buffer, excess_bytes);
- if (!read_whole_strings (r, buffer, chunk))
- partial_record (r);
- excess_bytes -= chunk;
- }
- }
- }
+ if (!read_case_string (r, v->s + sv->offset, sv->width))
+ goto eof;
+ if (!skip_whole_strings (r, ROUND_DOWN (sv->padding, 8)))
+ partial_record (r);
}
- return true;
-
- eof:
- case_destroy (c);
- if (i != 0)
- partial_record (r);
- if ( r->case_cnt != -1 )
- read_error (reader, r);
- return false;
}
+ return true;
+
+eof:
+ case_destroy (c);
+ if (i != 0)
+ partial_record (r);
+ if (r->case_cnt != -1)
+ read_error (reader, r);
+ return false;
}
/* Issues an error that R ends in a partial record. */
sys_error (r, _("File ends in partial case."));
}
+/* Issues an error that an unspecified error occurred SFM, and
+ marks R tainted. */
static void
read_error (struct casereader *r, const struct sfm_reader *sfm)
{
- msg (ME, _("Error reading case from file %s"), fh_get_name (sfm->fh));
+ msg (ME, _("Error reading case from file %s."), fh_get_name (sfm->fh));
casereader_force_error (r);
}
{
if (!r->compressed)
{
- uint8_t flt64[8];
- if (!try_read_bytes (r, flt64, sizeof flt64))
+ uint8_t number[8];
+ if (!try_read_bytes (r, number, sizeof number))
return false;
- *d = flt64_to_double (r, flt64);
+ float_convert (r->float_format, number, FLOAT_NATIVE_DOUBLE, d);
return true;
}
else
return false;
case 253:
- *d = read_flt64 (r);
+ *d = read_float (r);
break;
case 254:
return true;
}
}
+
+/* Skips LENGTH string bytes from R.
+ LENGTH must be a multiple of 8.
+ (LENGTH is also limited to 1024, but that's only because the
+ current caller never needs more than that many bytes.)
+ Returns true if successful, false if end of file is
+ reached immediately. */
+static bool
+skip_whole_strings (struct sfm_reader *r, size_t length)
+{
+ char buffer[1024];
+ assert (length < sizeof buffer);
+ return read_whole_strings (r, buffer, length);
+}
\f
/* Creates and returns a table that can be used for translating a value
index into a case to a "struct variable *" for DICT. Multiple
int i;
var_by_value_idx = pool_nmalloc (r->pool,
- r->flt64_cnt, sizeof *var_by_value_idx);
+ r->oct_cnt, sizeof *var_by_value_idx);
for (i = 0; i < dict_get_var_cnt (dict); i++)
{
struct variable *v = dict_get_var (dict, i);
for (j = 1; j < nv; j++)
var_by_value_idx[value_idx++] = NULL;
}
- assert (value_idx == r->flt64_cnt);
+ assert (value_idx == r->oct_cnt);
return var_by_value_idx;
}
{
struct variable *var;
- if (value_idx < 1 || value_idx > r->flt64_cnt)
+ if (value_idx < 1 || value_idx > r->oct_cnt)
sys_error (r, _("Variable index %d not in valid range 1...%d."),
- value_idx, r->flt64_cnt);
+ value_idx, r->oct_cnt);
var = var_by_value_idx[value_idx - 1];
if (var == NULL)
/* Reads a 32-bit signed integer from R and returns its value in
host format. */
-static int32_t
-read_int32 (struct sfm_reader *r)
+static int
+read_int (struct sfm_reader *r)
{
- uint8_t int32[4];
- read_bytes (r, int32, sizeof int32);
- return int32_to_native (r, int32);
+ uint8_t integer[4];
+ read_bytes (r, integer, sizeof integer);
+ return integer_get (r->integer_format, integer, sizeof integer);
}
/* Reads a 64-bit floating-point number from R and returns its
value in host format. */
static double
-read_flt64 (struct sfm_reader *r)
+read_float (struct sfm_reader *r)
{
- uint8_t flt64[8];
- read_bytes (r, flt64, sizeof flt64);
- return flt64_to_double (r, flt64);
+ uint8_t number[8];
+ read_bytes (r, number, sizeof number);
+ return float_get_double (r->float_format, number);
}
/* Reads exactly SIZE - 1 bytes into BUFFER
}
}
\f
-/* Returns the value of the 32-bit signed integer at INT32,
- converted from the format used by R to the host format. */
-static int32_t
-int32_to_native (const struct sfm_reader *r, const uint8_t int32[4])
-{
- int32_t x;
- if (r->integer_format == INTEGER_NATIVE)
- memcpy (&x, int32, sizeof x);
- else
- x = integer_get (r->integer_format, int32, sizeof x);
- return x;
-}
-
-/* Returns the value of the 64-bit floating point number at
- FLT64, converted from the format used by R to the host
- format. */
-static double
-flt64_to_double (const struct sfm_reader *r, const uint8_t flt64[8])
-{
- double x;
- if (r->float_format == FLOAT_NATIVE_DOUBLE)
- memcpy (&x, flt64, sizeof x);
- else
- float_convert (r->float_format, flt64, FLOAT_NATIVE_DOUBLE, &x);
- return x;
-}
-\f
static struct casereader_class sys_file_casereader_class =
{
sys_file_casereader_read,
#include <stdbool.h>
#include <stdio.h>
+#include <data/case.h>
#include <libpspp/float-format.h>
#include <libpspp/integer-format.h>
enum integer_format integer_format;
enum float_format float_format;
bool compressed; /* 0=no, 1=yes. */
- int case_cnt; /* -1 if unknown. */
+ casenumber case_cnt; /* -1 if unknown. */
char product[61]; /* Product name plus a null. */
+
+ /* Writer's version number in X.Y.Z format.
+ The version number is not always present; if not, then
+ all of these are set to 0. */
+ int version_major; /* X. */
+ int version_minor; /* Y. */
+ int version_revision; /* Z. */
};
struct dictionary;
/* PSPP - a program for statistical analysis.
- Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
+ Copyright (C) 1997-9, 2000, 2006, 2007 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
#include <ctype.h>
#include <errno.h>
#include <fcntl.h>
+#include <stdint.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <time.h>
#include <unistd.h>
#include <libpspp/alloc.h>
-#include <libpspp/hash.h>
+#include <libpspp/float-format.h>
+#include <libpspp/integer-format.h>
#include <libpspp/magic.h>
#include <libpspp/message.h>
#include <libpspp/misc.h>
#include <data/variable.h>
#include "minmax.h"
+#include "unlocked-io.h"
#include "gettext.h"
#define _(msgid) gettext (msgid)
-/* Find 64-bit floating-point type. */
-#if SIZEOF_FLOAT == 8
- #define flt64 float
- #define FLT64_MAX FLT_MAX
-#elif SIZEOF_DOUBLE == 8
- #define flt64 double
- #define FLT64_MAX DBL_MAX
-#elif SIZEOF_LONG_DOUBLE == 8
- #define flt64 long double
- #define FLT64_MAX LDBL_MAX
-#else
- #error Which one of your basic types is 64-bit floating point?
-#endif
-
-/* Figure out SYSMIS value for flt64. */
-#include <libpspp/magic.h>
-#if SIZEOF_DOUBLE == 8
-#define second_lowest_flt64 second_lowest_value
-#else
-#error Must define second_lowest_flt64 for your architecture.
-#endif
-
-/* Record Type 1: General Information. */
-struct sysfile_header
- {
- char rec_type[4] ; /* 00: Record-type code, "$FL2". */
- char prod_name[60] ; /* 04: Product identification. */
- int32_t layout_code ; /* 40: 2. */
- int32_t nominal_case_size ; /* 44: Number of `value's per case.
- Note: some systems set this to -1 */
- int32_t compress ; /* 48: 1=compressed, 0=not compressed. */
- int32_t weight_idx ; /* 4c: 1-based index of weighting var, or 0. */
- int32_t case_cnt ; /* 50: Number of cases, -1 if unknown. */
- flt64 bias ; /* 54: Compression bias (100.0). */
- char creation_date[9] ; /* 5c: `dd mmm yy' creation date of file. */
- char creation_time[8] ; /* 65: `hh:mm:ss' 24-hour creation time. */
- char file_label[64] ; /* 6d: File label. */
- char padding[3] ; /* ad: Ignored padding. */
- } ATTRIBUTE((packed)) ;
-
-/* Record Type 2: Variable. */
-struct sysfile_variable
- {
- int32_t rec_type ; /* 2. */
- int32_t type ; /* 0=numeric, 1-255=string width,
- -1=continued string. */
- int32_t has_var_label ; /* 1=has a variable label, 0=doesn't. */
- int32_t n_missing_values ; /* Missing value code of -3,-2,0,1,2, or 3. */
- int32_t print ; /* Print format. */
- int32_t write ; /* Write format. */
- char name[SHORT_NAME_LEN] ; /* Variable name. */
- /* The rest of the structure varies. */
- } ATTRIBUTE((packed)) ;
-
/* Compression bias used by PSPP. Values between (1 -
COMPRESSION_BIAS) and (251 - COMPRESSION_BIAS) inclusive can be
compressed. */
struct file_handle *fh; /* File handle. */
FILE *file; /* File stream. */
- int needs_translation; /* 0=use fast path, 1=translation needed. */
- int compress; /* 1=compressed, 0=not compressed. */
- int case_cnt; /* Number of cases written so far. */
- size_t flt64_cnt; /* Number of flt64 elements in case. */
- bool has_vls; /* Does the dict have very long strings? */
+ bool compress; /* 1=compressed, 0=not compressed. */
+ casenumber case_cnt; /* Number of cases written so far. */
- /* Compression buffering. */
- flt64 *buf; /* Buffered data. */
- flt64 *end; /* Buffer end. */
- flt64 *ptr; /* Current location in buffer. */
- unsigned char *x; /* Location in current instruction octet. */
- unsigned char *y; /* End of instruction octet. */
+ /* Compression buffering.
- /* Variables. */
- struct sfm_var *vars; /* Variables. */
- size_t var_cnt; /* Number of variables. */
- size_t var_cnt_vls; /* Number of variables including
- very long string components. */
- };
+ Compressed data is output as groups of 8 1-byte opcodes
+ followed by up to 8 (depending on the opcodes) 8-byte data
+ items. Data items and opcodes arrive at the same time but
+ must be reordered for writing to disk, thus a small amount
+ of buffering here. */
+ uint8_t opcodes[8]; /* Buffered opcodes. */
+ int opcode_cnt; /* Number of buffered opcodes. */
+ uint8_t data[8][8]; /* Buffered data. */
+ int data_cnt; /* Number of buffered data items. */
-/* A variable in a system file. */
-struct sfm_var
- {
- int width; /* 0=numeric, otherwise string width. */
- int fv; /* Index into case. */
- size_t flt64_cnt; /* Number of flt64 elements. */
+ /* Variables. */
+ struct sfm_var *sfm_vars; /* Variables. */
+ size_t sfm_var_cnt; /* Number of variables. */
+ size_t segment_cnt; /* Number of variables including extra segments
+ for long string variables. */
};
static struct casewriter_class sys_file_casewriter_class;
-static char *append_string_max (char *, const char *, const char *);
static void write_header (struct sfm_writer *, const struct dictionary *);
-static void buf_write (struct sfm_writer *, const void *, size_t);
static void write_variable (struct sfm_writer *, const struct variable *);
static void write_value_labels (struct sfm_writer *,
struct variable *, int idx);
-static void write_rec_7_34 (struct sfm_writer *);
+static void write_integer_info_record (struct sfm_writer *);
+static void write_float_info_record (struct sfm_writer *);
static void write_longvar_table (struct sfm_writer *w,
const struct dictionary *dict);
static void write_documents (struct sfm_writer *, const struct dictionary *);
+static void write_int (struct sfm_writer *, int32_t);
+static inline void convert_double_to_output_format (double, uint8_t[8]);
+static void write_float (struct sfm_writer *, double);
+static void write_string (struct sfm_writer *, const char *, size_t);
+static void write_bytes (struct sfm_writer *, const void *, size_t);
+static void write_zeros (struct sfm_writer *, size_t);
+static void write_spaces (struct sfm_writer *, size_t);
+static void write_value (struct sfm_writer *, const union value *, int width);
+
+static void write_case_uncompressed (struct sfm_writer *, struct ccase *);
+static void write_case_compressed (struct sfm_writer *, struct ccase *);
+static void flush_compressed (struct sfm_writer *);
+static void put_cmp_opcode (struct sfm_writer *, uint8_t);
+static void put_cmp_number (struct sfm_writer *, double);
+static void put_cmp_string (struct sfm_writer *, const void *, size_t);
+
bool write_error (const struct sfm_writer *);
bool close_writer (struct sfm_writer *);
-static inline int
-var_flt64_cnt (const struct variable *v)
-{
- assert(sizeof(flt64) == MAX_SHORT_STRING);
- return sfm_width_to_bytes(var_get_width (v)) / MAX_SHORT_STRING ;
-}
-
-static inline int
-var_flt64_cnt_nom (const struct variable *v)
-{
- return (var_is_numeric (v)
- ? 1 : DIV_RND_UP (var_get_width (v), sizeof (flt64)));
-}
-
-
/* Returns default options for writing a system file. */
struct sfm_write_options
sfm_writer_default_options (void)
w->fh = fh;
w->file = fdopen (fd, "w");
- w->needs_translation = dict_compacting_would_change (d);
w->compress = opts.compress;
w->case_cnt = 0;
- w->flt64_cnt = 0;
- w->has_vls = false;
- w->buf = w->end = w->ptr = NULL;
- w->x = w->y = NULL;
+ w->opcode_cnt = w->data_cnt = 0;
- w->var_cnt = dict_get_var_cnt (d);
- w->var_cnt_vls = w->var_cnt;
- w->vars = xnmalloc (w->var_cnt, sizeof *w->vars);
- for (i = 0; i < w->var_cnt; i++)
- {
- const struct variable *dv = dict_get_var (d, i);
- struct sfm_var *sv = &w->vars[i];
- sv->width = var_get_width (dv);
- /* spss compatibility nonsense */
- if ( var_get_width (dv) >= MIN_VERY_LONG_STRING )
- w->has_vls = true;
-
- sv->fv = var_get_case_index (dv);
- sv->flt64_cnt = var_flt64_cnt (dv);
- }
+ /* Figure out how to map in-memory case data to on-disk case
+ data. Also count the number of segments. Very long strings
+ occupy multiple segments, otherwise each variable only takes
+ one segment. */
+ w->segment_cnt = sfm_dictionary_to_sfm_vars (d, &w->sfm_vars,
+ &w->sfm_var_cnt);
/* Check that file create succeeded. */
if (w->file == NULL)
/* Write basic variable info. */
short_names_assign (d);
for (i = 0; i < dict_get_var_cnt (d); i++)
- {
- int count = 0;
- const struct variable *v = dict_get_var(d, i);
- int wcount = var_get_width (v);
-
- do {
- struct variable *var_cont = var_clone (v);
- var_set_short_name (var_cont, 0, var_get_short_name (v, 0));
- if ( var_is_alpha (v))
- {
- if ( 0 != count )
- {
- var_clear_missing_values (var_cont);
- var_set_short_name (var_cont, 0,
- var_get_short_name (v, count));
- var_clear_label (var_cont);
- w->var_cnt_vls++;
- }
- count++;
- if ( wcount >= MIN_VERY_LONG_STRING )
- {
- var_set_width (var_cont, MIN_VERY_LONG_STRING - 1);
- wcount -= EFFECTIVE_LONG_STRING_LENGTH;
- }
- else
- {
- var_set_width (var_cont, wcount);
- wcount -= var_get_width (var_cont);
- }
- }
-
- write_variable (w, var_cont);
- var_destroy (var_cont);
- } while(wcount > 0);
- }
+ write_variable (w, dict_get_var (d, i));
/* Write out value labels. */
- for (idx = i = 0; i < dict_get_var_cnt (d); i++)
+ idx = 0;
+ for (i = 0; i < dict_get_var_cnt (d); i++)
{
struct variable *v = dict_get_var (d, i);
write_value_labels (w, v, idx);
- idx += var_flt64_cnt (v);
+ idx += sfm_width_to_octs (var_get_width (v));
}
if (dict_get_documents (d) != NULL)
write_documents (w, d);
- write_rec_7_34 (w);
+ write_integer_info_record (w);
+ write_float_info_record (w);
write_variable_display_parameters (w, d);
if (opts.version >= 3)
write_longvar_table (w, d);
- write_vls_length_table(w, d);
+ write_vls_length_table (w, d);
/* Write end-of-headers record. */
- {
- struct
- {
- int32_t rec_type ;
- int32_t filler ;
- } ATTRIBUTE((packed))
- rec_999;
-
- rec_999.rec_type = 999;
- rec_999.filler = 0;
-
- buf_write (w, &rec_999, sizeof rec_999);
- }
-
- if (w->compress)
- {
- w->buf = xnmalloc (128, sizeof *w->buf);
- w->ptr = w->buf;
- w->end = &w->buf[128];
- w->x = (unsigned char *) w->ptr++;
- w->y = (unsigned char *) w->ptr;
- }
+ write_int (w, 999);
+ write_int (w, 0);
if (write_error (w))
goto error;
return x;
}
+/* Calculates the offset of data for TARGET_VAR from the
+ beginning of each case's data for dictionary D. The return
+ value is in "octs" (8-byte units). */
+static int
+calc_oct_idx (const struct dictionary *d, struct variable *target_var)
+{
+ int oct_idx;
+ int i;
+
+ oct_idx = 0;
+ for (i = 0; i < dict_get_var_cnt (d); i++)
+ {
+ struct variable *var = dict_get_var (d, i);
+ if (var == target_var)
+ break;
+ oct_idx += sfm_width_to_octs (var_get_width (var));
+ }
+ return oct_idx;
+}
+
/* Write the sysfile_header header to system file W. */
static void
write_header (struct sfm_writer *w, const struct dictionary *d)
{
- struct sysfile_header hdr;
- char *p;
- int i;
+ char prod_name[61];
+ char creation_date[10];
+ char creation_time[9];
+ const char *file_label;
+ struct variable *weight;
time_t t;
- memcpy (hdr.rec_type, "$FL2", 4);
+ /* Record-type code. */
+ write_string (w, "$FL2", 4);
- p = stpcpy (hdr.prod_name, "@(#) SPSS DATA FILE ");
- p = append_string_max (p, version, &hdr.prod_name[60]);
- p = append_string_max (p, " - ", &hdr.prod_name[60]);
- p = append_string_max (p, host_system, &hdr.prod_name[60]);
- memset (p, ' ', &hdr.prod_name[60] - p);
+ /* Product identification. */
+ snprintf (prod_name, sizeof prod_name, "@(#) SPSS DATA FILE %s - %s",
+ version, host_system);
+ write_string (w, prod_name, 60);
- hdr.layout_code = 2;
+ /* Layout code. */
+ write_int (w, 2);
- w->flt64_cnt = 0;
- for (i = 0; i < dict_get_var_cnt (d); i++)
- {
- w->flt64_cnt += var_flt64_cnt (dict_get_var (d, i));
- }
- hdr.nominal_case_size = w->flt64_cnt;
+ /* Number of `union value's per case. */
+ write_int (w, calc_oct_idx (d, NULL));
- hdr.compress = w->compress;
+ /* Compressed? */
+ write_int (w, w->compress);
- if (dict_get_weight (d) != NULL)
- {
- const struct variable *weight_var;
- int recalc_weight_idx = 1;
- int i;
+ /* Weight variable. */
+ weight = dict_get_weight (d);
+ write_int (w, weight != NULL ? calc_oct_idx (d, weight) + 1 : 0);
- weight_var = dict_get_weight (d);
- for (i = 0; ; i++)
- {
- struct variable *v = dict_get_var (d, i);
- if (v == weight_var)
- break;
- recalc_weight_idx += var_flt64_cnt (v);
- }
- hdr.weight_idx = recalc_weight_idx;
- }
- else
- hdr.weight_idx = 0;
+ /* Number of cases. We don't know this in advance, so we write
+ -1 to indicate an unknown number of cases. Later we can
+ come back and overwrite it with the true value. */
+ write_int (w, -1);
- hdr.case_cnt = -1;
- hdr.bias = COMPRESSION_BIAS;
+ /* Compression bias. */
+ write_float (w, COMPRESSION_BIAS);
+ /* Creation date and time. */
if (time (&t) == (time_t) -1)
{
- memcpy (hdr.creation_date, "01 Jan 70", 9);
- memcpy (hdr.creation_time, "00:00:00", 8);
+ strcpy (creation_date, "01 Jan 70");
+ strcpy (creation_time, "00:00:00");
}
else
{
int hour = rerange (tmp->tm_hour + 1);
int min = rerange (tmp->tm_min + 1);
int sec = rerange (tmp->tm_sec + 1);
- char buf[10];
- sprintf (buf, "%02d %s %02d", day, month_name[mon - 1], year);
- memcpy (hdr.creation_date, buf, sizeof hdr.creation_date);
- sprintf (buf, "%02d:%02d:%02d", hour - 1, min - 1, sec - 1);
- memcpy (hdr.creation_time, buf, sizeof hdr.creation_time);
+ snprintf (creation_date, sizeof creation_date,
+ "%02d %s %02d", day, month_name[mon - 1], year);
+ snprintf (creation_time, sizeof creation_time,
+ "%02d:%02d:%02d", hour - 1, min - 1, sec - 1);
}
+ write_string (w, creation_date, 9);
+ write_string (w, creation_time, 8);
- {
- const char *label = dict_get_label (d);
- if (label == NULL)
- label = "";
+ /* File label. */
+ file_label = dict_get_label (d);
+ if (file_label == NULL)
+ file_label = "";
+ write_string (w, file_label, 64);
- buf_copy_str_rpad (hdr.file_label, sizeof hdr.file_label, label);
- }
+ /* Padding. */
+ write_zeros (w, 3);
+}
- memset (hdr.padding, 0, sizeof hdr.padding);
+/* Write format spec FMT to W, after adjusting it to be
+ compatible with the given WIDTH. */
+static void
+write_format (struct sfm_writer *w, struct fmt_spec fmt, int width)
+{
+ assert (fmt_check_output (&fmt));
+ assert (sfm_width_to_segments (width) == 1);
- buf_write (w, &hdr, sizeof hdr);
+ if (width > 0)
+ fmt_resize (&fmt, width);
+ write_int (w, (fmt_to_io (fmt.type) << 16) | (fmt.w << 8) | fmt.d);
}
-/* Translates format spec from internal form in SRC to system file
- format in DEST. */
-static inline void
-write_format_spec (const struct fmt_spec *src, int32_t *dest)
+/* Write a string continuation variable record for each 8-byte
+ section beyond the initial 8 bytes, for a variable of the
+ given WIDTH. */
+static void
+write_variable_continuation_records (struct sfm_writer *w, int width)
{
- assert (fmt_check_output (src));
- *dest = (fmt_to_io (src->type) << 16) | (src->w << 8) | src->d;
+ int position;
+
+ assert (sfm_width_to_segments (width) == 1);
+ for (position = 8; position < width; position += 8)
+ {
+ write_int (w, 2); /* Record type. */
+ write_int (w, -1); /* Width. */
+ write_int (w, 0); /* No variable label. */
+ write_int (w, 0); /* No missing values. */
+ write_int (w, 0); /* Print format. */
+ write_int (w, 0); /* Write format. */
+ write_zeros (w, 8); /* Name. */
+ }
}
-/* Write the variable record(s) for primary variable P and secondary
- variable S to system file W. */
+/* Write the variable record(s) for variable V to system file
+ W. */
static void
write_variable (struct sfm_writer *w, const struct variable *v)
{
- struct sysfile_variable sv;
+ int width = var_get_width (v);
+ int segment_cnt = sfm_width_to_segments (width);
+ int seg0_width = sfm_segment_alloc_width (width, 0);
+ const struct missing_values *mv = var_get_missing_values (v);
+ int i;
- /* Missing values. */
- struct missing_values mv;
- flt64 m[3]; /* Missing value values. */
- int nm; /* Number of missing values, possibly negative. */
- const char *label = var_get_label (v);
+ /* Record type. */
+ write_int (w, 2);
- sv.rec_type = 2;
- sv.type = MIN (var_get_width (v), MIN_VERY_LONG_STRING - 1);
- sv.has_var_label = label != NULL;
+ /* Width. */
+ write_int (w, seg0_width);
- mv_copy (&mv, var_get_missing_values (v));
- nm = 0;
- if (mv_has_range (&mv))
- {
- double x, y;
- mv_pop_range (&mv, &x, &y);
- m[nm++] = x == LOWEST ? second_lowest_flt64 : x;
- m[nm++] = y == HIGHEST ? FLT64_MAX : y;
- }
- while (mv_has_value (&mv))
- {
- union value value;
- mv_pop_value (&mv, &value);
- if (var_is_numeric (v))
- m[nm] = value.f;
- else
- buf_copy_rpad ((char *) &m[nm], sizeof m[nm], value.s,
- var_get_width (v));
- nm++;
- }
- if (mv_has_range (var_get_missing_values (v)))
- nm = -nm;
+ /* Variable has a variable label? */
+ write_int (w, var_has_label (v));
- sv.n_missing_values = nm;
- write_format_spec (var_get_print_format (v), &sv.print);
- write_format_spec (var_get_write_format (v), &sv.write);
- buf_copy_str_rpad (sv.name, sizeof sv.name, var_get_short_name (v, 0));
- buf_write (w, &sv, sizeof sv);
+ /* Number of missing values. If there is a range, then the
+ range counts as 2 missing values and causes the number to be
+ negated. */
+ write_int (w, mv_has_range (mv) ? 2 - mv_n_values (mv) : mv_n_values (mv));
- if (label != NULL)
- {
- struct label
- {
- int32_t label_len ;
- char label[255] ;
- } ATTRIBUTE((packed))
- l;
+ /* Print and write formats. */
+ write_format (w, *var_get_print_format (v), seg0_width);
+ write_format (w, *var_get_write_format (v), seg0_width);
- int ext_len;
+ /* Short name.
+ The full name is in a translation table written
+ separately. */
+ write_string (w, var_get_short_name (v, 0), 8);
- l.label_len = MIN (strlen (label), 255);
- ext_len = ROUND_UP (l.label_len, sizeof l.label_len);
- memcpy (l.label, label, l.label_len);
- memset (&l.label[l.label_len], ' ', ext_len - l.label_len);
+ /* Value label. */
+ if (var_has_label (v))
+ {
+ const char *label = var_get_label (v);
+ size_t padded_len = ROUND_UP (MIN (strlen (label), 255), 4);
+ write_int (w, padded_len);
+ write_string (w, label, padded_len);
+ }
- buf_write (w, &l, offsetof (struct label, label) + ext_len);
+ /* Write the missing values, if any, range first. */
+ if (mv_has_range (mv))
+ {
+ double x, y;
+ mv_peek_range (mv, &x, &y);
+ write_float (w, x);
+ write_float (w, y);
+ }
+ for (i = 0; i < mv_n_values (mv); i++)
+ {
+ union value value;
+ mv_peek_value (mv, &value, i);
+ write_value (w, &value, seg0_width);
}
- if (nm)
- buf_write (w, m, sizeof *m * abs (nm));
+ write_variable_continuation_records (w, seg0_width);
- if (var_is_alpha (v) && var_get_width (v) > (int) sizeof (flt64))
+ /* Write additional segments for very long string variables. */
+ for (i = 1; i < segment_cnt; i++)
{
- int i;
- int pad_count;
-
- sv.type = -1;
- sv.has_var_label = 0;
- sv.n_missing_values = 0;
- memset (&sv.print, 0, sizeof sv.print);
- memset (&sv.write, 0, sizeof sv.write);
- memset (&sv.name, 0, sizeof sv.name);
-
- pad_count = DIV_RND_UP (MIN(var_get_width (v), MIN_VERY_LONG_STRING - 1),
- (int) sizeof (flt64)) - 1;
- for (i = 0; i < pad_count; i++)
- buf_write (w, &sv, sizeof sv);
+ int seg_width = sfm_segment_alloc_width (width, i);
+ struct fmt_spec fmt = fmt_for_output (FMT_A, MAX (seg_width, 1), 0);
+
+ write_int (w, 2); /* Variable record. */
+ write_int (w, seg_width); /* Width. */
+ write_int (w, 0); /* No variable label. */
+ write_int (w, 0); /* No missing values. */
+ write_format (w, fmt, seg_width); /* Print format. */
+ write_format (w, fmt, seg_width); /* Write format. */
+ write_string (w, var_get_short_name (v, i), 8);
+
+ write_variable_continuation_records (w, seg_width);
}
}
static void
write_value_labels (struct sfm_writer *w, struct variable *v, int idx)
{
- struct value_label_rec
- {
- int32_t rec_type ;
- int32_t n_labels ;
- flt64 labels[1] ;
- } ATTRIBUTE((packed));
-
- struct var_idx_rec
- {
- int32_t rec_type ;
- int32_t n_vars ;
- int32_t vars[1] ;
- } ATTRIBUTE((packed));
-
const struct val_labs *val_labs;
struct val_labs_iterator *i;
- struct value_label_rec *vlr;
- struct var_idx_rec vir;
struct val_lab *vl;
- size_t vlr_size;
- flt64 *loc;
val_labs = var_get_value_labels (v);
if (val_labs == NULL)
return;
- /* Pass 1: Count bytes. */
- vlr_size = (sizeof (struct value_label_rec)
- + sizeof (flt64) * (val_labs_count (val_labs) - 1));
- for (vl = val_labs_first (val_labs, &i); vl != NULL;
- vl = val_labs_next (val_labs, &i))
- vlr_size += ROUND_UP (strlen (vl->label) + 1, sizeof (flt64));
-
- /* Pass 2: Copy bytes. */
- vlr = xmalloc (vlr_size);
- vlr->rec_type = 3;
- vlr->n_labels = val_labs_count (val_labs);
- loc = vlr->labels;
+ /* Value label record. */
+ write_int (w, 3); /* Record type. */
+ write_int (w, val_labs_count (val_labs));
for (vl = val_labs_first_sorted (val_labs, &i); vl != NULL;
vl = val_labs_next (val_labs, &i))
{
- size_t len = strlen (vl->label);
-
- *loc++ = vl->value.f;
- *(unsigned char *) loc = len;
- memcpy (&((char *) loc)[1], vl->label, len);
- memset (&((char *) loc)[1 + len], ' ',
- REM_RND_UP (len + 1, sizeof (flt64)));
- loc += DIV_RND_UP (len + 1, sizeof (flt64));
- }
+ uint8_t len = MIN (strlen (vl->label), 255);
- buf_write (w, vlr, vlr_size);
- free (vlr);
+ write_value (w, &vl->value, var_get_width (v));
+ write_bytes (w, &len, 1);
+ write_bytes (w, vl->label, len);
+ write_zeros (w, REM_RND_UP (len + 1, 8));
+ }
- vir.rec_type = 4;
- vir.n_vars = 1;
- vir.vars[0] = idx + 1;
- buf_write (w, &vir, sizeof vir);
+ /* Value label variable record. */
+ write_int (w, 4); /* Record type. */
+ write_int (w, 1); /* Number of variables. */
+ write_int (w, idx + 1); /* Variable's dictionary index. */
}
/* Writes record type 6, document record. */
static void
write_documents (struct sfm_writer *w, const struct dictionary *d)
{
- struct
- {
- int32_t rec_type ; /* Always 6. */
- int32_t n_lines ; /* Number of lines of documents. */
- } ATTRIBUTE((packed)) rec_6;
+ size_t line_cnt = dict_get_document_line_cnt (d);
- const char * documents = dict_get_documents (d);
- size_t doc_bytes = strlen (documents);
-
- assert (doc_bytes % 80 == 0);
-
- rec_6.rec_type = 6;
- rec_6.n_lines = doc_bytes / 80;
- buf_write (w, &rec_6, sizeof rec_6);
- buf_write (w, documents, 80 * rec_6.n_lines);
+ write_int (w, 6); /* Record type. */
+ write_int (w, line_cnt);
+ write_bytes (w, dict_get_documents (d), line_cnt * DOC_LINE_LENGTH);
}
-/* Write the alignment, width and scale values */
+/* Write the alignment, width and scale values. */
static void
write_variable_display_parameters (struct sfm_writer *w,
const struct dictionary *dict)
{
int i;
- struct
- {
- int32_t rec_type ;
- int32_t subtype ;
- int32_t elem_size ;
- int32_t n_elem ;
- } ATTRIBUTE((packed)) vdp_hdr;
-
- vdp_hdr.rec_type = 7;
- vdp_hdr.subtype = 11;
- vdp_hdr.elem_size = 4;
- vdp_hdr.n_elem = w->var_cnt_vls * 3;
+ write_int (w, 7); /* Record type. */
+ write_int (w, 11); /* Record subtype. */
+ write_int (w, 4); /* Data item (int32) size. */
+ write_int (w, w->segment_cnt * 3); /* Number of data items. */
- buf_write (w, &vdp_hdr, sizeof vdp_hdr);
-
- for ( i = 0 ; i < w->var_cnt ; ++i )
+ for (i = 0; i < dict_get_var_cnt (dict); ++i)
{
- struct variable *v;
- struct
- {
- int32_t measure ;
- int32_t width ;
- int32_t align ;
- } ATTRIBUTE((packed)) params;
-
- v = dict_get_var(dict, i);
-
- params.measure = (var_get_measure (v) == MEASURE_NOMINAL ? 1
- : var_get_measure (v) == MEASURE_ORDINAL ? 2
- : 3);
- params.width = var_get_display_width (v);
- params.align = (var_get_alignment (v) == ALIGN_LEFT ? 0
- : var_get_alignment (v) == ALIGN_RIGHT ? 1
- : 2);
-
- buf_write (w, ¶ms, sizeof(params));
-
- if (var_is_long_string (v))
- {
- int wcount = var_get_width (v) - EFFECTIVE_LONG_STRING_LENGTH ;
-
- while (wcount > 0)
- {
- params.width = wcount >= MIN_VERY_LONG_STRING ? 32 : wcount;
-
- buf_write (w, ¶ms, sizeof(params));
-
- wcount -= EFFECTIVE_LONG_STRING_LENGTH ;
- }
- }
+ struct variable *v = dict_get_var (dict, i);
+ int width = var_get_width (v);
+ int segment_cnt = sfm_width_to_segments (width);
+ int measure = (var_get_measure (v) == MEASURE_NOMINAL ? 1
+ : var_get_measure (v) == MEASURE_ORDINAL ? 2
+ : 3);
+ int alignment = (var_get_alignment (v) == ALIGN_LEFT ? 0
+ : var_get_alignment (v) == ALIGN_RIGHT ? 1
+ : 2);
+ int i;
+
+ for (i = 0; i < segment_cnt; i++)
+ {
+ int width_left = width - sfm_segment_effective_offset (width, i);
+ write_int (w, measure);
+ write_int (w, (i == 0 ? var_get_display_width (v)
+ : sfm_width_to_segments (width_left) > 1 ? 32
+ : width_left));
+ write_int (w, alignment);
+ }
}
}
-/* Writes the table of lengths for Very Long String Variables */
+/* Writes the table of lengths for very long string variables. */
static void
write_vls_length_table (struct sfm_writer *w,
const struct dictionary *dict)
{
+ struct string map;
int i;
- struct
- {
- int32_t rec_type ;
- int32_t subtype ;
- int32_t elem_size ;
- int32_t n_elem ;
- } ATTRIBUTE((packed)) vls_hdr;
-
- struct string vls_length_map;
-
- ds_init_empty (&vls_length_map);
-
- vls_hdr.rec_type = 7;
- vls_hdr.subtype = 14;
- vls_hdr.elem_size = 1;
-
+ ds_init_empty (&map);
for (i = 0; i < dict_get_var_cnt (dict); ++i)
{
const struct variable *v = dict_get_var (dict, i);
-
- if ( var_get_width (v) < MIN_VERY_LONG_STRING )
- continue;
-
- ds_put_format (&vls_length_map, "%s=%05d",
- var_get_short_name (v, 0), var_get_width (v));
- ds_put_char (&vls_length_map, '\0');
- ds_put_char (&vls_length_map, '\t');
+ if (sfm_width_to_segments (var_get_width (v)) > 1)
+ ds_put_format (&map, "%s=%05d%c\t",
+ var_get_short_name (v, 0), var_get_width (v), 0);
}
-
- vls_hdr.n_elem = ds_length (&vls_length_map);
-
- if ( vls_hdr.n_elem > 0 )
+ if (!ds_is_empty (&map))
{
- buf_write (w, &vls_hdr, sizeof vls_hdr);
- buf_write (w, ds_data (&vls_length_map), ds_length (&vls_length_map));
+ write_int (w, 7); /* Record type. */
+ write_int (w, 14); /* Record subtype. */
+ write_int (w, 1); /* Data item (char) size. */
+ write_int (w, ds_length (&map)); /* Number of data items. */
+ write_bytes (w, ds_data (&map), ds_length (&map));
}
-
- ds_destroy (&vls_length_map);
+ ds_destroy (&map);
}
-/* Writes the long variable name table */
+/* Writes the long variable name table. */
static void
write_longvar_table (struct sfm_writer *w, const struct dictionary *dict)
{
- struct
- {
- int32_t rec_type ;
- int32_t subtype ;
- int32_t elem_size ;
- int32_t n_elem ;
- } ATTRIBUTE((packed)) lv_hdr;
-
- struct string long_name_map;
+ struct string map;
size_t i;
- ds_init_empty (&long_name_map);
+ ds_init_empty (&map);
for (i = 0; i < dict_get_var_cnt (dict); i++)
{
struct variable *v = dict_get_var (dict, i);
if (i)
- ds_put_char (&long_name_map, '\t');
- ds_put_format (&long_name_map, "%s=%s",
+ ds_put_char (&map, '\t');
+ ds_put_format (&map, "%s=%s",
var_get_short_name (v, 0), var_get_name (v));
}
- lv_hdr.rec_type = 7;
- lv_hdr.subtype = 13;
- lv_hdr.elem_size = 1;
- lv_hdr.n_elem = ds_length (&long_name_map);
+ write_int (w, 7); /* Record type. */
+ write_int (w, 13); /* Record subtype. */
+ write_int (w, 1); /* Data item (char) size. */
+ write_int (w, ds_length (&map)); /* Number of data items. */
+ write_bytes (w, ds_data (&map), ds_length (&map));
- buf_write (w, &lv_hdr, sizeof lv_hdr);
- buf_write (w, ds_data (&long_name_map), ds_length (&long_name_map));
-
- ds_destroy (&long_name_map);
+ ds_destroy (&map);
}
-/* Writes record type 7, subtypes 3 and 4. */
+/* Write integer information record. */
static void
-write_rec_7_34 (struct sfm_writer *w)
+write_integer_info_record (struct sfm_writer *w)
{
- struct
- {
- int32_t rec_type_3 ;
- int32_t subtype_3 ;
- int32_t data_type_3 ;
- int32_t n_elem_3 ;
- int32_t elem_3[8] ;
- int32_t rec_type_4 ;
- int32_t subtype_4 ;
- int32_t data_type_4 ;
- int32_t n_elem_4 ;
- flt64 elem_4[3] ;
- } ATTRIBUTE((packed)) rec_7;
-
- /* Components of the version number, from major to minor. */
int version_component[3];
-
- /* Used to step through the version string. */
- char *p;
-
- /* Parses the version string, which is assumed to be of the form
- #.#x, where each # is a string of digits, and x is a single
- letter. */
- version_component[0] = strtol (bare_version, &p, 10);
- if (*p == '.')
- p++;
- version_component[1] = strtol (bare_version, &p, 10);
- version_component[2] = (isalpha ((unsigned char) *p)
- ? tolower ((unsigned char) *p) - 'a' : 0);
-
- rec_7.rec_type_3 = 7;
- rec_7.subtype_3 = 3;
- rec_7.data_type_3 = sizeof (int32_t);
- rec_7.n_elem_3 = 8;
- rec_7.elem_3[0] = version_component[0];
- rec_7.elem_3[1] = version_component[1];
- rec_7.elem_3[2] = version_component[2];
- rec_7.elem_3[3] = -1;
-
- /* PORTME: 1=IEEE754, 2=IBM 370, 3=DEC VAX E. */
-#ifdef FPREP_IEEE754
- rec_7.elem_3[4] = 1;
-#endif
-
- rec_7.elem_3[5] = 1;
-
- /* PORTME: 1=big-endian, 2=little-endian. */
-#if WORDS_BIGENDIAN
- rec_7.elem_3[6] = 1;
-#else
- rec_7.elem_3[6] = 2;
-#endif
-
- /* PORTME: 1=EBCDIC, 2=7-bit ASCII, 3=8-bit ASCII, 4=DEC Kanji. */
- rec_7.elem_3[7] = 2;
-
- rec_7.rec_type_4 = 7;
- rec_7.subtype_4 = 4;
- rec_7.data_type_4 = sizeof (flt64);
- rec_7.n_elem_4 = 3;
- rec_7.elem_4[0] = -FLT64_MAX;
- rec_7.elem_4[1] = FLT64_MAX;
- rec_7.elem_4[2] = second_lowest_flt64;
-
- buf_write (w, &rec_7, sizeof rec_7);
-}
-
-/* Write NBYTES starting at BUF to the system file represented by
- H. */
-static void
-buf_write (struct sfm_writer *w, const void *buf, size_t nbytes)
-{
- assert (buf != NULL);
- fwrite (buf, nbytes, 1, w->file);
-}
-
-/* Copies string DEST to SRC with the proviso that DEST does not reach
- byte END; no null terminator is copied. Returns a pointer to the
- byte after the last byte copied. */
-static char *
-append_string_max (char *dest, const char *src, const char *end)
-{
- int nbytes = MIN (end - dest, (int) strlen (src));
- memcpy (dest, src, nbytes);
- return dest + nbytes;
+ int float_format;
+
+ /* Parse the version string. */
+ memset (version_component, 0, sizeof version_component);
+ sscanf (bare_version, "%d.%d.%d",
+ &version_component[0], &version_component[1], &version_component[2]);
+
+ /* Figure out the floating-point format. */
+ if (FLOAT_NATIVE_64_BIT == FLOAT_IEEE_DOUBLE_LE
+ || FLOAT_NATIVE_64_BIT == FLOAT_IEEE_DOUBLE_BE)
+ float_format = 1;
+ else if (FLOAT_NATIVE_64_BIT == FLOAT_Z_LONG)
+ float_format = 2;
+ else if (FLOAT_NATIVE_64_BIT == FLOAT_VAX_D)
+ float_format = 3;
+ else
+ abort ();
+
+ /* Write record. */
+ write_int (w, 7); /* Record type. */
+ write_int (w, 3); /* Record subtype. */
+ write_int (w, 4); /* Data item (int32) size. */
+ write_int (w, 8); /* Number of data items. */
+ write_int (w, version_component[0]);
+ write_int (w, version_component[1]);
+ write_int (w, version_component[2]);
+ write_int (w, -1); /* Machine code. */
+ write_int (w, float_format);
+ write_int (w, 1); /* Compression code. */
+ write_int (w, INTEGER_NATIVE == INTEGER_MSB_FIRST ? 1 : 2);
+ write_int (w, 2); /* 7-bit ASCII. */
}
-/* Makes certain that the compression buffer of H has room for another
- element. If there's not room, pads out the current instruction
- octet with zero and dumps out the buffer. */
+/* Write floating-point information record. */
static void
-ensure_buf_space (struct sfm_writer *w)
+write_float_info_record (struct sfm_writer *w)
{
- if (w->ptr >= w->end)
- {
- memset (w->x, 0, w->y - w->x);
- w->x = w->y;
- w->ptr = w->buf;
- buf_write (w, w->buf, sizeof *w->buf * 128);
- }
+ write_int (w, 7); /* Record type. */
+ write_int (w, 4); /* Record subtype. */
+ write_int (w, 8); /* Data item (flt64) size. */
+ write_int (w, 3); /* Number of data items. */
+ write_float (w, SYSMIS); /* System-missing value. */
+ write_float (w, HIGHEST); /* Value used for HIGHEST in missing values. */
+ write_float (w, LOWEST); /* Value used for LOWEST in missing values. */
}
-
-static void write_compressed_data (struct sfm_writer *w, const flt64 *elem);
-
+\f
/* Writes case C to system file W. */
static void
sys_file_casewriter_write (struct casewriter *writer, void *w_,
struct ccase *c)
{
struct sfm_writer *w = w_;
+
if (ferror (w->file))
{
casewriter_force_error (writer);
w->case_cnt++;
- if (!w->needs_translation && !w->compress
- && sizeof (flt64) == sizeof (union value) && ! w->has_vls )
- {
- /* Fast path: external and internal representations are the
- same and the dictionary is properly ordered. Write
- directly to file. */
- buf_write (w, case_data_all (c), sizeof (union value) * w->flt64_cnt);
- }
+ if (!w->compress)
+ write_case_uncompressed (w, c);
else
- {
- /* Slow path: internal and external representations differ.
- Write into a bounce buffer, then write to W. */
- flt64 *bounce;
- flt64 *bounce_cur;
- flt64 *bounce_end;
- size_t bounce_size;
- size_t i;
-
- bounce_size = sizeof *bounce * w->flt64_cnt;
- bounce = bounce_cur = local_alloc (bounce_size);
- bounce_end = bounce + bounce_size;
-
- for (i = 0; i < w->var_cnt; i++)
- {
- struct sfm_var *v = &w->vars[i];
-
- memset(bounce_cur, ' ', v->flt64_cnt * sizeof (flt64));
-
- if (v->width == 0)
- {
- *bounce_cur = case_num_idx (c, v->fv);
- bounce_cur += v->flt64_cnt;
- }
- else
- { int ofs = 0;
- while (ofs < v->width)
- {
- int chunk = MIN (MIN_VERY_LONG_STRING - 1, v->width - ofs);
- int nv = DIV_RND_UP (chunk, sizeof (flt64));
- buf_copy_rpad ((char *) bounce_cur, nv * sizeof (flt64),
- case_data_idx (c, v->fv)->s + ofs, chunk);
- bounce_cur += nv;
- ofs += chunk;
- }
- }
-
- }
-
- if (!w->compress)
- buf_write (w, bounce, bounce_size);
- else
- write_compressed_data (w, bounce);
-
- local_free (bounce);
- }
+ write_case_compressed (w, c);
case_destroy (c);
}
+/* Destroys system file writer W. */
static void
sys_file_casewriter_destroy (struct casewriter *writer, void *w_)
{
casewriter_force_error (writer);
}
-static void
-put_instruction (struct sfm_writer *w, unsigned char instruction)
-{
- if (w->x >= w->y)
- {
- ensure_buf_space (w);
- w->x = (unsigned char *) w->ptr++;
- w->y = (unsigned char *) w->ptr;
- }
- *w->x++ = instruction;
-}
-
-static void
-put_element (struct sfm_writer *w, const flt64 *elem)
-{
- ensure_buf_space (w);
- memcpy (w->ptr++, elem, sizeof *elem);
-}
-
-static void
-write_compressed_data (struct sfm_writer *w, const flt64 *elem)
-{
- size_t i;
-
- for (i = 0; i < w->var_cnt; i++)
- {
- struct sfm_var *v = &w->vars[i];
-
- if (v->width == 0)
- {
- if (*elem == -FLT64_MAX)
- put_instruction (w, 255);
- else if (*elem >= 1 - COMPRESSION_BIAS
- && *elem <= 251 - COMPRESSION_BIAS
- && *elem == (int) *elem)
- put_instruction (w, (int) *elem + COMPRESSION_BIAS);
- else
- {
- put_instruction (w, 253);
- put_element (w, elem);
- }
- elem++;
- }
- else
- {
- size_t j;
-
- for (j = 0; j < v->flt64_cnt; j++, elem++)
- {
- if (!memcmp (elem, " ", sizeof (flt64)))
- put_instruction (w, 254);
- else
- {
- put_instruction (w, 253);
- put_element (w, elem);
- }
- }
- }
- }
-}
-
/* Returns true if an I/O error has occurred on WRITER, false otherwise. */
bool
write_error (const struct sfm_writer *writer)
if (w->file != NULL)
{
/* Flush buffer. */
- if (w->buf != NULL && w->ptr > w->buf)
- {
- memset (w->x, 0, w->y - w->x);
- buf_write (w, w->buf, (w->ptr - w->buf) * sizeof *w->buf);
- }
+ if (w->opcode_cnt > 0)
+ flush_compressed (w);
fflush (w->file);
ok = !write_error (w);
/* Seek back to the beginning and update the number of cases.
This is just a courtesy to later readers, so there's no need
to check return values or report errors. */
- if (ok && !fseek (w->file, offsetof (struct sysfile_header, case_cnt),
- SEEK_SET))
+ if (ok && w->case_cnt <= INT32_MAX && !fseek (w->file, 80, SEEK_SET))
{
- int32_t case_cnt = w->case_cnt;
- fwrite (&case_cnt, sizeof case_cnt, 1, w->file);
+ write_int (w, w->case_cnt);
clearerr (w->file);
}
fh_close (w->fh, "system file", "we");
- free (w->buf);
- free (w->vars);
+ free (w->sfm_vars);
free (w);
return ok;
}
-\f
+
+/* System file writer casewriter class. */
static struct casewriter_class sys_file_casewriter_class =
{
sys_file_casewriter_write,
sys_file_casewriter_destroy,
NULL,
};
+\f
+/* Writes case C to system file W, without compressing it. */
+static void
+write_case_uncompressed (struct sfm_writer *w, struct ccase *c)
+{
+ size_t i;
+
+ for (i = 0; i < w->sfm_var_cnt; i++)
+ {
+ struct sfm_var *v = &w->sfm_vars[i];
+
+ if (v->width == 0)
+ write_float (w, case_num_idx (c, v->case_index));
+ else
+ {
+ write_bytes (w, case_str_idx (c, v->case_index) + v->offset,
+ v->width);
+ write_spaces (w, v->padding);
+ }
+ }
+}
+
+/* Writes case C to system file W, with compression. */
+static void
+write_case_compressed (struct sfm_writer *w, struct ccase *c)
+{
+ size_t i;
+
+ for (i = 0; i < w->sfm_var_cnt; i++)
+ {
+ struct sfm_var *v = &w->sfm_vars[i];
+
+ if (v->width == 0)
+ {
+ double d = case_num_idx (c, v->case_index);
+ if (d == SYSMIS)
+ put_cmp_opcode (w, 255);
+ else if (d >= 1 - COMPRESSION_BIAS
+ && d <= 251 - COMPRESSION_BIAS
+ && d == (int) d)
+ put_cmp_opcode (w, (int) d + COMPRESSION_BIAS);
+ else
+ {
+ put_cmp_opcode (w, 253);
+ put_cmp_number (w, d);
+ }
+ }
+ else
+ {
+ int offset = v->offset;
+ int width, padding;
+
+ /* This code properly deals with a width that is not a
+ multiple of 8, by ensuring that the final partial
+ oct (8 byte unit) is treated as padded with spaces
+ on the right. */
+ for (width = v->width; width > 0; width -= 8, offset += 8)
+ {
+ const void *data = case_str_idx (c, v->case_index) + offset;
+ int chunk_size = MIN (width, 8);
+ if (!memcmp (data, " ", chunk_size))
+ put_cmp_opcode (w, 254);
+ else
+ {
+ put_cmp_opcode (w, 253);
+ put_cmp_string (w, data, chunk_size);
+ }
+ }
+
+ /* This code deals properly with padding that is not a
+ multiple of 8 bytes, by discarding the remainder,
+ which was already effectively padded with spaces in
+ the previous loop. (Note that v->width + v->padding
+ is always a multiple of 8.) */
+ for (padding = v->padding / 8; padding > 0; padding--)
+ put_cmp_opcode (w, 254);
+ }
+ }
+}
+
+/* Flushes buffered compressed opcodes and data to W.
+ The compression buffer must not be empty. */
+static void
+flush_compressed (struct sfm_writer *w)
+{
+ assert (w->opcode_cnt > 0 && w->opcode_cnt <= 8);
+
+ write_bytes (w, w->opcodes, w->opcode_cnt);
+ write_zeros (w, 8 - w->opcode_cnt);
+
+ write_bytes (w, w->data, w->data_cnt * sizeof *w->data);
+
+ w->opcode_cnt = w->data_cnt = 0;
+}
+
+/* Appends OPCODE to the buffered set of compression opcodes in
+ W. Flushes the compression buffer beforehand if necessary. */
+static void
+put_cmp_opcode (struct sfm_writer *w, uint8_t opcode)
+{
+ if (w->opcode_cnt >= 8)
+ flush_compressed (w);
+
+ w->opcodes[w->opcode_cnt++] = opcode;
+}
+
+/* Appends NUMBER to the buffered compression data in W. The
+ buffer must not be full; the way to assure that is to call
+ this function only just after a call to put_cmp_opcode, which
+ will flush the buffer as necessary. */
+static void
+put_cmp_number (struct sfm_writer *w, double number)
+{
+ assert (w->opcode_cnt > 0);
+ assert (w->data_cnt < 8);
+
+ convert_double_to_output_format (number, w->data[w->data_cnt++]);
+}
+
+/* Appends SIZE bytes of DATA to the buffered compression data in
+ W, followed by enough spaces to pad the output data to exactly
+ 8 bytes (thus, SIZE must be no greater than 8). The buffer
+ must not be full; the way to assure that is to call this
+ function only just after a call to put_cmp_opcode, which will
+ flush the buffer as necessary. */
+static void
+put_cmp_string (struct sfm_writer *w, const void *data, size_t size)
+{
+ assert (w->opcode_cnt > 0);
+ assert (w->data_cnt < 8);
+ assert (size <= 8);
+
+ memset (w->data[w->data_cnt], ' ', 8);
+ memcpy (w->data[w->data_cnt], data, size);
+ w->data_cnt++;
+}
+\f
+/* Writes 32-bit integer X to the output file for writer W. */
+static void
+write_int (struct sfm_writer *w, int32_t x)
+{
+ write_bytes (w, &x, sizeof x);
+}
+
+/* Converts NATIVE to the 64-bit format used in output files in
+ OUTPUT. */
+static inline void
+convert_double_to_output_format (double native, uint8_t output[8])
+{
+ /* If "double" is not a 64-bit type, then convert it to a
+ 64-bit type. Otherwise just copy it. */
+ if (FLOAT_NATIVE_DOUBLE != FLOAT_NATIVE_64_BIT)
+ float_convert (FLOAT_NATIVE_DOUBLE, &native, FLOAT_NATIVE_64_BIT, output);
+ else
+ memcpy (output, &native, sizeof native);
+}
+
+/* Writes floating-point number X to the output file for writer
+ W. */
+static void
+write_float (struct sfm_writer *w, double x)
+{
+ uint8_t output[8];
+ convert_double_to_output_format (x, output);
+ write_bytes (w, output, sizeof output);
+}
+
+/* Writes contents of VALUE with the given WIDTH to W, padding
+ with zeros to a multiple of 8 bytes.
+ To avoid a branch, and because we don't actually need to
+ support it, WIDTH must be no bigger than 8. */
+static void
+write_value (struct sfm_writer *w, const union value *value, int width)
+{
+ assert (width <= 8);
+ if (width == 0)
+ write_float (w, value->f);
+ else
+ {
+ write_bytes (w, value->s, width);
+ write_zeros (w, 8 - width);
+ }
+}
+
+/* Writes null-terminated STRING in a field of the given WIDTH to
+ W. If WIDTH is longer than WIDTH, it is truncated; if WIDTH
+ is narrowed, it is padded on the right with spaces. */
+static void
+write_string (struct sfm_writer *w, const char *string, size_t width)
+{
+ size_t data_bytes = MIN (strlen (string), width);
+ size_t pad_bytes = width - data_bytes;
+ write_bytes (w, string, data_bytes);
+ while (pad_bytes-- > 0)
+ putc (' ', w->file);
+}
+
+/* Writes SIZE bytes of DATA to W's output file. */
+static void
+write_bytes (struct sfm_writer *w, const void *data, size_t size)
+{
+ fwrite (data, 1, size, w->file);
+}
+
+/* Writes N zeros to W's output file. */
+static void
+write_zeros (struct sfm_writer *w, size_t n)
+{
+ while (n-- > 0)
+ putc (0, w->file);
+}
+
+/* Writes N spaces to W's output file. */
+static void
+write_spaces (struct sfm_writer *w, size_t n)
+{
+ while (n-- > 0)
+ putc (' ', w->file);
+}
(unsigned int) dict_get_var_cnt (d));
tab_text (t, 0, 6, TAB_LEFT, _("Cases:"));
tab_text (t, 1, 6, TAB_LEFT | TAT_PRINTF,
- info.case_cnt == -1 ? _("Unknown") : "%d", info.case_cnt);
+ info.case_cnt == -1 ? _("Unknown") : "%ld",
+ (long int) info.case_cnt);
tab_text (t, 0, 7, TAB_LEFT, _("Type:"));
tab_text (t, 1, 7, TAB_LEFT, _("System File."));
tab_text (t, 0, 8, TAB_LEFT, _("Weight:"));