1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2006 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include <data/sys-file-private.h>
21 #include <data/dictionary.h>
22 #include <data/value.h>
23 #include <data/variable.h>
24 #include <libpspp/assertion.h>
29 /* Number of bytes really stored in each segment of a very long
31 #define REAL_VLS_CHUNK 255
33 /* Number of bytes per segment by which the amount of space for
34 very long string variables is allocated. */
35 #define EFFECTIVE_VLS_CHUNK 252
37 /* Returns true if WIDTH is a very long string width,
40 is_very_long (int width)
45 /* Returns the smaller of A or B.
46 (Defined as a function to avoid evaluating A or B more than
49 min_int (int a, int b)
54 /* Returns the larger of A or B.
55 (Defined as a function to avoid evaluating A or B more than
58 max_int (int a, int b)
63 /* Returns the number of bytes of uncompressed case data used for
64 writing a variable of the given WIDTH to a system file. All
65 required space is included, including trailing padding and
68 sfm_width_to_bytes (int width)
76 else if (!is_very_long (width))
80 int chunks = width / EFFECTIVE_VLS_CHUNK;
81 int remainder = width % EFFECTIVE_VLS_CHUNK;
82 bytes = remainder + (chunks * ROUND_UP (REAL_VLS_CHUNK, 8));
84 return ROUND_UP (bytes, 8);
87 /* Returns the number of 8-byte units (octs) used to write data
88 for a variable of the given WIDTH. */
90 sfm_width_to_octs (int width)
92 return sfm_width_to_bytes (width) / 8;
95 /* Returns the number of "segments" used for writing case data
96 for a variable of the given WIDTH. A segment is a physical
97 variable in the system file that represents some piece of a
98 logical variable as seen by a PSPP user. Only very long
99 string variables have more than one segment. */
101 sfm_width_to_segments (int width)
105 return !is_very_long (width) ? 1 : DIV_RND_UP (width, EFFECTIVE_VLS_CHUNK);
108 /* Returns the width to allocate to the given SEGMENT within a
109 variable of the given WIDTH. A segment is a physical variable
110 in the system file that represents some piece of a logical
111 variable as seen by a PSPP user. */
113 sfm_segment_alloc_width (int width, int segment)
115 assert (segment < sfm_width_to_segments (width));
117 return (!is_very_long (width) ? width
118 : segment < sfm_width_to_segments (width) - 1 ? 255
119 : width - segment * EFFECTIVE_VLS_CHUNK);
122 /* Returns the number of bytes to allocate to the given SEGMENT
123 within a variable of the given width. This is the same as
124 sfm_segment_alloc_width, except that a numeric value takes up
125 8 bytes despite having a width of 0. */
127 sfm_segment_alloc_bytes (int width, int segment)
129 assert (segment < sfm_width_to_segments (width));
130 return (width == 0 ? 8
131 : ROUND_UP (sfm_segment_alloc_width (width, segment), 8));
134 /* Returns the number of bytes in the given SEGMENT within a
135 variable of the given WIDTH that are actually used to store
136 data. For a numeric value (WIDTH of 0), this is 8 bytes; for
137 a string value less than 256 bytes wide, it is WIDTH bytes.
138 For very long string values, the calculation is more
139 complicated and ranges between 255 bytes for the first segment
140 to as little as 0 bytes for final segments. */
142 sfm_segment_used_bytes (int width, int segment)
144 assert (segment < sfm_width_to_segments (width));
145 return (width == 0 ? 8
146 : !is_very_long (width) ? width
147 : max_int (0, min_int (width - REAL_VLS_CHUNK * segment,
151 /* Returns the number of bytes at the end of the given SEGMENT
152 within a variable of the given WIDTH that are not used for
153 data; that is, the number of bytes that must be padded with
154 data that a reader ignores. */
156 sfm_segment_padding (int width, int segment)
158 return (sfm_segment_alloc_bytes (width, segment)
159 - sfm_segment_used_bytes (width, segment));
162 /* Returns the byte offset of the start of the given SEGMENT
163 within a variable of the given WIDTH. The first segment
164 starts at offset 0; only very long string variables have any
167 sfm_segment_offset (int width, int segment)
169 assert (segment < sfm_width_to_segments (width));
170 return min_int (REAL_VLS_CHUNK * segment, width);
173 /* Returns the byte offset of the start of the given SEGMENT
174 within a variable of the given WIDTH, given the (incorrect)
175 assumption that there are EFFECTIVE_VLS_CHUNK bytes per
176 segment. (Use of this function is questionable at best.) */
178 sfm_segment_effective_offset (int width, int segment)
180 assert (segment < sfm_width_to_segments (width));
181 return EFFECTIVE_VLS_CHUNK * segment;
184 /* Creates and initializes an array of struct sfm_vars that
185 describe how a case drawn from dictionary DICT is laid out in
186 a system file. Returns the number of segments in a case. A
187 segment is a physical variable in the system file that
188 represents some piece of a logical variable as seen by a PSPP
191 The array is allocated with malloc and stored in *SFM_VARS,
192 and its number of elements is stored in *SFM_VAR_CNT. The
193 caller is responsible for freeing it when it is no longer
196 sfm_dictionary_to_sfm_vars (const struct dictionary *dict,
197 struct sfm_var **sfm_vars, size_t *sfm_var_cnt)
199 size_t var_cnt = dict_get_var_cnt (dict);
203 /* Estimate the number of sfm_vars that will be needed.
204 We might not need all of these, because very long string
205 variables can have segments that are all padding, which do
206 not need sfm_vars of their own. */
208 for (i = 0; i < var_cnt; i++)
210 const struct variable *v = dict_get_var (dict, i);
211 segment_cnt += sfm_width_to_segments (var_get_width (v));
214 /* Compose the sfm_vars. */
215 *sfm_vars = xnmalloc (segment_cnt, sizeof **sfm_vars);
217 for (i = 0; i < var_cnt; i++)
219 const struct variable *dv = dict_get_var (dict, i);
220 int width = var_get_width (dv);
223 for (j = 0; j < sfm_width_to_segments (width); j++)
225 int used_bytes = sfm_segment_used_bytes (width, j);
226 int padding = sfm_segment_padding (width, j);
230 sv = &(*sfm_vars)[(*sfm_var_cnt)++];
231 sv->width = width == 0 ? 0 : used_bytes;
232 sv->case_index = var_get_case_index (dv);
233 sv->offset = sfm_segment_offset (width, j);
234 sv->padding = padding;
238 /* Segment is all padding. Just add it to the
239 previous segment. (Otherwise we'd have an
240 ambiguity whether ->width of 0 indicates a
241 numeric variable or an all-padding segment.) */
242 sv = &(*sfm_vars)[*sfm_var_cnt - 1];
243 sv->padding += padding;
245 assert ((sv->width + sv->padding) % 8 == 0);