1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2006, 2009 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include <data/sys-file-private.h>
21 #include <data/dictionary.h>
22 #include <data/value.h>
23 #include <data/variable.h>
24 #include <libpspp/assertion.h>
25 #include <libpspp/misc.h>
30 /* Number of bytes really stored in each segment of a very long
32 #define REAL_VLS_CHUNK 255
34 /* Number of bytes per segment by which the amount of space for
35 very long string variables is allocated. */
36 #define EFFECTIVE_VLS_CHUNK 252
38 /* Returns true if WIDTH is a very long string width,
41 is_very_long (int width)
46 /* Returns the smaller of A or B.
47 (Defined as a function to avoid evaluating A or B more than
50 min_int (int a, int b)
55 /* Returns the larger of A or B.
56 (Defined as a function to avoid evaluating A or B more than
59 max_int (int a, int b)
64 /* Returns the number of bytes of uncompressed case data used for
65 writing a variable of the given WIDTH to a system file. All
66 required space is included, including trailing padding and
69 sfm_width_to_bytes (int width)
77 else if (!is_very_long (width))
81 int chunks = width / EFFECTIVE_VLS_CHUNK;
82 int remainder = width % EFFECTIVE_VLS_CHUNK;
83 bytes = remainder + (chunks * ROUND_UP (REAL_VLS_CHUNK, 8));
85 return ROUND_UP (bytes, 8);
88 /* Returns the number of 8-byte units (octs) used to write data
89 for a variable of the given WIDTH. */
91 sfm_width_to_octs (int width)
93 return sfm_width_to_bytes (width) / 8;
96 /* Returns the number of "segments" used for writing case data
97 for a variable of the given WIDTH. A segment is a physical
98 variable in the system file that represents some piece of a
99 logical variable as seen by a PSPP user. Only very long
100 string variables have more than one segment. */
102 sfm_width_to_segments (int width)
106 return !is_very_long (width) ? 1 : DIV_RND_UP (width, EFFECTIVE_VLS_CHUNK);
109 /* Returns the width to allocate to the given SEGMENT within a
110 variable of the given WIDTH. A segment is a physical variable
111 in the system file that represents some piece of a logical
112 variable as seen by a PSPP user. */
114 sfm_segment_alloc_width (int width, int segment)
116 assert (segment < sfm_width_to_segments (width));
118 return (!is_very_long (width) ? width
119 : segment < sfm_width_to_segments (width) - 1 ? 255
120 : width - segment * EFFECTIVE_VLS_CHUNK);
123 /* Returns the number of bytes to allocate to the given SEGMENT
124 within a variable of the given width. This is the same as
125 sfm_segment_alloc_width, except that a numeric value takes up
126 8 bytes despite having a width of 0. */
128 sfm_segment_alloc_bytes (int width, int segment)
130 assert (segment < sfm_width_to_segments (width));
131 return (width == 0 ? 8
132 : ROUND_UP (sfm_segment_alloc_width (width, segment), 8));
135 /* Returns the number of bytes in the given SEGMENT within a
136 variable of the given WIDTH that are actually used to store
137 data. For a numeric value (WIDTH of 0), this is 8 bytes; for
138 a string value less than 256 bytes wide, it is WIDTH bytes.
139 For very long string values, the calculation is more
140 complicated and ranges between 255 bytes for the first segment
141 to as little as 0 bytes for final segments. */
143 sfm_segment_used_bytes (int width, int segment)
145 assert (segment < sfm_width_to_segments (width));
146 return (width == 0 ? 8
147 : !is_very_long (width) ? width
148 : max_int (0, min_int (width - REAL_VLS_CHUNK * segment,
152 /* Returns the number of bytes at the end of the given SEGMENT
153 within a variable of the given WIDTH that are not used for
154 data; that is, the number of bytes that must be padded with
155 data that a reader ignores. */
157 sfm_segment_padding (int width, int segment)
159 return (sfm_segment_alloc_bytes (width, segment)
160 - sfm_segment_used_bytes (width, segment));
163 /* Returns the byte offset of the start of the given SEGMENT
164 within a variable of the given WIDTH. The first segment
165 starts at offset 0; only very long string variables have any
168 sfm_segment_offset (int width, int segment)
170 assert (segment < sfm_width_to_segments (width));
171 return min_int (REAL_VLS_CHUNK * segment, width);
174 /* Returns the byte offset of the start of the given SEGMENT
175 within a variable of the given WIDTH, given the (incorrect)
176 assumption that there are EFFECTIVE_VLS_CHUNK bytes per
177 segment. (Use of this function is questionable at best.) */
179 sfm_segment_effective_offset (int width, int segment)
181 assert (segment < sfm_width_to_segments (width));
182 return EFFECTIVE_VLS_CHUNK * segment;
185 /* Creates and initializes an array of struct sfm_vars that
186 describe how a case drawn from dictionary DICT is laid out in
187 a system file. Returns the number of segments in a case. A
188 segment is a physical variable in the system file that
189 represents some piece of a logical variable as seen by a PSPP
192 The array is allocated with malloc and stored in *SFM_VARS,
193 and its number of elements is stored in *SFM_VAR_CNT. The
194 caller is responsible for freeing it when it is no longer
197 sfm_dictionary_to_sfm_vars (const struct dictionary *dict,
198 struct sfm_var **sfm_vars, size_t *sfm_var_cnt)
200 size_t var_cnt = dict_get_var_cnt (dict);
204 /* Estimate the number of sfm_vars that will be needed.
205 We might not need all of these, because very long string
206 variables can have segments that are all padding, which do
207 not need sfm_vars of their own. */
209 for (i = 0; i < var_cnt; i++)
211 const struct variable *v = dict_get_var (dict, i);
212 segment_cnt += sfm_width_to_segments (var_get_width (v));
215 /* Compose the sfm_vars. */
216 *sfm_vars = xnmalloc (segment_cnt, sizeof **sfm_vars);
218 for (i = 0; i < var_cnt; i++)
220 const struct variable *dv = dict_get_var (dict, i);
221 int width = var_get_width (dv);
224 for (j = 0; j < sfm_width_to_segments (width); j++)
226 int used_bytes = sfm_segment_used_bytes (width, j);
227 int padding = sfm_segment_padding (width, j);
231 sv = &(*sfm_vars)[(*sfm_var_cnt)++];
232 sv->var_width = width;
233 sv->segment_width = width == 0 ? 0 : used_bytes;
234 sv->case_index = var_get_case_index (dv);
235 sv->offset = sfm_segment_offset (width, j);
236 sv->padding = padding;
240 /* Segment is all padding. Just add it to the
242 sv = &(*sfm_vars)[*sfm_var_cnt - 1];
243 sv->padding += padding;
245 assert ((sv->segment_width + sv->padding) % 8 == 0);