1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
25 #include "group_proc.h"
28 /* Frequency tables. */
30 /* Frequency table entry. */
33 union value v; /* The value. */
34 double c; /* The number of occurrences of the value. */
37 /* Types of frequency tables. */
44 /* Entire frequency table. */
47 int mode; /* FRQM_GENERAL or FRQM_INTEGER. */
50 struct hsh_table *data; /* Undifferentiated data. */
53 double *vector; /* Frequencies proper. */
54 int min, max; /* The boundaries of the table. */
55 double out_of_range; /* Sum of weights of out-of-range values. */
56 double sysmis; /* Sum of weights of SYSMIS values. */
59 struct freq *valid; /* Valid freqs. */
60 int n_valid; /* Number of total freqs. */
62 struct freq *missing; /* Missing freqs. */
63 int n_missing; /* Number of missing freqs. */
66 double total_cases; /* Sum of weights of all cases. */
67 double valid_cases; /* Sum of weights of valid cases. */
70 /* Procedures' private per-variable data. */
72 /* Structure name suffixes for private data:
73 _proc: for a procedure (i.e., LIST -> list_proc).
74 _trns: for a transformation (i.e., COMPUTE -> compute_trns.
75 _pgm: for an input program (i.e., DATA LIST -> data_list_pgm). */
77 /* CROSSTABS private data. */
80 /* Integer mode only. */
81 int min; /* Minimum value. */
82 int max; /* Maximum value + 1. */
83 int count; /* max - min. */
87 /* FREQUENCIES private data. */
90 frq_mean = 0, frq_semean, frq_median, frq_mode, frq_stddev, frq_variance,
91 frq_kurt, frq_sekurt, frq_skew, frq_seskew, frq_range, frq_min, frq_max,
95 struct frequencies_proc
97 int used; /* 1=This variable already used. */
100 struct freq_tab tab; /* Frequencies table to use. */
103 int n_groups; /* Number of groups. */
104 double *groups; /* Groups. */
107 double stat[frq_n_stats];
110 /* LIST private data. */
113 int newline; /* Whether a new line begins here. */
114 int width; /* Field width. */
115 int vert; /* Whether to print the varname vertically. */
118 /* GET private data. */
121 int fv, nv; /* First, # of values. */
124 /* MEANS private data. */
127 double min, max; /* Range for integer mode. */
130 /* Different types of variables for MATRIX DATA procedure. Order is
131 important: these are used for sort keys. */
134 MXD_SPLIT, /* SPLIT FILE variables. */
135 MXD_ROWTYPE, /* ROWTYPE_. */
136 MXD_FACTOR, /* Factor variables. */
137 MXD_VARNAME, /* VARNAME_. */
138 MXD_CONTINUOUS, /* Continuous variables. */
143 /* MATRIX DATA private data. */
144 struct matrix_data_proc
146 int vartype; /* Variable type. */
147 int subtype; /* Subtype. */
150 /* MATCH FILES private data. */
151 struct match_files_proc
153 struct variable *master; /* Corresponding master file variable. */
157 /* Script variables. */
162 NUMERIC, /* A numeric variable. */
163 ALPHA /* A string variable. (STRING is pre-empted by lexer.h) */
166 /* Types of missing values. Order is significant, see
167 mis-val.c:parse_numeric(), sfm-read.c:sfm_read_dictionary()
168 sfm-write.c:sfm_write_dictionary(),
169 sysfile-info.c:cmd_sysfile_info(), mis-val.c:copy_missing_values(),
170 pfm-read.c:read_variables(), pfm-write.c:write_variables(),
171 apply-dict.c:cmd_apply_dictionary(), and more (?). */
174 MISSING_NONE, /* No user-missing values. */
175 MISSING_1, /* One user-missing value. */
176 MISSING_2, /* Two user-missing values. */
177 MISSING_3, /* Three user-missing values. */
178 MISSING_RANGE, /* [a,b]. */
179 MISSING_LOW, /* (-inf,a]. */
180 MISSING_HIGH, /* (a,+inf]. */
181 MISSING_RANGE_1, /* [a,b], c. */
182 MISSING_LOW_1, /* (-inf,a], b. */
183 MISSING_HIGH_1, /* (a,+inf), b. */
187 /* A variable's dictionary entry. */
190 char name[9]; /* As a string. */
191 int index; /* Index into its dictionary's var[]. */
192 int type; /* NUMERIC or ALPHA. */
194 int width; /* Size of string variables in chars. */
195 int fv, nv; /* Index into `value's, number of values. */
196 unsigned init : 1; /* 1=VFM must init and possibly reinit. */
197 unsigned reinit : 1; /* Cases are: 1=reinitialized; 0=left. */
199 /* Missing values. */
200 int miss_type; /* One of the MISSING_* constants. */
201 union value missing[3]; /* User-missing value. */
203 /* Display formats. */
204 struct fmt_spec print; /* Default format for PRINT. */
205 struct fmt_spec write; /* Default format for WRITE. */
208 struct val_labs *val_labs;
209 char *label; /* Variable label. */
211 /* Per-procedure info. */
216 struct crosstab_proc crs;
217 struct frequencies_proc frq;
218 struct list_proc lst;
219 struct means_proc mns;
220 struct matrix_data_proc mxd;
221 struct match_files_proc mtf;
222 struct group_proc grp_data;
227 int compare_variables (const void *, const void *, void *);
228 unsigned hash_variable (const void *, void *);
230 /* Classes of variables. */
233 DC_ORDINARY, /* Ordinary identifier. */
234 DC_SYSTEM, /* System variable. */
235 DC_SCRATCH /* Scratch variable. */
238 enum dict_class dict_class_from_id (const char *name);
239 const char *dict_class_to_name (enum dict_class dict_class);
241 /* Vector of variables. */
244 int idx; /* Index for dict_get_vector(). */
245 char name[9]; /* Name. */
246 struct variable **var; /* Vector of variables. */
247 int cnt; /* Number of variables. */
252 /* Complete dictionary state. */
255 struct dictionary *dict_create (void);
256 struct dictionary *dict_clone (const struct dictionary *);
257 void dict_clear (struct dictionary *);
258 void dict_destroy (struct dictionary *);
260 size_t dict_get_var_cnt (const struct dictionary *);
261 struct variable *dict_get_var (const struct dictionary *, size_t idx);
262 void dict_get_vars (const struct dictionary *,
263 struct variable ***vars, size_t *cnt,
264 unsigned exclude_classes);
266 struct variable *dict_create_var (struct dictionary *, const char *,
268 struct variable *dict_create_var_assert (struct dictionary *, const char *,
270 struct variable *dict_clone_var (struct dictionary *, const struct variable *,
272 void dict_rename_var (struct dictionary *, struct variable *, const char *);
274 struct variable *dict_lookup_var (const struct dictionary *, const char *);
275 struct variable *dict_lookup_var_assert (const struct dictionary *,
277 int dict_contains_var (const struct dictionary *, const struct variable *);
278 void dict_delete_var (struct dictionary *, struct variable *);
279 void dict_delete_vars (struct dictionary *,
280 struct variable *const *, size_t count);
281 void dict_reorder_vars (struct dictionary *,
282 struct variable *const *, size_t count);
283 int dict_rename_vars (struct dictionary *,
284 struct variable **, char **new_names,
285 size_t count, char **err_name);
288 struct variable *dict_get_weight (const struct dictionary *);
289 double dict_get_case_weight (const struct dictionary *,
290 const struct ccase *, int *);
291 void dict_set_weight (struct dictionary *, struct variable *);
293 struct variable *dict_get_filter (const struct dictionary *);
294 void dict_set_filter (struct dictionary *, struct variable *);
296 int dict_get_case_limit (const struct dictionary *);
297 void dict_set_case_limit (struct dictionary *, int);
299 int dict_get_next_value_idx (const struct dictionary *);
300 size_t dict_get_case_size (const struct dictionary *);
302 void dict_compact_values (struct dictionary *);
303 size_t dict_get_compacted_value_cnt (const struct dictionary *);
304 int *dict_get_compacted_idx_to_fv (const struct dictionary *);
306 struct variable *const *dict_get_split_vars (const struct dictionary *);
307 size_t dict_get_split_cnt (const struct dictionary *);
308 void dict_set_split_vars (struct dictionary *,
309 struct variable *const *, size_t cnt);
311 const char *dict_get_label (const struct dictionary *);
312 void dict_set_label (struct dictionary *, const char *);
314 const char *dict_get_documents (const struct dictionary *);
315 void dict_set_documents (struct dictionary *, const char *);
317 int dict_create_vector (struct dictionary *,
319 struct variable **, size_t cnt);
320 const struct vector *dict_get_vector (const struct dictionary *,
322 size_t dict_get_vector_cnt (const struct dictionary *);
323 const struct vector *dict_lookup_vector (const struct dictionary *,
325 void dict_clear_vectors (struct dictionary *);
327 void discard_variables (void);
329 /* This is the active file dictionary. */
330 extern struct dictionary *default_dict;
332 /* Transformation state. */
334 /* Default file handle for DATA LIST, REREAD, REPEATING DATA
336 extern struct file_handle *default_handle;
338 /* PROCESS IF expression. */
339 extern struct expression *process_if_expr;
341 /* TEMPORARY support. */
343 /* 1=TEMPORARY has been executed at some point. */
344 extern int temporary;
346 /* If temporary!=0, the saved dictionary. */
347 extern struct dictionary *temp_dict;
349 /* If temporary!=0, index into t_trns[] (declared far below) that
350 gives the point at which data should be written out. -1 means that
351 the data shouldn't be changed since all transformations are
353 extern int temp_trns;
355 /* If FILTER is active, whether it was executed before or after
357 extern int FILTER_before_TEMPORARY;
359 void cancel_temporary (void);
363 void dump_split_vars (const struct ccase *);
364 typedef int (* is_missing_func )(const union value *, const struct variable *);
366 int is_num_user_missing (double, const struct variable *);
367 int is_str_user_missing (const unsigned char[], const struct variable *);
368 int is_missing (const union value *, const struct variable *);
369 int is_system_missing (const union value *, const struct variable *);
370 int is_user_missing (const union value *, const struct variable *);
371 void copy_missing_values (struct variable *dest, const struct variable *src);
373 /* Transformations. */
376 typedef int trns_proc_func (struct trns_header *, struct ccase *, int);
377 typedef void trns_free_func (struct trns_header *);
379 /* Header for all transformations. */
382 int index; /* Index into t_trns[]. */
383 trns_proc_func *proc; /* Transformation proc. */
384 trns_free_func *free; /* Garbage collector proc. */
387 /* Array of transformations */
388 extern struct trns_header **t_trns;
390 /* Number of transformations, maximum number in array currently. */
391 extern int n_trns, m_trns;
393 /* Index of first transformation that is really a transformation. Any
394 transformations before this belong to INPUT PROGRAM. */
397 void add_transformation (struct trns_header *trns);
398 void cancel_transformations (void);
402 struct var_set *var_set_create_from_dict (const struct dictionary *d);
403 struct var_set *var_set_create_from_array (struct variable *const *var,
406 size_t var_set_get_cnt (const struct var_set *vs);
407 struct variable *var_set_get_var (const struct var_set *vs, size_t idx);
408 struct variable *var_set_lookup_var (const struct var_set *vs,
410 void var_set_destroy (struct var_set *vs);
412 /* Variable parsers. */
416 PV_NONE = 0, /* No options. */
417 PV_SINGLE = 0001, /* Restrict to a single name or TO use. */
418 PV_DUPLICATE = 0002, /* Don't merge duplicates. */
419 PV_APPEND = 0004, /* Append to existing list. */
420 PV_NO_DUPLICATE = 0010, /* Error on duplicates. */
421 PV_NUMERIC = 0020, /* Vars must be numeric. */
422 PV_STRING = 0040, /* Vars must be string. */
423 PV_SAME_TYPE = 00100, /* All vars must be the same type. */
424 PV_NO_SCRATCH = 00200 /* Disallow scratch variables. */
427 struct variable *parse_variable (void);
428 struct variable *parse_dict_variable (const struct dictionary *);
429 int parse_variables (const struct dictionary *, struct variable ***, int *,
431 int parse_var_set_vars (const struct var_set *, struct variable ***, int *,
433 int parse_DATA_LIST_vars (char ***names, int *cnt, int opts);
434 int parse_mixed_vars (char ***names, int *cnt, int opts);
438 /* Return a string representing this variable, in the form most
439 appropriate from a human factors perspective.
440 (IE: the label if it has one, otherwise the name )
442 const char * var_to_string(const struct variable *var);