1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
27 /* Definition of the max length of a short string value, generally
29 #define MAX_SHORT_STRING ((SIZEOF_DOUBLE)>=8 ? ((SIZEOF_DOUBLE)+1)/2*2 : 8)
30 #define MIN_LONG_STRING (MAX_SHORT_STRING+1)
32 /* FYI: It is a bad situation if sizeof(flt64) < MAX_SHORT_STRING:
33 then short string missing values can be truncated in system files
34 because there's only room for as many characters as can fit in a
36 #if MAX_SHORT_STRING > 8
37 #error MAX_SHORT_STRING must be less than 8.
41 #define SYSMIS (-DBL_MAX)
42 #define LOWEST second_lowest_value
43 #define HIGHEST DBL_MAX
45 /* Describes one value, which is either a floating-point number or a
49 /* A numeric value. */
52 /* A short-string value. */
53 unsigned char s[MAX_SHORT_STRING];
55 /* This member is used by data-in.c to return a string result,
56 since it may need to return a long string. As currently
57 implemented, it's a pointer to a static internal buffer in
60 Also used by evaluate_expression() to return a string result.
61 As currently implemented, it's a pointer to a dynamic buffer in
62 the appropriate expression.
64 Also used by the AGGREGATE procedure in handling string
68 /* Sometimes we insert value's in a hash table. */
69 unsigned long hash[SIZEOF_DOUBLE / SIZEOF_LONG];
72 /* Describes one value label. */
75 union value v; /* The value being labeled. */
76 char *s; /* Pointer to malloc()'d label. */
77 int ref_count; /* Reference count. */
80 /* Frequency tables. */
82 /* Frequency table entry. */
85 union value v; /* The value. */
86 double c; /* The number of occurrences of the value. */
89 /* Types of frequency tables. */
96 /* Entire frequency table. */
99 int mode; /* FRQM_GENERAL or FRQM_INTEGER. */
102 struct avl_tree *tree; /* Undifferentiated data. */
105 double *vector; /* Frequencies proper. */
106 int min, max; /* The boundaries of the table. */
107 double out_of_range; /* Sum of weights of out-of-range values. */
108 double sysmis; /* Sum of weights of SYSMIS values. */
111 struct freq *valid; /* Valid freqs. */
112 int n_valid; /* Number of total freqs. */
114 struct freq *missing; /* Missing freqs. */
115 int n_missing; /* Number of missing freqs. */
118 double total_cases; /* Sum of weights of all cases. */
119 double valid_cases; /* Sum of weights of valid cases. */
122 /* A complete set of 3 frequency tables. */
125 struct freq_tab miss; /* Includes user-missing values. */
126 struct freq_tab no_miss; /* Excludes user-missing values. */
127 struct freq_tab sel; /* Identical to either miss or no_miss. */
130 /* Procedures' private per-variable data. */
132 /* Structure name suffixes for private data:
133 _proc: for a procedure (i.e., LIST -> list_proc).
134 _trns: for a transformation (i.e., COMPUTE -> compute_trns.
135 _pgm: for an input program (i.e., DATA LIST -> data_list_pgm). */
137 /* CROSSTABS private data. */
140 /* Integer mode only. */
141 int min; /* Minimum value. */
142 int max; /* Maximum value + 1. */
143 int count; /* max - min. */
146 /* FREQUENCIES private data. */
149 frq_mean = 0, frq_semean, frq_median, frq_mode, frq_stddev, frq_variance,
150 frq_kurt, frq_sekurt, frq_skew, frq_seskew, frq_range, frq_min, frq_max,
154 struct frequencies_proc
157 struct freq_tab tab; /* Frequencies table to use. */
160 int n_groups; /* Number of groups. */
161 double *groups; /* Groups. */
164 double stat[frq_n_stats];
167 /* LIST private data. */
170 int newline; /* Whether a new line begins here. */
171 int width; /* Field width. */
172 int vert; /* Whether to print the varname vertically. */
175 /* DESCRIPTIVES private data. Note that the DESCRIPTIVES procedure also
176 has a transformation, descriptives_trns. */
179 /* As these are used as bit indexes, there must be 32 or fewer.
180 Be very careful in adjusting these, see the structure below
181 and the table in descriptives.q. */
182 dsc_mean = 0, dsc_semean, dsc_stddev, dsc_variance, dsc_kurt,
183 dsc_sekurt, dsc_skew, dsc_seskew, dsc_range, dsc_min,
184 dsc_max, dsc_sum, dsc_n_stats
187 struct descriptives_proc
190 int dup; /* Finds duplicates in list of
192 char zname[10]; /* Name for z-score variable. */
195 double valid, miss; /* Valid, missing--general. */
197 /* Mean, moments about the mean. */
198 double X_bar, M2, M3, M4;
202 double stats[dsc_n_stats]; /* Everything glommed together. */
205 /* GET private data. */
208 int fv, nv; /* First, last, # of values. */
214 SRT_ASCEND, /* A, B, C, ..., X, Y, Z. */
215 SRT_DESCEND /* Z, Y, X, ..., C, B, A. */
218 /* SORT CASES private data. */
219 struct sort_cases_proc
221 int order; /* SRT_ASCEND or SRT_DESCEND. */
224 /* MODIFY VARS private data. */
225 struct modify_vars_proc
227 char new_name[9]; /* Variable's new name. */
228 int drop_this_var; /* 0=keep this var, 1=drop this var. */
229 struct variable *next; /* Next in linked list. */
232 /* MEANS private data. */
235 double min, max; /* Range for integer mode. */
238 /* Different types of variables for MATRIX DATA procedure. Order is
239 important: these are used for sort keys. */
242 MXD_SPLIT, /* SPLIT FILE variables. */
243 MXD_ROWTYPE, /* ROWTYPE_. */
244 MXD_FACTOR, /* Factor variables. */
245 MXD_VARNAME, /* VARNAME_. */
246 MXD_CONTINUOUS, /* Continuous variables. */
251 /* MATRIX DATA private data. */
252 struct matrix_data_proc
254 int vartype; /* Variable type. */
255 int subtype; /* Subtype. */
258 /* MATCH FILES private data. */
259 struct match_files_proc
261 struct variable *master; /* Corresponding master file variable. */
265 /* Script variables. */
270 NUMERIC, /* A numeric variable. */
271 ALPHA /* A string variable. (STRING is pre-empted by lexer.h) */
274 /* Types of missing values. Order is significant, see
275 mis-val.c:parse_numeric(), sfm-read.c:sfm_read_dictionary()
276 sfm-write.c:sfm_write_dictionary(),
277 sysfile-info.c:cmd_sysfile_info(), mis-val.c:copy_missing_values(),
278 pfm-read.c:read_variables(), pfm-write.c:write_variables(),
279 apply-dict.c:cmd_apply_dictionary(), and more (?). */
282 MISSING_NONE, /* No user-missing values. */
283 MISSING_1, /* One user-missing value. */
284 MISSING_2, /* Two user-missing values. */
285 MISSING_3, /* Three user-missing values. */
286 MISSING_RANGE, /* [a,b]. */
287 MISSING_LOW, /* (-inf,a]. */
288 MISSING_HIGH, /* (a,+inf]. */
289 MISSING_RANGE_1, /* [a,b], c. */
290 MISSING_LOW_1, /* (-inf,a], b. */
291 MISSING_HIGH_1, /* (a,+inf), b. */
295 /* A variable's dictionary entry. Note: don't reorder name[] from the
296 first element; a pointer to `variable' should be a pointer to
300 /* Required by parse_variables() to be in this order. */
301 char name[9]; /* As a string. */
302 int index; /* Index into its dictionary's var[]. */
303 int type; /* NUMERIC or ALPHA. */
304 int foo; /* Used for temporary storage. */
306 /* Also important but parse_variables() doesn't need it. Still,
307 check before reordering. */
308 int width; /* Size of string variables in chars. */
309 int fv, nv; /* Index into `value's, number of values. */
310 int left; /* 0=do not LEAVE, 1=LEAVE. */
312 /* Missing values. */
313 int miss_type; /* One of the MISSING_* constants. */
314 union value missing[3]; /* User-missing value. */
316 /* Display formats. */
317 struct fmt_spec print; /* Default format for PRINT. */
318 struct fmt_spec write; /* Default format for WRITE. */
321 struct avl_tree *val_lab; /* Avltree of value_label structures. */
322 char *label; /* Variable label. */
324 /* Per-procedure info. */
328 struct crosstab_proc crs;
329 struct descriptives_proc dsc;
330 struct frequencies_proc frq;
331 struct list_proc lst;
332 struct means_proc mns;
333 struct sort_cases_proc srt;
334 struct modify_vars_proc mfv;
335 struct matrix_data_proc mxd;
336 struct match_files_proc mtf;
343 /* A single case. (This doesn't need to be a struct anymore, but it
344 remains so for hysterical raisins.) */
352 /* Complete dictionary state. */
355 struct variable **var; /* Variable descriptions. */
356 struct avl_tree *var_by_name; /* Variables arranged by name. */
357 int nvar; /* Number of variables. */
359 int N; /* Current case limit (N command). */
360 int nval; /* Number of value structures per case. */
362 int n_splits; /* Number of SPLIT FILE variables. */
363 struct variable **splits; /* List of SPLIT FILE vars. */
365 char *label; /* File label. */
367 int n_documents; /* Number of lines of documents. */
368 char *documents; /* Documents; 80*n_documents bytes in size. */
370 int weight_index; /* `value' index of $WEIGHT, or -1 if none.
371 Call update_weighting() before using! */
372 char weight_var[9]; /* Name of WEIGHT variable. */
374 char filter_var[9]; /* Name of FILTER variable. */
375 /* Do not make another field the last field! or see
376 temporary.c:restore_dictionary() before doing so! */
379 /* This is the active file dictionary. */
380 extern struct dictionary default_dict;
382 /* Transformation state. */
384 /* Default file handle for DATA LIST, REREAD, REPEATING DATA
386 extern struct file_handle *default_handle;
388 /* PROCESS IF expression. */
389 extern struct expression *process_if_expr;
391 /* TEMPORARY support. */
393 /* 1=TEMPORARY has been executed at some point. */
394 extern int temporary;
396 /* If temporary!=0, the saved dictionary. */
397 extern struct dictionary *temp_dict;
399 /* If temporary!=0, index into t_trns[] (declared far below) that
400 gives the point at which data should be written out. -1 means that
401 the data shouldn't be changed since all transformations are
403 extern int temp_trns;
405 /* If FILTER is active, whether it was executed before or after
407 extern int FILTER_before_TEMPORARY;
409 void cancel_temporary (void);
413 int is_varname (const char *);
414 int is_dict_varname (const struct dictionary *, const char *);
416 /* Flags for passing to fill_all_vars(). */
419 FV_NONE = 0, /* No flags. */
420 FV_NO_SYSTEM = 001, /* Don't include system variables. */
421 FV_NO_SCRATCH = 002 /* Don't include scratch variables. */
424 void fill_all_vars (struct variable ***, int *, int flags);
426 int val_lab_cmp (const void *, const void *, void *);
427 char *get_val_lab (const struct variable *, union value, int);
428 void free_val_lab (void *, void *);
429 void free_value_label (struct value_label *);
430 struct avl_tree *copy_value_labels (struct avl_tree *);
432 void dump_split_vars (const struct ccase *);
434 int is_num_user_missing (double, const struct variable *);
435 int is_str_user_missing (const unsigned char[], const struct variable *);
436 int is_missing (const union value *, const struct variable *);
437 int is_system_missing (const union value *, const struct variable *);
438 int is_user_missing (const union value *, const struct variable *);
439 void copy_missing_values (struct variable *dest, const struct variable *src);
441 int cmp_variable (const void *, const void *, void *);
444 struct variable *force_create_variable (struct dictionary *, const char *name,
445 int type, int width);
446 struct variable *force_dup_variable (struct dictionary *,
447 const struct variable *src,
450 #define force_create_variable(A, B, C, D) \
451 create_variable (A, B, C, D)
452 #define force_dup_variable(A, B, C) \
453 dup_variable (A, B, C)
456 struct variable *create_variable (struct dictionary *, const char *name,
457 int type, int width);
458 void delete_variable (struct dictionary *, struct variable *v);
459 struct variable *find_variable (const char *name);
460 struct variable *find_dict_variable (const struct dictionary *,
462 void init_variable (struct dictionary *, struct variable *, const char *name,
463 int type, int width);
464 void replace_variable (struct variable *, const char *name,
465 int type, int width);
466 void clear_variable (struct dictionary *, struct variable *);
467 void rename_variable (struct dictionary *, struct variable *v,
468 const char *new_name);
469 void discard_variables (void);
470 void clear_default_dict (void);
471 void copy_variable (struct variable *dest, const struct variable *src);
472 struct variable *dup_variable (struct dictionary *dict,
473 const struct variable *src, const char *name);
475 struct variable *update_weighting (struct dictionary *);
476 void stop_weighting (struct dictionary *);
478 struct dictionary *save_dictionary (void);
479 void restore_dictionary (struct dictionary *);
480 void free_dictionary (struct dictionary *);
481 struct dictionary *new_dictionary (int copy);
483 /* Transformations. */
485 /* Header for all transformations. */
488 /* Index into t_trns[]. */
491 /* Transformation proc. */
492 int (*proc) (struct trns_header *, struct ccase *);
494 /* Garbage collector proc. */
495 void (*free) (struct trns_header *);
498 /* Array of transformations */
499 extern struct trns_header **t_trns;
501 /* Number of transformations, maximum number in array currently. */
502 extern int n_trns, m_trns;
504 /* Index of first transformation that is really a transformation. Any
505 transformations before this belong to INPUT PROGRAM. */
508 void add_transformation (struct trns_header *trns);
509 void cancel_transformations (void);
511 /* Variable parsers. */
513 /* Only parse_variables() supports options other than PV_APPEND,
517 PV_NONE = 0, /* No options. */
518 PV_SINGLE = 0001, /* Restrict to a single varname or TO use. */
519 PV_DUPLICATE = 0002, /* Don't merge duplicates. */
520 PV_APPEND = 0004, /* Append to existing list. */
521 PV_NO_DUPLICATE = 0010, /* Error on duplicates. */
522 PV_NUMERIC = 0020, /* Vars must be numeric. */
523 PV_STRING = 0040, /* Vars must be string. */
524 PV_SAME_TYPE = 00100, /* All vars must be the same type. */
525 PV_NO_SCRATCH = 00200 /* Disallow scratch variables. */
528 struct variable *parse_variable (void);
529 struct variable *parse_dict_variable (struct dictionary *);
530 int parse_variables (struct dictionary *dict, struct variable ***v,
531 int *nv, int pv_opts);
532 int parse_DATA_LIST_vars (char ***names, int *nnames, int pv_opts);
533 int parse_mixed_vars (char ***names, int *nnames, int pv_opts);