projects
/
pspp
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
Added confidence and prediction intervals to model export
[pspp]
/
src
/
descript.c
diff --git
a/src/descript.c
b/src/descript.c
index 0492b33ed2378a232418f368326974eab88eaf00..a7152301abfdfcfcefb93af2dfea68b6d1b3205f 100644
(file)
--- a/
src/descript.c
+++ b/
src/descript.c
@@
-14,8
+14,8
@@
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
- Foundation, Inc., 5
9 Temple Place - Suite 330
, Boston, MA
- 0211
1-1307
, USA. */
+ Foundation, Inc., 5
1 Franklin Street, Fifth Floor
, Boston, MA
+ 0211
0-1301
, USA. */
/* FIXME: Many possible optimizations. */
/* FIXME: Many possible optimizations. */
@@
-26,8
+26,10
@@
#include <stdlib.h>
#include "algorithm.h"
#include "alloc.h"
#include <stdlib.h>
#include "algorithm.h"
#include "alloc.h"
+#include "case.h"
#include "casefile.h"
#include "command.h"
#include "casefile.h"
#include "command.h"
+#include "dictionary.h"
#include "lexer.h"
#include "error.h"
#include "magic.h"
#include "lexer.h"
#include "error.h"
#include "magic.h"
@@
-37,6
+39,10
@@
#include "var.h"
#include "vfm.h"
#include "var.h"
#include "vfm.h"
+#include "gettext.h"
+#define _(msgid) gettext (msgid)
+#define N_(msgid) msgid
+
/* DESCRIPTIVES private data. */
struct dsc_proc;
/* DESCRIPTIVES private data. */
struct dsc_proc;
@@
-62,7
+68,6
@@
struct dsc_z_score
/* DESCRIPTIVES transformation (for calculating Z-scores). */
struct dsc_trns
{
/* DESCRIPTIVES transformation (for calculating Z-scores). */
struct dsc_trns
{
- struct trns_header h;
struct dsc_z_score *z_scores; /* Array of Z-scores. */
int z_score_cnt; /* Number of Z-scores. */
struct variable **vars; /* Variables for listwise missing checks. */
struct dsc_z_score *z_scores; /* Array of Z-scores. */
int z_score_cnt; /* Number of Z-scores. */
struct variable **vars; /* Variables for listwise missing checks. */
@@
-118,7
+123,7
@@
static const struct dsc_statistic_info dsc_info[DSC_N_STATS] =
struct dsc_var
{
struct variable *v; /* Variable to calculate on. */
struct dsc_var
{
struct variable *v; /* Variable to calculate on. */
- char z_name[
9];
/* Name for z-score variable. */
+ char z_name[
LONG_NAME_LEN + 1];
/* Name for z-score variable. */
double valid, missing; /* Valid, missing counts. */
struct moments *moments; /* Moments. */
double min, max; /* Maximum and mimimum values. */
double valid, missing; /* Valid, missing counts. */
struct moments *moments; /* Moments. */
double min, max; /* Maximum and mimimum values. */
@@
-164,7
+169,7
@@
static void free_dsc_proc (struct dsc_proc *);
/* Z-score functions. */
static int try_name (struct dsc_proc *dsc, char *name);
static int generate_z_varname (struct dsc_proc *dsc, char *z_name,
/* Z-score functions. */
static int try_name (struct dsc_proc *dsc, char *name);
static int generate_z_varname (struct dsc_proc *dsc, char *z_name,
- const char *name,
in
t *z_cnt);
+ const char *name,
size_
t *z_cnt);
static void dump_z_table (struct dsc_proc *);
static void setup_z_trns (struct dsc_proc *);
static void dump_z_table (struct dsc_proc *);
static void setup_z_trns (struct dsc_proc *);
@@
-180,10
+185,10
@@
cmd_descriptives (void)
{
struct dsc_proc *dsc;
struct variable **vars = NULL;
{
struct dsc_proc *dsc;
struct variable **vars = NULL;
-
in
t var_cnt = 0;
+
size_
t var_cnt = 0;
int save_z_scores = 0;
int save_z_scores = 0;
-
in
t z_cnt = 0;
-
in
t i;
+
size_
t z_cnt = 0;
+
size_
t i;
/* Create and initialize dsc. */
dsc = xmalloc (sizeof *dsc);
/* Create and initialize dsc. */
dsc = xmalloc (sizeof *dsc);
@@
-261,11
+266,7
@@
cmd_descriptives (void)
else if (lex_match_id ("DEFAULT"))
dsc->show_stats |= DEFAULT_STATS;
else
else if (lex_match_id ("DEFAULT"))
dsc->show_stats |= DEFAULT_STATS;
else
- {
- dsc->show_stats |= 1ul << (match_statistic ());
- if (dsc->show_stats == DSC_NONE)
- dsc->show_stats = DEFAULT_STATS;
- }
+ dsc->show_stats |= 1ul << (match_statistic ());
lex_match (',');
}
if (dsc->show_stats == 0)
lex_match (',');
}
if (dsc->show_stats == 0)
@@
-309,7
+310,7
@@
cmd_descriptives (void)
PV_APPEND | PV_NO_DUPLICATE | PV_NUMERIC))
goto error;
PV_APPEND | PV_NO_DUPLICATE | PV_NUMERIC))
goto error;
- dsc->vars = x
realloc (dsc->vars, sizeof *dsc->vars * var_cnt
);
+ dsc->vars = x
nrealloc (dsc->vars, var_cnt, sizeof *dsc->vars
);
for (i = dsc->var_cnt; i < var_cnt; i++)
{
struct dsc_var *dv = &dsc->vars[i];
for (i = dsc->var_cnt; i < var_cnt; i++)
{
struct dsc_var *dv = &dsc->vars[i];
@@
-359,7
+360,7
@@
cmd_descriptives (void)
{
if (save_z_scores)
{
{
if (save_z_scores)
{
-
in
t gen_cnt = 0;
+
size_
t gen_cnt = 0;
for (i = 0; i < dsc->var_cnt; i++)
if (dsc->vars[i].z_name[0] == 0)
for (i = 0; i < dsc->var_cnt; i++)
if (dsc->vars[i].z_name[0] == 0)
@@
-462,12
+463,12
@@
free_dsc_proc (struct dsc_proc *dsc)
static int
try_name (struct dsc_proc *dsc, char *name)
{
static int
try_name (struct dsc_proc *dsc, char *name)
{
-
in
t i;
+
size_
t i;
if (dict_lookup_var (default_dict, name) != NULL)
return 0;
for (i = 0; i < dsc->var_cnt; i++)
if (dict_lookup_var (default_dict, name) != NULL)
return 0;
for (i = 0; i < dsc->var_cnt; i++)
- if (!strcmp (dsc->vars[i].z_name, name))
+ if (!strc
asec
mp (dsc->vars[i].z_name, name))
return 0;
return 1;
}
return 0;
return 1;
}
@@
-478,14
+479,13
@@
try_name (struct dsc_proc *dsc, char *name)
copies the new name into Z_NAME. On failure, returns zero. */
static int
generate_z_varname (struct dsc_proc *dsc, char *z_name,
copies the new name into Z_NAME. On failure, returns zero. */
static int
generate_z_varname (struct dsc_proc *dsc, char *z_name,
- const char *var_name,
in
t *z_cnt)
+ const char *var_name,
size_
t *z_cnt)
{
{
- char name[
10
];
+ char name[
LONG_NAME_LEN + 1
];
/* Try a name based on the original variable name. */
name[0] = 'Z';
/* Try a name based on the original variable name. */
name[0] = 'Z';
- strcpy (name + 1, var_name);
- name[8] = '\0';
+ str_copy_trunc (name + 1, sizeof name - 1, var_name);
if (try_name (dsc, name))
{
strcpy (z_name, name);
if (try_name (dsc, name))
{
strcpy (z_name, name);
@@
-526,11
+526,11
@@
generate_z_varname (struct dsc_proc *dsc, char *z_name,
static void
dump_z_table (struct dsc_proc *dsc)
{
static void
dump_z_table (struct dsc_proc *dsc)
{
-
in
t cnt = 0;
+
size_
t cnt = 0;
struct tab_table *t;
{
struct tab_table *t;
{
-
in
t i;
+
size_
t i;
for (i = 0; i < dsc->var_cnt; i++)
if (dsc->vars[i].z_name[0] != '\0')
for (i = 0; i < dsc->var_cnt; i++)
if (dsc->vars[i].z_name[0] != '\0')
@@
-548,7
+548,7
@@
dump_z_table (struct dsc_proc *dsc)
tab_dim (t, tab_natural_dimensions);
{
tab_dim (t, tab_natural_dimensions);
{
-
in
t i, y;
+
size_
t i, y;
for (i = 0, y = 1; i < dsc->var_cnt; i++)
if (dsc->vars[i].z_name[0] != '\0')
for (i = 0, y = 1; i < dsc->var_cnt; i++)
if (dsc->vars[i].z_name[0] != '\0')
@@
-568,10
+568,10
@@
dump_z_table (struct dsc_proc *dsc)
(either system or user-missing values that weren't included).
*/
static int
(either system or user-missing values that weren't included).
*/
static int
-descriptives_trns_proc (
struct trns_header *trns
, struct ccase * c,
- int case_
num
UNUSED)
+descriptives_trns_proc (
void *trns_
, struct ccase * c,
+ int case_
idx
UNUSED)
{
{
- struct dsc_trns *t =
(struct dsc_trns *) trns
;
+ struct dsc_trns *t =
trns_
;
struct dsc_z_score *z;
struct variable **vars;
int all_sysmis = 0;
struct dsc_z_score *z;
struct variable **vars;
int all_sysmis = 0;
@@
-581,9
+581,10
@@
descriptives_trns_proc (struct trns_header *trns, struct ccase * c,
assert(t->vars);
for (vars = t->vars; vars < t->vars + t->var_cnt; vars++)
{
assert(t->vars);
for (vars = t->vars; vars < t->vars + t->var_cnt; vars++)
{
- double score = c->data[(*vars)->fv].f;
- if ( score == SYSMIS || (!t->include_user_missing
- && is_num_user_missing(score, *vars)) )
+ double score = case_num (c, (*vars)->fv);
+ if ( score == SYSMIS
+ || (!t->include_user_missing
+ && mv_is_num_user_missing (&(*vars)->miss, score)))
{
all_sysmis = 1;
break;
{
all_sysmis = 1;
break;
@@
-593,23
+594,25
@@
descriptives_trns_proc (struct trns_header *trns, struct ccase * c,
for (z = t->z_scores; z < t->z_scores + t->z_score_cnt; z++)
{
for (z = t->z_scores; z < t->z_scores + t->z_score_cnt; z++)
{
- double score = c->data[z->src_idx].f;
+ double input = case_num (c, z->src_idx);
+ double *output = &case_data_rw (c, z->dst_idx)->f;
if (z->mean == SYSMIS || z->std_dev == SYSMIS
if (z->mean == SYSMIS || z->std_dev == SYSMIS
- || all_sysmis || score == SYSMIS
- || (!t->include_user_missing && is_num_user_missing(score, z->v)))
- c->data[z->dst_idx].f = SYSMIS;
+ || all_sysmis || input == SYSMIS
+ || (!t->include_user_missing
+ && mv_is_num_user_missing (&z->v->miss, input)))
+ *output = SYSMIS;
else
else
-
c->data[z->dst_idx].f = (score
- z->mean) / z->std_dev;
+
*output = (input
- z->mean) / z->std_dev;
}
return -1;
}
/* Frees a descriptives_trns struct. */
static void
}
return -1;
}
/* Frees a descriptives_trns struct. */
static void
-descriptives_trns_free (
struct trns_header * trns
)
+descriptives_trns_free (
void *trns_
)
{
{
- struct dsc_trns *t =
(struct dsc_trns *) trns
;
+ struct dsc_trns *t =
trns_
;
free (t->z_scores);
assert((t->missing_type != DSC_LISTWISE) ^ (t->vars != NULL));
free (t->z_scores);
assert((t->missing_type != DSC_LISTWISE) ^ (t->vars != NULL));
@@
-621,23
+624,21
@@
static void
setup_z_trns (struct dsc_proc *dsc)
{
struct dsc_trns *t;
setup_z_trns (struct dsc_proc *dsc)
{
struct dsc_trns *t;
-
in
t cnt, i;
+
size_
t cnt, i;
for (cnt = i = 0; i < dsc->var_cnt; i++)
if (dsc->vars[i].z_name[0] != '\0')
cnt++;
t = xmalloc (sizeof *t);
for (cnt = i = 0; i < dsc->var_cnt; i++)
if (dsc->vars[i].z_name[0] != '\0')
cnt++;
t = xmalloc (sizeof *t);
- t->h.proc = descriptives_trns_proc;
- t->h.free = descriptives_trns_free;
- t->z_scores = xmalloc (cnt * sizeof *t->z_scores);
+ t->z_scores = xnmalloc (cnt, sizeof *t->z_scores);
t->z_score_cnt = cnt;
t->missing_type = dsc->missing_type;
t->include_user_missing = dsc->include_user_missing;
if ( t->missing_type == DSC_LISTWISE )
{
t->var_cnt = dsc->var_cnt;
t->z_score_cnt = cnt;
t->missing_type = dsc->missing_type;
t->include_user_missing = dsc->include_user_missing;
if ( t->missing_type == DSC_LISTWISE )
{
t->var_cnt = dsc->var_cnt;
- t->vars = x
malloc(t->var_cnt *
sizeof *t->vars);
+ t->vars = x
nmalloc (t->var_cnt,
sizeof *t->vars);
for (i = 0; i < t->var_cnt; i++)
t->vars[i] = dsc->vars[i].v;
}
for (i = 0; i < t->var_cnt; i++)
t->vars[i] = dsc->vars[i].v;
}
@@
-646,7
+647,6
@@
setup_z_trns (struct dsc_proc *dsc)
t->var_cnt = 0;
t->vars = NULL;
}
t->var_cnt = 0;
t->vars = NULL;
}
-
for (cnt = i = 0; i < dsc->var_cnt; i++)
{
for (cnt = i = 0; i < dsc->var_cnt; i++)
{
@@
-681,7
+681,7
@@
setup_z_trns (struct dsc_proc *dsc)
}
}
}
}
- add_transformation (
(struct trns_header *)
t);
+ add_transformation (
descriptives_trns_proc, descriptives_trns_free,
t);
}
\f
/* Statistical calculation. */
}
\f
/* Statistical calculation. */
@@
-695,8
+695,8
@@
calc_descriptives (const struct casefile *cf, void *dsc_)
{
struct dsc_proc *dsc = dsc_;
struct casereader *reader;
{
struct dsc_proc *dsc = dsc_;
struct casereader *reader;
-
const struct ccase *
c;
-
in
t i;
+
struct ccase
c;
+
size_
t i;
for (i = 0; i < dsc->var_cnt; i++)
{
for (i = 0; i < dsc->var_cnt; i++)
{
@@
-712,15
+712,16
@@
calc_descriptives (const struct casefile *cf, void *dsc_)
dsc->valid = 0.;
/* First pass to handle most of the work. */
dsc->valid = 0.;
/* First pass to handle most of the work. */
- reader = casefile_get_reader (cf);
- while (casereader_read (reader, &c))
+ for (reader = casefile_get_reader (cf);
+ casereader_read (reader, &c);
+ case_destroy (&c))
{
{
- double weight = dict_get_case_weight (default_dict, c, &dsc->bad_warn);
+ double weight = dict_get_case_weight (default_dict,
&
c, &dsc->bad_warn);
if (weight <= 0.0)
if (weight <= 0.0)
-
continue;
+ continue;
/* Check for missing values. */
/* Check for missing values. */
- if (listwise_missing (dsc, c))
+ if (listwise_missing (dsc,
&
c))
{
dsc->missing_listwise += weight;
if (dsc->missing_type == DSC_LISTWISE)
{
dsc->missing_listwise += weight;
if (dsc->missing_type == DSC_LISTWISE)
@@
-731,24
+732,19
@@
calc_descriptives (const struct casefile *cf, void *dsc_)
for (i = 0; i < dsc->var_cnt; i++)
{
struct dsc_var *dv = &dsc->vars[i];
for (i = 0; i < dsc->var_cnt; i++)
{
struct dsc_var *dv = &dsc->vars[i];
- double x = c
->data[dv->v->fv].f
;
+ double x = c
ase_num (&c, dv->v->fv)
;
if (dsc->missing_type != DSC_LISTWISE
&& (x == SYSMIS
|| (!dsc->include_user_missing
if (dsc->missing_type != DSC_LISTWISE
&& (x == SYSMIS
|| (!dsc->include_user_missing
- &&
is_num_user_missing (x, dv->v
))))
+ &&
mv_is_num_user_missing (&dv->v->miss, x
))))
{
dv->missing += weight;
continue;
}
if (dv->moments != NULL)
{
dv->missing += weight;
continue;
}
if (dv->moments != NULL)
- {
- if (dsc->max_moment > MOMENT_MEAN)
- moments_pass_one (dv->moments, x, weight);
- else
- moments_pass_two (dv->moments, x, weight);
- }
+ moments_pass_one (dv->moments, x, weight);
if (x < dv->min)
dv->min = x;
if (x < dv->min)
dv->min = x;
@@
-761,28
+757,29
@@
calc_descriptives (const struct casefile *cf, void *dsc_)
/* Second pass for higher-order moments. */
if (dsc->max_moment > MOMENT_MEAN)
{
/* Second pass for higher-order moments. */
if (dsc->max_moment > MOMENT_MEAN)
{
- reader = casefile_get_reader (cf);
- while (casereader_read (reader, &c))
+ for (reader = casefile_get_reader (cf);
+ casereader_read (reader, &c);
+ case_destroy (&c))
{
{
- double weight = dict_get_case_weight (default_dict, c,
+ double weight = dict_get_case_weight (default_dict,
&
c,
&dsc->bad_warn);
if (weight <= 0.0)
continue;
/* Check for missing values. */
&dsc->bad_warn);
if (weight <= 0.0)
continue;
/* Check for missing values. */
- if (listwise_missing (dsc, c)
+ if (listwise_missing (dsc,
&
c)
&& dsc->missing_type == DSC_LISTWISE)
continue;
for (i = 0; i < dsc->var_cnt; i++)
{
struct dsc_var *dv = &dsc->vars[i];
&& dsc->missing_type == DSC_LISTWISE)
continue;
for (i = 0; i < dsc->var_cnt; i++)
{
struct dsc_var *dv = &dsc->vars[i];
- double x = c
->data[dv->v->fv].f
;
+ double x = c
ase_num (&c, dv->v->fv)
;
if (dsc->missing_type != DSC_LISTWISE
&& (x == SYSMIS
|| (!dsc->include_user_missing
if (dsc->missing_type != DSC_LISTWISE
&& (x == SYSMIS
|| (!dsc->include_user_missing
- &&
is_num_user_missing (x, dv->v
))))
+ &&
mv_is_num_user_missing (&dv->v->miss, x
))))
continue;
if (dv->moments != NULL)
continue;
if (dv->moments != NULL)
@@
-837,15
+834,16
@@
calc_descriptives (const struct casefile *cf, void *dsc_)
static int
listwise_missing (struct dsc_proc *dsc, const struct ccase *c)
{
static int
listwise_missing (struct dsc_proc *dsc, const struct ccase *c)
{
-
in
t i;
+
size_
t i;
for (i = 0; i < dsc->var_cnt; i++)
{
struct dsc_var *dv = &dsc->vars[i];
for (i = 0; i < dsc->var_cnt; i++)
{
struct dsc_var *dv = &dsc->vars[i];
- double x = c
->data[dv->v->fv].f
;
+ double x = c
ase_num (c, dv->v->fv)
;
if (x == SYSMIS
if (x == SYSMIS
- || (!dsc->include_user_missing && is_num_user_missing (x, dv->v)))
+ || (!dsc->include_user_missing
+ && mv_is_num_user_missing (&dv->v->miss, x)))
return 1;
}
return 0;
return 1;
}
return 0;
@@
-859,7
+857,7
@@
static algo_compare_func descriptives_compare_dsc_vars;
static void
display (struct dsc_proc *dsc)
{
static void
display (struct dsc_proc *dsc)
{
-
int i, j
;
+
size_t i
;
int nc;
struct tab_table *t;
int nc;
struct tab_table *t;
@@
-900,6
+898,7
@@
display (struct dsc_proc *dsc)
for (i = 0; i < dsc->var_cnt; i++)
{
struct dsc_var *dv = &dsc->vars[i];
for (i = 0; i < dsc->var_cnt; i++)
{
struct dsc_var *dv = &dsc->vars[i];
+ size_t j;
nc = 0;
tab_text (t, nc++, i + 1, TAB_LEFT, dv->v->name);
nc = 0;
tab_text (t, nc++, i + 1, TAB_LEFT, dv->v->name);
@@
-929,7
+928,7
@@
descriptives_compare_dsc_vars (const void *a_, const void *b_, void *dsc_)
int result;
if (dsc->sort_by_stat == DSC_NAME)
int result;
if (dsc->sort_by_stat == DSC_NAME)
- result = strcmp (a->v->name, b->v->name);
+ result = strc
asec
mp (a->v->name, b->v->name);
else
{
double as = a->stats[dsc->sort_by_stat];
else
{
double as = a->stats[dsc->sort_by_stat];