Fully implement arbitrary delimiters on DATA LIST, extending the half
[pspp-builds.git] / src / aggregate.c
index 3821de78b822f9043676c6ec1c4b8cc87dbf03af..4f2f1f45800fad3ad719ba32e57e1421b3c4c77d 100644 (file)
 #include "file-handle.h"
 #include "lexer.h"
 #include "misc.h"
+#include "moments.h"
 #include "pool.h"
 #include "settings.h"
 #include "sfm.h"
 #include "sort.h"
-#include "stats.h"
 #include "str.h"
 #include "var.h"
 #include "vfm.h"
@@ -53,6 +53,7 @@ struct agr_var
     int int1, int2;
     char *string;
     int missing;
+    struct moments1 *moments;
   };
 
 /* Aggregation functions. */
@@ -386,8 +387,8 @@ parse_aggregate_functions (struct agr_proc *agr)
          
          if (token == T_STRING)
            {
-             ds_truncate (&tokstr, 120);
-             dest_label[n_dest - 1] = xstrdup (ds_value (&tokstr));
+             ds_truncate (&tokstr, 255);
+             dest_label[n_dest - 1] = xstrdup (ds_c_str (&tokstr));
              lex_get ();
            }
        }
@@ -453,7 +454,7 @@ parse_aggregate_functions (struct agr_proc *agr)
                lex_match (',');
                if (token == T_STRING)
                  {
-                   arg[i].c = xstrdup (ds_value (&tokstr));
+                   arg[i].c = xstrdup (ds_c_str (&tokstr));
                    type = ALPHA;
                  }
                else if (token == T_NUM)
@@ -511,6 +512,7 @@ parse_aggregate_functions (struct agr_proc *agr)
            agr->vars = v;
           tail = v;
          tail->next = NULL;
+          v->moments = NULL;
          
          /* Create the target variable in the aggregate
              dictionary. */
@@ -663,6 +665,8 @@ agr_destroy (struct agr_proc *agr)
            free (iter->arg[i].c);
          free (iter->string);
        }
+      else if (iter->function == SD)
+        moments1_destroy (iter->moments);
       free (iter);
     }
   free (agr->prev_break);
@@ -790,8 +794,9 @@ accumulate_aggregate_info (struct agr_proc *agr,
 {
   struct agr_var *iter;
   double weight;
+  int bad_warn = 1;
 
-  weight = dict_get_case_weight (default_dict, input);
+  weight = dict_get_case_weight (default_dict, input, &bad_warn);
 
   for (iter = agr->vars; iter; iter = iter->next)
     if (iter->src)
@@ -825,14 +830,9 @@ accumulate_aggregate_info (struct agr_proc *agr,
             iter->dbl[0] += v->f * weight;
             iter->dbl[1] += weight;
             break;
-         case SD: 
-            {
-              double product = v->f * weight;
-              iter->dbl[0] += product;
-              iter->dbl[1] += product * v->f;
-              iter->dbl[2] += weight;
-              break; 
-            }
+         case SD:
+            moments1_add (iter->moments, v->f, weight);
+            break;
          case MAX:
            iter->dbl[0] = max (iter->dbl[0], v->f);
            iter->int1 = 1;
@@ -992,9 +992,17 @@ dump_aggregate_info (struct agr_proc *agr, struct ccase *output)
            v->f = i->dbl[1] != 0.0 ? i->dbl[0] / i->dbl[1] : SYSMIS;
            break;
          case SD:
-           v->f = ((i->dbl[2] > 1.0)
-                   ? calc_stddev (calc_variance (i->dbl, i->dbl[2]))
-                   : SYSMIS);
+            {
+              double variance;
+
+              /* FIXME: we should use two passes. */
+              moments1_calculate (i->moments, NULL, NULL, &variance,
+                                 NULL, NULL);
+              if (variance != SYSMIS)
+                v->f = sqrt (variance);
+              else
+                v->f = SYSMIS; 
+            }
            break;
          case MAX:
          case MIN:
@@ -1088,6 +1096,12 @@ initialize_aggregate_info (struct agr_proc *agr)
        case MAX | FSTRING:
          memset (iter->string, 0, iter->src->width);
          break;
+        case SD:
+          if (iter->moments == NULL)
+            iter->moments = moments1_create (MOMENT_VARIANCE);
+          else
+            moments1_clear (iter->moments);
+          break;
        default:
          iter->dbl[0] = iter->dbl[1] = iter->dbl[2] = 0.0;
          iter->int1 = iter->int2 = 0;