-/* Structure for replacement selection tree. */
-struct repl_sel_tree
- {
- struct repl_sel_tree *loser;/* Loser associated w/this internal node. */
- int rn; /* Run number of `loser'. */
- struct repl_sel_tree *fe; /* Internal node above this external node. */
- struct repl_sel_tree *fi; /* Internal node above this internal node. */
- union value record[1]; /* The case proper. */
- };
-
-/* Static variables used for sorting. */
-static struct repl_sel_tree **x; /* Buffers. */
-static int x_max; /* Size of buffers, in records. */
-static int records_per_buffer; /* Number of records in each buffer. */
-
-/* In the merge phase, the first N_INPUT_BUFFERS handle[] elements are
- input files and the last element is the output file. Before that,
- they're all used as output files, although the last one is
- segregated. */
-static FILE *handle[MAX_FILE_HANDLES]; /* File handles. */
-
-/* Now, MAX_FILE_HANDLES is the maximum number of files we will *try*
- to open. But if we can't open that many, max_handles will be set
- to the number we apparently can open. */
-static int max_handles; /* Maximum number of handles. */
-
-/* When we create temporary files, they are all put in the same
- directory and numbered sequentially from zero. tmp_basename is the
- drive/directory, etc., and tmp_extname can be sprintf() with "%08x"
- to the file number, then tmp_basename used to open the file. */
-static char *tmp_basename; /* Temporary file basename. */
-static char *tmp_extname; /* Temporary file extension name. */
-
-/* We use Huffman's method to determine the merge pattern. This means
- that we need to know which runs are the shortest at any given time.
- Priority queues as implemented by heap.c are a natural for this
- task (probably because I wrote the code specifically for it). */
-static struct heap *huffman_queue; /* Huffman priority queue. */
-
-/* Prototypes for helper functions. */
-static void sort_stream_write (void);
-static int write_initial_runs (int separate);
-static int allocate_cases (void);
-static int allocate_file_handles (void);
-static int merge (void);
-static void rmdir_temp_dir (void);
-
-/* Performs an external sort of the active file. A description of the
- procedure follows. All page references refer to Knuth's _Art of
- Computer Programming, Vol. 3: Sorting and Searching_, which is the
- canonical resource for sorting.
-
- 1. The data is read and S initial runs are formed through the
- action of algorithm 5.4.1R (replacement selection).
-
- 2. Huffman's method (p. 365-366) is used to determine the optimum
- merge pattern.
-
- 3. If an OS that supports overlapped reading, writing, and
- computing is being run, we should use 5.4.6F for forecasting.
- Otherwise, buffers are filled only when they run out of data.
- FIXME: Since the author of PSPP uses GNU/Linux, which does not
- yet implement overlapped r/w/c, 5.4.6F is not used.
-
- 4. We perform P-way merges:
-
- (a) The desired P is the smallest P such that ceil(ln(S)/ln(P))
- is minimized. (FIXME: Since I don't have an algorithm for
- minimizing this, it's just set to MAX_MERGE_ORDER.)
-
- (b) P is reduced if the selected value would make input buffers
- less than 4096 bytes each, or 16 records, whichever is larger.
-
- (c) P is reduced if we run out of available file handles or space
- for file handles.