/* PSPP - a program for statistical analysis.
- Copyright (C) 1997-9, 2000, 2006, 2009, 2010, 2011 Free Software Foundation, Inc.
+ Copyright (C) 1997-9, 2000, 2006, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
#include <string.h>
#include "data/dataset.h"
-#include "data/file-name.h"
#include "data/variable.h"
+#include "libpspp/cast.h"
#include "libpspp/compiler.h"
+#include "libpspp/hash-functions.h"
#include "libpspp/hmap.h"
#include "libpspp/i18n.h"
#include "libpspp/message.h"
#include "libpspp/str.h"
-#include "libpspp/hash-functions.h"
+#include <sys/stat.h>
+
+#include "gl/dirname.h"
#include "gl/xalloc.h"
#include "gettext.h"
#define _(msgid) gettext (msgid)
+#if defined _WIN32 || defined __WIN32__
+#define WIN32_LEAN_AND_MEAN /* avoid including junk */
+#include <windows.h>
+#endif
+
/* File handle. */
struct file_handle
{
/* FH_REF_FILE only. */
char *file_name; /* File name as provided by user. */
+ char *file_name_encoding; /* The character encoding of file_name,
+ This is NOT the encoding of the file contents! */
enum fh_mode mode; /* File mode. */
- const char *encoding; /* File encoding. */
+ enum fh_line_ends line_ends; /* Line ends for text files. */
/* FH_REF_FILE and FH_REF_INLINE only. */
size_t record_width; /* Length of fixed-format records. */
size_t tab_width; /* Tab width, 0=do not expand tabs. */
+ char *encoding; /* Charset for contents. */
/* FH_REF_DATASET only. */
struct dataset *ds; /* Dataset. */
static struct file_handle *inline_file;
static struct file_handle *create_handle (const char *id,
- char *name, enum fh_referent);
+ char *name, enum fh_referent,
+ const char *encoding);
static void free_handle (struct file_handle *);
static void unname_handle (struct file_handle *);
/* Hash table of all active locks. */
static struct hmap locks = HMAP_INITIALIZER (locks);
+static struct file_identity *fh_get_identity (const struct file_handle *);
+static void fh_free_identity (struct file_identity *);
+static int fh_compare_file_identities (const struct file_identity *,
+ const struct file_identity *);
+static unsigned int fh_hash_identity (const struct file_identity *);
+
/* File handle initialization routine. */
void
fh_init (void)
{
- inline_file = create_handle ("INLINE", xstrdup ("INLINE"), FH_REF_INLINE);
+ inline_file = create_handle ("INLINE", xstrdup ("INLINE"), FH_REF_INLINE,
+ "Auto");
inline_file->record_width = 80;
inline_file->tab_width = 8;
}
free (handle->id);
free (handle->name);
free (handle->file_name);
+ free (handle->file_name_encoding);
+ free (handle->encoding);
free (handle);
}
struct file_handle *handle;
HMAP_FOR_EACH_WITH_HASH (handle, struct file_handle, name_node,
- hash_case_string (id, 0), &named_handles)
- if (!strcasecmp (id, handle->id))
+ utf8_hash_case_string (id, 0), &named_handles)
+ if (!utf8_strcasecmp (id, handle->id))
{
- handle->ref_cnt++;
- return handle;
+ return fh_ref (handle);
}
return NULL;
The new handle is not fully initialized. The caller is
responsible for completing its initialization. */
static struct file_handle *
-create_handle (const char *id, char *handle_name, enum fh_referent referent)
+create_handle (const char *id, char *handle_name, enum fh_referent referent,
+ const char *encoding)
{
struct file_handle *handle = xzalloc (sizeof *handle);
handle->id = id != NULL ? xstrdup (id) : NULL;
handle->name = handle_name;
handle->referent = referent;
+ handle->encoding = xstrdup (encoding);
if (id != NULL)
{
- assert (fh_from_id (id) == NULL);
hmap_insert (&named_handles, &handle->name_node,
- hash_case_string (handle->id, 0));
- handle->ref_cnt++;
+ utf8_hash_case_string (handle->id, 0));
}
return handle;
struct file_handle *
fh_inline_file (void)
{
- fh_ref (inline_file);
return inline_file;
}
existing file identifiers. The new handle is associated with file FILE_NAME
and the given PROPERTIES. */
struct file_handle *
-fh_create_file (const char *id, const char *file_name,
+fh_create_file (const char *id, const char *file_name, const char *file_name_encoding,
const struct fh_properties *properties)
{
char *handle_name;
struct file_handle *handle;
handle_name = id != NULL ? xstrdup (id) : xasprintf ("`%s'", file_name);
- handle = create_handle (id, handle_name, FH_REF_FILE);
+ handle = create_handle (id, handle_name, FH_REF_FILE, properties->encoding);
handle->file_name = xstrdup (file_name);
+ handle->file_name_encoding = file_name_encoding ? xstrdup (file_name_encoding) : NULL;
handle->mode = properties->mode;
+ handle->line_ends = properties->line_ends;
handle->record_width = properties->record_width;
handle->tab_width = properties->tab_width;
- handle->encoding = properties->encoding;
return handle;
}
if (name[0] == '\0')
name = _("active dataset");
- handle = create_handle (NULL, xstrdup (name), FH_REF_DATASET);
+ handle = create_handle (NULL, xstrdup (name), FH_REF_DATASET, C_ENCODING);
handle->ds = ds;
return handle;
}
const struct fh_properties *
fh_default_properties (void)
{
+#if defined _WIN32 || defined __WIN32__
+#define DEFAULT_LINE_ENDS FH_END_CRLF
+#else
+#define DEFAULT_LINE_ENDS FH_END_LF
+#endif
+
static const struct fh_properties default_properties
- = {FH_MODE_TEXT, 1024, 4, C_ENCODING};
+ = {FH_MODE_TEXT, DEFAULT_LINE_ENDS, 1024, 4, (char *) "Auto"};
return &default_properties;
}
return handle->file_name;
}
+
+/* Returns the character encoding of the name of the file associated with HANDLE. */
+const char *
+fh_get_file_name_encoding (const struct file_handle *handle)
+{
+ assert (handle->referent == FH_REF_FILE);
+ return handle->file_name_encoding;
+}
+
+
/* Returns the mode of HANDLE. */
enum fh_mode
fh_get_mode (const struct file_handle *handle)
return handle->mode;
}
+/* Returns the line ends of HANDLE, which must be a handle associated with a
+ file. */
+enum fh_line_ends
+fh_get_line_ends (const struct file_handle *handle)
+{
+ assert (handle->referent == FH_REF_FILE);
+ return handle->line_ends;
+}
+
/* Returns the width of a logical record on HANDLE. */
size_t
fh_get_record_width (const struct file_handle *handle)
/* Returns the encoding of characters read from HANDLE. */
const char *
-fh_get_legacy_encoding (const struct file_handle *handle)
+fh_get_encoding (const struct file_handle *handle)
{
- assert (handle->referent & (FH_REF_FILE | FH_REF_INLINE));
- return (handle->referent == FH_REF_FILE ? handle->encoding : C_ENCODING);
+ return handle->encoding;
}
/* Returns the dataset handle associated with HANDLE.
struct file_handle *
fh_get_default_handle (void)
{
- return default_handle ? fh_ref (default_handle) : fh_inline_file ();
+ return default_handle ? default_handle : fh_inline_file ();
}
/* Sets NEW_DEFAULT_HANDLE as the default handle. */
{
assert (new_default_handle == NULL
|| (new_default_handle->referent & (FH_REF_INLINE | FH_REF_FILE)));
- if (default_handle != NULL)
+ if (default_handle != NULL && default_handle != inline_file)
fh_unref (default_handle);
default_handle = new_default_handle;
if (default_handle != NULL)
lock->referent = fh_get_referent (h);
lock->access = access;
if (lock->referent == FH_REF_FILE)
- lock->u.file = fn_get_identity (fh_get_file_name (h));
+ lock->u.file = fh_get_identity (h);
else if (lock->referent == FH_REF_DATASET)
lock->u.unique_id = dataset_seqno (fh_get_dataset (h));
}
free_key (struct fh_lock *lock)
{
if (lock->referent == FH_REF_FILE)
- fn_free_identity (lock->u.file);
+ fh_free_identity (lock->u.file);
}
/* Compares the key fields in struct fh_lock objects A and B and
else if (a->access != b->access)
return a->access < b->access ? -1 : 1;
else if (a->referent == FH_REF_FILE)
- return fn_compare_file_identities (a->u.file, b->u.file);
+ return fh_compare_file_identities (a->u.file, b->u.file);
else if (a->referent == FH_REF_DATASET)
return (a->u.unique_id < b->u.unique_id ? -1
: a->u.unique_id > b->u.unique_id);
{
unsigned int basis;
if (lock->referent == FH_REF_FILE)
- basis = fn_hash_identity (lock->u.file);
+ basis = fh_hash_identity (lock->u.file);
else if (lock->referent == FH_REF_DATASET)
basis = lock->u.unique_id;
else
basis = 0;
return hash_int ((lock->referent << 3) | lock->access, basis);
}
+
+\f
+
+
+
+
+/* A file's identity:
+
+ - For a file that exists, this is its device and inode.
+
+ - For a file that does not exist, but which has a directory
+ name that exists, this is the device and inode of the
+ directory, plus the file's base name.
+
+ - For a file that does not exist and has a nonexistent
+ directory, this is the file name.
+
+ Windows doesn't have inode numbers, so we just use the name
+ there. */
+struct file_identity
+{
+ unsigned long long device; /* Device number. */
+ unsigned long long inode; /* Inode number. */
+ char *name; /* File name, where needed, otherwise NULL. */
+};
+
+/* Returns a pointer to a dynamically allocated structure whose
+ value can be used to tell whether two files are actually the
+ same file. The caller is responsible for freeing the structure with
+ fh_free_identity() when finished. */
+static struct file_identity *
+fh_get_identity (const struct file_handle *fh)
+{
+ struct file_identity *identity = xmalloc (sizeof *identity);
+
+ const char *file_name = fh_get_file_name (fh);
+
+#if !(defined _WIN32 || defined __WIN32__)
+ struct stat s;
+ if (lstat (file_name, &s) == 0)
+ {
+ identity->device = s.st_dev;
+ identity->inode = s.st_ino;
+ identity->name = NULL;
+ }
+ else
+ {
+ char *dir = dir_name (file_name);
+ if (last_component (file_name) != NULL && stat (dir, &s) == 0)
+ {
+ identity->device = s.st_dev;
+ identity->inode = s.st_ino;
+ identity->name = base_name (file_name);
+ }
+ else
+ {
+ identity->device = 0;
+ identity->inode = 0;
+ identity->name = xstrdup (file_name);
+ }
+ free (dir);
+ }
+#else /* Windows */
+ bool ok = false;
+ HANDLE h = CreateFile (file_name, GENERIC_READ, 0, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_READONLY, NULL);
+ if (h != INVALID_HANDLE_VALUE)
+ {
+ BY_HANDLE_FILE_INFORMATION fi;
+ ok = GetFileInformationByHandle (h, &fi);
+ if (ok)
+ {
+ identity->device = fi.dwVolumeSerialNumber;
+ identity->inode = fi.nFileIndexHigh;
+ identity->inode <<= (sizeof fi.nFileIndexLow) * CHAR_BIT;
+ identity->inode |= fi.nFileIndexLow;
+ identity->name = 0;
+ }
+ CloseHandle (h);
+ }
+
+ if (!ok)
+ {
+ identity->device = 0;
+ identity->inode = 0;
+
+ size_t bufsize;
+ size_t pathlen = 255;
+ char *cname = NULL;
+ do
+ {
+ bufsize = pathlen;
+ cname = xrealloc (cname, bufsize);
+ pathlen = GetFullPathName (file_name, bufsize, cname, NULL);
+ }
+ while (pathlen > bufsize);
+ identity->name = xstrdup (cname);
+ free (cname);
+ str_lowercase (identity->name);
+ }
+#endif /* Windows */
+
+ return identity;
+}
+
+/* Frees IDENTITY obtained from fh_get_identity(). */
+void
+fh_free_identity (struct file_identity *identity)
+{
+ if (identity != NULL)
+ {
+ free (identity->name);
+ free (identity);
+ }
+}
+
+/* Compares A and B, returning a strcmp()-type result. */
+int
+fh_compare_file_identities (const struct file_identity *a,
+ const struct file_identity *b)
+{
+ if (a->device != b->device)
+ return a->device < b->device ? -1 : 1;
+ else if (a->inode != b->inode)
+ return a->inode < b->inode ? -1 : 1;
+ else if (a->name != NULL)
+ return b->name != NULL ? strcmp (a->name, b->name) : 1;
+ else
+ return b->name != NULL ? -1 : 0;
+}
+
+/* Returns a hash value for IDENTITY. */
+unsigned int
+fh_hash_identity (const struct file_identity *identity)
+{
+ unsigned int hash = hash_int (identity->device, identity->inode);
+ if (identity->name != NULL)
+ hash = hash_string (identity->name, hash);
+ return hash;
+}