1 /* Copyright (c) 2009, 2010, 2011 Nicira Networks
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
31 #include "ovsdb-error.h"
33 #include "socket-util.h"
36 #include "transaction.h"
41 VLOG_DEFINE_THIS_MODULE(ovsdb_file);
43 /* Minimum number of milliseconds between database compactions. */
44 #define COMPACT_MIN_MSEC (10 * 60 * 1000) /* 10 minutes. */
46 /* Minimum number of milliseconds between trying to compact the database if
47 * compacting fails. */
48 #define COMPACT_RETRY_MSEC (60 * 1000) /* 1 minute. */
50 /* A transaction being converted to JSON for writing to a file. */
51 struct ovsdb_file_txn {
52 struct json *json; /* JSON for the whole transaction. */
53 struct json *table_json; /* JSON for 'table''s transaction. */
54 struct ovsdb_table *table; /* Table described in 'table_json'. */
57 static void ovsdb_file_txn_init(struct ovsdb_file_txn *);
58 static void ovsdb_file_txn_add_row(struct ovsdb_file_txn *,
59 const struct ovsdb_row *old,
60 const struct ovsdb_row *new,
61 const unsigned long int *changed);
62 static struct ovsdb_error *ovsdb_file_txn_commit(struct json *,
67 static struct ovsdb_error *ovsdb_file_open__(const char *file_name,
68 const struct ovsdb_schema *,
69 bool read_only, struct ovsdb **,
70 struct ovsdb_file **);
71 static struct ovsdb_error *ovsdb_file_txn_from_json(
72 struct ovsdb *, const struct json *, bool converting,
73 long long int *date, struct ovsdb_txn **);
74 static struct ovsdb_error *ovsdb_file_create(struct ovsdb *,
76 const char *file_name,
77 long long int oldest_commit,
78 unsigned int n_transactions,
79 struct ovsdb_file **filep);
81 /* Opens database 'file_name' and stores a pointer to the new database in
82 * '*dbp'. If 'read_only' is false, then the database will be locked and
83 * changes to the database will be written to disk. If 'read_only' is true,
84 * the database will not be locked and changes to the database will persist
85 * only as long as the "struct ovsdb".
87 * If 'filep' is nonnull and 'read_only' is false, then on success sets
88 * '*filep' to an ovsdb_file that represents the open file. This ovsdb_file
89 * persists until '*dbp' is destroyed.
91 * On success, returns NULL. On failure, returns an ovsdb_error (which the
92 * caller must destroy) and sets '*dbp' and '*filep' to NULL. */
94 ovsdb_file_open(const char *file_name, bool read_only,
95 struct ovsdb **dbp, struct ovsdb_file **filep)
97 return ovsdb_file_open__(file_name, NULL, read_only, dbp, filep);
100 /* Opens database 'file_name' with an alternate schema. The specified 'schema'
101 * is used to interpret the data in 'file_name', ignoring the schema actually
102 * stored in the file. Data in the file for tables or columns that do not
103 * exist in 'schema' are ignored, but the ovsdb file format must otherwise be
104 * observed, including column constraints.
106 * This function can be useful for upgrading or downgrading databases to
107 * "almost-compatible" formats.
109 * The database will not be locked. Changes to the database will persist only
110 * as long as the "struct ovsdb".
112 * On success, stores a pointer to the new database in '*dbp' and returns a
113 * null pointer. On failure, returns an ovsdb_error (which the caller must
114 * destroy) and sets '*dbp' to NULL. */
116 ovsdb_file_open_as_schema(const char *file_name,
117 const struct ovsdb_schema *schema,
120 return ovsdb_file_open__(file_name, schema, true, dbp, NULL);
123 static struct ovsdb_error *
124 ovsdb_file_open_log(const char *file_name, enum ovsdb_log_open_mode open_mode,
125 struct ovsdb_log **logp, struct ovsdb_schema **schemap)
127 struct ovsdb_schema *schema = NULL;
128 struct ovsdb_log *log = NULL;
129 struct ovsdb_error *error;
130 struct json *json = NULL;
132 assert(logp || schemap);
134 error = ovsdb_log_open(file_name, open_mode, -1, &log);
139 error = ovsdb_log_read(log, &json);
143 error = ovsdb_io_error(EOF, "%s: database file contains no schema",
149 error = ovsdb_schema_from_json(json, &schema);
151 error = ovsdb_wrap_error(error,
152 "failed to parse \"%s\" as ovsdb schema",
162 ovsdb_log_close(log);
170 ovsdb_log_close(log);
181 static struct ovsdb_error *
182 ovsdb_file_open__(const char *file_name,
183 const struct ovsdb_schema *alternate_schema,
184 bool read_only, struct ovsdb **dbp,
185 struct ovsdb_file **filep)
187 enum ovsdb_log_open_mode open_mode;
188 long long int oldest_commit;
189 unsigned int n_transactions;
190 struct ovsdb_schema *schema = NULL;
191 struct ovsdb_error *error;
192 struct ovsdb_log *log;
194 struct ovsdb *db = NULL;
196 /* In read-only mode there is no ovsdb_file so 'filep' must be null. */
197 assert(!(read_only && filep));
199 open_mode = read_only ? OVSDB_LOG_READ_ONLY : OVSDB_LOG_READ_WRITE;
200 error = ovsdb_file_open_log(file_name, open_mode, &log,
201 alternate_schema ? NULL : &schema);
206 db = ovsdb_create(schema ? schema : ovsdb_schema_clone(alternate_schema));
208 oldest_commit = LLONG_MAX;
210 while ((error = ovsdb_log_read(log, &json)) == NULL && json) {
211 struct ovsdb_txn *txn;
214 error = ovsdb_file_txn_from_json(db, json, alternate_schema != NULL,
218 ovsdb_log_unread(log);
223 if (date < oldest_commit) {
224 oldest_commit = date;
227 error = ovsdb_txn_commit(txn, false);
229 ovsdb_log_unread(log);
234 /* Log error but otherwise ignore it. Probably the database just got
235 * truncated due to power failure etc. and we should use its current
237 char *msg = ovsdb_error_to_string(error);
241 ovsdb_error_destroy(error);
245 struct ovsdb_file *file;
247 error = ovsdb_file_create(db, log, file_name, oldest_commit,
248 n_transactions, &file);
256 ovsdb_log_close(log);
268 ovsdb_log_close(log);
272 static struct ovsdb_error *
273 ovsdb_file_update_row_from_json(struct ovsdb_row *row, bool converting,
274 const struct json *json)
276 struct ovsdb_table_schema *schema = row->table->schema;
277 struct ovsdb_error *error;
278 struct shash_node *node;
280 if (json->type != JSON_OBJECT) {
281 return ovsdb_syntax_error(json, NULL, "row must be JSON object");
284 SHASH_FOR_EACH (node, json_object(json)) {
285 const char *column_name = node->name;
286 const struct ovsdb_column *column;
287 struct ovsdb_datum datum;
289 column = ovsdb_table_schema_get_column(schema, column_name);
294 return ovsdb_syntax_error(json, "unknown column",
295 "No column %s in table %s.",
296 column_name, schema->name);
299 error = ovsdb_datum_from_json(&datum, &column->type, node->data, NULL);
303 ovsdb_datum_swap(&row->fields[column->index], &datum);
304 ovsdb_datum_destroy(&datum, &column->type);
310 static struct ovsdb_error *
311 ovsdb_file_txn_row_from_json(struct ovsdb_txn *txn, struct ovsdb_table *table,
313 const struct uuid *row_uuid, struct json *json)
315 const struct ovsdb_row *row = ovsdb_table_get_row(table, row_uuid);
316 if (json->type == JSON_NULL) {
318 return ovsdb_syntax_error(NULL, NULL, "transaction deletes "
319 "row "UUID_FMT" that does not exist",
320 UUID_ARGS(row_uuid));
322 ovsdb_txn_row_delete(txn, row);
325 return ovsdb_file_update_row_from_json(ovsdb_txn_row_modify(txn, row),
328 struct ovsdb_error *error;
329 struct ovsdb_row *new;
331 new = ovsdb_row_create(table);
332 *ovsdb_row_get_uuid_rw(new) = *row_uuid;
333 error = ovsdb_file_update_row_from_json(new, converting, json);
335 ovsdb_row_destroy(new);
337 ovsdb_txn_row_insert(txn, new);
343 static struct ovsdb_error *
344 ovsdb_file_txn_table_from_json(struct ovsdb_txn *txn,
345 struct ovsdb_table *table,
346 bool converting, struct json *json)
348 struct shash_node *node;
350 if (json->type != JSON_OBJECT) {
351 return ovsdb_syntax_error(json, NULL, "object expected");
354 SHASH_FOR_EACH (node, json->u.object) {
355 const char *uuid_string = node->name;
356 struct json *txn_row_json = node->data;
357 struct ovsdb_error *error;
358 struct uuid row_uuid;
360 if (!uuid_from_string(&row_uuid, uuid_string)) {
361 return ovsdb_syntax_error(json, NULL, "\"%s\" is not a valid UUID",
365 error = ovsdb_file_txn_row_from_json(txn, table, converting,
366 &row_uuid, txn_row_json);
375 /* Converts 'json' to an ovsdb_txn for 'db', storing the new transaction in
376 * '*txnp'. Returns NULL if successful, otherwise an error.
378 * If 'converting' is true, then unknown table and column names are ignored
379 * (which can ease upgrading and downgrading schemas); otherwise, they are
382 * If successful, the date associated with the transaction, as the number of
383 * milliseconds since the epoch, is stored in '*date'. If the transaction does
384 * not include a date, LLONG_MAX is stored. */
385 static struct ovsdb_error *
386 ovsdb_file_txn_from_json(struct ovsdb *db, const struct json *json,
387 bool converting, long long int *date,
388 struct ovsdb_txn **txnp)
390 struct ovsdb_error *error;
391 struct shash_node *node;
392 struct ovsdb_txn *txn;
397 if (json->type != JSON_OBJECT) {
398 return ovsdb_syntax_error(json, NULL, "object expected");
401 txn = ovsdb_txn_create(db);
402 SHASH_FOR_EACH (node, json->u.object) {
403 const char *table_name = node->name;
404 struct json *node_json = node->data;
405 struct ovsdb_table *table;
407 table = shash_find_data(&db->tables, table_name);
409 if (!strcmp(table_name, "_date")
410 && node_json->type == JSON_INTEGER) {
411 *date = json_integer(node_json);
413 } else if (!strcmp(table_name, "_comment") || converting) {
417 error = ovsdb_syntax_error(json, "unknown table",
418 "No table named %s.", table_name);
422 error = ovsdb_file_txn_table_from_json(txn, table, converting,
432 ovsdb_txn_abort(txn);
436 static struct ovsdb_error *
437 ovsdb_file_save_copy__(const char *file_name, int locking,
438 const char *comment, const struct ovsdb *db,
439 struct ovsdb_log **logp)
441 const struct shash_node *node;
442 struct ovsdb_file_txn ftxn;
443 struct ovsdb_error *error;
444 struct ovsdb_log *log;
447 error = ovsdb_log_open(file_name, OVSDB_LOG_CREATE, locking, &log);
453 json = ovsdb_schema_to_json(db->schema);
454 error = ovsdb_log_write(log, json);
461 ovsdb_file_txn_init(&ftxn);
462 SHASH_FOR_EACH (node, &db->tables) {
463 const struct ovsdb_table *table = node->data;
464 const struct ovsdb_row *row;
466 HMAP_FOR_EACH (row, hmap_node, &table->rows) {
467 ovsdb_file_txn_add_row(&ftxn, NULL, row, NULL);
470 error = ovsdb_file_txn_commit(ftxn.json, comment, true, log);
481 ovsdb_log_close(log);
488 /* Saves a snapshot of 'db''s current contents as 'file_name'. If 'comment' is
489 * nonnull, then it is added along with the data contents and can be viewed
490 * with "ovsdb-tool show-log".
492 * 'locking' is passed along to ovsdb_log_open() untouched. */
494 ovsdb_file_save_copy(const char *file_name, int locking,
495 const char *comment, const struct ovsdb *db)
497 return ovsdb_file_save_copy__(file_name, locking, comment, db, NULL);
500 /* Opens database 'file_name', reads its schema, and closes it. On success,
501 * stores the schema into '*schemap' and returns NULL; the caller then owns the
502 * schema. On failure, returns an ovsdb_error (which the caller must destroy)
503 * and sets '*dbp' to NULL. */
505 ovsdb_file_read_schema(const char *file_name, struct ovsdb_schema **schemap)
507 assert(schemap != NULL);
508 return ovsdb_file_open_log(file_name, OVSDB_LOG_READ_ONLY, NULL, schemap);
511 /* Replica implementation. */
514 struct ovsdb_replica replica;
516 struct ovsdb_log *log;
518 long long int oldest_commit;
519 long long int next_compact;
520 unsigned int n_transactions;
523 static const struct ovsdb_replica_class ovsdb_file_class;
525 static struct ovsdb_error *
526 ovsdb_file_create(struct ovsdb *db, struct ovsdb_log *log,
527 const char *file_name,
528 long long int oldest_commit,
529 unsigned int n_transactions,
530 struct ovsdb_file **filep)
532 long long int now = time_msec();
533 struct ovsdb_file *file;
536 /* Use the absolute name of the file because ovsdb-server opens its
537 * database before daemonize() chdirs to "/". */
538 abs_name = abs_file_name(NULL, file_name);
541 return ovsdb_io_error(0, "could not determine current "
542 "working directory");
545 file = xmalloc(sizeof *file);
546 ovsdb_replica_init(&file->replica, &ovsdb_file_class);
549 file->file_name = abs_name;
550 file->oldest_commit = MIN(oldest_commit, now);
551 file->next_compact = file->oldest_commit + COMPACT_MIN_MSEC;
552 file->n_transactions = n_transactions;
553 ovsdb_add_replica(db, &file->replica);
559 static struct ovsdb_file *
560 ovsdb_file_cast(struct ovsdb_replica *replica)
562 assert(replica->class == &ovsdb_file_class);
563 return CONTAINER_OF(replica, struct ovsdb_file, replica);
567 ovsdb_file_change_cb(const struct ovsdb_row *old,
568 const struct ovsdb_row *new,
569 const unsigned long int *changed,
572 struct ovsdb_file_txn *ftxn = ftxn_;
573 ovsdb_file_txn_add_row(ftxn, old, new, changed);
577 static struct ovsdb_error *
578 ovsdb_file_commit(struct ovsdb_replica *replica,
579 const struct ovsdb_txn *txn, bool durable)
581 struct ovsdb_file *file = ovsdb_file_cast(replica);
582 struct ovsdb_file_txn ftxn;
583 struct ovsdb_error *error;
585 ovsdb_file_txn_init(&ftxn);
586 ovsdb_txn_for_each_change(txn, ovsdb_file_change_cb, &ftxn);
588 /* Nothing to commit. */
592 error = ovsdb_file_txn_commit(ftxn.json, ovsdb_txn_get_comment(txn),
597 file->n_transactions++;
599 /* If it has been at least COMPACT_MIN_MSEC millseconds since the last time
600 * we compacted (or at least COMPACT_RETRY_MSEC since the last time we
601 * tried), and if there are at least 100 transactions in the database, and
602 * if the database is at least 10 MB, then compact the database. */
603 if (time_msec() >= file->next_compact
604 && file->n_transactions >= 100
605 && ovsdb_log_get_offset(file->log) >= 10 * 1024 * 1024)
607 error = ovsdb_file_compact(file);
609 char *s = ovsdb_error_to_string(error);
610 ovsdb_error_destroy(error);
611 VLOG_WARN("%s: compacting database failed (%s), retrying in "
613 file->file_name, s, COMPACT_RETRY_MSEC / 1000);
616 file->next_compact = time_msec() + COMPACT_RETRY_MSEC;
624 ovsdb_file_compact(struct ovsdb_file *file)
626 struct ovsdb_log *new_log = NULL;
627 struct lockfile *tmp_lock = NULL;
628 struct ovsdb_error *error;
629 char *tmp_name = NULL;
630 char *comment = NULL;
633 comment = xasprintf("compacting database online "
634 "(%.3f seconds old, %u transactions, %llu bytes)",
635 (time_msec() - file->oldest_commit) / 1000.0,
636 file->n_transactions,
637 (unsigned long long) ovsdb_log_get_offset(file->log));
638 VLOG_INFO("%s: %s", file->file_name, comment);
640 /* Commit the old version, so that we can be assured that we'll eventually
641 * have either the old or the new version. */
642 error = ovsdb_log_commit(file->log);
647 /* Lock temporary file. */
648 tmp_name = xasprintf("%s.tmp", file->file_name);
649 retval = lockfile_lock(tmp_name, 0, &tmp_lock);
651 error = ovsdb_io_error(retval, "could not get lock on %s", tmp_name);
655 /* Remove temporary file. (It might not exist.) */
656 if (unlink(tmp_name) < 0 && errno != ENOENT) {
657 error = ovsdb_io_error(errno, "failed to remove %s", tmp_name);
662 error = ovsdb_file_save_copy__(tmp_name, false, comment, file->db,
668 /* Replace original by temporary. */
669 if (rename(tmp_name, file->file_name)) {
670 error = ovsdb_io_error(errno, "failed to rename \"%s\" to \"%s\"",
671 tmp_name, file->file_name);
674 fsync_parent_dir(file->file_name);
678 ovsdb_log_close(file->log);
680 file->oldest_commit = time_msec();
681 file->next_compact = file->oldest_commit + COMPACT_MIN_MSEC;
682 file->n_transactions = 1;
684 ovsdb_log_close(new_log);
690 lockfile_unlock(tmp_lock);
698 ovsdb_file_destroy(struct ovsdb_replica *replica)
700 struct ovsdb_file *file = ovsdb_file_cast(replica);
702 ovsdb_log_close(file->log);
703 free(file->file_name);
707 static const struct ovsdb_replica_class ovsdb_file_class = {
713 ovsdb_file_txn_init(struct ovsdb_file_txn *ftxn)
716 ftxn->table_json = NULL;
721 ovsdb_file_txn_add_row(struct ovsdb_file_txn *ftxn,
722 const struct ovsdb_row *old,
723 const struct ovsdb_row *new,
724 const unsigned long int *changed)
729 row = json_null_create();
731 struct shash_node *node;
733 row = old ? NULL : json_object_create();
734 SHASH_FOR_EACH (node, &new->table->schema->columns) {
735 const struct ovsdb_column *column = node->data;
736 const struct ovsdb_type *type = &column->type;
737 unsigned int idx = column->index;
739 if (idx != OVSDB_COL_UUID && column->persistent
741 ? bitmap_is_set(changed, idx)
742 : !ovsdb_datum_is_default(&new->fields[idx], type)))
745 row = json_object_create();
747 json_object_put(row, column->name,
748 ovsdb_datum_to_json(&new->fields[idx], type));
754 struct ovsdb_table *table = new ? new->table : old->table;
755 char uuid[UUID_LEN + 1];
757 if (table != ftxn->table) {
758 /* Create JSON object for transaction overall. */
760 ftxn->json = json_object_create();
763 /* Create JSON object for transaction on this table. */
764 ftxn->table_json = json_object_create();
766 json_object_put(ftxn->json, table->schema->name, ftxn->table_json);
769 /* Add row to transaction for this table. */
770 snprintf(uuid, sizeof uuid,
771 UUID_FMT, UUID_ARGS(ovsdb_row_get_uuid(new ? new : old)));
772 json_object_put(ftxn->table_json, uuid, row);
776 static struct ovsdb_error *
777 ovsdb_file_txn_commit(struct json *json, const char *comment,
778 bool durable, struct ovsdb_log *log)
780 struct ovsdb_error *error;
783 json = json_object_create();
786 json_object_put_string(json, "_comment", comment);
788 json_object_put(json, "_date", json_integer_create(time_wall()));
790 error = ovsdb_log_write(log, json);
793 return ovsdb_wrap_error(error, "writing transaction failed");
797 error = ovsdb_log_commit(log);
799 return ovsdb_wrap_error(error, "committing transaction failed");