From: Justin Pettit Date: Fri, 5 Feb 2010 23:58:27 +0000 (-0800) Subject: Merge branch 'master' into next X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=a4af00400a835eb87569ba40e21874c05e872c0f;p=openvswitch Merge branch 'master' into next Conflicts: COPYING datapath/datapath.h lib/automake.mk lib/dpif-provider.h lib/dpif.c lib/hmap.h lib/netdev-provider.h lib/netdev.c lib/stream-ssl.h ofproto/executer.c ofproto/ofproto.c ofproto/ofproto.h tests/automake.mk utilities/ovs-ofctl.c utilities/ovs-vsctl.in vswitchd/ovs-vswitchd.conf.5.in xenserver/etc_init.d_vswitch xenserver/etc_xensource_scripts_vif xenserver/opt_xensource_libexec_interface-reconfigure --- a4af00400a835eb87569ba40e21874c05e872c0f diff --cc COPYING index 3f0659d0,375efeca..39dd45c6 --- a/COPYING +++ b/COPYING @@@ -23,26 -23,6 +23,30 @@@ Files under the xenserver directory ar basis. Some files are under an uncertain license that may not be DFSG-compliant or GPL-compatible. Refer to each file for details. +The files under ovsdb/simplejson are covered by the following license: + + Copyright (c) 2006 Bob Ippolito + + Permission is hereby granted, free of charge, to any person + obtaining a copy of this software and associated documentation + files (the "Software"), to deal in the Software without + restriction, including without limitation the rights to use, copy, + modify, merge, publish, distribute, sublicense, and/or sell copies + of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. ++ + Files lib/sflow*.[ch] are licensed under the terms of the InMon sFlow + licence that is available at: + http://www.inmon.com/technology/sflowlicense.txt diff --cc datapath/datapath.c index 116fd989,ba363fb7..6a3b9ec7 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@@ -594,118 -593,37 +595,116 @@@ static int skb_pull_up_to(struct sk_buf int vswitch_skb_checksum_setup(struct sk_buff *skb) { - if (skb->proto_csum_blank) { - if (skb->protocol != htons(ETH_P_IP)) - goto out; - if (!skb_pull_up_to(skb, skb->nh.iph + 1)) - goto out; - skb->h.raw = (unsigned char *)skb->nh.iph + 4*skb->nh.iph->ihl; - switch (skb->nh.iph->protocol) { - case IPPROTO_TCP: - skb->csum = offsetof(struct tcphdr, check); - break; - case IPPROTO_UDP: - skb->csum = offsetof(struct udphdr, check); - break; - default: - if (net_ratelimit()) - printk(KERN_ERR "Attempting to checksum a non-" - "TCP/UDP packet, dropping a protocol" - " %d packet", skb->nh.iph->protocol); - goto out; - } - if (!skb_pull_up_to(skb, skb->h.raw + skb->csum + 2)) - goto out; - skb->ip_summed = CHECKSUM_HW; - skb->proto_csum_blank = 0; + struct iphdr *iph; + unsigned char *th; + int err = -EPROTO; + __u16 csum_start, csum_offset; + + if (!skb->proto_csum_blank) + return 0; + + if (skb->protocol != htons(ETH_P_IP)) + goto out; + + if (!skb_pull_up_to(skb, skb_network_header(skb) + 1)) + goto out; + + iph = ip_hdr(skb); + th = skb_network_header(skb) + 4 * iph->ihl; + + csum_start = th - skb->head; + switch (iph->protocol) { + case IPPROTO_TCP: + csum_offset = offsetof(struct tcphdr, check); + break; + case IPPROTO_UDP: + csum_offset = offsetof(struct udphdr, check); + break; + default: + if (net_ratelimit()) + printk(KERN_ERR "Attempting to checksum a non-" + "TCP/UDP packet, dropping a protocol" + " %d packet", iph->protocol); + goto out; } - return 0; + + if (!skb_pull_up_to(skb, th + csum_offset + 2)) + goto out; + + skb->ip_summed = CHECKSUM_PARTIAL; + skb->proto_csum_blank = 0; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22) + skb->csum_start = csum_start; + skb->csum_offset = csum_offset; +#else + skb_set_transport_header(skb, csum_start - skb_headroom(skb)); + skb->csum = csum_offset; +#endif + + err = 0; + out: - return -EPROTO; + return err; } - #else - int vswitch_skb_checksum_setup(struct sk_buff *skb) { return 0; } - #endif /* CONFIG_XEN && linux == 2.6.18 */ + #endif /* CONFIG_XEN && HAVE_PROTO_DATA_VALID */ + /* Types of checksums that we can receive (these all refer to L4 checksums): + * 1. CHECKSUM_NONE: Device that did not compute checksum, contains full + * (though not verified) checksum in packet but not in skb->csum. Packets + * from the bridge local port will also have this type. + * 2. CHECKSUM_COMPLETE (CHECKSUM_HW): Good device that computes checksums, + * also the GRE module. This is the same as CHECKSUM_NONE, except it has + * a valid skb->csum. Importantly, both contain a full checksum (not + * verified) in the packet itself. The only difference is that if the + * packet gets to L4 processing on this machine (not in DomU) we won't + * have to recompute the checksum to verify. Most hardware devices do not + * produce packets with this type, even if they support receive checksum + * offloading (they produce type #5). + * 3. CHECKSUM_PARTIAL (CHECKSUM_HW): Packet without full checksum and needs to + * be computed if it is sent off box. Unfortunately on earlier kernels, + * this case is impossible to distinguish from #2, despite having opposite + * meanings. Xen adds an extra field on earlier kernels (see #4) in order + * to distinguish the different states. The only real user of this type + * with bridging is Xen (on later kernels). + * 4. CHECKSUM_UNNECESSARY (with proto_csum_blank true): This packet was + * generated locally by a Xen DomU and has a partial checksum. If it is + * handled on this machine (Dom0 or DomU), then the checksum will not be + * computed. If it goes off box, the checksum in the packet needs to + * completed. Calling skb_checksum_setup converts this to CHECKSUM_HW + * (CHECKSUM_PARTIAL) so that the checksum can be completed. In later + * kernels, this combination is replaced with CHECKSUM_PARTIAL. + * 5. CHECKSUM_UNNECESSARY (with proto_csum_blank false): Packet with a correct + * full checksum or using a protocol without a checksum. skb->csum is + * undefined. This is common from devices with receive checksum + * offloading. This is somewhat similar to CHECKSUM_NONE, except that + * nobody will try to verify the checksum with CHECKSUM_UNNECESSARY. + * + * Note that on earlier kernels, CHECKSUM_COMPLETE and CHECKSUM_PARTIAL are + * both defined as CHECKSUM_HW. Normally the meaning of CHECKSUM_HW is clear + * based on whether it is on the transmit or receive path. After the datapath + * it will be intepreted as CHECKSUM_PARTIAL. If the packet already has a + * checksum, we will panic. Since we can receive packets with checksums, we + * assume that all CHECKSUM_HW packets have checksums and map them to + * CHECKSUM_NONE, which has a similar meaning (the it is only different if the + * packet is processed by the local IP stack, in which case it will need to + * be reverified). If we receive a packet with CHECKSUM_HW that really means + * CHECKSUM_PARTIAL, it will be sent with the wrong checksum. However, there + * shouldn't be any devices that do this with bridging. + * + * The bridge has similar behavior and this function closely resembles + * skb_forward_csum(). It is slightly different because we are only concerned + * with bridging and not other types of forwarding and can get away with + * slightly more optimal behavior.*/ +void +forward_ip_summed(struct sk_buff *skb) +{ +#ifdef CHECKSUM_HW + if (skb->ip_summed == CHECKSUM_HW) + skb->ip_summed = CHECKSUM_NONE; +#endif +} + /* Append each packet in 'skb' list to 'queue'. There will be only one packet * unless we broke up a GSO packet. */ static int diff --cc datapath/datapath.h index d6883db2,3b5a67b1..6732b59e --- a/datapath/datapath.h +++ b/datapath/datapath.h @@@ -169,7 -218,4 +219,6 @@@ static inline int vswitch_skb_checksum_ } #endif - int vswitch_skb_checksum_setup(struct sk_buff *skb); +void forward_ip_summed(struct sk_buff *skb); + #endif /* datapath.h */ diff --cc lib/automake.mk index 31cab8ab,67100fc1..51d3c11b --- a/lib/automake.mk +++ b/lib/automake.mk @@@ -113,18 -94,9 +113,19 @@@ lib_libopenvswitch_a_SOURCES = lib/signals.h \ lib/socket-util.c \ lib/socket-util.h \ + lib/sort.c \ + lib/sort.h \ lib/stp.c \ lib/stp.h \ + lib/stream-fd.c \ + lib/stream-fd.h \ + lib/stream-provider.h \ + lib/stream-ssl.h \ + lib/stream-tcp.c \ + lib/stream-unix.c \ + lib/stream.c \ + lib/stream.h \ + lib/string.h \ lib/svec.c \ lib/svec.h \ lib/tag.c \ diff --cc lib/dpif-provider.h index 699f724a,39c66e11..fddc8ea3 --- a/lib/dpif-provider.h +++ b/lib/dpif-provider.h @@@ -27,21 -31,18 +31,21 @@@ extern "C" * * This structure should be treated as opaque by dpif implementations. */ struct dpif { - const struct dpif_class *class; + const struct dpif_class *dpif_class; - char *name; + char *base_name; + char *full_name; uint8_t netflow_engine_type; uint8_t netflow_engine_id; }; void dpif_init(struct dpif *, const struct dpif_class *, const char *name, uint8_t netflow_engine_type, uint8_t netflow_engine_id); +void dpif_uninit(struct dpif *dpif, bool close); + static inline void dpif_assert_class(const struct dpif *dpif, - const struct dpif_class *class) + const struct dpif_class *dpif_class) { - assert(dpif->class == class); + assert(dpif->dpif_class == dpif_class); } /* Datapath interface class structure, to be defined by each implementation of diff --cc lib/dpif.c index f3d6713d,7edaf31b..315f11f9 --- a/lib/dpif.c +++ b/lib/dpif.c @@@ -317,14 -204,9 +317,15 @@@ voi dpif_close(struct dpif *dpif) { if (dpif) { - char *name = dpif->name; - dpif->dpif_class->close(dpif); - free(name); + struct registered_dpif_class *registered_class; + - registered_class = shash_find_data(&dpif_classes, dpif->class->type); ++ registered_class = shash_find_data(&dpif_classes, ++ dpif->dpif_class->type); + assert(registered_class); + assert(registered_class->refcount); + + registered_class->refcount--; + dpif_uninit(dpif, true); } } @@@ -1050,35 -960,15 +1088,36 @@@ dpif_get_netflow_ids(const struct dpif } void - dpif_init(struct dpif *dpif, const struct dpif_class *class, const char *name, + dpif_init(struct dpif *dpif, const struct dpif_class *dpif_class, + const char *name, uint8_t netflow_engine_type, uint8_t netflow_engine_id) { - dpif->class = class; + dpif->dpif_class = dpif_class; - dpif->name = xstrdup(name); + dpif->base_name = xstrdup(name); - dpif->full_name = xasprintf("%s@%s", class->type, name); ++ dpif->full_name = xasprintf("%s@%s", dpif_class->type, name); dpif->netflow_engine_type = netflow_engine_type; dpif->netflow_engine_id = netflow_engine_id; } + +/* Undoes the results of initialization. + * + * Normally this function only needs to be called from dpif_close(). + * However, it may be called by providers due to an error on opening + * that occurs after initialization. It this case dpif_close() would + * never be called. */ +void +dpif_uninit(struct dpif *dpif, bool close) +{ + char *base_name = dpif->base_name; + char *full_name = dpif->full_name; + + if (close) { - dpif->class->close(dpif); ++ dpif->dpif_class->close(dpif); + } + + free(base_name); + free(full_name); +} static void log_operation(const struct dpif *dpif, const char *operation, int error) diff --cc lib/hmap.h index abf380be,d770af88..2f4a302f --- a/lib/hmap.h +++ b/lib/hmap.h @@@ -66,7 -52,6 +70,7 @@@ struct hmap void hmap_init(struct hmap *); void hmap_destroy(struct hmap *); void hmap_swap(struct hmap *a, struct hmap *b); - void hmap_moved(struct hmap *); ++void hmap_moved(struct hmap *hmap); static inline size_t hmap_count(const struct hmap *); static inline bool hmap_is_empty(const struct hmap *); @@@ -80,31 -65,13 +84,32 @@@ static inline void hmap_insert_fast(str struct hmap_node *, size_t hash); static inline void hmap_insert(struct hmap *, struct hmap_node *, size_t hash); static inline void hmap_remove(struct hmap *, struct hmap_node *); -static inline void hmap_moved(struct hmap *, - struct hmap_node *, struct hmap_node *); -static inline void hmap_replace(struct hmap *, - const struct hmap_node *old_node, - struct hmap_node *new_node); + -/* Search. */ +void hmap_node_moved(struct hmap *, struct hmap_node *, struct hmap_node *); +static inline void hmap_replace(struct hmap *, const struct hmap_node *old, + struct hmap_node *new); + +/* Search. + * + * HMAP_FOR_EACH_WITH_HASH iterates NODE over all of the nodes in HMAP that + * have hash value equal to HASH. HMAP_FOR_EACH_IN_BUCKET iterates NODE over + * all of the nodes in HMAP that would fall in the same bucket as HASH. STRUCT + * and MEMBER must be the name of the struct that contains the 'struct + * hmap_node' and the name of the 'struct hmap_node' member, respectively. + * + * These macros may be used interchangeably to search for a particular value in + * an hmap, see, e.g. shash_find() for an example. Usually, using + * HMAP_FOR_EACH_WITH_HASH provides an optimization, because comparing a hash + * value is usually cheaper than comparing an entire hash map key. But for + * simple hash map keys, it makes sense to use HMAP_FOR_EACH_IN_BUCKET because + * it avoids doing two comparisons when a single simple comparison suffices. + * + * The loop should not change NODE to point to a different node or insert or + * delete nodes in HMAP (unless it "break"s out of the loop to terminate + * iteration). + * + * HASH is only evaluated once. + */ #define HMAP_FOR_EACH_WITH_HASH(NODE, STRUCT, MEMBER, HASH, HMAP) \ for ((NODE) = CONTAINER_OF(hmap_first_with_hash(HMAP, HASH), \ STRUCT, MEMBER); \ @@@ -207,24 -165,36 +212,24 @@@ hmap_remove(struct hmap *hmap, struct h hmap->n--; } - /* Puts 'new' in the position in 'hmap' currently occupied by 'old'. The 'new' - * node must hash to the same value as 'old'. The client is responsible for - * ensuring that the replacement does not violate any client-imposed - * invariants (e.g. uniqueness of keys within a map). -/* Adjusts 'hmap' to compensate for 'old_node' having moved position in memory - * to 'node' (e.g. due to realloc()). */ -static inline void -hmap_moved(struct hmap *hmap, - struct hmap_node *old_node, struct hmap_node *node) -{ - struct hmap_node **bucket = &hmap->buckets[node->hash & hmap->mask]; - while (*bucket != old_node) { - bucket = &(*bucket)->next; - } - *bucket = node; -} - + /* Puts 'new_node' in the position in 'hmap' currently occupied by 'old_node'. + * The 'new_node' must hash to the same value as 'old_node'. The client is + * responsible for ensuring that the replacement does not violate any + * client-imposed invariants (e.g. uniqueness of keys within a map). * - * Afterward, 'old' is not part of 'hmap', and the client is responsible for - * freeing it (if this is desirable). */ + * Afterward, 'old_node' is not part of 'hmap', and the client is responsible + * for freeing it (if this is desirable). */ static inline void hmap_replace(struct hmap *hmap, - const struct hmap_node *old, struct hmap_node *new) + const struct hmap_node *old_node, struct hmap_node *new_node) { - struct hmap_node **bucket = &hmap->buckets[old->hash & hmap->mask]; - while (*bucket != old) { + struct hmap_node **bucket = &hmap->buckets[old_node->hash & hmap->mask]; + while (*bucket != old_node) { bucket = &(*bucket)->next; } - *bucket = new; - new->hash = old->hash; - new->next = old->next; + *bucket = new_node; + new_node->hash = old_node->hash; ++ new_node->next = old_node->next; } static inline struct hmap_node * diff --cc lib/netdev-provider.h index 43a330c7,07141db7..1eb1b1e4 --- a/lib/netdev-provider.h +++ b/lib/netdev-provider.h @@@ -25,40 -24,30 +25,45 @@@ #include "list.h" #include "shash.h" + #ifdef __cplusplus + extern "C" { + #endif + -/* A network device object that was created through the netdev_create() - * call. +struct arg { + char *key; + char *value; +}; + +/* A network device (e.g. an Ethernet device). * * This structure should be treated as opaque by network device * implementations. */ -struct netdev_obj { - const struct netdev_class *netdev_class; - int ref_cnt; - bool created; /* Was netdev_create() called? */ +struct netdev_dev { + char *name; /* Name of network device. */ - const struct netdev_class *class; /* Functions to control this device. */ ++ const struct netdev_class *netdev_class; /* Functions to control ++ this device. */ + int ref_cnt; /* Times this devices was opened. */ + struct shash_node *node; /* Pointer to element in global map. */ + struct arg *args; /* Argument list from last config. */ + int n_args; /* Number of arguments in 'args'. */ }; -void netdev_obj_init(struct netdev_obj *, const char *name, - const struct netdev_class *, bool created); -static inline void netdev_obj_assert_class(const struct netdev_obj *netdev_obj, +void netdev_dev_init(struct netdev_dev *, const char *name, + const struct netdev_class *); +void netdev_dev_uninit(struct netdev_dev *, bool destroy); +const char *netdev_dev_get_type(const struct netdev_dev *); +const char *netdev_dev_get_name(const struct netdev_dev *); +struct netdev_dev *netdev_dev_from_name(const char *name); +void netdev_dev_get_devices(const struct netdev_class *, + struct shash *device_list); + +static inline void netdev_dev_assert_class(const struct netdev_dev *netdev_dev, - const struct netdev_class *class) + const struct netdev_class *class_) { - assert(netdev_dev->class == class); - assert(netdev_obj->netdev_class == class_); ++ assert(netdev_dev->netdev_class == class_); } -/* A network device (e.g. an Ethernet device). +/* A instance of an open network device. * * This structure should be treated as opaque by network device * implementations. */ @@@ -68,17 -57,17 +73,17 @@@ struct netdev enum netdev_flags save_flags; /* Initial device flags. */ enum netdev_flags changed_flags; /* Flags that we changed. */ - struct list node; /* Element in global list. */ }; -void netdev_init(struct netdev *, const char *name, - const struct netdev_class *); +void netdev_init(struct netdev *, struct netdev_dev *); +void netdev_uninit(struct netdev *, bool close); +struct netdev_dev *netdev_get_dev(const struct netdev *); + static inline void netdev_assert_class(const struct netdev *netdev, - const struct netdev_class *class) + const struct netdev_class *netdev_class) { - netdev_dev_assert_class(netdev_get_dev(netdev), class); - assert(netdev->netdev_class == netdev_class); ++ netdev_dev_assert_class(netdev_get_dev(netdev), netdev_class); } -const char *netdev_get_type(const struct netdev *netdev); /* A network device notifier. * @@@ -356,6 -353,9 +361,10 @@@ struct netdev_class extern const struct netdev_class netdev_linux_class; extern const struct netdev_class netdev_tap_class; +extern const struct netdev_class netdev_gre_class; + #ifdef __cplusplus + } + #endif + #endif /* netdev.h */ diff --cc lib/netdev.c index 88ba0178,804050fc..ddd6e92c --- a/lib/netdev.c +++ b/lib/netdev.c @@@ -111,57 -129,32 +112,57 @@@ netdev_wait(void } } -/* Attempts to create a network device object of 'type' with 'name'. 'type' - * corresponds to the 'type' field used in the netdev_class * structure. - * Arguments for creation are provided in 'args', which may be empty or NULL - * if none are needed. */ +/* Initializes and registers a new netdev provider. After successful + * registration, new netdevs of that type can be opened using netdev_open(). */ int -netdev_create(const char *name, const char *type, const struct shash *args) +netdev_register_provider(const struct netdev_class *new_class) { - struct shash empty_args = SHASH_INITIALIZER(&empty_args); - int i; + struct netdev_class *new_provider; - netdev_initialize(); + if (shash_find(&netdev_classes, new_class->type)) { + VLOG_WARN("attempted to register duplicate netdev provider: %s", + new_class->type); + return EEXIST; + } - if (!args) { - args = &empty_args; + if (new_class->init) { + int error = new_class->init(); + if (error) { + VLOG_ERR("failed to initialize %s network device class: %s", + new_class->type, strerror(error)); + return error; + } } - if (shash_find(&netdev_obj_shash, name)) { - VLOG_WARN("attempted to create a netdev object with bound name: %s", - name); - return EEXIST; + new_provider = xmalloc(sizeof *new_provider); + memcpy(new_provider, new_class, sizeof *new_provider); + + shash_add(&netdev_classes, new_class->type, new_provider); + + return 0; +} + +/* Unregisters a netdev provider. 'type' must have been previously + * registered and not currently be in use by any netdevs. After unregistration + * new netdevs of that type cannot be opened using netdev_open(). */ +int +netdev_unregister_provider(const char *type) +{ + struct shash_node *del_node, *netdev_dev_node; + + del_node = shash_find(&netdev_classes, type); + if (!del_node) { + VLOG_WARN("attempted to unregister a netdev provider that is not " + "registered: %s", type); + return EAFNOSUPPORT; } - for (i = 0; i < n_netdev_classes; i++) { - const struct netdev_class *class = netdev_classes[i]; - if (!strcmp(type, class->type)) { - return class->create(name, type, args, true); + SHASH_FOR_EACH(netdev_dev_node, &netdev_dev_shash) { + struct netdev_dev *netdev_dev = netdev_dev_node->data; - if (!strcmp(netdev_dev->class->type, type)) { ++ if (!strcmp(netdev_dev->netdev_class->type, type)) { + VLOG_WARN("attempted to unregister in use netdev provider: %s", + type); + return EBUSY; } } @@@ -293,106 -222,48 +294,107 @@@ create_device(struct netdev_options *op * 'ethertype' may be a 16-bit Ethernet protocol value in host byte order to * capture frames of that type received on the device. It may also be one of * the 'enum netdev_pseudo_ethertype' values to receive frames in one of those - * categories. */ + * categories. + * + * If the 'may_create' flag is set then this is allowed to be the first time + * the device is opened (i.e. the refcount will be 1 after this call). It + * may be set to false if the device should have already been created. + * + * If the 'may_open' flag is set then the call will succeed even if another + * caller has already opened it. It may be to false if the device should not + * currently be open. */ + int -netdev_open(const char *name, int ethertype, struct netdev **netdevp) +netdev_open(struct netdev_options *options, struct netdev **netdevp) { - struct netdev_obj *netdev_obj; - struct netdev *netdev = NULL; + struct shash empty_args = SHASH_INITIALIZER(&empty_args); + struct netdev_dev *netdev_dev; int error; - int i; + *netdevp = NULL; netdev_initialize(); - netdev_obj = shash_find_data(&netdev_obj_shash, name); - if (netdev_obj) { - error = netdev_obj->netdev_class->open(name, ethertype, &netdev); - } else { - /* Default to "system". */ - error = EAFNOSUPPORT; - for (i = 0; i < n_netdev_classes; i++) { - const struct netdev_class *class = netdev_classes[i]; - if (!strcmp(class->type, "system")) { - struct shash empty_args = SHASH_INITIALIZER(&empty_args); - - /* Dynamically create the netdev object, but indicate - * that it should be destroyed when the the last user - * closes its handle. */ - error = class->create(name, "system", &empty_args, false); - if (!error) { - error = class->open(name, ethertype, &netdev); - netdev_obj = shash_find_data(&netdev_obj_shash, name); - } - break; - } + if (!options->args) { + options->args = &empty_args; + } + + netdev_dev = shash_find_data(&netdev_dev_shash, options->name); + + if (!netdev_dev) { + error = create_device(options, &netdev_dev); + if (error) { + return error; } + update_device_args(netdev_dev, options->args); + + } else if (options->may_open) { + if (!shash_is_empty(options->args) && + !compare_device_args(netdev_dev, options->args)) { + + VLOG_WARN("%s: attempted to open already created netdev with " + "different arguments", options->name); + return EINVAL; + } + } else { + VLOG_WARN("%s: attempted to create a netdev device with bound name", + options->name); + return EEXIST; } + - error = netdev_dev->class->open(netdev_dev, options->ethertype, netdevp); ++ error = netdev_dev->netdev_class->open(netdev_dev, options->ethertype, ++ netdevp); + if (!error) { - netdev_obj->ref_cnt++; + netdev_dev->ref_cnt++; + } else { + if (!netdev_dev->ref_cnt) { + netdev_dev_uninit(netdev_dev, true); + } } - *netdevp = error ? NULL : netdev; return error; } +int +netdev_open_default(const char *name, struct netdev **netdevp) +{ + struct netdev_options options; + + memset(&options, 0, sizeof options); + + options.name = name; + options.ethertype = NETDEV_ETH_TYPE_NONE; + options.may_create = true; + options.may_open = true; + + return netdev_open(&options, netdevp); +} + +/* Reconfigures the device 'netdev' with 'args'. 'args' may be empty + * or NULL if none are needed. */ +int +netdev_reconfigure(struct netdev *netdev, const struct shash *args) +{ + struct shash empty_args = SHASH_INITIALIZER(&empty_args); + struct netdev_dev *netdev_dev = netdev_get_dev(netdev); + + if (!args) { + args = &empty_args; + } + - if (netdev_dev->class->reconfigure) { ++ if (netdev_dev->netdev_class->reconfigure) { + if (!compare_device_args(netdev_dev, args)) { + update_device_args(netdev_dev, args); - return netdev_dev->class->reconfigure(netdev_dev, args); ++ return netdev_dev->netdev_class->reconfigure(netdev_dev, args); + } + } else if (!shash_is_empty(args)) { + VLOG_WARN("%s: arguments provided to device that does not have a " + "reconfigure function", netdev_get_name(netdev)); + } + + return 0; +} + /* Closes and destroys 'netdev'. */ void netdev_close(struct netdev *netdev) @@@ -479,8 -372,8 +481,8 @@@ netdev_recv(struct netdev *netdev, stru assert(buffer->size == 0); assert(ofpbuf_tailroom(buffer) >= ETH_TOTAL_MIN); - retval = netdev_get_dev(netdev)->class->recv(netdev, buffer->data, - retval = netdev->netdev_class->recv(netdev, - buffer->data, ofpbuf_tailroom(buffer)); ++ retval = netdev_get_dev(netdev)->netdev_class->recv(netdev, buffer->data, + ofpbuf_tailroom(buffer)); if (retval >= 0) { COVERAGE_INC(netdev_received); buffer->size += retval; @@@ -498,14 -391,14 +500,14 @@@ void netdev_recv_wait(struct netdev *netdev) { - netdev_get_dev(netdev)->class->recv_wait(netdev); - netdev->netdev_class->recv_wait(netdev); ++ netdev_get_dev(netdev)->netdev_class->recv_wait(netdev); } /* Discards all packets waiting to be received from 'netdev'. */ int netdev_drain(struct netdev *netdev) { - return netdev_get_dev(netdev)->class->drain(netdev); - return netdev->netdev_class->drain(netdev); ++ return netdev_get_dev(netdev)->netdev_class->drain(netdev); } /* Sends 'buffer' on 'netdev'. Returns 0 if successful, otherwise a positive @@@ -520,8 -413,7 +522,8 @@@ int netdev_send(struct netdev *netdev, const struct ofpbuf *buffer) { - int error = netdev_get_dev(netdev)->class->send(netdev, buffer->data, - buffer->size); - int error = netdev->netdev_class->send(netdev, buffer->data, buffer->size); ++ int error = netdev_get_dev(netdev)->netdev_class->send(netdev, ++ buffer->data, buffer->size); if (!error) { COVERAGE_INC(netdev_sent); } @@@ -538,7 -430,7 +540,7 @@@ void netdev_send_wait(struct netdev *netdev) { - return netdev_get_dev(netdev)->class->send_wait(netdev); - return netdev->netdev_class->send_wait(netdev); ++ return netdev_get_dev(netdev)->netdev_class->send_wait(netdev); } /* Attempts to set 'netdev''s MAC address to 'mac'. Returns 0 if successful, @@@ -546,7 -438,7 +548,7 @@@ int netdev_set_etheraddr(struct netdev *netdev, const uint8_t mac[ETH_ADDR_LEN]) { - return netdev_get_dev(netdev)->class->set_etheraddr(netdev, mac); - return netdev->netdev_class->set_etheraddr(netdev, mac); ++ return netdev_get_dev(netdev)->netdev_class->set_etheraddr(netdev, mac); } /* Retrieves 'netdev''s MAC address. If successful, returns 0 and copies the @@@ -555,7 -447,7 +557,7 @@@ int netdev_get_etheraddr(const struct netdev *netdev, uint8_t mac[ETH_ADDR_LEN]) { - return netdev_get_dev(netdev)->class->get_etheraddr(netdev, mac); - return netdev->netdev_class->get_etheraddr(netdev, mac); ++ return netdev_get_dev(netdev)->netdev_class->get_etheraddr(netdev, mac); } /* Returns the name of the network device that 'netdev' represents, @@@ -576,7 -468,7 +578,7 @@@ netdev_get_name(const struct netdev *ne int netdev_get_mtu(const struct netdev *netdev, int *mtup) { - int error = netdev_get_dev(netdev)->class->get_mtu(netdev, mtup); - int error = netdev->netdev_class->get_mtu(netdev, mtup); ++ int error = netdev_get_dev(netdev)->netdev_class->get_mtu(netdev, mtup); if (error) { VLOG_WARN_RL(&rl, "failed to retrieve MTU for network device %s: %s", netdev_get_name(netdev), strerror(error)); @@@ -597,7 -489,7 +599,7 @@@ int netdev_get_ifindex(const struct netdev *netdev) { - return netdev_get_dev(netdev)->class->get_ifindex(netdev); - return netdev->netdev_class->get_ifindex(netdev); ++ return netdev_get_dev(netdev)->netdev_class->get_ifindex(netdev); } /* Stores the features supported by 'netdev' into each of '*current', @@@ -626,9 -518,8 +628,8 @@@ netdev_get_features(struct netdev *netd peer = &dummy[3]; } - error = netdev_get_dev(netdev)->class->get_features(netdev, current, - advertised, supported, - peer); - error = netdev->netdev_class->get_features(netdev, current, advertised, - supported, peer); ++ error = netdev_get_dev(netdev)->netdev_class->get_features(netdev, current, ++ advertised, supported, peer); if (error) { *current = *advertised = *supported = *peer = 0; } @@@ -640,9 -560,8 +670,9 @@@ netdev_features_is_full_duplex(uint32_ int netdev_set_advertisements(struct netdev *netdev, uint32_t advertise) { - return (netdev_get_dev(netdev)->class->set_advertisements - ? netdev_get_dev(netdev)->class->set_advertisements(netdev, - advertise) - return (netdev->netdev_class->set_advertisements - ? netdev->netdev_class->set_advertisements(netdev, advertise) ++ return (netdev_get_dev(netdev)->netdev_class->set_advertisements ++ ? netdev_get_dev(netdev)->netdev_class->set_advertisements( ++ netdev, advertise) : EOPNOTSUPP); } @@@ -666,9 -585,8 +696,9 @@@ netdev_get_in4(const struct netdev *net struct in_addr netmask; int error; - error = (netdev_get_dev(netdev)->class->get_in4 - ? netdev_get_dev(netdev)->class->get_in4(netdev, &address, - &netmask) - error = (netdev->netdev_class->get_in4 - ? netdev->netdev_class->get_in4(netdev, &address, &netmask) ++ error = (netdev_get_dev(netdev)->netdev_class->get_in4 ++ ? netdev_get_dev(netdev)->netdev_class->get_in4(netdev, ++ &address, &netmask) : EOPNOTSUPP); if (address_) { address_->s_addr = error ? 0 : address.s_addr; @@@ -685,8 -603,8 +715,8 @@@ int netdev_set_in4(struct netdev *netdev, struct in_addr addr, struct in_addr mask) { - return (netdev_get_dev(netdev)->class->set_in4 - ? netdev_get_dev(netdev)->class->set_in4(netdev, addr, mask) - return (netdev->netdev_class->set_in4 - ? netdev->netdev_class->set_in4(netdev, addr, mask) ++ return (netdev_get_dev(netdev)->netdev_class->set_in4 ++ ? netdev_get_dev(netdev)->netdev_class->set_in4(netdev, addr, mask) : EOPNOTSUPP); } @@@ -696,8 -614,8 +726,8 @@@ in netdev_add_router(struct netdev *netdev, struct in_addr router) { COVERAGE_INC(netdev_add_router); - return (netdev_get_dev(netdev)->class->add_router - ? netdev_get_dev(netdev)->class->add_router(netdev, router) - return (netdev->netdev_class->add_router - ? netdev->netdev_class->add_router(netdev, router) ++ return (netdev_get_dev(netdev)->netdev_class->add_router ++ ? netdev_get_dev(netdev)->netdev_class->add_router(netdev, router) : EOPNOTSUPP); } @@@ -713,9 -631,9 +743,9 @@@ netdev_get_next_hop(const struct netde const struct in_addr *host, struct in_addr *next_hop, char **netdev_name) { - int error = (netdev_get_dev(netdev)->class->get_next_hop - ? netdev_get_dev(netdev)->class->get_next_hop(host, next_hop, - netdev_name) - int error = (netdev->netdev_class->get_next_hop - ? netdev->netdev_class->get_next_hop(host, next_hop, - netdev_name) ++ int error = (netdev_get_dev(netdev)->netdev_class->get_next_hop ++ ? netdev_get_dev(netdev)->netdev_class->get_next_hop( ++ host, next_hop, netdev_name) : EOPNOTSUPP); if (error) { next_hop->s_addr = 0; @@@ -741,9 -659,8 +771,9 @@@ netdev_get_in6(const struct netdev *net struct in6_addr dummy; int error; - error = (netdev_get_dev(netdev)->class->get_in6 - ? netdev_get_dev(netdev)->class->get_in6(netdev, in6 ? in6 - : &dummy) - error = (netdev->netdev_class->get_in6 - ? netdev->netdev_class->get_in6(netdev, in6 ? in6 : &dummy) ++ error = (netdev_get_dev(netdev)->netdev_class->get_in6 ++ ? netdev_get_dev(netdev)->netdev_class->get_in6(netdev, ++ in6 ? in6 : &dummy) : EOPNOTSUPP); if (error && in6) { memset(in6, 0, sizeof *in6); @@@ -763,8 -680,8 +793,8 @@@ do_update_flags(struct netdev *netdev, enum netdev_flags old_flags; int error; - error = netdev_get_dev(netdev)->class->update_flags(netdev, off & ~on, on, - &old_flags); - error = netdev->netdev_class->update_flags(netdev, off & ~on, - on, &old_flags); ++ error = netdev_get_dev(netdev)->netdev_class->update_flags(netdev, ++ off & ~on, on, &old_flags); if (error) { VLOG_WARN_RL(&rl, "failed to %s flags for network device %s: %s", off || on ? "set" : "get", netdev_get_name(netdev), @@@ -837,8 -754,8 +867,9 @@@ in netdev_arp_lookup(const struct netdev *netdev, uint32_t ip, uint8_t mac[ETH_ADDR_LEN]) { - int error = (netdev_get_dev(netdev)->class->arp_lookup - ? netdev_get_dev(netdev)->class->arp_lookup(netdev, ip, mac) - int error = (netdev->netdev_class->arp_lookup - ? netdev->netdev_class->arp_lookup(netdev, ip, mac) ++ int error = (netdev_get_dev(netdev)->netdev_class->arp_lookup ++ ? netdev_get_dev(netdev)->netdev_class->arp_lookup(netdev, ++ ip, mac) : EOPNOTSUPP); if (error) { memset(mac, 0, ETH_ADDR_LEN); @@@ -851,8 -768,8 +882,9 @@@ int netdev_get_carrier(const struct netdev *netdev, bool *carrier) { - int error = (netdev_get_dev(netdev)->class->get_carrier - ? netdev_get_dev(netdev)->class->get_carrier(netdev, carrier) - int error = (netdev->netdev_class->get_carrier - ? netdev->netdev_class->get_carrier(netdev, carrier) ++ int error = (netdev_get_dev(netdev)->netdev_class->get_carrier ++ ? netdev_get_dev(netdev)->netdev_class->get_carrier(netdev, ++ carrier) : EOPNOTSUPP); if (error) { *carrier = false; @@@ -867,8 -784,8 +899,8 @@@ netdev_get_stats(const struct netdev *n int error; COVERAGE_INC(netdev_get_stats); - error = (netdev_get_dev(netdev)->class->get_stats - ? netdev_get_dev(netdev)->class->get_stats(netdev, stats) - error = (netdev->netdev_class->get_stats - ? netdev->netdev_class->get_stats(netdev, stats) ++ error = (netdev_get_dev(netdev)->netdev_class->get_stats ++ ? netdev_get_dev(netdev)->netdev_class->get_stats(netdev, stats) : EOPNOTSUPP); if (error) { memset(stats, 0xff, sizeof *stats); @@@ -883,9 -800,9 +915,9 @@@ in netdev_set_policing(struct netdev *netdev, uint32_t kbits_rate, uint32_t kbits_burst) { - return (netdev_get_dev(netdev)->class->set_policing - ? netdev_get_dev(netdev)->class->set_policing(netdev, kbits_rate, - kbits_burst) - return (netdev->netdev_class->set_policing - ? netdev->netdev_class->set_policing(netdev, - kbits_rate, kbits_burst) ++ return (netdev_get_dev(netdev)->netdev_class->set_policing ++ ? netdev_get_dev(netdev)->netdev_class->set_policing(netdev, ++ kbits_rate, kbits_burst) : EOPNOTSUPP); } @@@ -897,8 -814,8 +929,9 @@@ int netdev_get_vlan_vid(const struct netdev *netdev, int *vlan_vid) { - int error = (netdev_get_dev(netdev)->class->get_vlan_vid - ? netdev_get_dev(netdev)->class->get_vlan_vid(netdev, vlan_vid) - int error = (netdev->netdev_class->get_vlan_vid - ? netdev->netdev_class->get_vlan_vid(netdev, vlan_vid) ++ int error = (netdev_get_dev(netdev)->netdev_class->get_vlan_vid ++ ? netdev_get_dev(netdev)->netdev_class->get_vlan_vid(netdev, ++ vlan_vid) : ENOENT); if (error) { *vlan_vid = 0; @@@ -934,93 -851,26 +967,93 @@@ exit return netdev; } -/* Initializes 'netdev_obj' as a netdev object named 'name' of the +/* Initializes 'netdev_dev' as a netdev device named 'name' of the - * specified 'class'. + * specified 'netdev_class'. * - * This function adds 'netdev_obj' to a netdev-owned shash, so it is - * very important that 'netdev_obj' only be freed after calling - * netdev_destroy(). */ + * This function adds 'netdev_dev' to a netdev-owned shash, so it is + * very important that 'netdev_dev' only be freed after calling + * the refcount drops to zero. */ void -netdev_obj_init(struct netdev_obj *netdev_obj, const char *name, - const struct netdev_class *netdev_class, bool created) +netdev_dev_init(struct netdev_dev *netdev_dev, const char *name, - const struct netdev_class *class) ++ const struct netdev_class *class_) { - assert(!shash_find(&netdev_obj_shash, name)); + assert(!shash_find(&netdev_dev_shash, name)); - netdev_obj->netdev_class = netdev_class; - netdev_obj->ref_cnt = 0; - netdev_obj->created = created; - shash_add(&netdev_obj_shash, name, netdev_obj); + memset(netdev_dev, 0, sizeof *netdev_dev); - netdev_dev->class = class; ++ netdev_dev->netdev_class = class_; + netdev_dev->name = xstrdup(name); + netdev_dev->node = shash_add(&netdev_dev_shash, name, netdev_dev); } -/* Initializes 'netdev' as a netdev named 'name' of the specified - * 'netdev_class'. +/* Undoes the results of initialization. + * + * Normally this function does not need to be called as netdev_close has + * the same effect when the refcount drops to zero. + * However, it may be called by providers due to an error on creation + * that occurs after initialization. It this case netdev_close() would + * never be called. */ +void +netdev_dev_uninit(struct netdev_dev *netdev_dev, bool destroy) +{ + char *name = netdev_dev->name; + + assert(!netdev_dev->ref_cnt); + + shash_delete(&netdev_dev_shash, netdev_dev->node); + update_device_args(netdev_dev, NULL); + + if (destroy) { - netdev_dev->class->destroy(netdev_dev); ++ netdev_dev->netdev_class->destroy(netdev_dev); + } + free(name); +} + +/* Returns the class type of 'netdev_dev'. + * + * The caller must not free the returned value. */ +const char * +netdev_dev_get_type(const struct netdev_dev *netdev_dev) +{ - return netdev_dev->class->type; ++ return netdev_dev->netdev_class->type; +} + +/* Returns the name of 'netdev_dev'. + * + * The caller must not free the returned value. */ +const char * +netdev_dev_get_name(const struct netdev_dev *netdev_dev) +{ + return netdev_dev->name; +} + +/* Returns the netdev_dev with 'name' or NULL if there is none. + * + * The caller must not free the returned value. */ +struct netdev_dev * +netdev_dev_from_name(const char *name) +{ + return shash_find_data(&netdev_dev_shash, name); +} + +/* Fills 'device_list' with devices that match 'class'. + * + * The caller is responsible for initializing and destroying 'device_list' + * but the contained netdev_devs must not be freed. */ +void - netdev_dev_get_devices(const struct netdev_class *class, ++netdev_dev_get_devices(const struct netdev_class *class_, + struct shash *device_list) +{ + struct shash_node *node; + SHASH_FOR_EACH (node, &netdev_dev_shash) { + struct netdev_dev *dev = node->data; + - if (dev->class == class) { ++ if (dev->netdev_class == class_) { + shash_add(device_list, node->name, node->data); + } + } +} + +/* Initializes 'netdev' as a instance of the netdev_dev. * * This function adds 'netdev' to a netdev-owned linked list, so it is very * important that 'netdev' only be freed after calling netdev_close(). */ @@@ -1032,42 -885,12 +1065,42 @@@ netdev_init(struct netdev *netdev, stru list_push_back(&netdev_list, &netdev->node); } +/* Undoes the results of initialization. + * + * Normally this function only needs to be called from netdev_close(). + * However, it may be called by providers due to an error on opening + * that occurs after initialization. It this case netdev_close() would + * never be called. */ +void +netdev_uninit(struct netdev *netdev, bool close) +{ + /* Restore flags that we changed, if any. */ + int error = restore_flags(netdev); + list_remove(&netdev->node); + if (error) { + VLOG_WARN("failed to restore network device flags on %s: %s", + netdev_get_name(netdev), strerror(error)); + } + + if (close) { - netdev_get_dev(netdev)->class->close(netdev); ++ netdev_get_dev(netdev)->netdev_class->close(netdev); + } +} + + /* Returns the class type of 'netdev'. * * The caller must not free the returned value. */ -const char *netdev_get_type(const struct netdev *netdev) +const char * +netdev_get_type(const struct netdev *netdev) { - return netdev_get_dev(netdev)->class->type; - return netdev->netdev_class->type; ++ return netdev_get_dev(netdev)->netdev_class->type; +} + +struct netdev_dev * +netdev_get_dev(const struct netdev *netdev) +{ + return netdev->netdev_dev; } /* Initializes 'notifier' as a netdev notifier for 'netdev', for which @@@ -1107,7 -930,7 +1140,8 @@@ netdev_monitor_destroy(struct netdev_mo SHASH_FOR_EACH (node, &monitor->polled_netdevs) { struct netdev_notifier *notifier = node->data; - netdev_get_dev(notifier->netdev)->class->poll_remove(notifier); - notifier->netdev->netdev_class->poll_remove(notifier); ++ netdev_get_dev(notifier->netdev)->netdev_class->poll_remove( ++ notifier); } shash_destroy(&monitor->polled_netdevs); @@@ -1137,12 -960,11 +1171,11 @@@ netdev_monitor_add(struct netdev_monito const char *netdev_name = netdev_get_name(netdev); int error = 0; if (!shash_find(&monitor->polled_netdevs, netdev_name) - && netdev_get_dev(netdev)->class->poll_add) - && netdev->netdev_class->poll_add) ++ && netdev_get_dev(netdev)->netdev_class->poll_add) { struct netdev_notifier *notifier; - error = netdev_get_dev(netdev)->class->poll_add(netdev, - netdev_monitor_cb, - monitor, ¬ifier); - error = netdev->netdev_class->poll_add(netdev, netdev_monitor_cb, - monitor, ¬ifier); ++ error = netdev_get_dev(netdev)->netdev_class->poll_add(netdev, ++ netdev_monitor_cb, monitor, ¬ifier); if (!error) { assert(notifier->netdev == netdev); shash_add(&monitor->polled_netdevs, netdev_name, notifier); @@@ -1164,7 -986,7 +1197,7 @@@ netdev_monitor_remove(struct netdev_mon if (node) { /* Cancel future notifications. */ struct netdev_notifier *notifier = node->data; - netdev_get_dev(netdev)->class->poll_remove(notifier); - netdev->netdev_class->poll_remove(notifier); ++ netdev_get_dev(netdev)->netdev_class->poll_remove(notifier); shash_delete(&monitor->polled_netdevs, node); /* Drop any pending notification. */ @@@ -1222,9 -1044,10 +1255,9 @@@ restore_flags(struct netdev *netdev if (netdev->changed_flags) { enum netdev_flags restore = netdev->save_flags & netdev->changed_flags; enum netdev_flags old_flags; - return netdev_get_dev(netdev)->class->update_flags(netdev, - return netdev->netdev_class->update_flags(netdev, - netdev->changed_flags - & ~restore, - restore, &old_flags); ++ return netdev_get_dev(netdev)->netdev_class->update_flags(netdev, + netdev->changed_flags & ~restore, + restore, &old_flags); } return 0; } diff --cc lib/stream-ssl.h index 3c2a8986,00000000..dd2a16ee mode 100644,000000..100644 --- a/lib/stream-ssl.h +++ b/lib/stream-ssl.h @@@ -1,54 -1,0 +1,59 @@@ +/* + * Copyright (c) 2008, 2009 Nicira Networks. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef STREAM_SSL_H +#define STREAM_SSL_H 1 + +#include + +#ifdef HAVE_OPENSSL +bool stream_ssl_is_configured(void); +void stream_ssl_set_private_key_file(const char *file_name); +void stream_ssl_set_certificate_file(const char *file_name); +void stream_ssl_set_ca_cert_file(const char *file_name, bool bootstrap); +void stream_ssl_set_peer_ca_cert_file(const char *file_name); + - #define STREAM_SSL_LONG_OPTIONS \ ++/* Define the long options for SSL support. ++ * ++ * Note that the definition includes a final comma, and therefore a comma ++ * must not be supplied when using the definition. This is done so that ++ * compilation succeeds whether or not HAVE_OPENSSL is defined. */ ++#define STREAM_SSL_LONG_OPTIONS \ + {"private-key", required_argument, 0, 'p'}, \ + {"certificate", required_argument, 0, 'c'}, \ + {"ca-cert", required_argument, 0, 'C'}, + +#define STREAM_SSL_OPTION_HANDLERS \ + case 'p': \ + stream_ssl_set_private_key_file(optarg); \ + break; \ + \ + case 'c': \ + stream_ssl_set_certificate_file(optarg); \ + break; \ + \ + case 'C': \ + stream_ssl_set_ca_cert_file(optarg, false); \ + break; +#else /* !HAVE_OPENSSL */ +static inline bool stream_ssl_is_configured(void) +{ + return false; +} +#define STREAM_SSL_LONG_OPTIONS +#define STREAM_SSL_OPTION_HANDLERS +#endif /* !HAVE_OPENSSL */ + +#endif /* stream-ssl.h */ diff --cc lib/vlog-modules.def index 8506516b,b791525e..f012e10c --- a/lib/vlog-modules.def +++ b/lib/vlog-modules.def @@@ -64,14 -60,9 +64,15 @@@ VLOG_MODULE(port_watcher VLOG_MODULE(proc_net_compat) VLOG_MODULE(process) VLOG_MODULE(rconn) +VLOG_MODULE(reconnect) VLOG_MODULE(rtnetlink) + VLOG_MODULE(sflow) VLOG_MODULE(stp) +VLOG_MODULE(stream_fd) +VLOG_MODULE(stream_ssl) +VLOG_MODULE(stream_tcp) +VLOG_MODULE(stream_unix) +VLOG_MODULE(stream) VLOG_MODULE(stats) VLOG_MODULE(status) VLOG_MODULE(svec) diff --cc ofproto/ofproto-sflow.c index 00000000,b37db42d..1b659d1a mode 000000,100644..100644 --- a/ofproto/ofproto-sflow.c +++ b/ofproto/ofproto-sflow.c @@@ -1,0 -1,607 +1,607 @@@ + /* + * Copyright (c) 2009, 2010 InMon Corp. + * Copyright (c) 2009 Nicira Networks. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + #include + #include "ofproto-sflow.h" + #include + #include + #include "collectors.h" + #include "dpif.h" + #include "compiler.h" + #include "netdev.h" + #include "ofpbuf.h" + #include "ofproto.h" + #include "poll-loop.h" + #include "port-array.h" + #include "sflow_api.h" + #include "socket-util.h" + #include "timeval.h" + + #define THIS_MODULE VLM_sflow + #include "vlog.h" + + struct ofproto_sflow_port { + struct netdev *netdev; /* Underlying network device, for stats. */ + SFLDataSource_instance dsi; /* sFlow library's notion of port number. */ + }; + + struct ofproto_sflow { + struct ofproto *ofproto; + struct collectors *collectors; + SFLAgent *sflow_agent; + struct ofproto_sflow_options *options; + struct dpif *dpif; + time_t next_tick; + size_t n_flood, n_all; + struct port_array ports; /* Indexed by ODP port number. */ + }; + + #define RECEIVER_INDEX 1 + + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); + + static bool + nullable_string_is_equal(const char *a, const char *b) + { + return a ? b && !strcmp(a, b) : !b; + } + + static bool + ofproto_sflow_options_equal(const struct ofproto_sflow_options *a, + const struct ofproto_sflow_options *b) + { + return (svec_equal(&a->targets, &b->targets) + && a->sampling_rate == b->sampling_rate + && a->polling_interval == b->polling_interval + && a->header_len == b->header_len + && a->sub_id == b->sub_id + && nullable_string_is_equal(a->agent_device, b->agent_device) + && nullable_string_is_equal(a->control_ip, b->control_ip)); + } + + static struct ofproto_sflow_options * + ofproto_sflow_options_clone(const struct ofproto_sflow_options *old) + { + struct ofproto_sflow_options *new = xmemdup(old, sizeof *old); + svec_clone(&new->targets, &old->targets); + new->agent_device = old->agent_device ? xstrdup(old->agent_device) : NULL; + new->control_ip = old->control_ip ? xstrdup(old->control_ip) : NULL; + return new; + } + + static void + ofproto_sflow_options_destroy(struct ofproto_sflow_options *options) + { + if (options) { + svec_destroy(&options->targets); + free(options->agent_device); + free(options->control_ip); + free(options); + } + } + + /* sFlow library callback to allocate memory. */ + static void * + sflow_agent_alloc_cb(void *magic UNUSED, SFLAgent *agent UNUSED, size_t bytes) + { + return calloc(1, bytes); + } + + /* sFlow library callback to free memory. */ + static int + sflow_agent_free_cb(void *magic UNUSED, SFLAgent *agent UNUSED, void *obj) + { + free(obj); + return 0; + } + + /* sFlow library callback to report error. */ + static void + sflow_agent_error_cb(void *magic UNUSED, SFLAgent *agent UNUSED, char *msg) + { + VLOG_WARN("sFlow agent error: %s", msg); + } + + /* sFlow library callback to send datagram. */ + static void + sflow_agent_send_packet_cb(void *os_, SFLAgent *agent UNUSED, + SFLReceiver *receiver UNUSED, u_char *pkt, + uint32_t pktLen) + { + struct ofproto_sflow *os = os_; + collectors_send(os->collectors, pkt, pktLen); + } + + static void + sflow_agent_get_counters(void *os_, SFLPoller *poller, + SFL_COUNTERS_SAMPLE_TYPE *cs) + { + struct ofproto_sflow *os = os_; + SFLCounters_sample_element elem; + struct ofproto_sflow_port *osp; + SFLIf_counters *counters; + struct netdev_stats stats; + enum netdev_flags flags; + uint32_t current; + + osp = port_array_get(&os->ports, poller->bridgePort); + if (!osp) { + return; + } + + elem.tag = SFLCOUNTERS_GENERIC; + counters = &elem.counterBlock.generic; + counters->ifIndex = SFL_DS_INDEX(poller->dsi); + counters->ifType = 6; + if (!netdev_get_features(osp->netdev, ¤t, NULL, NULL, NULL)) { + /* The values of ifDirection come from MAU MIB (RFC 2668): 0 = unknown, + 1 = full-duplex, 2 = half-duplex, 3 = in, 4=out */ + counters->ifSpeed = netdev_features_to_bps(current); + counters->ifDirection = (netdev_features_is_full_duplex(current) + ? 1 : 2); + } else { + counters->ifSpeed = 100000000; + counters->ifDirection = 0; + } + if (!netdev_get_flags(osp->netdev, &flags) && flags & NETDEV_UP) { + bool carrier; + + counters->ifStatus = 1; /* ifAdminStatus up. */ + if (!netdev_get_carrier(osp->netdev, &carrier) && carrier) { + counters->ifStatus |= 2; /* ifOperStatus us. */ + } + } else { + counters->ifStatus = 0; /* Down. */ + } + + /* XXX + 1. Is the multicast counter filled in? + 2. Does the multicast counter include broadcasts? + 3. Does the rx_packets counter include multicasts/broadcasts? + */ + netdev_get_stats(osp->netdev, &stats); + counters->ifInOctets = stats.rx_bytes; + counters->ifInUcastPkts = stats.rx_packets; + counters->ifInMulticastPkts = stats.multicast; + counters->ifInBroadcastPkts = -1; + counters->ifInDiscards = stats.rx_dropped; + counters->ifInErrors = stats.rx_errors; + counters->ifInUnknownProtos = -1; + counters->ifOutOctets = stats.tx_bytes; + counters->ifOutUcastPkts = stats.tx_packets; + counters->ifOutMulticastPkts = -1; + counters->ifOutBroadcastPkts = -1; + counters->ifOutDiscards = stats.tx_dropped; + counters->ifOutErrors = stats.tx_errors; + counters->ifPromiscuousMode = 0; + + SFLADD_ELEMENT(cs, &elem); + sfl_poller_writeCountersSample(poller, cs); + } + + /* Obtains an address to use for the local sFlow agent and stores it into + * '*agent_addr'. Returns true if successful, false on failure. + * + * The sFlow agent address should be a local IP address that is persistent and + * reachable over the network, if possible. The IP address associated with + * 'agent_device' is used if it has one, and otherwise 'control_ip', the IP + * address used to talk to the controller. */ + static bool + sflow_choose_agent_address(const char *agent_device, const char *control_ip, + SFLAddress *agent_addr) + { + struct in_addr in4; + + memset(agent_addr, 0, sizeof *agent_addr); + agent_addr->type = SFLADDRESSTYPE_IP_V4; + + if (agent_device) { + struct netdev *netdev; + - if (!netdev_open(agent_device, NETDEV_ETH_TYPE_NONE, &netdev)) { ++ if (!netdev_open_default(agent_device, &netdev)) { + int error = netdev_get_in4(netdev, &in4, NULL); + netdev_close(netdev); + if (!error) { + goto success; + } + } + } + + if (control_ip && !lookup_ip(control_ip, &in4)) { + goto success; + } + + VLOG_ERR("could not determine IP address for sFlow agent"); + return false; + + success: + agent_addr->address.ip_v4.addr = in4.s_addr; + return true; + } + + void + ofproto_sflow_clear(struct ofproto_sflow *os) + { + struct ofproto_sflow_port *osp; + unsigned int odp_port; + + if (os->sflow_agent) { + sfl_agent_release(os->sflow_agent); + os->sflow_agent = NULL; + } + collectors_destroy(os->collectors); + os->collectors = NULL; + ofproto_sflow_options_destroy(os->options); + os->options = NULL; + + PORT_ARRAY_FOR_EACH (osp, &os->ports, odp_port) { + ofproto_sflow_del_port(os, odp_port); + } + port_array_clear(&os->ports); + + /* Turn off sampling to save CPU cycles. */ + dpif_set_sflow_probability(os->dpif, 0); + } + + bool + ofproto_sflow_is_enabled(const struct ofproto_sflow *os) + { + return os->collectors != NULL; + } + + struct ofproto_sflow * + ofproto_sflow_create(struct dpif *dpif) + { + struct ofproto_sflow *os; + + os = xcalloc(1, sizeof *os); + os->dpif = dpif; + os->next_tick = time_now() + 1; + port_array_init(&os->ports); + return os; + } + + void + ofproto_sflow_destroy(struct ofproto_sflow *os) + { + if (os) { + ofproto_sflow_clear(os); + port_array_destroy(&os->ports); + free(os); + } + } + + static void + ofproto_sflow_add_poller(struct ofproto_sflow *os, + struct ofproto_sflow_port *osp, uint16_t odp_port) + { + SFLPoller *poller = sfl_agent_addPoller(os->sflow_agent, &osp->dsi, os, + sflow_agent_get_counters); + sfl_poller_set_sFlowCpInterval(poller, os->options->polling_interval); + sfl_poller_set_sFlowCpReceiver(poller, RECEIVER_INDEX); + sfl_poller_set_bridgePort(poller, odp_port); + } + + static void + ofproto_sflow_add_sampler(struct ofproto_sflow *os, + struct ofproto_sflow_port *osp, + u_int32_t sampling_rate, u_int32_t header_len) + { + SFLSampler *sampler = sfl_agent_addSampler(os->sflow_agent, &osp->dsi); + sfl_sampler_set_sFlowFsPacketSamplingRate(sampler, sampling_rate); + sfl_sampler_set_sFlowFsMaximumHeaderSize(sampler, header_len); + sfl_sampler_set_sFlowFsReceiver(sampler, RECEIVER_INDEX); + } + + void + ofproto_sflow_add_port(struct ofproto_sflow *os, uint16_t odp_port, + const char *netdev_name) + { + struct ofproto_sflow_port *osp; + struct netdev *netdev; + uint32_t ifindex; + int error; + + ofproto_sflow_del_port(os, odp_port); + + /* Open network device. */ - error = netdev_open(netdev_name, NETDEV_ETH_TYPE_NONE, &netdev); ++ error = netdev_open_default(netdev_name, &netdev); + if (error) { + VLOG_WARN_RL(&rl, "failed to open network device \"%s\": %s", + netdev_name, strerror(error)); + return; + } + + /* Add to table of ports. */ + osp = xmalloc(sizeof *osp); + osp->netdev = netdev; + ifindex = netdev_get_ifindex(netdev); + if (ifindex <= 0) { + ifindex = (os->sflow_agent->subId << 16) + odp_port; + } + SFL_DS_SET(osp->dsi, 0, ifindex, 0); + port_array_set(&os->ports, odp_port, osp); + + /* Add poller. */ + if (os->sflow_agent) { + ofproto_sflow_add_poller(os, osp, odp_port); + } + } + + void + ofproto_sflow_del_port(struct ofproto_sflow *os, uint16_t odp_port) + { + struct ofproto_sflow_port *osp = port_array_get(&os->ports, odp_port); + if (osp) { + if (os->sflow_agent) { + sfl_agent_removePoller(os->sflow_agent, &osp->dsi); + sfl_agent_removeSampler(os->sflow_agent, &osp->dsi); + } + netdev_close(osp->netdev); + free(osp); + port_array_set(&os->ports, odp_port, NULL); + } + } + + void + ofproto_sflow_set_options(struct ofproto_sflow *os, + const struct ofproto_sflow_options *options) + { + struct ofproto_sflow_port *osp; + bool options_changed; + SFLReceiver *receiver; + unsigned int odp_port; + SFLAddress agentIP; + time_t now; + int error; + + if (!options->targets.n || !options->sampling_rate) { + /* No point in doing any work if there are no targets or nothing to + * sample. */ + ofproto_sflow_clear(os); + return; + } + + options_changed = (!os->options + || !ofproto_sflow_options_equal(options, os->options)); + + /* Configure collectors if options have changed or if we're shortchanged in + * collectors (which indicates that opening one or more of the configured + * collectors failed, so that we should retry). */ + if (options_changed + || collectors_count(os->collectors) < options->targets.n) { + collectors_destroy(os->collectors); + error = collectors_create(&options->targets, + SFL_DEFAULT_COLLECTOR_PORT, &os->collectors); + if (os->collectors == NULL) { + VLOG_WARN_RL(&rl, "no collectors could be initialized, " + "sFlow disabled"); + ofproto_sflow_clear(os); + return; + } + } + + /* Avoid reconfiguring if options didn't change. */ + if (!options_changed) { + return; + } + ofproto_sflow_options_destroy(os->options); + os->options = ofproto_sflow_options_clone(options); + + /* Choose agent IP address. */ + if (!sflow_choose_agent_address(options->agent_device, + options->control_ip, &agentIP)) { + ofproto_sflow_clear(os); + return; + } + + /* Create agent. */ + VLOG_INFO("creating sFlow agent %d", options->sub_id); + if (os->sflow_agent) { + sfl_agent_release(os->sflow_agent); + } + os->sflow_agent = xcalloc(1, sizeof *os->sflow_agent); + now = time_now(); + sfl_agent_init(os->sflow_agent, + &agentIP, + options->sub_id, + now, /* Boot time. */ + now, /* Current time. */ + os, /* Pointer supplied to callbacks. */ + sflow_agent_alloc_cb, + sflow_agent_free_cb, + sflow_agent_error_cb, + sflow_agent_send_packet_cb); + + receiver = sfl_agent_addReceiver(os->sflow_agent); + sfl_receiver_set_sFlowRcvrOwner(receiver, "Open vSwitch sFlow"); + sfl_receiver_set_sFlowRcvrTimeout(receiver, 0xffffffff); + + /* Set the sampling_rate down in the datapath. */ + dpif_set_sflow_probability(os->dpif, + MAX(1, UINT32_MAX / options->sampling_rate)); + + /* Add samplers and pollers for the currently known ports. */ + PORT_ARRAY_FOR_EACH (osp, &os->ports, odp_port) { + ofproto_sflow_add_sampler(os, osp, + options->sampling_rate, options->header_len); + } + } + + static int + ofproto_sflow_odp_port_to_ifindex(const struct ofproto_sflow *os, + uint16_t odp_port) + { + struct ofproto_sflow_port *osp = port_array_get(&os->ports, odp_port); + return osp ? SFL_DS_INDEX(osp->dsi) : 0; + } + + void + ofproto_sflow_received(struct ofproto_sflow *os, struct odp_msg *msg) + { + SFL_FLOW_SAMPLE_TYPE fs; + SFLFlow_sample_element hdrElem; + SFLSampled_header *header; + SFLFlow_sample_element switchElem; + SFLSampler *sampler; + const struct odp_sflow_sample_header *hdr; + const union odp_action *actions; + struct ofpbuf payload; + size_t n_actions, n_outputs; + size_t min_size; + flow_t flow; + size_t i; + + /* Get odp_sflow_sample_header. */ + min_size = sizeof *msg + sizeof *hdr; + if (min_size > msg->length) { + VLOG_WARN_RL(&rl, "sFlow packet too small (%"PRIu32" < %zu)", + msg->length, min_size); + return; + } + hdr = (const struct odp_sflow_sample_header *) (msg + 1); + + /* Get actions. */ + n_actions = hdr->n_actions; + if (n_actions > 65536 / sizeof *actions) { + VLOG_WARN_RL(&rl, "too many actions in sFlow packet (%zu > %zu)", + 65536 / sizeof *actions, n_actions); + return; + } + min_size += n_actions * sizeof *actions; + if (min_size > msg->length) { + VLOG_WARN_RL(&rl, "sFlow packet with %zu actions too small " + "(%"PRIu32" < %zu)", + n_actions, msg->length, min_size); + return; + } + actions = (const union odp_action *) (hdr + 1); + + /* Get packet payload and extract flow. */ + payload.data = (union odp_action *) (actions + n_actions); + payload.size = msg->length - min_size; + flow_extract(&payload, msg->port, &flow); + + /* Build a flow sample */ + memset(&fs, 0, sizeof fs); + fs.input = ofproto_sflow_odp_port_to_ifindex(os, msg->port); + fs.output = 0; /* Filled in correctly below. */ + fs.sample_pool = hdr->sample_pool; + + /* We are going to give it to the sampler that represents this input port. + * By implementing "ingress-only" sampling like this we ensure that we + * never have to offer the same sample to more than one sampler. */ + sampler = sfl_agent_getSamplerByIfIndex(os->sflow_agent, fs.input); + if (!sampler) { + VLOG_WARN_RL(&rl, "no sampler for input ifIndex (%"PRIu32")", + fs.input); + return; + } + + /* Sampled header. */ + memset(&hdrElem, 0, sizeof hdrElem); + hdrElem.tag = SFLFLOW_HEADER; + header = &hdrElem.flowType.header; + header->header_protocol = SFLHEADER_ETHERNET_ISO8023; + header->frame_length = payload.size; + header->stripped = 4; /* Ethernet FCS stripped off. */ + header->header_length = MIN(payload.size, + sampler->sFlowFsMaximumHeaderSize); + header->header_bytes = payload.data; + + /* Add extended switch element. */ + memset(&switchElem, 0, sizeof(switchElem)); + switchElem.tag = SFLFLOW_EX_SWITCH; + switchElem.flowType.sw.src_vlan = ntohs(flow.dl_vlan); + switchElem.flowType.sw.src_priority = -1; /* XXX */ + switchElem.flowType.sw.dst_vlan = -1; /* Filled in correctly below. */ + switchElem.flowType.sw.dst_priority = switchElem.flowType.sw.src_priority; + + /* Figure out the output ports. */ + n_outputs = 0; + for (i = 0; i < n_actions; i++) { + const union odp_action *a = &actions[i]; + + switch (a->type) { + case ODPAT_OUTPUT: + fs.output = ofproto_sflow_odp_port_to_ifindex(os, a->output.port); + n_outputs++; + break; + + case ODPAT_OUTPUT_GROUP: + n_outputs += (a->output_group.group == DP_GROUP_FLOOD ? os->n_flood + : a->output_group.group == DP_GROUP_ALL ? os->n_all + : 0); + break; + + case ODPAT_SET_VLAN_VID: + switchElem.flowType.sw.dst_vlan = ntohs(a->vlan_vid.vlan_vid); + break; + + case ODPAT_SET_VLAN_PCP: + switchElem.flowType.sw.dst_priority = a->vlan_pcp.vlan_pcp; + break; + + default: + break; + } + } + + /* Set output port, as defined by http://www.sflow.org/sflow_version_5.txt + (search for "Input/output port information"). */ + if (!n_outputs) { + /* This value indicates that the packet was dropped for an unknown + * reason. */ + fs.output = 0x40000000 | 256; + } else if (n_outputs > 1 || !fs.output) { + /* Setting the high bit means "multiple output ports". */ + fs.output = 0x80000000 | n_outputs; + } + + /* Submit the flow sample to be encoded into the next datagram. */ + SFLADD_ELEMENT(&fs, &hdrElem); + SFLADD_ELEMENT(&fs, &switchElem); + sfl_sampler_writeFlowSample(sampler, &fs); + } + + void + ofproto_sflow_set_group_sizes(struct ofproto_sflow *os, + size_t n_flood, size_t n_all) + { + os->n_flood = n_flood; + os->n_all = n_all; + } + + void + ofproto_sflow_run(struct ofproto_sflow *os) + { + if (ofproto_sflow_is_enabled(os)) { + time_t now = time_now(); + if (now >= os->next_tick) { + sfl_agent_tick(os->sflow_agent, now); + os->next_tick = now + 1; + } + } + } + + void + ofproto_sflow_wait(struct ofproto_sflow *os) + { + if (ofproto_sflow_is_enabled(os)) { + poll_timer_wait(os->next_tick * 1000 - time_msec()); + } + } diff --cc ofproto/ofproto.c index 10b4796f,43054fa3..c44762c4 --- a/ofproto/ofproto.c +++ b/ofproto/ofproto.c @@@ -204,7 -205,9 +202,8 @@@ struct ofproto struct discovery *discovery; struct fail_open *fail_open; struct pinsched *miss_sched, *action_sched; - struct executer *executer; struct netflow *netflow; + struct ofproto_sflow *sflow; /* Flow table. */ struct classifier cls; @@@ -311,7 -314,9 +311,8 @@@ ofproto_create(const char *datapath, co p->discovery = NULL; p->fail_open = NULL; p->miss_sched = p->action_sched = NULL; - p->executer = NULL; p->netflow = NULL; + p->sflow = NULL; /* Initialize flow table. */ classifier_init(&p->cls); @@@ -675,7 -741,9 +700,8 @@@ ofproto_destroy(struct ofproto *p fail_open_destroy(p->fail_open); pinsched_destroy(p->miss_sched); pinsched_destroy(p->action_sched); - executer_destroy(p->executer); netflow_destroy(p->netflow); + ofproto_sflow_destroy(p->sflow); switch_status_unregister(p->ss_cat); @@@ -882,6 -952,12 +911,9 @@@ ofproto_wait(struct ofproto *p } pinsched_wait(p->miss_sched); pinsched_wait(p->action_sched); - if (p->executer) { - executer_wait(p->executer); - } + if (p->sflow) { + ofproto_sflow_wait(p->sflow); + } if (!tag_set_is_empty(&p->revalidate_set)) { poll_immediate_wake(); } diff --cc ofproto/ofproto.h index ddc34483,6377e51e..a94c8b59 --- a/ofproto/ofproto.h +++ b/ofproto/ofproto.h @@@ -36,8 -41,17 +41,18 @@@ struct ofexpired long long int used; /* Last-used time (0 if never used). */ }; + struct ofproto_sflow_options { + struct svec targets; + uint32_t sampling_rate; + uint32_t polling_interval; + uint32_t header_len; + uint32_t sub_id; + char *agent_device; + char *control_ip; + }; + -int ofproto_create(const char *datapath, const struct ofhooks *, void *aux, +int ofproto_create(const char *datapath, const char *datapath_type, + const struct ofhooks *, void *aux, struct ofproto **ofprotop); void ofproto_destroy(struct ofproto *); int ofproto_run(struct ofproto *); diff --cc tests/automake.mk index 502e8dbd,bd8a605f..f862c33b --- a/tests/automake.mk +++ b/tests/automake.mk @@@ -234,10 -66,9 +234,13 @@@ noinst_PROGRAMS += tests/test-sha tests_test_sha1_SOURCES = tests/test-sha1.c tests_test_sha1_LDADD = lib/libopenvswitch.a +noinst_PROGRAMS += tests/test-timeval +tests_test_timeval_SOURCES = tests/test-timeval.c +tests_test_timeval_LDADD = lib/libopenvswitch.a + + noinst_PROGRAMS += tests/test-strtok_r + tests_test_strtok_r_SOURCES = tests/test-strtok_r.c + noinst_PROGRAMS += tests/test-type-props tests_test_type_props_SOURCES = tests/test-type-props.c diff --cc tests/library.at index fa2c5f34,a9a5bea5..0e408f04 --- a/tests/library.at +++ b/tests/library.at @@@ -32,5 -31,13 +32,9 @@@ AT_CHECK([test-sha1], [0], [ignore] AT_CLEANUP AT_SETUP([test type properties]) -OVS_CHECK_LCOV([test-type-props], [0], [ignore]) -AT_CLEANUP - -AT_SETUP([test vconn library]) -OVS_CHECK_LCOV([test-vconn], [0], [ignore]) +AT_CHECK([test-type-props], [0], [ignore]) AT_CLEANUP + + AT_SETUP([test strtok_r bug fix]) + AT_CHECK([test-strtok_r], [0], [ignore]) + AT_CLEANUP diff --cc utilities/automake.mk index b3d78418,1a9d4925..5feb01cd --- a/utilities/automake.mk +++ b/utilities/automake.mk @@@ -72,12 -80,11 +72,13 @@@ utilities_ovs_ofctl_LDADD = lib/libopen utilities_ovs_openflowd_SOURCES = utilities/ovs-openflowd.c utilities_ovs_openflowd_LDADD = \ ofproto/libofproto.a \ + lib/libsflow.a \ lib/libopenvswitch.a \ - $(FAULT_LIBS) \ $(SSL_LIBS) +utilities_ovs_vsctl_SOURCES = utilities/ovs-vsctl.c vswitchd/vswitch-idl.c +utilities_ovs_vsctl_LDADD = lib/libopenvswitch.a $(SSL_LIBS) + utilities_ovs_wdt_SOURCES = utilities/ovs-wdt.c utilities_nlmon_SOURCES = utilities/nlmon.c diff --cc utilities/ovs-ofctl.c index 52ec0b6b,199bd43e..2447ba28 --- a/utilities/ovs-ofctl.c +++ b/utilities/ovs-ofctl.c @@@ -80,18 -120,19 +80,19 @@@ main(int argc, char *argv[] } static void -parse_options(int argc, char *argv[], struct settings *s) +parse_options(int argc, char *argv[]) { enum { - OPT_STRICT = UCHAR_MAX + 1 + OPT_STRICT = UCHAR_MAX + 1, + VLOG_OPTION_ENUMS }; static struct option long_options[] = { {"timeout", required_argument, 0, 't'}, {"strict", no_argument, 0, OPT_STRICT}, {"help", no_argument, 0, 'h'}, {"version", no_argument, 0, 'V'}, + VLOG_LONG_OPTIONS, - VCONN_SSL_LONG_OPTIONS + STREAM_SSL_LONG_OPTIONS {0, 0, 0, 0}, }; char *short_options = long_options_to_short_options(long_options); @@@ -123,15 -167,12 +124,12 @@@ OVS_PRINT_VERSION(OFP_VERSION, OFP_VERSION); exit(EXIT_SUCCESS); - case 'v': - vlog_set_verbosity(optarg); - break; - case OPT_STRICT: - s->strict = true; + strict = true; break; + VLOG_OPTION_HANDLERS - VCONN_SSL_OPTION_HANDLERS + STREAM_SSL_OPTION_HANDLERS case '?': exit(EXIT_FAILURE); diff --cc vswitchd/automake.mk index 9121169a,d810c830..c38add65 --- a/vswitchd/automake.mk +++ b/vswitchd/automake.mk @@@ -18,7 -21,9 +18,8 @@@ vswitchd_ovs_vswitchd_SOURCES = vswitchd/xenserver.h vswitchd_ovs_vswitchd_LDADD = \ ofproto/libofproto.a \ + lib/libsflow.a \ lib/libopenvswitch.a \ - $(FAULT_LIBS) \ $(SSL_LIBS) vswitchd_ovs_brcompatd_SOURCES = \ diff --cc vswitchd/bridge.c index 66f0884f,3b7ec51f..88f8db11 --- a/vswitchd/bridge.c +++ b/vswitchd/bridge.c @@@ -58,9 -58,10 +58,10 @@@ #include "util.h" #include "unixctl.h" #include "vconn.h" -#include "vconn-ssl.h" +#include "vswitchd/vswitch-idl.h" #include "xenserver.h" #include "xtoxll.h" + #include "sflow_api.h" #define THIS_MODULE VLM_bridge #include "vlog.h" @@@ -529,13 -524,12 +530,14 @@@ iterate_and_prune_ifaces(struct bridge } void -bridge_reconfigure(void) +bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg) { - struct svec old_br, new_br; + struct ovsdb_idl_txn *txn; + struct shash old_br, new_br; + struct shash_node *node; struct bridge *br, *next; size_t i; + int sflow_bridge_number; COVERAGE_INC(bridge_reconfigure); @@@ -621,54 -601,55 +623,55 @@@ LIST_FOR_EACH (br, struct bridge, node, &all_bridges) { struct odp_port *dpif_ports; size_t n_dpif_ports; - struct svec cur_ifaces, want_ifaces, add_ifaces; + struct shash cur_ifaces, want_ifaces; + struct shash_node *node; + /* Get the set of interfaces currently in this datapath. */ dpif_port_list(br->dpif, &dpif_ports, &n_dpif_ports); - svec_init(&cur_ifaces); + shash_init(&cur_ifaces); for (i = 0; i < n_dpif_ports; i++) { - svec_add(&cur_ifaces, dpif_ports[i].devname); + const char *name = dpif_ports[i].devname; + if (!shash_find(&cur_ifaces, name)) { + shash_add(&cur_ifaces, name, NULL); + } } free(dpif_ports); - svec_sort_unique(&cur_ifaces); - bridge_get_all_ifaces(br, &want_ifaces); - svec_diff(&want_ifaces, &cur_ifaces, &add_ifaces, NULL, NULL); - - for (i = 0; i < cur_ifaces.n; i++) { - const char *if_name = cur_ifaces.names[i]; - reconfigure_iface(if_name); - } - for (i = 0; i < add_ifaces.n; i++) { - const char *if_name = add_ifaces.names[i]; - bool internal; - int error; + /* Get the set of interfaces we want on this datapath. */ + bridge_get_all_ifaces(br, &want_ifaces); - /* Attempt to create the network interface in case it - * doesn't exist yet. */ - error = create_iface(if_name); - if (error) { - VLOG_WARN("could not create iface %s: %s\n", if_name, - strerror(error)); - continue; - } + SHASH_FOR_EACH (node, &want_ifaces) { + const char *if_name = node->name; + struct iface *iface = node->data; - /* Add to datapath. */ - internal = iface_is_internal(br, if_name); - error = dpif_port_add(br->dpif, if_name, - internal ? ODP_PORT_INTERNAL : 0, NULL); - if (error == EFBIG) { - VLOG_ERR("ran out of valid port numbers on %s", - dpif_name(br->dpif)); - break; - } else if (error) { - VLOG_ERR("failed to add %s interface to %s: %s", - if_name, dpif_name(br->dpif), strerror(error)); + if (shash_find(&cur_ifaces, if_name)) { + /* Already exists, just reconfigure it. */ + if (iface) { + reconfigure_iface(iface->cfg, iface); + } + } else { + /* Need to add to datapath. */ + bool internal; + int error; + + /* Add to datapath. */ + internal = iface_is_internal(br, if_name); + error = dpif_port_add(br->dpif, if_name, + internal ? ODP_PORT_INTERNAL : 0, NULL); + if (error == EFBIG) { + VLOG_ERR("ran out of valid port numbers on %s", + dpif_name(br->dpif)); + break; + } else if (error) { + VLOG_ERR("failed to add %s interface to %s: %s", + if_name, dpif_name(br->dpif), strerror(error)); + } } } - svec_destroy(&cur_ifaces); - svec_destroy(&want_ifaces); - svec_destroy(&add_ifaces); + shash_destroy(&cur_ifaces); + shash_destroy(&want_ifaces); } + sflow_bridge_number = 0; LIST_FOR_EACH (br, struct bridge, node, &all_bridges) { uint8_t ea[8]; uint64_t dpid; @@@ -697,59 -678,84 +700,98 @@@ dpid = bridge_pick_datapath_id(br, ea, hw_addr_iface); ofproto_set_datapath_id(br->ofproto, dpid); + dpid_string = xasprintf("%012"PRIx64, dpid); + ovsrec_bridge_set_datapath_id(br->cfg, dpid_string); + free(dpid_string); + /* Set NetFlow configuration on this bridge. */ - memset(&nf_options, 0, sizeof nf_options); - dpif_get_netflow_ids(br->dpif, &nf_options.engine_type, - &nf_options.engine_id); - nf_options.active_timeout = -1; - - if (cfg_has("netflow.%s.engine-type", br->name)) { - nf_options.engine_type = cfg_get_int(0, "netflow.%s.engine-type", - br->name); - } - if (cfg_has("netflow.%s.engine-id", br->name)) { - nf_options.engine_id = cfg_get_int(0, "netflow.%s.engine-id", - br->name); - } - if (cfg_has("netflow.%s.active-timeout", br->name)) { - nf_options.active_timeout = cfg_get_int(0, - "netflow.%s.active-timeout", - br->name); - } - if (cfg_has("netflow.%s.add-id-to-iface", br->name)) { - nf_options.add_id_to_iface = cfg_get_bool(0, - "netflow.%s.add-id-to-iface", - br->name); - } - if (nf_options.add_id_to_iface && nf_options.engine_id > 0x7f) { - VLOG_WARN("bridge %s: netflow port mangling may conflict with " - "another vswitch, choose an engine id less than 128", - br->name); - } - if (nf_options.add_id_to_iface && br->n_ports > 508) { - VLOG_WARN("bridge %s: netflow port mangling will conflict with " - "another port when more than 508 ports are used", - br->name); - } - svec_init(&nf_options.collectors); - cfg_get_all_keys(&nf_options.collectors, "netflow.%s.host", br->name); - if (ofproto_set_netflow(br->ofproto, &nf_options)) { - VLOG_ERR("bridge %s: problem setting netflow collectors", - br->name); - } - svec_destroy(&nf_options.collectors); - - if (cfg_has("sflow.%s.host", br->name)) { + if (br->cfg->netflow) { + struct ovsrec_netflow *nf_cfg = br->cfg->netflow; + struct netflow_options opts; + + memset(&opts, 0, sizeof opts); + + dpif_get_netflow_ids(br->dpif, &opts.engine_type, &opts.engine_id); + if (nf_cfg->engine_type) { + opts.engine_type = *nf_cfg->engine_type; + } + if (nf_cfg->engine_id) { + opts.engine_id = *nf_cfg->engine_id; + } + + opts.active_timeout = nf_cfg->active_timeout; + if (!opts.active_timeout) { + opts.active_timeout = -1; + } else if (opts.active_timeout < 0) { + VLOG_WARN("bridge %s: active timeout interval set to negative " + "value, using default instead (%d seconds)", br->name, + NF_ACTIVE_TIMEOUT_DEFAULT); + opts.active_timeout = -1; + } + + opts.add_id_to_iface = nf_cfg->add_id_to_interface; + if (opts.add_id_to_iface) { + if (opts.engine_id > 0x7f) { + VLOG_WARN("bridge %s: netflow port mangling may conflict " + "with another vswitch, choose an engine id less " + "than 128", br->name); + } + if (br->n_ports > 508) { + VLOG_WARN("bridge %s: netflow port mangling will conflict " + "with another port when more than 508 ports are " + "used", br->name); + } + } + + opts.collectors.n = nf_cfg->n_targets; + opts.collectors.names = nf_cfg->targets; + if (ofproto_set_netflow(br->ofproto, &opts)) { + VLOG_ERR("bridge %s: problem setting netflow collectors", + br->name); + } + } else { + ofproto_set_netflow(br->ofproto, NULL); + } + ++ /* Set sFlow configuration on this bridge. */ ++ if (br->cfg->sflow) { ++ struct ovsrec_sflow *sflow_cfg = br->cfg->sflow; + struct ofproto_sflow_options oso; + - svec_init(&oso.targets); - cfg_get_all_keys(&oso.targets, "sflow.%s.host", br->name); ++ memset(&oso, 0, sizeof oso); ++ ++ oso.targets.n = sflow_cfg->n_targets; ++ oso.targets.names = sflow_cfg->targets; + + oso.sampling_rate = SFL_DEFAULT_SAMPLING_RATE; - if (cfg_has("sflow.%s.sampling", br->name)) { - oso.sampling_rate = cfg_get_int(0, "sflow.%s.sampling", - br->name); ++ if (sflow_cfg->sampling) { ++ oso.sampling_rate = *sflow_cfg->sampling; + } + + oso.polling_interval = SFL_DEFAULT_POLLING_INTERVAL; - if (cfg_has("sflow.%s.polling", br->name)) { - oso.polling_interval = cfg_get_int(0, "sflow.%s.polling", - br->name); ++ if (sflow_cfg->polling) { ++ oso.polling_interval = *sflow_cfg->polling; + } + + oso.header_len = SFL_DEFAULT_HEADER_SIZE; - if (cfg_has("sflow.%s.header", br->name)) { - oso.header_len = cfg_get_int(0, "sflow.%s.header", br->name); ++ if (sflow_cfg->header) { ++ oso.header_len = *sflow_cfg->header; + } + + oso.sub_id = sflow_bridge_number++; - oso.agent_device = (char *) cfg_get_string(0, "sflow.%s.agent", - br->name); - oso.control_ip = (char *) cfg_get_string(0, - "bridge.%s.controller.ip", - br->name); ++ oso.agent_device = sflow_cfg->agent; ++ ++#if 0 /* xxx foo */ ++ ctrl = bridge_get_controller(ovs_cfg, br); ++ oso.control_ip = ctrl ? ctrl->local_ip : NULL; ++#endif + ofproto_set_sflow(br->ofproto, &oso); + + svec_destroy(&oso.targets); + } else { + ofproto_set_sflow(br->ofproto, NULL); + } + /* Update the controller and related settings. It would be more * straightforward to call this from bridge_reconfigure_one(), but we * can't do it there for two reasons. First, and most importantly, at diff --cc vswitchd/vswitch-idl.ann index ff5766ae,00000000..b8e457d5 mode 100644,000000..100644 --- a/vswitchd/vswitch-idl.ann +++ b/vswitchd/vswitch-idl.ann @@@ -1,20 -1,0 +1,21 @@@ +# -*- python -*- + +# This code, when invoked by "ovsdb-idlc annotate" (by the build +# process), annotates vswitch.ovsschema with additional data that give +# the ovsdb-idl engine information about the types involved, so that +# it can generate more programmer-friendly data structures. + +s["idlPrefix"] = "ovsrec_" +s["idlHeader"] = "\"vswitchd/vswitch-idl.h\"" +s["tables"]["Open_vSwitch"]["columns"]["bridges"]["type"]["keyRefTable"] = "Bridge" +s["tables"]["Open_vSwitch"]["columns"]["controller"]["type"]["keyRefTable"] = "Controller" +s["tables"]["Open_vSwitch"]["columns"]["ssl"]["type"]["keyRefTable"] = "SSL" +s["tables"]["Bridge"]["columns"]["ports"]["type"]["keyRefTable"] = "Port" +s["tables"]["Bridge"]["columns"]["mirrors"]["type"]["keyRefTable"] = "Mirror" +s["tables"]["Bridge"]["columns"]["netflow"]["type"]["keyRefTable"] = "NetFlow" ++s["tables"]["Bridge"]["columns"]["sflow"]["type"]["keyRefTable"] = "sFlow" +s["tables"]["Bridge"]["columns"]["controller"]["type"]["keyRefTable"] = "Controller" +s["tables"]["Port"]["columns"]["interfaces"]["type"]["keyRefTable"] = "Interface" +s["tables"]["Mirror"]["columns"]["select_src_port"]["type"]["keyRefTable"] = "Port" +s["tables"]["Mirror"]["columns"]["select_dst_port"]["type"]["keyRefTable"] = "Port" +s["tables"]["Mirror"]["columns"]["output_port"]["type"]["keyRefTable"] = "Port" diff --cc vswitchd/vswitch.ovsschema index 7b0a5398,00000000..022d65fd mode 100644,000000..100644 --- a/vswitchd/vswitch.ovsschema +++ b/vswitchd/vswitch.ovsschema @@@ -1,216 -1,0 +1,237 @@@ +{"name": "ovs_vswitchd_db", + "comment": "Configuration for one Open vSwitch daemon.", + "tables": { + "Open_vSwitch": { + "comment": "Configuration for an Open vSwitch daemon.", + "columns": { + "bridges": { + "comment": "Set of bridges managed by the daemon.", + "type": {"key": "uuid", "min": 0, "max": "unlimited"}}, + "controller": { + "comment": "Default Controller used by bridges.", + "type": {"key": "uuid", "min": 0, "max": 1}}, + "managers": { + "comment": "Remote database clients to which the Open vSwitch's database server should connect or to which it should listen.", + "type": {"key": "string", "min": 0, "max": "unlimited"}}, + "ssl": { + "comment": "SSL used globally by the daemon.", + "type": {"key": "uuid", "min": 0, "max": 1}}, + "next_cfg": { + "comment": "Sequence number for client to increment when it modifies the configuration and wishes to wait for Open vSwitch to finish applying the changes.", + "type": "integer"}, + "cur_cfg": { + "comment": "Sequence number that Open vSwitch sets to the current value of 'next_cfg' after it finishing applying a set of configuration changes.", + "type": "integer"}}}, + "Bridge": { + "comment": "Configuration for a bridge within an Open_vSwitch.", + "columns": { + "name": { + "comment": "Bridge identifier. Should be alphanumeric and no more than about 8 bytes long. Must be unique among the names of ports, interfaces, and bridges on a host.", + "type": "string"}, + "datapath_type": { + "comment": "Name of datapath provider. The kernel datapath has type \"system\". The userspace datapath has type \"netdev\".", + "type": "string"}, + "datapath_id": { + "comment": "Reports the OpenFlow datapath ID in use. Exactly 12 hex digits.", + "type": {"key": "string", "min": 0, "max": 1}, + "ephemeral": true}, + "ports": { + "comment": "Ports included in the bridge.", + "type": {"key": "uuid", "min": 0, "max": "unlimited"}}, + "mirrors": { + "comment": "Port mirroring configuration.", + "type": {"key": "uuid", "min": 0, "max": "unlimited"}}, + "netflow": { + "comment": "NetFlow configuration.", + "type": {"key": "uuid", "min": 0, "max": 1}}, ++ "sflow": { ++ "comment": "sFlow configuration.", ++ "type": {"key": "uuid", "min": 0, "max": 1}}, + "controller": { + "comment": "OpenFlow controller. If unset, defaults to that specified by the parent Open_vSwitch.", + "type": {"key": "uuid", "min": 0, "max": 1}}, + "other_config": { + "comment": "Key-value pairs for configuring rarely used bridge features. The currently defined key-value pairs are: \"datapath-id\", exactly 12 hex digits to set the OpenFlow datapath ID to a specific value; \"hwaddr\", exactly 12 hex digits in the form \"XX:XX:XX:XX:XX:XX\" to set the hardware address of the local port and influence the datapath ID.", + "type": {"key": "string", "value": "string", "min": 0, "max": "unlimited"}}, + "external_ids": { + "comment": "Key-value pairs that identify this bridge's role in external systems. The currently defined key-value pairs are: \"xs-network-uuids\", a space-delimited set of the Citrix XenServer network UUIDs with which this bridge is associated; \"xs-network-names\", a semicolon-delimited set of Citrix XenServer network names with which this bridge is associated.", + "type": {"key": "string", "value": "string", "min": 0, "max": "unlimited"}}, + "flood_vlans": { + "comment": "VLAN IDs of VLANs on which MAC address learning should be disabled, so that packets are flooded instead of being sent to specific ports that are believed to contain packets' destination MACs. This should ordinarily be used to disable MAC learning on VLANs used for mirroring (RSPAN VLANs). It may also be useful for debugging.", + "type": {"key": "integer", "min": 0, "max": 4096} +}}}, + "Port": { + "comment": "A port within a Bridge. May contain a single Interface or multiple (bonded) Interfaces.", + "columns": { + "name": { + "comment": "Port name. Should be alphanumeric and no more than about 8 bytes long. May be the same as the interface name, for non-bonded ports. Must otherwise be unique among the names of ports, interfaces, and bridges on a host.", + "type": "string"}, + "interfaces": { + "comment": "The Port's Interfaces. If there is more than one, this is a bonded Port.", + "type": {"key": "uuid", "min": 1, "max": "unlimited"}}, + "trunks": { + "comment": "The 802.1Q VLAN(s) that this port trunks. Should be empty if this port trunks all VLAN(s) or if this is not a trunk port.", + "type": {"key": "integer", "min": 0, "max": 4096}}, + "tag": { + "comment": "This port's implicitly tagged VLAN. Should be empty if this is a trunk port.", + "type": {"key": "integer", "min": 0, "max": 1}}, + "mac": { + "comment": "The MAC address to use for this port for the purpose of choosing the bridge's MAC address. This column does not necessarily reflect the port's actual MAC address, nor will setting it change the port's actual MAC address. Exactly 12 hex digits in the form XX:XX:XX:XX:XX:XX.", + "type": {"key": "string", "min": 0, "max": 1}}, + "bond_updelay": { + "comment": "For a bonded port, the number of milliseconds for which carrier must stay up on an interface before the interface is considered to be up. Ignored for non-bonded ports.", + "type": "integer"}, + "bond_downdelay": { + "comment": "For a bonded port, the number of milliseconds for which carrier must stay down on an interface before the interface is considered to be down. Ignored for non-bonded ports.", + "type": "integer"}, + "bond_fake_iface": { + "comment": "For a bonded port, whether to create a fake interface with the name of the port. Use only for compatibility with legacy software that requires this.", + "type": "boolean"}, + "fake_bridge": { + "comment": "Does this port represent a sub-bridge for its tagged VLAN within the Bridge? See ovs-vsctl(8) for more information.", + "type": "boolean"}, + "other_config": { + "comment": "Key-value pairs for configuring rarely used port features. The currently defined key-value pairs are: \"hwaddr\", exactly 12 hex digits in the form \"XX:XX:XX:XX:XX:XX\".", + "type": {"key": "string", "value": "string", "min": 0, "max": "unlimited"}}, + "external_ids": { + "comment": "Key-value pairs that identify this port's role in external systems. No key-value pairs native to Port are currently defined. For fake bridges (see the \"fake-bridge\" column), external IDs for the fake bridge are defined here by prefixing their keys with \"fake-bridge\", e.g. \"fake-bridge-xs-network-uuids\".", + "type": {"key": "string", "value": "string", "min": 0, "max": "unlimited"}}}}, + "Interface": { + "comment": "An interface within a Port.", + "columns": { + "name": { + "comment": "Interface name. Should be alphanumeric and no more than about 8 bytes long. May be the same as the port name, for non-bonded ports. Must otherwise be unique among the names of ports, interfaces, and bridges on a host.", + "type": "string"}, + "type": { + "comment": "The interface type. Normal network devices, e.g. eth0, have type \"system\" or \"\" (which are synonyms). Internal ports have type \"internal\". TUN/TAP devices have type \"tap\". GRE devices have type \"gre\".", + "type": "string"}, + "options": { + "comment": "Configuration options whose interpretation varies based on \"type\".", + "type": {"key": "string", "value": "string", "min": 0, "max": "unlimited"}}, + "ingress_policing_rate": { + "comment": "Maximum rate for data received on this interface, in kbps. Set to 0 to disable policing.", + "type": "integer"}, + "ingress_policing_burst": { + "comment": "Maximum burst size for data received on this interface, in kb. The default burst size if set to 0 is 1000 kb.", + "type": "integer"}, + "mac": { + "comment": "Ethernet address to set for this interface. If unset then the default MAC address is used. May not be supported on all interfaces. Exactly 12 hex digits in the form XX:XX:XX:XX:XX:XX.", + "type": {"key": "string", "min": 0, "max": 1}}, + "external_ids": { + "comment": "Key-value pairs that identify this interface's role in external systems. The currently defined key-value pairs are: \"xs-vif-uuid\", the UUID of the Citrix XenServer VIF associated with this interface; \"xs-network-uuid\", the UUID of the Citrix XenServer network to which this interface is attached; \"xs-vif-vm-uuid\", the UUID of the Citrix XenServer VM to which this interface belongs; \"xs-vif-mac\", the value of the \"MAC\" field in the Citrix XenServer VIF record for this interface.", + "type": {"key": "string", "value": "string", "min": 0, "max": "unlimited"}}, + "ofport": { + "comment": "OpenFlow port number for this interface. This is populated when the port number becomes known. Before it is populated its value will be missing. If the interface cannot be added then this is indicated by a value of -1.", + "type": {"key": "integer", "min": 0, "max": 1}, + "ephemeral": true}}}, + "Mirror": { + "comment": "A port mirror within a Bridge.", + "columns": { + "name": { + "comment": "Arbitrary identifier for the Mirror.", + "type": "string"}, + "select_src_port": { + "comment": "Ports on which arriving packets are selected for mirroring.", + "type": {"key": "uuid", "min": 0, "max": "unlimited"}}, + "select_dst_port": { + "comment": "Ports on which departing packets are selected for mirroring.", + "type": {"key": "uuid", "min": 0, "max": "unlimited"}}, + "select_vlan": { + "comment": "VLANs on which packets are selected for mirroring.", + "type": {"key": "integer", "min": 0, "max": 4096}}, + "output_port": { + "comment": "Output port for selected packets. Mutually exclusive with output_vlan.", + "type": {"key": "uuid", "min": 0, "max": 1}}, + "output_vlan": { + "comment": "Output VLAN for selected packets. Mutually exclusive with output_port.", + "type": {"key": "integer", "min": 0, "max": 1}}}}, + "NetFlow": { + "comment": "A NetFlow target.", + "columns": { + "targets": { + "comment": "NetFlow targets in the form \"IP:PORT\".", + "type": {"key": "string", "min": 1, "max": "unlimited"}}, + "engine_type": { + "comment": "Engine type to use in NetFlow messages. Defaults to datapath index if not specified.", + "type": {"key": "integer", "min": 0, "max": 1}}, + "engine_id": { + "comment": "Engine ID to use in NetFlow messages. Defaults to datapath index if not specified.", + "type": {"key": "integer", "min": 0, "max": 1}}, + "add_id_to_interface": { + "comment": "Place least-significant 7 bits of engine ID into most significant bits of ingress and egress interface fields of NetFlow records?", + "type": "boolean"}, + "active_timeout": { + "comment": "Active timeout interval, in seconds. A value of 0 requests the default timeout; a negative value disables active timeouts.", + "type": "integer"}}}, ++ "sFlow": { ++ "comment": "A sFlow target.", ++ "columns": { ++ "targets": { ++ "comment": "sFlow targets in the form \"IP:PORT\".", ++ "type": {"key": "string", "min": 1, "max": "unlimited"}}, ++ "sampling": { ++ "comment": "Rate at which packets should be sampled and sent to the collector. If not specified, defaults to 400, which means one out of 400, on average, will be sent to the collector.", ++ "type": {"key": "integer", "min": 0, "max": 1}}, ++ "polling": { ++ "comment": "Polling rate in seconds to send port statistics to the collector. If not specified, defaults to 30 seconds.", ++ "type": {"key": "integer", "min": 0, "max": 1}}, ++ "header": { ++ "comment": "Number of bytes of a sampled packet to send to the collector. If not specified, defaults is 128 bytes.", ++ "type": {"key": "integer", "min": 0, "max": 1}}, ++ "agent": { ++ "comment": "IP address to report as \"agent address\" to collectors. If not specified, defaults to collector's \"local_ip\" value. If neither is specified, sFlow is disabled.", ++ "type": {"key": "string", "min": 0, "max": 1}}}}, + "Controller": { + "comment": "An OpenFlow controller.", + "columns": { + "target": { + "comment": "Connection method for controller, e.g. \"ssl:...\", \"tcp:...\". The special string \"discover\" enables controller discovery. The special string \"none\" disables the controller.", + "type": "string"}, + "max_backoff": { + "comment": "Maximum number of milliseconds to wait between connection attempts. Default is implementation-specific.", + "type": {"key": "integer", "min": 0, "max": 1}}, + "inactivity_probe": { + "comment": "Maximum number of milliseconds of idle time on connection to controller before sending an inactivity probe message. Default is implementation-specific.", + "type": {"key": "integer", "min": 0, "max": 1}}, + "fail_mode": { + "comment": "Either \"standalone\" or \"secure\", or empty to use the implementation's default.", + "type": {"key": "string", "min": 0, "max": 1}}, + "discover_accept_regex": { + "comment": "If \"target\" is \"discover\", a POSIX extended regular expression against which the discovered controller location is validated. If not specified, the default is implementation-specific.", + "type": {"key": "string", "min": 0, "max": 1}}, + "discover_update_resolv_conf": { + "comment": "If \"target\" is \"discover\", whether to update /etc/resolv.conf when the controller is discovered. If not specified, the default is implementation-specific.", + "type": {"key": "boolean", "min": 0, "max": 1}}, + "connection_mode": { + "comment": "Either \"in-band\" or \"out-of-band\". If not specified, the default is implementation-specific.", + "type": {"key": "string", "min": 0, "max": 1}}, + "local_ip": { + "comment": "If \"target\" is not \"discover\", the IP address to configure on the local port.", + "type": {"key": "string", "min": 0, "max": 1}}, + "local_netmask": { + "comment": "If \"target\" is not \"discover\", the IP netmask to configure on the local port.", + "type": {"key": "string", "min": 0, "max": 1}}, + "local_gateway": { + "comment": "If \"target\" is not \"discover\", the IP gateway to configure on the local port.", + "type": {"key": "string", "min": 0, "max": 1}}, + "controller_rate_limit": { + "comment": "The maximum rate at which packets will be forwarded to the OpenFlow controller, in packets per second. If not specified, the default is implementation-specific.", + "type": {"key": "integer", "min": 0, "max": 1}}, + "controller_burst_limit": { + "comment": "The maximum number of unused packet credits that the bridge will allow to accumulate, in packets. If not specified, the default is implementation-specific.", + "type": {"key": "integer", "min": 0, "max": 1}}}}, + "SSL": { + "comment": "SSL configuration for an Open_vSwitch.", + "columns": { + "private_key": { + "comment": "Name of a PEM file containing the private key used as the switch's identity for SSL connections to the controller.", + "type": "string"}, + "certificate": { + "comment": "Name of a PEM file containing a certificate, signed by the certificate authority (CA) used by the controller and manager, that certifies the switch's private key, identifying a trustworthy switch.", + "type": "string"}, + "ca_cert": { + "comment": "Name of a PEM file containing the CA certificate used to verify that the switch is connected to a trustworthy controller.", + "type": "string"}, + "bootstrap_ca_cert": { + "comment": "If set to true, then Open vSwitch will attempt to obtain the CA certificate from the controller on its first SSL connection and save it to the named PEM file. If it is successful, it will immediately drop the connection and reconnect, and from then on all SSL connections must be authenticated by a certificate signed by the CA certificate thus obtained. This option exposes the SSL connection to a man-in-the-middle attack obtaining the initial CA certificate, but it may be useful for bootstrapping.", + "type": "boolean"}}}}} diff --cc xenserver/etc_init.d_vswitch index 7bc826aa,c8172c81..95c6b80b --- a/xenserver/etc_init.d_vswitch +++ b/xenserver/etc_init.d_vswitch @@@ -386,12 -314,10 +386,12 @@@ case "$1" i strace -p $(cat "$BRCOMPATD_PIDFILE") "$@" ;; status) - status -p ovsdb-server.pid ovsdb-server - status -p ovs-vswitchd.pid ovs-vswitchd - status -p ovs-brcompatd.pid ovs-brcompatd ++ status -p "$OVSDB_SERVER_PIDFILE" ovsdb-server + status -p "$VSWITCHD_PIDFILE" ovs-vswitchd + status -p "$BRCOMPATD_PIDFILE" ovs-brcompatd ;; version) + /usr/sbin/ovsdb-server -V /usr/sbin/ovs-vswitchd -V /usr/sbin/ovs-brcompatd -V ;; diff --cc xenserver/etc_xensource_scripts_vif index 6905448f,91131598..4e24d83a --- a/xenserver/etc_xensource_scripts_vif +++ b/xenserver/etc_xensource_scripts_vif @@@ -21,20 -20,12 +20,11 @@@ # Keep other-config/ keys in sync with device.ml:vif_udev_keys + BRCTL="/usr/sbin/brctl" + IP="/sbin/ip" + -cfg_mod="/usr/bin/ovs-cfg-mod" vsctl="/usr/bin/ovs-vsctl" -service="/sbin/service" +dump_vif_details="/usr/share/vswitch/scripts/dump-vif-details" - service="/sbin/service" - - TYPE=`echo ${XENBUS_PATH} | cut -f 2 -d '/'` - DOMID=`echo ${XENBUS_PATH} | cut -f 3 -d '/'` - DEVID=`echo ${XENBUS_PATH} | cut -f 4 -d '/'` - - XAPI=/xapi/${DOMID}/hotplug/${TYPE}/${DEVID} - HOTPLUG=/xapi/${DOMID}/hotplug/${TYPE}/${DEVID} - PRIVATE=/xapi/${DOMID}/private/${TYPE}/${DEVID} - BRCTL=/usr/sbin/brctl - IP=/sbin/ip - handle_promiscuous() { @@@ -77,50 -104,131 +75,113 @@@ add_to_bridge( local bridge=$(xenstore-read "${PRIVATE}/bridge") if [ $? -ne 0 -o -z "${bridge}" ]; then logger -t scripts-vif "Failed to read ${PRIVATE}/bridge from xenstore" + exit 1 fi - logger -t scripts-vif "Adding ${vif} to ${bridge} with address ${address}" - - ${IP} link set "${vif}" down || logger -t scripts-vif "Failed to ip link set ${vif} down" - ${IP} link set "${vif}" arp off || logger -t scripts-vif "Failed to ip link set ${vif} arp off" - ${IP} link set "${vif}" multicast off || logger -t scripts-vif "Failed to ip link set ${vif} multicast off" - ${IP} link set "${vif}" address "${address}" || logger -t scripts-vif "Failed to ip link set ${vif} address ${address}" - ${IP} addr flush "${vif}" || logger -t scripts-vif "Failed to ip addr flush ${vif}" - - local vif_details=$($dump_vif_details $DOMID $DEVID) - if [ $? -ne 0 -o -z "${vif_details}" ]; then - logger -t scripts-vif "Failed to retrieve vif details for vswitch" - fi - - $vsctl add-port $bridge $vif $vif_details + logger -t scripts-vif "Adding ${dev} to ${bridge} with address ${address}" + + ${IP} link set "${dev}" down || logger -t scripts-vif "Failed to ip link set ${dev} down" + ${IP} link set "${dev}" arp off || logger -t scripts-vif "Failed to ip link set ${dev} arp off" + ${IP} link set "${dev}" multicast off || logger -t scripts-vif "Failed to ip link set ${dev} multicast off" + ${IP} link set "${dev}" address "${address}" || logger -t scripts-vif "Failed to ip link set ${dev} address ${address}" + ${IP} addr flush "${dev}" || logger -t scripts-vif "Failed to ip addr flush ${dev}" + + case $NETWORK_MODE in + bridge) + ${BRCTL} setfd "${bridge}" 0 || logger -t scripts-vif "Failed to brctl setfd ${bridge} 0" + ${BRCTL} addif "${bridge}" "${dev}" || logger -t scripts-vif "Failed to brctl addif ${bridge} ${dev}" + ;; + vswitch) - local VLAN_ID=$($vsctl br-to-vlan $bridge) - local vid= - if [ "$VLAN_ID" -ne 0 ] ; then - bridge=$($vsctl br-to-parent $bridge) - vid="--add=vlan.${dev}.tag=${VLAN_ID}" - fi - - if [ "$TYPE" = "vif" ] ; then - local vif_details=$(handle_vswitch_vif_details) - fi - - $cfg_mod -F /etc/ovs-vswitchd.conf \ - --del-match="bridge.*.port=${dev}" \ - --del-match="vlan.${dev}.trunks=*" \ - --del-match="vlan.${dev}.tag=*" \ - --del-match="port.${dev}.[!0-9]*" \ - --add="bridge.$bridge.port=${dev}" \ - $vid $vif_details -c - $service vswitch reload ++ local vif_details=$($dump_vif_details $DOMID $DEVID) ++ if [ $? -ne 0 -o -z "${vif_details}" ]; then ++ logger -t scripts-vif "Failed to retrieve vif details for vswitch" ++ fi ++ ++ $vsctl add-port $bridge $dev $vif_details + ;; + esac + + ${IP} link set "${dev}" up || logger -t scripts-vif "Failed to ip link set ${dev} up" + } - ${IP} link set "${vif}" up || logger -t scripts-vif "Failed to ip link set ${vif} up" + remove_from_bridge() + { + case $NETWORK_MODE in + bridge) + # Nothing to do + ;; + vswitch) - $cfg_mod -vANY:console:emer -F /etc/ovs-vswitchd.conf \ - --del-match="bridge.*.port=${dev}" \ - --del-match="vlan.${dev}.trunks=*" \ - --del-match="vlan.${dev}.tag=*" \ - --del-match="port.${dev}.[!0-9]*" -c - $service vswitch reload ++ $vsctl del-port $bridge $dev + ;; + esac } - echo Called as "$@" "$TYPE" "$DOMID" "$DEVID" | logger -t scripts-vif - case "$1" in - online) - handle_ethtool rx - handle_ethtool tx - handle_ethtool sg - handle_ethtool tso - handle_ethtool ufo - handle_ethtool gso + NETWORK_MODE=$(cat /etc/xensource/network.conf) + ACTION=$1 + TYPE=$2 - handle_mtu - add_to_bridge - handle_promiscuous + case $NETWORK_MODE in + bridge|vswitch) ;; + *) + logger -t scripts-vif "Unknown network mode $NETWORK_MODE" + exit 1 + ;; + esac - xenstore-write "${HOTPLUG}/vif" "${vif}" - xenstore-write "${HOTPLUG}/hotplug" "online" + case ${TYPE} in + vif) + DOMID=`echo ${XENBUS_PATH} | cut -f 3 -d '/'` + DEVID=`echo ${XENBUS_PATH} | cut -f 4 -d '/'` + dev=vif${DOMID}.${DEVID} + ;; + tap) + dev=$INTERFACE + DOMID=`echo ${dev#tap} | cut -f 1 -d '.'` + DEVID=`echo ${dev#tap} | cut -f 2 -d '.'` + ;; + *) + logger -t scripts-vif "unknown interface type ${TYPE}" + exit 1 + ;; + esac - # xs-xen.pq.hq:91e986b8e49f netback-wait-for-hotplug - xenstore-write "/local/domain/0/backend/vif/${DOMID}/${DEVID}/hotplug-status" "connected" + XAPI=/xapi/${DOMID}/hotplug/vif/${DEVID} + HOTPLUG=/xapi/${DOMID}/hotplug/vif/${DEVID} + PRIVATE=/xapi/${DOMID}/private/vif/${DEVID} + logger -t scripts-vif "Called as \"$@\" domid:$DOMID devid:$DEVID mode:$NETWORK_MODE" + case "${ACTION}" in + online) + if [ "${TYPE}" = "vif" ] ; then + handle_ethtool rx + handle_ethtool tx + handle_ethtool sg + handle_ethtool tso + handle_ethtool ufo + handle_ethtool gso + + handle_mtu + add_to_bridge + handle_promiscuous + + xenstore-write "${HOTPLUG}/vif" "${dev}" + xenstore-write "${HOTPLUG}/hotplug" "online" + + # xs-xen.pq.hq:91e986b8e49f netback-wait-for-hotplug + xenstore-write "/local/domain/0/backend/vif/${DOMID}/${DEVID}/hotplug-status" "connected" + fi ;; + + add) + if [ "${TYPE}" = "tap" ] ; then + add_to_bridge + fi + ;; + remove) - xenstore-rm "${HOTPLUG}/hotplug" - vif=vif${DOMID}.${DEVID} - logger -t scripts-vif "${vif} has been removed" - $vsctl del-port $bridge $vif + if [ "${TYPE}" = "vif" ] ;then + xenstore-rm "${HOTPLUG}/hotplug" + fi + logger -t scripts-vif "${dev} has been removed" + remove_from_bridge ;; esac diff --cc xenserver/opt_xensource_libexec_InterfaceReconfigureVswitch.py index 00000000,6f093a4a..c0838591 mode 000000,100644..100644 --- a/xenserver/opt_xensource_libexec_InterfaceReconfigureVswitch.py +++ b/xenserver/opt_xensource_libexec_InterfaceReconfigureVswitch.py @@@ -1,0 -1,458 +1,447 @@@ + # Copyright (c) 2008,2009 Citrix Systems, Inc. + # Copyright (c) 2009 Nicira Networks. + # + # This program is free software; you can redistribute it and/or modify + # it under the terms of the GNU Lesser General Public License as published + # by the Free Software Foundation; version 2.1 only. with the special + # exception on linking described in file LICENSE. + # + # This program is distributed in the hope that it will be useful, + # but WITHOUT ANY WARRANTY; without even the implied warranty of + # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + # GNU Lesser General Public License for more details. + # + from InterfaceReconfigure import * + + # + # Bare Network Devices -- network devices without IP configuration + # + + def netdev_down(netdev): + """Bring down a bare network device""" + if not netdev_exists(netdev): + log("netdev: down: device %s does not exist, ignoring" % netdev) + return + run_command(["/sbin/ifconfig", netdev, 'down']) + + def netdev_up(netdev, mtu=None): + """Bring up a bare network device""" + if not netdev_exists(netdev): + raise Error("netdev: up: device %s does not exist" % netdev) + + if mtu: + mtu = ["mtu", mtu] + else: + mtu = [] + + run_command(["/sbin/ifconfig", netdev, 'up'] + mtu) + + # + # Bridges + # + + def pif_bridge_name(pif): + """Return the bridge name of a pif. + + PIF must not be a VLAN and must be a bridged PIF.""" + + pifrec = db().get_pif_record(pif) + + if pif_is_vlan(pif): + raise Error("PIF %(uuid)s cannot be a bridge, VLAN is %(VLAN)s" % pifrec) + + nwrec = db().get_network_record(pifrec['network']) + + if nwrec['bridge']: + return nwrec['bridge'] + else: + raise Error("PIF %(uuid)s does not have a bridge name" % pifrec) + + # + # PIF miscellanea + # + + def pif_currently_in_use(pif): + """Determine if a PIF is currently in use. + + A PIF is determined to be currently in use if + - PIF.currently-attached is true + - Any bond master is currently attached + - Any VLAN master is currently attached + """ + rec = db().get_pif_record(pif) + if rec['currently_attached']: + log("configure_datapath: %s is currently attached" % (pif_netdev_name(pif))) + return True + for b in pif_get_bond_masters(pif): + if pif_currently_in_use(b): + log("configure_datapath: %s is in use by BOND master %s" % (pif_netdev_name(pif),pif_netdev_name(b))) + return True + for v in pif_get_vlan_masters(pif): + if pif_currently_in_use(v): + log("configure_datapath: %s is in use by VLAN master %s" % (pif_netdev_name(pif),pif_netdev_name(v))) + return True + return False + + # + # Datapath Configuration + # + + def pif_datapath(pif): + """Return the datapath PIF associated with PIF. + For a non-VLAN PIF, the datapath name is the bridge name. + For a VLAN PIF, the datapath name is the bridge name for the PIF's VLAN slave. + """ + if pif_is_vlan(pif): + return pif_datapath(pif_get_vlan_slave(pif)) + + pifrec = db().get_pif_record(pif) + nwrec = db().get_network_record(pifrec['network']) + if not nwrec['bridge']: + return None + else: + return pif + + def datapath_get_physical_pifs(pif): + """Return the PIFs for the physical network device(s) associated with a datapath PIF. + For a bond master PIF, these are the bond slave PIFs. + For a non-VLAN, non-bond master PIF, the PIF is its own physical device PIF. + + A VLAN PIF cannot be a datapath PIF. + """ + if pif_is_vlan(pif): + # Seems like overkill... + raise Error("get-physical-pifs should not get passed a VLAN") + elif pif_is_bond(pif): + return pif_get_bond_slaves(pif) + else: + return [pif] + + def datapath_deconfigure_physical(netdev): - # The use of [!0-9] keeps an interface of 'eth0' from matching - # VLANs attached to eth0 (such as 'eth0.123'), which are distinct - # interfaces. - return ['--del-match=bridge.*.port=%s' % netdev, - '--del-match=port.%s.[!0-9]*' % netdev, - '--del-match=bonding.*.slave=%s' % netdev, - '--del-match=iface.%s.[!0-9]*' % netdev] ++ return ['--', '--if-exists', 'del-port', netdev] + + def datapath_configure_bond(pif,slaves): - pifrec = db().get_pif_record(pif) ++ bridge = pif_bridge_name(pif) ++ pifrec = db.get_pif_record(pif) + interface = pif_netdev_name(pif) + - argv = ['--del-match=bonding.%s.[!0-9]*' % interface] - argv += ["--add=bonding.%s.slave=%s" % (interface, pif_netdev_name(slave)) - for slave in slaves] - argv += ['--add=bonding.%s.fake-iface=true' % interface] ++ argv = ['--', '--fake-iface', 'add-bond', bridge, interface] ++ for slave in slaves: ++ argv += [pif_netdev_name(slave)] + - if pifrec['MAC'] != "": - argv += ['--add=port.%s.mac=%s' % (interface, pifrec['MAC'])] ++ # XXX need ovs-vsctl support ++ #if pifrec['MAC'] != "": ++ # argv += ['--add=port.%s.mac=%s' % (interface, pifrec['MAC'])] + + # Bonding options. + bond_options = { + "mode": "balance-slb", + "miimon": "100", + "downdelay": "200", + "updelay": "31000", + "use_carrier": "1", + } + # override defaults with values from other-config whose keys + # being with "bond-" + oc = pifrec['other_config'] + overrides = filter(lambda (key,val): + key.startswith("bond-"), oc.items()) + overrides = map(lambda (key,val): (key[5:], val), overrides) + bond_options.update(overrides) + for (name,val) in bond_options.items(): - argv += ["--add=bonding.%s.%s=%s" % (interface, name, val)] ++ # XXX need ovs-vsctl support for bond options ++ #argv += ["--add=bonding.%s.%s=%s" % (interface, name, val)] ++ pass + return argv + + def datapath_deconfigure_bond(netdev): - # The use of [!0-9] keeps an interface of 'eth0' from matching - # VLANs attached to eth0 (such as 'eth0.123'), which are distinct - # interfaces. - return ['--del-match=bonding.%s.[!0-9]*' % netdev, - '--del-match=port.%s.[!0-9]*' % netdev] ++ return ['--', '--if-exists', 'del-port', netdev] + + def datapath_deconfigure_ipdev(interface): - # The use of [!0-9] keeps an interface of 'eth0' from matching - # VLANs attached to eth0 (such as 'eth0.123'), which are distinct - # interfaces. - return ['--del-match=bridge.*.port=%s' % interface, - '--del-match=port.%s.[!0-9]*' % interface, - '--del-match=iface.%s.[!0-9]*' % interface, - '--del-match=vlan.%s.trunks=*' % interface, - '--del-match=vlan.%s.tag=*' % interface] ++ return ['--', '--if-exists', 'del-port', interface] + + def datapath_modify_config(commands): - #log("modifying configuration:") - #for c in commands: - # log(" %s" % c) - - rc = run_command(['/usr/bin/ovs-cfg-mod', '-vANY:console:emer', - '-F', '/etc/ovs-vswitchd.conf'] - + [c for c in commands if c[0] != '#'] + ['-c']) - if not rc: ++ if debug_mode(): ++ log("modifying configuration:") ++ for c in commands: ++ log(" %s" % c) ++ ++ rc = run_command(['/usr/bin/ovs-vsctl'] + ['--timeout=20'] ++ + [c for c in commands if not c.startswith('#')]) ++ if not rc: + raise Error("Failed to modify vswitch configuration") - run_command(['/sbin/service', 'vswitch', 'reload']) + return True + + # + # Toplevel Datapath Configuration. + # + + def configure_datapath(pif): + """Bring up the datapath configuration for PIF. + + Should be careful not to glitch existing users of the datapath, e.g. other VLANs etc. + + Should take care of tearing down other PIFs which encompass common physical devices. + + Returns a tuple containing - - A list containing the necessary cfgmod command line arguments ++ - A list containing the necessary vsctl command line arguments + - A list of additional devices which should be brought up after + the configuration is applied. + """ + - cfgmod_argv = [] ++ vsctl_argv = [] + extra_up_ports = [] + + bridge = pif_bridge_name(pif) + + physical_devices = datapath_get_physical_pifs(pif) + + # Determine additional devices to deconfigure. + # + # Given all physical devices which are part of this PIF we need to + # consider: + # - any additional bond which a physical device is part of. + # - any additional physical devices which are part of an additional bond. + # + # Any of these which are not currently in use should be brought + # down and deconfigured. + extra_down_bonds = [] + extra_down_ports = [] + for p in physical_devices: + for bond in pif_get_bond_masters(p): + if bond == pif: + log("configure_datapath: leaving bond %s up" % pif_netdev_name(bond)) + continue + if bond in extra_down_bonds: + continue + if db().get_pif_record(bond)['currently_attached']: + log("configure_datapath: implicitly tearing down currently-attached bond %s" % pif_netdev_name(bond)) + + extra_down_bonds += [bond] + + for s in pif_get_bond_slaves(bond): + if s in physical_devices: + continue + if s in extra_down_ports: + continue + if pif_currently_in_use(s): + continue + extra_down_ports += [s] + + log("configure_datapath: bridge - %s" % bridge) + log("configure_datapath: physical - %s" % [pif_netdev_name(p) for p in physical_devices]) + log("configure_datapath: extra ports - %s" % [pif_netdev_name(p) for p in extra_down_ports]) + log("configure_datapath: extra bonds - %s" % [pif_netdev_name(p) for p in extra_down_bonds]) + + # Need to fully deconfigure any bridge which any of the: + # - physical devices + # - bond devices + # - sibling devices + # refers to + for brpif in physical_devices + extra_down_ports + extra_down_bonds: + if brpif == pif: + continue + b = pif_bridge_name(brpif) + #ifdown(b) + # XXX + netdev_down(b) - cfgmod_argv += ['# remove bridge %s' % b] - cfgmod_argv += ['--del-match=bridge.%s.*' % b] ++ vsctl_argv += ['# remove bridge %s' % b] ++ vsctl_argv += ['--', '--if-exists', 'del-br', b] + + for n in extra_down_ports: + dev = pif_netdev_name(n) - cfgmod_argv += ['# deconfigure sibling physical device %s' % dev] - cfgmod_argv += datapath_deconfigure_physical(dev) ++ vsctl_argv += ['# deconfigure sibling physical device %s' % dev] ++ vsctl_argv += datapath_deconfigure_physical(dev) + netdev_down(dev) + + for n in extra_down_bonds: + dev = pif_netdev_name(n) - cfgmod_argv += ['# deconfigure bond device %s' % dev] - cfgmod_argv += datapath_deconfigure_bond(dev) ++ vsctl_argv += ['# deconfigure bond device %s' % dev] ++ vsctl_argv += datapath_deconfigure_bond(dev) + netdev_down(dev) + + for p in physical_devices: + dev = pif_netdev_name(p) - cfgmod_argv += ['# deconfigure physical port %s' % dev] - cfgmod_argv += datapath_deconfigure_physical(dev) ++ vsctl_argv += ['# deconfigure physical port %s' % dev] ++ vsctl_argv += datapath_deconfigure_physical(dev) ++ + if len(physical_devices) > 1: - cfgmod_argv += ['# deconfigure bond %s' % pif_netdev_name(pif)] - cfgmod_argv += datapath_deconfigure_bond(pif_netdev_name(pif)) - cfgmod_argv += ['--del-entry=bridge.%s.port=%s' % (bridge,pif_netdev_name(pif))] - cfgmod_argv += ['# configure bond %s' % pif_netdev_name(pif)] - cfgmod_argv += datapath_configure_bond(pif, physical_devices) - cfgmod_argv += ['--add=bridge.%s.port=%s' % (bridge,pif_netdev_name(pif)) ] ++ vsctl_argv += ['# deconfigure bond %s' % pif_netdev_name(pif)] ++ vsctl_argv += datapath_deconfigure_bond(pif_netdev_name(pif)) ++ vsctl_argv += ['# configure bond %s' % pif_netdev_name(pif)] ++ vsctl_argv += datapath_configure_bond(pif, physical_devices) + extra_up_ports += [pif_netdev_name(pif)] - else: ++ else: + iface = pif_netdev_name(physical_devices[0]) - cfgmod_argv += ['# add physical device %s' % iface] - cfgmod_argv += ['--add=bridge.%s.port=%s' % (bridge,iface) ] ++ vsctl_argv += ['# add physical device %s' % iface] ++ vsctl_argv += ['--', 'add-port', bridge, iface] + - return cfgmod_argv,extra_up_ports ++ return vsctl_argv,extra_up_ports + + def deconfigure_datapath(pif): - cfgmod_argv = [] ++ vsctl_argv = [] + + bridge = pif_bridge_name(pif) + + physical_devices = datapath_get_physical_pifs(pif) + + log("deconfigure_datapath: bridge - %s" % bridge) + log("deconfigure_datapath: physical devices - %s" % [pif_netdev_name(p) for p in physical_devices]) + + for p in physical_devices: + dev = pif_netdev_name(p) - cfgmod_argv += ['# deconfigure physical port %s' % dev] - cfgmod_argv += datapath_deconfigure_physical(dev) ++ vsctl_argv += ['# deconfigure physical port %s' % dev] ++ vsctl_argv += datapath_deconfigure_physical(dev) + netdev_down(dev) + + if len(physical_devices) > 1: - cfgmod_argv += ['# deconfigure bond %s' % pif_netdev_name(pif)] - cfgmod_argv += datapath_deconfigure_bond(pif_netdev_name(pif)) ++ vsctl_argv += ['# deconfigure bond %s' % pif_netdev_name(pif)] ++ vsctl_argv += datapath_deconfigure_bond(pif_netdev_name(pif)) + - cfgmod_argv += ['# deconfigure bridge %s' % bridge] - cfgmod_argv += ['--del-match=bridge.%s.*' % bridge] ++ vsctl_argv += ['# deconfigure bridge %s' % bridge] ++ vsctl_argv += ['--', '--if-exists', 'del-br', bridge] + - return cfgmod_argv ++ return vsctl_argv + + # + # + # + + class DatapathVswitch(Datapath): + def __init__(self, pif): + Datapath.__init__(self, pif) + self._dp = pif_datapath(pif) + self._ipdev = pif_ipdev_name(pif) + + if pif_is_vlan(pif) and not self._dp: + raise Error("Unbridged VLAN devices not implemented yet") + + log("Configured for Vswitch datapath") + + def configure_ipdev(self, cfg): + cfg.write("TYPE=Ethernet\n") + + def preconfigure(self, parent): - cfgmod_argv = [] ++ vsctl_argv = [] + extra_ports = [] + + pifrec = db().get_pif_record(self._pif) + + ipdev = self._ipdev + bridge = pif_bridge_name(self._dp) + c,e = configure_datapath(self._dp) - cfgmod_argv += c ++ vsctl_argv += c + extra_ports += e + - cfgmod_argv += ['# configure xs-network-uuids'] - cfgmod_argv += ['--del-match=bridge.%s.xs-network-uuids=*' % bridge] ++ if pif_is_vlan(pif): ++ datapath = pif_datapath(pif) ++ vsctl_argv += ['--', 'add-br', bridge, datapath, pifrec['VLAN']] ++ else: ++ vsctl_argv += ['--', 'add-br', bridge] + ++ xs_network_uuids = [] + for nwpif in db().get_pifs_by_device(db().get_pif_record(self._pif)['device']): + rec = db().get_pif_record(nwpif) + + # When state is read from dbcache PIF.currently_attached + # is always assumed to be false... Err on the side of + # listing even detached networks for the time being. + #if nwpif != pif and not rec['currently_attached']: + # log("Network PIF %s not currently attached (%s)" % (rec['uuid'],pifrec['uuid'])) + # continue + nwrec = db().get_network_record(rec['network']) - cfgmod_argv += ['--add=bridge.%s.xs-network-uuids=%s' % (bridge, nwrec['uuid'])] - - cfgmod_argv += ["# deconfigure ipdev %s" % ipdev] - cfgmod_argv += datapath_deconfigure_ipdev(ipdev) - cfgmod_argv += ["# reconfigure ipdev %s" % ipdev] - cfgmod_argv += ['--add=bridge.%s.port=%s' % (bridge, ipdev)] - if bridge == ipdev: - cfgmod_argv += ['--add=bridge.%s.mac=%s' % (bridge, pifrec['MAC'])] - else: - cfgmod_argv += ['--add=iface.%s.mac=%s' % (ipdev, pifrec['MAC'])] - - if pif_is_vlan(self._pif): - cfgmod_argv += ['--add=vlan.%s.tag=%s' % (ipdev, pifrec['VLAN'])] - cfgmod_argv += ['--add=iface.%s.internal=true' % (ipdev)] - cfgmod_argv += ['--add=iface.%s.fake-bridge=true' % (ipdev)] ++ xs_network_uuids += [nwrec['uuid']] ++ ++ vsctl_argv += ['# configure xs-network-uuids'] ++ vsctl_argv += ['--', 'br-set-external-id', bridge, ++ 'xs-network-uuids', ';'.join(xs_network_uuids)] ++ ++ vsctl_argv += ["# deconfigure ipdev %s" % ipdev] ++ vsctl_argv += datapath_deconfigure_ipdev(ipdev) ++ vsctl_argv += ["# reconfigure ipdev %s" % ipdev] ++ vsctl_argv += ['--', 'add-port', bridge, ipdev] ++ ++ # XXX Needs support in ovs-vsctl ++ #if bridge == ipdev: ++ # vsctl_argv += ['--add=bridge.%s.mac=%s' % (bridge, pifrec['MAC'])] ++ #else: ++ # vsctl_argv += ['--add=iface.%s.mac=%s' % (ipdev, pifrec['MAC'])] + - self._cfgmod_argv = cfgmod_argv ++ self._vsctl_argv = vsctl_argv + self._extra_ports = extra_ports + + def bring_down_existing(self): + pass + + def configure(self): + # Bring up physical devices. ovs-vswitchd initially enables or + # disables bond slaves based on whether carrier is detected + # when they are added, and a network device that is down + # always reports "no carrier". + physical_devices = datapath_get_physical_pifs(self._dp) + + for p in physical_devices: + oc = db().get_pif_record(p)['other_config'] + + dev = pif_netdev_name(p) + + mtu = mtu_setting(oc) + + netdev_up(dev, mtu) + + settings, offload = ethtool_settings(oc) + if len(settings): + run_command(['/sbin/ethtool', '-s', dev] + settings) + if len(offload): + run_command(['/sbin/ethtool', '-K', dev] + offload) + - datapath_modify_config(self._cfgmod_argv) ++ datapath_modify_config(self._vsctl_argv) + + def post(self): + for p in self._extra_ports: + log("action_up: bring up %s" % p) + netdev_up(p) + + def bring_down(self): - cfgmod_argv = [] ++ vsctl_argv = [] + + dp = self._dp + ipdev = self._ipdev + + bridge = pif_bridge_name(dp) + + #nw = db().get_pif_record(self._pif)['network'] + #nwrec = db().get_network_record(nw) - #cfgmod_argv += ['# deconfigure xs-network-uuids'] - #cfgmod_argv += ['--del-entry=bridge.%s.xs-network-uuids=%s' % (bridge,nwrec['uuid'])] ++ #vsctl_argv += ['# deconfigure xs-network-uuids'] ++ #vsctl_argv += ['--del-entry=bridge.%s.xs-network-uuids=%s' % (bridge,nwrec['uuid'])] + + log("deconfigure ipdev %s on %s" % (ipdev,bridge)) - cfgmod_argv += ["# deconfigure ipdev %s" % ipdev] - cfgmod_argv += datapath_deconfigure_ipdev(ipdev) ++ vsctl_argv += ["# deconfigure ipdev %s" % ipdev] ++ vsctl_argv += datapath_deconfigure_ipdev(ipdev) + + if pif_is_vlan(self._pif): + # If the VLAN's slave is attached, leave datapath setup. + slave = pif_get_vlan_slave(self._pif) + if db().get_pif_record(slave)['currently_attached']: + log("action_down: vlan slave is currently attached") + dp = None + + # If the VLAN's slave has other VLANs that are attached, leave datapath setup. + for master in pif_get_vlan_masters(slave): + if master != self._pif and db().get_pif_record(master)['currently_attached']: + log("action_down: vlan slave has other master: %s" % pif_netdev_name(master)) + dp = None + + # Otherwise, take down the datapath too (fall through) + if dp: + log("action_down: no more masters, bring down slave %s" % bridge) + else: + # Stop here if this PIF has attached VLAN masters. + masters = [db().get_pif_record(m)['VLAN'] for m in pif_get_vlan_masters(self._pif) if db().get_pif_record(m)['currently_attached']] + if len(masters) > 0: + log("Leaving datapath %s up due to currently attached VLAN masters %s" % (bridge, masters)) + dp = None + + if dp: - cfgmod_argv += deconfigure_datapath(dp) - datapath_modify_config(cfgmod_argv) ++ vsctl_argv += deconfigure_datapath(dp) ++ datapath_modify_config(vsctl_argv) diff --cc xenserver/vswitch-xen.spec index f67a1bf9,2d8acd5d..41f709d1 --- a/xenserver/vswitch-xen.spec +++ b/xenserver/vswitch-xen.spec @@@ -67,10 -67,14 +67,16 @@@ install -d -m 755 $RPM_BUILD_ROOT/etc/x install -m 755 xenserver/etc_xapi.d_plugins_vswitch-cfg-update \ $RPM_BUILD_ROOT/etc/xapi.d/plugins/vswitch-cfg-update install -d -m 755 $RPM_BUILD_ROOT/usr/share/vswitch/scripts +install -m 644 vswitchd/vswitch.ovsschema \ + $RPM_BUILD_ROOT/usr/share/vswitch/vswitch.ovsschema install -m 755 xenserver/opt_xensource_libexec_interface-reconfigure \ $RPM_BUILD_ROOT/usr/share/vswitch/scripts/interface-reconfigure + install -m 755 xenserver/opt_xensource_libexec_InterfaceReconfigure.py \ + $RPM_BUILD_ROOT/usr/share/vswitch/scripts/InterfaceReconfigure.py + install -m 755 xenserver/opt_xensource_libexec_InterfaceReconfigureBridge.py \ + $RPM_BUILD_ROOT/usr/share/vswitch/scripts/InterfaceReconfigureBridge.py + install -m 755 xenserver/opt_xensource_libexec_InterfaceReconfigureVswitch.py \ + $RPM_BUILD_ROOT/usr/share/vswitch/scripts/InterfaceReconfigureVswitch.py install -m 755 xenserver/etc_xensource_scripts_vif \ $RPM_BUILD_ROOT/usr/share/vswitch/scripts/vif install -m 755 xenserver/usr_share_vswitch_scripts_dump-vif-details \ @@@ -301,10 -296,13 +313,13 @@@ if [ "$1" = "0" ]; then # $1 = 1 fo done # Remove all configuration files - rm -f /etc/ovs-vswitchd.conf + rm -f /etc/ovs-vswitchd.conf.db rm -f /etc/sysconfig/vswitch rm -f /etc/ovs-vswitchd.cacert - rm -f /var/lib/openvswitch/dbcache + rm -f /var/xapi/network.dbcache + + # Configure system to use bridge + echo bridge > /etc/xensource/network.conf printf "\nYou MUST reboot the server now to complete the change to\n" printf "standard Xen networking. Attempts to modify networking on the\n"