From 660f6596ba31042a63bf31317851356726606fdd Mon Sep 17 00:00:00 2001 From: Justin Pettit Date: Mon, 29 Dec 2008 22:53:08 -0800 Subject: [PATCH] First cut at bridge compatibility for vswitchd. This set of changes allows the bridge ioctls to be used for adding and removing datapaths and interfaces. To enable, one must insmod the new "brcompat_mod.ko" kernel module. Then, vswitchd is run with the "--brcompat" flag. See the man page for vswitchd for more details. --- datapath/Modules.mk | 7 +- datapath/brcompat.c | 232 +++++++++ datapath/datapath.c | 119 ++++- datapath/datapath.h | 4 +- datapath/dp_dev.c | 23 +- datapath/dp_dev.h | 3 +- datapath/linux-2.4/.gitignore | 1 + .../compat-2.4/include/linux/netdevice.h | 15 +- datapath/linux-2.6/.gitignore | 1 + datapath/linux-2.6/compat-2.6/compat26.h | 4 +- include/openflow/brcompat-netlink.h | 62 +++ include/openflow/openflow-netlink.h | 3 +- lib/dpif.c | 77 ++- lib/dpif.h | 9 +- lib/vlog-modules.def | 1 + utilities/.gitignore | 2 + utilities/dpctl.8.in | 4 + utilities/dpctl.c | 22 +- vswitchd/.gitignore | 4 + vswitchd/automake.mk | 1 + vswitchd/brcompat.c | 449 ++++++++++++++++++ vswitchd/brcompat.h | 41 ++ vswitchd/bridge.c | 62 +-- vswitchd/bridge.h | 1 + vswitchd/cfg.c | 18 +- vswitchd/vswitchd.8.in | 9 + vswitchd/vswitchd.c | 15 + 27 files changed, 1077 insertions(+), 112 deletions(-) create mode 100644 datapath/brcompat.c create mode 100644 include/openflow/brcompat-netlink.h create mode 100644 vswitchd/.gitignore create mode 100644 vswitchd/brcompat.c create mode 100644 vswitchd/brcompat.h diff --git a/datapath/Modules.mk b/datapath/Modules.mk index cbb52df8..7818ab76 100644 --- a/datapath/Modules.mk +++ b/datapath/Modules.mk @@ -1,5 +1,5 @@ all_modules = $(dist_modules) -dist_modules = openflow +dist_modules = openflow brcompat openflow_sources = \ chain.c \ @@ -30,6 +30,11 @@ openflow_headers = \ nx_msg.h \ table.h +brcompat_sources = \ + brcompat.c + +brcompat_headers = + dist_sources = $(foreach module,$(dist_modules),$($(module)_sources)) dist_headers = $(foreach module,$(dist_modules),$($(module)_headers)) all_sources = $(foreach module,$(all_modules),$($(module)_sources)) diff --git a/datapath/brcompat.c b/datapath/brcompat.c new file mode 100644 index 00000000..5661240d --- /dev/null +++ b/datapath/brcompat.c @@ -0,0 +1,232 @@ +#include +#include +#include +#include +#include +#include + +#include "compat.h" +#include "openflow/brcompat-netlink.h" +#include "datapath.h" +#include "dp_dev.h" + +static struct genl_family brc_genl_family; +static struct genl_multicast_group brc_mc_group; + +int brc_send_dp_add_del(const char *dp_name, int add); +int brc_send_port_add_del(const char *dp_name, const char *port_name, int add); + + +int +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) +brc_ioctl_deviceless_stub(unsigned int cmd, void __user *uarg) +#else +brc_ioctl_deviceless_stub(struct net *net, unsigned int cmd, void __user *uarg) +#endif +{ + switch (cmd) { + case SIOCBRADDBR: + case SIOCBRDELBR: { + char dp_name[IFNAMSIZ]; + + if (copy_from_user(dp_name, uarg, IFNAMSIZ)) + return -EFAULT; + + dp_name[IFNAMSIZ-1] = 0; + if (cmd == SIOCBRADDBR) + return brc_send_dp_add_del(dp_name, 1); + + return brc_send_dp_add_del(dp_name, 0); + } + } + + return -EOPNOTSUPP; +} + +/* Called with the rtnl_lock. */ +int +brc_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) +{ + int err; + struct net_device *port; + + port = dev_get_by_index(&init_net, rq->ifr_ifindex); + if (!port) + return -EINVAL; + + switch (cmd) { + case SIOCBRADDIF: + err = brc_send_port_add_del(dev->name, port->name, 1); + break; + case SIOCBRDELIF: + err = brc_send_port_add_del(dev->name, port->name, 0); + break; + default: + err = -EOPNOTSUPP; + break; + } + + dev_put(port); + return err; +} + + +static struct genl_family brc_genl_family = { + .id = GENL_ID_GENERATE, + .hdrsize = 0, + .name = BRC_GENL_FAMILY_NAME, + .version = 1, + .maxattr = BRC_GENL_A_MAX, +}; + +/* Attribute policy: what each attribute may contain. */ +static struct nla_policy brc_genl_policy[BRC_GENL_A_MAX + 1] = { + [BRC_GENL_A_DP_NAME] = { .type = NLA_STRING }, + [BRC_GENL_A_PORT_NAME] = { .type = NLA_STRING } +}; + +static int brc_genl_query(struct sk_buff *skb, struct genl_info *info) +{ + int err = -EINVAL; + struct sk_buff *ans_skb; + void *data; + + ans_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); + if (!ans_skb) + return -ENOMEM; + + data = genlmsg_put_reply(ans_skb, info, &brc_genl_family, + 0, BRC_GENL_C_QUERY_MC); + if (data == NULL) { + err = -ENOMEM; + goto err; + } + NLA_PUT_U32(ans_skb, BRC_GENL_A_MC_GROUP, brc_mc_group.id); + + genlmsg_end(ans_skb, data); + return genlmsg_reply(ans_skb, info); + +err: +nla_put_failure: + kfree_skb(ans_skb); + return err; +} + +static struct genl_ops brc_genl_ops_query_dp = { + .cmd = BRC_GENL_C_QUERY_MC, + .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privelege. */ + .policy = brc_genl_policy, + .doit = brc_genl_query, + .dumpit = NULL +}; + +int brc_send_dp_add_del(const char *dp_name, int add) +{ + struct sk_buff *skb; + void *data; + + skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (skb == NULL) + return -ENOMEM; + + if (add) + data = genlmsg_put(skb, 0, 0, &brc_genl_family, 0, + BRC_GENL_C_DP_ADD); + else + data = genlmsg_put(skb, 0, 0, &brc_genl_family, 0, + BRC_GENL_C_DP_DEL); + if (!data) + goto err; + + NLA_PUT_STRING(skb, BRC_GENL_A_DP_NAME, dp_name); + + genlmsg_end(skb, data); + return genlmsg_multicast(skb, 0, brc_mc_group.id, GFP_ATOMIC); + +nla_put_failure: +err: + kfree_skb(skb); + return -EINVAL; +} + +int brc_send_port_add_del(const char *dp_name, const char *port_name, int add) +{ + struct sk_buff *skb; + void *data; + + skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (skb == NULL) + return -ENOMEM; + + if (add) + data = genlmsg_put(skb, 0, 0, &brc_genl_family, 0, + BRC_GENL_C_PORT_ADD); + else + data = genlmsg_put(skb, 0, 0, &brc_genl_family, 0, + BRC_GENL_C_PORT_DEL); + if (!data) + goto err; + + NLA_PUT_STRING(skb, BRC_GENL_A_DP_NAME, dp_name); + NLA_PUT_STRING(skb, BRC_GENL_A_PORT_NAME, port_name); + + genlmsg_end(skb, data); + return genlmsg_multicast(skb, 0, brc_mc_group.id, GFP_ATOMIC); + +nla_put_failure: +err: + kfree_skb(skb); + return -EINVAL; +} + +static int +__init brc_init(void) +{ + int err; + + printk("OpenFlow Bridge Compatiblity, built "__DATE__" "__TIME__"\n"); + + /* Set the bridge ioctl handler */ + brioctl_set(brc_ioctl_deviceless_stub); + + /* Set the OpenFlow device ioctl handler */ + dp_ioctl_hook = brc_dev_ioctl; + + /* Register generic netlink family to communicate changes to + * userspace. */ + err = genl_register_family(&brc_genl_family); + if (err) + return err; + + err = genl_register_ops(&brc_genl_family, &brc_genl_ops_query_dp); + if (err != 0) + goto err_unregister; + + strcpy(brc_mc_group.name, "brcompat"); + err = genl_register_mc_group(&brc_genl_family, &brc_mc_group); + if (err < 0) + goto err_unregister; + + return 0; + +err_unregister: + genl_unregister_family(&brc_genl_family); + return err; +} + +static void +brc_cleanup(void) +{ + /* Check refcount for datapaths so hook doesn't disappear? */ + dp_ioctl_hook = NULL; + + brioctl_set(NULL); + genl_unregister_family(&brc_genl_family); +} + +module_init(brc_init); +module_exit(brc_cleanup); + +MODULE_DESCRIPTION("OpenFlow bridge compatibility"); +MODULE_AUTHOR("Copyright (c) 2008 The Board of Trustees of The Leland Stanford Junior University"); +MODULE_LICENSE("GPL"); diff --git a/datapath/datapath.c b/datapath/datapath.c index 352baf39..dbd0628d 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -66,6 +66,9 @@ MODULE_PARM(serial_num, "s"); #endif +int (*dp_ioctl_hook)(struct net_device *dev, struct ifreq *rq, int cmd); +EXPORT_SYMBOL(dp_ioctl_hook); + /* Number of milliseconds between runs of the maintenance thread. */ #define MAINT_SLEEP_MSECS 1000 @@ -232,13 +235,32 @@ uint64_t get_datapath_id(struct net_device *dev) return id; } -/* Creates a new datapath numbered 'dp_idx'. Returns 0 for success or a - * negative error code. */ -static int new_dp(int dp_idx) +/* Find the first free datapath index. Return the index or -1 if a free + * index could not be found. */ +int gen_dp_idx(void) +{ + int i; + + for (i=0; i= DP_MAX) return -EINVAL; @@ -256,12 +278,13 @@ static int new_dp(int dp_idx) if (dp == NULL) goto err_unlock; - /* Setup our "of" device */ - err = dp_dev_setup(dp); + dp->dp_idx = dp_idx; + + /* Setup our datapath device */ + err = dp_dev_setup(dp, dp_name); if (err) goto err_free_dp; - dp->dp_idx = dp_idx; dp->chain = chain_create(dp); if (dp->chain == NULL) goto err_destroy_dp_dev; @@ -1076,16 +1099,22 @@ static struct genl_family dp_genl_family = { /* Attribute policy: what each attribute may contain. */ static struct nla_policy dp_genl_policy[DP_GENL_A_MAX + 1] = { [DP_GENL_A_DP_IDX] = { .type = NLA_U32 }, + [DP_GENL_A_DP_NAME] = { .type = NLA_STRING }, [DP_GENL_A_MC_GROUP] = { .type = NLA_U32 }, [DP_GENL_A_PORTNAME] = { .type = NLA_STRING } }; static int dp_genl_add(struct sk_buff *skb, struct genl_info *info) { - if (!info->attrs[DP_GENL_A_DP_IDX]) + int dp_idx = info->attrs[DP_GENL_A_DP_IDX] ? + nla_get_u32(info->attrs[DP_GENL_A_DP_IDX]) : -1; + const char *dp_name = info->attrs[DP_GENL_A_DP_NAME] ? + nla_data(info->attrs[DP_GENL_A_DP_NAME]) : NULL; + + if ((dp_idx == -1) && (!dp_name)) return -EINVAL; - return new_dp(nla_get_u32(info->attrs[DP_GENL_A_DP_IDX])); + return new_dp(dp_idx, dp_name); } static struct genl_ops dp_genl_ops_add_dp = { @@ -1096,22 +1125,50 @@ static struct genl_ops dp_genl_ops_add_dp = { .dumpit = NULL, }; -struct datapath *dp_get(int dp_idx) +struct datapath *dp_get_by_idx(int dp_idx) { if (dp_idx < 0 || dp_idx > DP_MAX) return NULL; return rcu_dereference(dps[dp_idx]); } +struct datapath *dp_get_by_name(const char *dp_name) +{ + int i; + + if (!dp_name || (strlen(dp_name) >= sizeof(dps[0]->netdev->name))) + return NULL; + + for (i=0; inetdev->name, dp_name, IFNAMSIZ)) + return rcu_dereference(dps[i]); + } + return NULL; +} + static int dp_genl_del(struct sk_buff *skb, struct genl_info *info) { - struct datapath *dp; + int dp_idx = info->attrs[DP_GENL_A_DP_IDX] ? + nla_get_u32(info->attrs[DP_GENL_A_DP_IDX]) : -1; + const char *dp_name = info->attrs[DP_GENL_A_DP_NAME] ? + nla_data(info->attrs[DP_GENL_A_DP_NAME]) : NULL; + struct datapath *dp = NULL; int err; - if (!info->attrs[DP_GENL_A_DP_IDX]) + if ((dp_idx == -1) && (!dp_name)) return -EINVAL; - dp = dp_get(nla_get_u32(info->attrs[DP_GENL_A_DP_IDX])); + if (dp_idx != -1) + dp = dp_get_by_idx(dp_idx); + + if (dp_name) { + struct datapath *dp_n; + dp_n = dp_get_by_name(dp_name); + if (dp && dp != dp_n) + return -EINVAL; + dp = dp_n; + } + if (!dp) err = -ENOENT; else { @@ -1130,10 +1187,11 @@ static struct genl_ops dp_genl_ops_del_dp = { }; /* Queries a datapath for related information. Currently the only relevant - * information is the datapath's multicast group ID. Really we want one - * multicast group per datapath, but because of locking issues[*] we can't - * easily get one. Thus, every datapath will currently return the same - * global multicast group ID, but in the future it would be nice to fix that. + * information is the datapath's multicast group ID, datapath ID, and + * datapath device name. Really we want one multicast group per datapath, + * but because of locking issues[*] we can't easily get one. Thus, every + * datapath will currently return the same global multicast group ID, but + * in the future it would be nice to fix that. * * [*] dp_genl_add, to add a new datapath, is called under the genl_lock * mutex, and genl_register_mc_group, called to acquire a new multicast @@ -1143,15 +1201,21 @@ static int dp_genl_query(struct sk_buff *skb, struct genl_info *info) { struct datapath *dp; struct sk_buff *ans_skb = NULL; - int dp_idx; int err = -ENOMEM; + int dp_idx = info->attrs[DP_GENL_A_DP_IDX] ? + nla_get_u32(info->attrs[DP_GENL_A_DP_IDX]) : -1; + const char *dp_name = info->attrs[DP_GENL_A_DP_NAME] ? + nla_data(info->attrs[DP_GENL_A_DP_NAME]) : NULL; - if (!info->attrs[DP_GENL_A_DP_IDX]) + if ((dp_idx == -1) && (!dp_name)) return -EINVAL; rcu_read_lock(); - dp_idx = nla_get_u32((info->attrs[DP_GENL_A_DP_IDX])); - dp = dp_get(dp_idx); + if (dp_idx == -1) + dp = dp_get_by_name(dp_name); + else + dp = dp_get_by_idx(dp_idx); + if (!dp) err = -ENOENT; else { @@ -1167,7 +1231,8 @@ static int dp_genl_query(struct sk_buff *skb, struct genl_info *info) err = -ENOMEM; goto err; } - NLA_PUT_U32(ans_skb, DP_GENL_A_DP_IDX, dp_idx); + NLA_PUT_U32(ans_skb, DP_GENL_A_DP_IDX, dp->dp_idx); + NLA_PUT_STRING(ans_skb, DP_GENL_A_DP_NAME, dp->netdev->name); NLA_PUT_U32(ans_skb, DP_GENL_A_MC_GROUP, mc_group.id); genlmsg_end(ans_skb, data); @@ -1199,7 +1264,7 @@ static int dp_genl_add_del_port(struct sk_buff *skb, struct genl_info *info) return -EINVAL; /* Get datapath. */ - dp = dp_get(nla_get_u32(info->attrs[DP_GENL_A_DP_IDX])); + dp = dp_get_by_idx(nla_get_u32(info->attrs[DP_GENL_A_DP_IDX])); if (!dp) { err = -ENOENT; goto out; @@ -1257,7 +1322,7 @@ static int dp_genl_openflow(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[DP_GENL_A_DP_IDX] || !va) return -EINVAL; - dp = dp_get(nla_get_u32(info->attrs[DP_GENL_A_DP_IDX])); + dp = dp_get_by_idx(nla_get_u32(info->attrs[DP_GENL_A_DP_IDX])); if (!dp) return -ENOENT; @@ -1642,7 +1707,7 @@ dp_genl_openflow_dumpit(struct sk_buff *skb, struct netlink_callback *cb) if (!attrs[DP_GENL_A_DP_IDX]) return -EINVAL; dp_idx = nla_get_u16(attrs[DP_GENL_A_DP_IDX]); - dp = dp_get(dp_idx); + dp = dp_get_by_idx(dp_idx); if (!dp) return -ENOENT; @@ -1690,7 +1755,7 @@ dp_genl_openflow_dumpit(struct sk_buff *skb, struct netlink_callback *cb) dp_idx = cb->args[1]; s = &stats[cb->args[2]]; - dp = dp_get(dp_idx); + dp = dp_get_by_idx(dp_idx); if (!dp) return -ENOENT; } else { @@ -1778,7 +1843,7 @@ static int dp_init_netlink(void) err_unregister: genl_unregister_family(&dp_genl_family); - return err; + return err; } static void dp_uninit_netlink(void) @@ -1832,6 +1897,8 @@ static int __init dp_init(void) if (err) goto error_unreg_notifier; + dp_ioctl_hook = NULL; + /* Check if better descriptions of the switch are available than the * defaults. */ set_desc(); diff --git a/datapath/datapath.h b/datapath/datapath.h index 48690227..6826cde6 100644 --- a/datapath/datapath.h +++ b/datapath/datapath.h @@ -84,6 +84,7 @@ struct net_bridge_port { extern struct mutex dp_mutex; extern struct notifier_block dp_device_notifier; +extern int (*dp_ioctl_hook)(struct net_device *dev, struct ifreq *rq, int cmd); int dp_del_switch_port(struct net_bridge_port *); int dp_xmit_skb(struct sk_buff *skb); @@ -106,6 +107,7 @@ int dp_send_hello(struct datapath *, const struct sender *, const struct ofp_header *); /* Should hold at least RCU read lock when calling */ -struct datapath *dp_get(int dp_idx); +struct datapath *dp_get_by_idx(int dp_idx); +struct datapath *dp_get_by_name(const char *dp_name); #endif /* datapath.h */ diff --git a/datapath/dp_dev.c b/datapath/dp_dev.c index 7a726c39..f2d1acd1 100644 --- a/datapath/dp_dev.c +++ b/datapath/dp_dev.c @@ -165,6 +165,7 @@ do_setup(struct net_device *netdev) { ether_setup(netdev); + netdev->do_ioctl = dp_ioctl_hook; netdev->get_stats = dp_dev_get_stats; netdev->hard_start_xmit = dp_dev_xmit; netdev->open = dp_dev_open; @@ -185,16 +186,23 @@ do_setup(struct net_device *netdev) netdev->dev_addr[3] |= 0xc0; } - -int dp_dev_setup(struct datapath *dp) +/* Create a datapath device associated with 'dp'. If 'dp_name' is null, + * the device name will be of the form 'of'. */ +int dp_dev_setup(struct datapath *dp, const char *dp_name) { struct dp_dev *dp_dev; struct net_device *netdev; - char of_name[8]; + char dev_name[IFNAMSIZ]; int err; - snprintf(of_name, sizeof of_name, "of%d", dp->dp_idx); - netdev = alloc_netdev(sizeof(struct dp_dev), of_name, do_setup); + if (dp_name) { + if (strlen(dp_name) >= IFNAMSIZ) + return -EINVAL; + strncpy(dev_name, dp_name, sizeof(dev_name)); + } else + snprintf(dev_name, sizeof dev_name, "of%d", dp->dp_idx); + + netdev = alloc_netdev(sizeof(struct dp_dev), dev_name, do_setup); if (!netdev) return -ENOMEM; @@ -204,8 +212,9 @@ int dp_dev_setup(struct datapath *dp) return err; } - /* For "of0", we check the DMI UUID to see if a Nicira mac address - * is available to use instead of the random one just generated. */ + /* For the first datapath, we check the DMI UUID to see if a Nicira + * mac address is available to use instead of the random one just + * generated. */ if (dp->dp_idx == 0) set_uuid_mac(netdev); diff --git a/datapath/dp_dev.h b/datapath/dp_dev.h index 2d4453b5..1d1abac2 100644 --- a/datapath/dp_dev.h +++ b/datapath/dp_dev.h @@ -1,7 +1,8 @@ #ifndef DP_DEV_H #define DP_DEV_H 1 -int dp_dev_setup(struct datapath *); + +int dp_dev_setup(struct datapath *, const char *); void dp_dev_destroy(struct datapath *); int dp_dev_recv(struct net_device *, struct sk_buff *); int is_dp_dev(struct net_device *); diff --git a/datapath/linux-2.4/.gitignore b/datapath/linux-2.4/.gitignore index ad178b13..28483d44 100644 --- a/datapath/linux-2.4/.gitignore +++ b/datapath/linux-2.4/.gitignore @@ -3,6 +3,7 @@ /Makefile /Makefile.main /attr.c +/brcompat.c /chain.c /datapath.c /compat24.c diff --git a/datapath/linux-2.4/compat-2.4/include/linux/netdevice.h b/datapath/linux-2.4/compat-2.4/include/linux/netdevice.h index 60c81b0c..9decde28 100644 --- a/datapath/linux-2.4/compat-2.4/include/linux/netdevice.h +++ b/datapath/linux-2.4/compat-2.4/include/linux/netdevice.h @@ -8,8 +8,6 @@ /*---------------------------------------------------------------------------- * In 2.6.24, a namespace argument became required for dev_get_by_name. */ -#define net_init NULL - #ifdef dev_get_by_name #undef dev_get_by_name #define dev_get_by_name(net, name) \ @@ -23,6 +21,19 @@ static inline struct net_device *compat_dev_get_by_name(const char *name) dev_get_by_name((name)) #endif /* dev_get_by_name */ +#ifdef dev_get_by_index +#undef dev_get_by_index +#define dev_get_by_index(net, ifindex) \ + compat_dev_get_by_index((ifindex)) +static inline struct net_device *compat_dev_get_by_index(int ifindex) +{ + return (_set_ver(dev_get_by_index))(ifindex); +} +#else +#define dev_get_by_index(net, ifindex) \ + dev_get_by_index((ifindex)) +#endif /* dev_get_by_index */ + #if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,27) static inline void *netdev_priv(struct net_device *dev) { diff --git a/datapath/linux-2.6/.gitignore b/datapath/linux-2.6/.gitignore index 8e8a3b54..b172e746 100644 --- a/datapath/linux-2.6/.gitignore +++ b/datapath/linux-2.6/.gitignore @@ -1,6 +1,7 @@ /Kbuild /Makefile /Makefile.main +/brcompat.c /chain.c /crc32.c /crc_t.c diff --git a/datapath/linux-2.6/compat-2.6/compat26.h b/datapath/linux-2.6/compat-2.6/compat26.h index 04335f7d..37f6a4f8 100644 --- a/datapath/linux-2.6/compat-2.6/compat26.h +++ b/datapath/linux-2.6/compat-2.6/compat26.h @@ -10,11 +10,13 @@ #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) /*---------------------------------------------------------------------------- * In 2.6.24, a namespace argument became required for dev_get_by_name. */ -#define net_init NULL #define dev_get_by_name(net, name) \ dev_get_by_name((name)) +#define dev_get_by_index(net, ifindex) \ + dev_get_by_index((ifindex)) + #endif /* linux kernel <= 2.6.23 */ diff --git a/include/openflow/brcompat-netlink.h b/include/openflow/brcompat-netlink.h new file mode 100644 index 00000000..1a276df4 --- /dev/null +++ b/include/openflow/brcompat-netlink.h @@ -0,0 +1,62 @@ +/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford + * Junior University + * + * We are making the OpenFlow specification and associated documentation + * (Software) available for public use and benefit with the expectation + * that others will use, modify and enhance the Software and contribute + * those enhancements back to the community. However, since we would + * like to make the Software available for broadest use, with as few + * restrictions as possible permission is hereby granted, free of + * charge, to any person obtaining a copy of this Software to deal in + * the Software under the copyrights without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * The name and trademarks of copyright holder(s) may NOT be used in + * advertising or publicity pertaining to the Software or any + * derivatives without specific, written prior permission. + */ + +#ifndef OPENFLOW_BRCOMPAT_NETLINK_H +#define OPENFLOW_BRCOMPAT_NETLINK_H 1 + +#define BRC_GENL_FAMILY_NAME "brcompat" + +/* Attributes that can be attached to the datapath's netlink messages. */ +enum { + BRC_GENL_A_UNSPEC, + BRC_GENL_A_DP_NAME, /* Datapath name. */ + BRC_GENL_A_PORT_NAME, /* Interface name. */ + BRC_GENL_A_MC_GROUP, /* Generic netlink multicast group. */ + + __BRC_GENL_A_MAX, + BRC_GENL_A_MAX = __BRC_GENL_A_MAX - 1 +}; + +/* Commands that can be executed on the datapath's netlink interface. */ +enum brc_genl_command { + BRC_GENL_C_UNSPEC, + BRC_GENL_C_DP_ADD, /* Datapath created. */ + BRC_GENL_C_DP_DEL, /* Datapath destroyed. */ + BRC_GENL_C_PORT_ADD, /* Port added to datapath. */ + BRC_GENL_C_PORT_DEL, /* Port removed from datapath. */ + BRC_GENL_C_QUERY_MC, /* Get multicast group for brcompat. */ + + __BRC_GENL_C_MAX, + BRC_GENL_C_MAX = __BRC_GENL_C_MAX - 1 +}; +#endif /* openflow/brcompat-netlink.h */ diff --git a/include/openflow/openflow-netlink.h b/include/openflow/openflow-netlink.h index 23e26408..3193c691 100644 --- a/include/openflow/openflow-netlink.h +++ b/include/openflow/openflow-netlink.h @@ -39,7 +39,8 @@ /* Attributes that can be attached to the datapath's netlink messages. */ enum { DP_GENL_A_UNSPEC, - DP_GENL_A_DP_IDX, /* Datapath Ethernet device name. */ + DP_GENL_A_DP_IDX, /* Datapath device index. */ + DP_GENL_A_DP_NAME, /* Datapath device name. */ DP_GENL_A_PORTNAME, /* Device name for datapath port. */ DP_GENL_A_MC_GROUP, /* Generic netlink multicast group. */ DP_GENL_A_OPENFLOW, /* OpenFlow packet. */ diff --git a/lib/dpif.c b/lib/dpif.c index 8d16a22a..0f995225 100644 --- a/lib/dpif.c +++ b/lib/dpif.c @@ -241,20 +241,21 @@ dpif_send_openflow(struct dpif *dp, int dp_idx, struct ofpbuf *buffer, return retval; } -/* Creates local datapath numbered 'dp_idx'. Returns 0 if successful, - * otherwise a positive errno value. */ +/* Creates local datapath numbered 'dp_idx' with the name 'dp_name'. A + * 'dp_idx' of -1 or null 'dp_name' will have the kernel module choose + * values. Returns 0 if successful, otherwise a positive errno value. */ int -dpif_add_dp(struct dpif *dp, int dp_idx) +dpif_add_dp(struct dpif *dp, int dp_idx, const char *dp_name) { - return send_mgmt_command(dp, dp_idx, DP_GENL_C_ADD_DP, NULL); + return send_mgmt_command(dp, dp_idx, DP_GENL_C_ADD_DP, dp_name); } /* Destroys local datapath numbered 'dp_idx'. Returns 0 if successful, * otherwise a positive errno value. */ int -dpif_del_dp(struct dpif *dp, int dp_idx) +dpif_del_dp(struct dpif *dp, int dp_idx, const char *dp_name) { - return send_mgmt_command(dp, dp_idx, DP_GENL_C_DEL_DP, NULL); + return send_mgmt_command(dp, dp_idx, DP_GENL_C_DEL_DP, dp_name); } /* Adds the Ethernet device named 'netdev' to the local datapath numbered @@ -275,14 +276,17 @@ dpif_del_port(struct dpif *dp, int dp_idx, const char *netdev) static const struct nl_policy openflow_multicast_policy[] = { [DP_GENL_A_DP_IDX] = { .type = NL_A_U32 }, + [DP_GENL_A_DP_NAME] = { .type = NL_A_STRING }, [DP_GENL_A_MC_GROUP] = { .type = NL_A_U32 }, }; -/* Looks up the Netlink multicast group used by datapath 'dp_idx'. If - * successful, stores the multicast group in '*multicast_group' and returns 0. - * Otherwise, returns a positve errno value. */ +/* Looks up the Netlink multicast group and datapath index of a datapath + * by either the datapath index or name. If 'dp_idx' points to a value + * of '-1', then 'dp_name' is used to lookup the datapath. If successful, + * stores the multicast group in '*multicast_group' and the index in + * '*dp_idx' and returns 0. Otherwise, returns a positive errno value. */ static int -lookup_openflow_multicast_group(int dp_idx, int *multicast_group) +query_datapath(int *dp_idx, int *multicast_group, const char *dp_name) { struct nl_sock *sock; struct ofpbuf request, *reply; @@ -296,7 +300,12 @@ lookup_openflow_multicast_group(int dp_idx, int *multicast_group) ofpbuf_init(&request, 0); nl_msg_put_genlmsghdr(&request, sock, 0, openflow_family, NLM_F_REQUEST, DP_GENL_C_QUERY_DP, 1); - nl_msg_put_u32(&request, DP_GENL_A_DP_IDX, dp_idx); + if (*dp_idx != -1) { + nl_msg_put_u32(&request, DP_GENL_A_DP_IDX, *dp_idx); + } + if (dp_name) { + nl_msg_put_string(&request, DP_GENL_A_DP_NAME, dp_name); + } retval = nl_sock_transact(sock, &request, &reply); ofpbuf_uninit(&request); if (retval) { @@ -309,6 +318,7 @@ lookup_openflow_multicast_group(int dp_idx, int *multicast_group) ofpbuf_delete(reply); return EPROTO; } + *dp_idx = nl_attr_get_u32(attrs[DP_GENL_A_DP_IDX]); *multicast_group = nl_attr_get_u32(attrs[DP_GENL_A_MC_GROUP]); nl_sock_destroy(sock); ofpbuf_delete(reply); @@ -316,13 +326,36 @@ lookup_openflow_multicast_group(int dp_idx, int *multicast_group) return 0; } +/* Looks up the Netlink multicast group used by datapath 'dp_idx'. If + * successful, stores the multicast group in '*multicast_group' and returns 0. + * Otherwise, returns a positve errno value. */ +static int +lookup_openflow_multicast_group(int dp_idx, int *multicast_group) +{ + return query_datapath(&dp_idx, multicast_group, NULL); +} + +/* Looks up the datatpath index based on the name. Returns the index, or + * -1 on error. */ +int +dpif_get_idx(const char *name) +{ + int dp_idx = -1; + int mc_group = 0; + + if (query_datapath(&dp_idx, &mc_group, name)) { + return -1; + } + + return dp_idx; +} + /* Sends the given 'command' to datapath 'dp', related to the local datapath - * numbered 'dp_idx'. If 'netdev' is nonnull, adds it to the command as the - * port name attribute. Returns 0 if successful, otherwise a positive errno - * value. */ + * numbered 'dp_idx'. If 'arg' is nonnull, adds it to the command as the + * datapath or port name attribute depending on the requested operation. + * Returns 0 if successful, otherwise a positive errno value. */ static int -send_mgmt_command(struct dpif *dp, int dp_idx, - int command, const char *netdev) +send_mgmt_command(struct dpif *dp, int dp_idx, int command, const char *arg) { struct ofpbuf request, *reply; int retval; @@ -330,9 +363,15 @@ send_mgmt_command(struct dpif *dp, int dp_idx, ofpbuf_init(&request, 0); nl_msg_put_genlmsghdr(&request, dp->sock, 32, openflow_family, NLM_F_REQUEST | NLM_F_ACK, command, 1); - nl_msg_put_u32(&request, DP_GENL_A_DP_IDX, dp_idx); - if (netdev) { - nl_msg_put_string(&request, DP_GENL_A_PORTNAME, netdev); + if (dp_idx != -1) { + nl_msg_put_u32(&request, DP_GENL_A_DP_IDX, dp_idx); + } + if (arg) { + if ((command == DP_GENL_C_ADD_DP) || (command == DP_GENL_C_DEL_DP)) { + nl_msg_put_string(&request, DP_GENL_A_DP_NAME, arg); + } else { + nl_msg_put_string(&request, DP_GENL_A_PORTNAME, arg); + } } retval = nl_sock_transact(dp->sock, &request, &reply); ofpbuf_uninit(&request); diff --git a/lib/dpif.h b/lib/dpif.h index 7a0cd081..519ba923 100644 --- a/lib/dpif.h +++ b/lib/dpif.h @@ -59,9 +59,10 @@ int dpif_recv_openflow(struct dpif *, struct ofpbuf **, bool wait); int dpif_send_openflow(struct dpif *, int dp_idx, struct ofpbuf *, bool wait); /* Management functions. */ -int dpif_add_dp(struct dpif *, int dp_idx); -int dpif_del_dp(struct dpif *, int dp_idx); -int dpif_add_port(struct dpif *, int dp_idx, const char *netdev); -int dpif_del_port(struct dpif *, int dp_idx, const char *netdev); +int dpif_add_dp(struct dpif *, int, const char *); +int dpif_del_dp(struct dpif *, int, const char *); +int dpif_add_port(struct dpif *, int, const char *); +int dpif_del_port(struct dpif *, int, const char *); +int dpif_get_idx(const char *); #endif /* dpif.h */ diff --git a/lib/vlog-modules.def b/lib/vlog-modules.def index 3fcd61f0..2a787184 100644 --- a/lib/vlog-modules.def +++ b/lib/vlog-modules.def @@ -1,4 +1,5 @@ /* Modules that can emit log messages. */ +VLOG_MODULE(brcompat) VLOG_MODULE(bridge) VLOG_MODULE(chain) VLOG_MODULE(cfg) diff --git a/utilities/.gitignore b/utilities/.gitignore index 0be1ed06..c93169bf 100644 --- a/utilities/.gitignore +++ b/utilities/.gitignore @@ -1,6 +1,7 @@ /Makefile /Makefile.in /dpctl +/dpctl.8 /ofp-discover /ofp-discover.8 /ofp-kill @@ -9,3 +10,4 @@ /ofp-pki-cgi /ofp-pki.8 /vlogconf +/vlogconf.8 diff --git a/utilities/dpctl.8.in b/utilities/dpctl.8.in index da07d452..77646c8c 100644 --- a/utilities/dpctl.8.in +++ b/utilities/dpctl.8.in @@ -85,6 +85,10 @@ traffic and the network device appears silent to the rest of the system. Removes each \fInetdev\fR from the list of network devices datapath \fIdp_idx\fR monitors. +.TP +\fBget-idx \fIof_dev\fR +Prints the datapath index for OpenFlow device \fIof_dev\fR. + .PP The following commands can be apply to OpenFlow switches regardless of the connection method. diff --git a/utilities/dpctl.c b/utilities/dpctl.c index 1c23a24b..4b03bf26 100644 --- a/utilities/dpctl.c +++ b/utilities/dpctl.c @@ -211,6 +211,7 @@ usage(void) " deldp nl:DP_ID delete local datapath DP_ID\n" " addif nl:DP_ID IFACE... add each IFACE as a port on DP_ID\n" " delif nl:DP_ID IFACE... delete each IFACE from DP_ID\n" + " get-idx OF_DEV get datapath index for OF_DEV\n" #endif "\nFor local datapaths and remote switches:\n" " show SWITCH show basic information\n" @@ -287,6 +288,22 @@ static int if_up(const char *netdev_name) return retval; } +static void +do_get_idx(const struct settings *s, int argc UNUSED, char *argv[]) +{ + int dp_idx; + + struct dpif dpif; + run(dpif_open(-1, &dpif), "opening management socket"); + dp_idx = dpif_get_idx(argv[1]); + if (dp_idx == -1) { + dpif_close(&dpif); + ofp_fatal(0, "unknown OpenFlow device: %s", argv[1]); + } + printf("%d\n", dp_idx); + dpif_close(&dpif); +} + static int get_dp_idx(const char *name) { @@ -303,7 +320,7 @@ do_add_dp(const struct settings *s, int argc UNUSED, char *argv[]) { struct dpif dpif; run(dpif_open(-1, &dpif), "opening management socket"); - run(dpif_add_dp(&dpif, get_dp_idx(argv[1])), "add_dp"); + run(dpif_add_dp(&dpif, get_dp_idx(argv[1]), NULL), "add_dp"); dpif_close(&dpif); } @@ -312,7 +329,7 @@ do_del_dp(const struct settings *s, int argc UNUSED, char *argv[]) { struct dpif dpif; run(dpif_open(-1, &dpif), "opening management socket"); - run(dpif_del_dp(&dpif, get_dp_idx(argv[1])), "del_dp"); + run(dpif_del_dp(&dpif, get_dp_idx(argv[1]), NULL), "del_dp"); dpif_close(&dpif); } @@ -1384,6 +1401,7 @@ static struct command all_commands[] = { { "deldp", 1, 1, do_del_dp }, { "addif", 2, INT_MAX, do_add_port }, { "delif", 2, INT_MAX, do_del_port }, + { "get-idx", 1, 1, do_get_idx }, #endif { "show", 1, 1, do_show }, diff --git a/vswitchd/.gitignore b/vswitchd/.gitignore new file mode 100644 index 00000000..42dcf728 --- /dev/null +++ b/vswitchd/.gitignore @@ -0,0 +1,4 @@ +/Makefile +/Makefile.in +/vswitchd +/vswitchd.8 diff --git a/vswitchd/automake.mk b/vswitchd/automake.mk index 05bf98c8..884b4595 100644 --- a/vswitchd/automake.mk +++ b/vswitchd/automake.mk @@ -4,6 +4,7 @@ man_MANS += vswitchd/vswitchd.8 DISTCLEANFILES += vswitchd/vswitchd.8 vswitchd_vswitchd_SOURCES = \ + vswitchd/brcompat.c \ vswitchd/bridge.c \ vswitchd/bridge.h \ vswitchd/cfg.c \ diff --git a/vswitchd/brcompat.c b/vswitchd/brcompat.c new file mode 100644 index 00000000..b4da4fbf --- /dev/null +++ b/vswitchd/brcompat.c @@ -0,0 +1,449 @@ +/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford + * Junior University + * + * We are making the OpenFlow specification and associated documentation + * (Software) available for public use and benefit with the expectation + * that others will use, modify and enhance the Software and contribute + * those enhancements back to the community. However, since we would + * like to make the Software available for broadest use, with as few + * restrictions as possible permission is hereby granted, free of + * charge, to any person obtaining a copy of this Software to deal in + * the Software under the copyrights without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * The name and trademarks of copyright holder(s) may NOT be used in + * advertising or publicity pertaining to the Software or any + * derivatives without specific, written prior permission. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "openflow/brcompat-netlink.h" +#include "bridge.h" +#include "cfg.h" +#include "command-line.h" +#include "daemon.h" +#include "dpif.h" +#include "fault.h" +#include "netlink.h" +#include "netlink-protocol.h" +#include "ofpbuf.h" +#include "poll-loop.h" +#include "process.h" +#include "svec.h" +#include "timeval.h" +#include "util.h" +#include "vlog-socket.h" + +#include "vlog.h" +#define THIS_MODULE VLM_brcompat + +/* Name of temporary file used during a configuration update. */ +#define TMP_CONFIG_NAME ".brcompat.conf" + +static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 60); + +/* Used for creating kernel datapaths */ +static struct dpif mgmt_dpif; + +/* Netlink socket to kernel datapath */ +struct nl_sock *nl_sock; + +/* Pointer to configuration file name */ +static const char *config_name; + +/* xxx Just hangs if datapath is rmmod/insmod. Learn to reconnect? */ + +/* The Generic Netlink family number used for bridge compatibility. */ +static int brc_family; + +static const struct nl_policy brc_multicast_policy[] = { + [BRC_GENL_A_MC_GROUP] = {.type = NL_A_U32 } +}; + +static int +lookup_brc_multicast_group(int *multicast_group) +{ + struct nl_sock *sock; + struct ofpbuf request, *reply; + struct nlattr *attrs[ARRAY_SIZE(brc_multicast_policy)]; + int retval; + + retval = nl_sock_create(NETLINK_GENERIC, 0, 0, 0, &sock); + if (retval) { + return retval; + } + ofpbuf_init(&request, 0); + nl_msg_put_genlmsghdr(&request, sock, 0, brc_family, + NLM_F_REQUEST, BRC_GENL_C_QUERY_MC, 1); + retval = nl_sock_transact(sock, &request, &reply); + ofpbuf_uninit(&request); + if (retval) { + nl_sock_destroy(sock); + return retval; + } + if (!nl_policy_parse(reply, brc_multicast_policy, attrs, + ARRAY_SIZE(brc_multicast_policy))) { + nl_sock_destroy(sock); + ofpbuf_delete(reply); + return EPROTO; + } + *multicast_group = nl_attr_get_u32(attrs[BRC_GENL_A_MC_GROUP]); + nl_sock_destroy(sock); + ofpbuf_delete(reply); + + return 0; +} + +/* Opens a socket for brcompat notifications. Returns 0 if successful, + * otherwise a positive errno value. */ +static int +brc_open(struct nl_sock **sock) +{ + int multicast_group = 0; + int retval; + + retval = nl_lookup_genl_family(BRC_GENL_FAMILY_NAME, &brc_family); + if (retval) { + return retval; + } + + retval = lookup_brc_multicast_group(&multicast_group); + if (retval) { + return retval; + } + + retval = nl_sock_create(NETLINK_GENERIC, multicast_group, 0, 0, sock); + if (retval) { + return retval; + } + + return 0; +} + +static const struct nl_policy brc_dp_policy[] = { + [BRC_GENL_A_DP_NAME] = { .type = NL_A_STRING }, +}; + +/* Write 'new_cfg' into the configuration file. Returns 0 if successful, + * otherwise a positive errno value. */ +static int +brc_write_config(struct svec *new_cfg) +{ + int fd; + size_t i; + + svec_sort(new_cfg); + svec_unique(new_cfg); + + fd = open(TMP_CONFIG_NAME, O_CREAT|O_WRONLY|O_TRUNC, S_IRUSR|S_IWUSR); + if (fd == -1) { + VLOG_WARN_RL(&rl, "could not open temp config file for writing: %s", + strerror(errno)); + return errno; + } + + for (i = 0; i < new_cfg->n; i++) { + int retval; + const char *entry = new_cfg->names[i]; + retval = write(fd, entry, strlen(entry)); + if (retval != strlen(entry)) { + VLOG_WARN_RL(&rl, "problem writing to temp config file %d: %s", + retval, strerror(errno)); + return errno; + } + } + close(fd); + + if (rename(TMP_CONFIG_NAME, config_name) < 0) { + VLOG_WARN_RL(&rl, "could not rename temp config file: %s", + strerror(errno)); + return errno; + } + + return 0; +} + +enum bmc_action { + BMC_ADD_DP, + BMC_DEL_DP, + BMC_ADD_PORT, + BMC_DEL_PORT +}; + +/* Modify the existing configuration according to 'act'. The configuration + * file will be modified to reflect these changes. The caller is + * responsible for causing vswitchd to actually re-read its configuration. */ +static void +brc_modify_config(const char *dp_name, const char *port_name, + enum bmc_action act) +{ + struct svec new_cfg; + struct svec old_br; + struct svec old_keys; + char buf[1024]; + size_t i, j; + int n_ports = -1; + + svec_init(&new_cfg); + svec_init(&old_br); + svec_init(&old_keys); + cfg_get_subsections(&old_br, "bridge"); + for (i = 0; i < old_br.n; i++) { + /* If we're deleting the datapath, skip over its current config. */ + if ((act == BMC_DEL_DP) && !strcmp(old_br.names[i], dp_name)) { + continue; + } + + cfg_get_all_keys(&old_keys, "bridge.%s.port", old_br.names[i]); + for (j = 0; j < old_keys.n; j++) { + if ((act == BMC_DEL_PORT) + && !strcmp(old_br.names[i], dp_name) + && !strcmp(old_keys.names[j], port_name)) { + n_ports = old_keys.n - 1; + continue; + } + if (snprintf(buf, sizeof(buf), "bridge.%s.port = %s\n", + old_br.names[i], old_keys.names[j]) >= sizeof(buf)) { + VLOG_WARN_RL(&rl, "config line too long, skipping"); + continue; + } + svec_add(&new_cfg, buf); + } + + cfg_get_all_keys(&old_keys, "bridge.%s.enabled", old_br.names[i]); + for (j = 0; j < old_keys.n; j++) { + snprintf(buf, sizeof(buf), "bridge.%s.enabled = true\n", + old_br.names[i]); + svec_add(&new_cfg, buf); + } + } + svec_destroy(&old_br); + svec_destroy(&old_keys); + + /* If the last interface was removed from the datapath, we'll add + * this dummy entry so that it's still created when vswitchd is + * restarted. */ + if ((act == BMC_ADD_DP) || (n_ports == 0)) { + snprintf(buf, sizeof(buf), "bridge.%s.enabled = true\n", dp_name); + svec_add(&new_cfg, buf); + } + + if (act == BMC_ADD_PORT) { + snprintf(buf, sizeof(buf), "bridge.%s.port = %s\n", dp_name, port_name); + svec_add(&new_cfg, buf); + } + + brc_write_config(&new_cfg); + svec_destroy(&new_cfg); +} + +static int +brc_add_dp(const char *dp_name) +{ + if (bridge_exists(dp_name)) { + return EINVAL; + } + + brc_modify_config(dp_name, NULL, BMC_ADD_DP); + + cfg_read(); + bridge_reconfigure(); + + if (!bridge_exists(dp_name)) { + return EINVAL; + } + + return 0; +} + +static int +brc_del_dp(const char *dp_name) +{ + if (!bridge_exists(dp_name)) { + return EINVAL; + } + + brc_modify_config(dp_name, NULL, BMC_DEL_DP); + + cfg_read(); + bridge_reconfigure(); + + if (bridge_exists(dp_name)) { + return EINVAL; + } + + return 0; +} + +int +brc_handle_dp_cmd(struct ofpbuf *buffer, bool add) +{ + struct nlattr *attrs[ARRAY_SIZE(brc_dp_policy)]; + const char *dp_name; + + if (!nl_policy_parse(buffer, brc_dp_policy, attrs, + ARRAY_SIZE(brc_dp_policy))) { + return EINVAL; + } + + dp_name = nl_attr_get(attrs[BRC_GENL_A_DP_NAME]); + + if (add) { + return brc_add_dp(dp_name); + } else { + return brc_del_dp(dp_name); + } +} + +static const struct nl_policy brc_port_policy[] = { + [BRC_GENL_A_DP_NAME] = { .type = NL_A_STRING }, + [BRC_GENL_A_PORT_NAME] = { .type = NL_A_STRING }, +}; + +int +brc_handle_port_cmd(struct ofpbuf *buffer, bool add) +{ + struct nlattr *attrs[ARRAY_SIZE(brc_port_policy)]; + const char *dp_name, *port_name; + + if (!nl_policy_parse(buffer, brc_port_policy, attrs, + ARRAY_SIZE(brc_port_policy))) { + return EINVAL; + } + + dp_name = nl_attr_get(attrs[BRC_GENL_A_DP_NAME]); + port_name = nl_attr_get(attrs[BRC_GENL_A_PORT_NAME]); + + if (!bridge_exists(dp_name)) { + return EINVAL; + } + + if (add) { + brc_modify_config(dp_name, port_name, BMC_ADD_PORT); + } else { + brc_modify_config(dp_name, port_name, BMC_DEL_PORT); + } + + /* Force vswitchd to reconfigure itself. */ + cfg_read(); + bridge_reconfigure(); + + return 0; +} + +int +brc_recv_update(void) +{ + int retval; + struct ofpbuf *buffer; + struct genlmsghdr *genlmsghdr; + + + buffer = NULL; + do { + ofpbuf_delete(buffer); + retval = nl_sock_recv(nl_sock, &buffer, false); + } while (retval == ENOBUFS + || (!retval + && (nl_msg_nlmsgerr(buffer, NULL) + || nl_msg_nlmsghdr(buffer)->nlmsg_type == NLMSG_DONE))); + if (retval) { + if (retval != EAGAIN) { + VLOG_WARN_RL(&rl, "brc_recv_update: %s", strerror(retval)); + } + return retval; + } + + genlmsghdr = nl_msg_genlmsghdr(buffer); + if (!genlmsghdr) { + VLOG_WARN_RL(&rl, "received packet too short for generic NetLink"); + goto error; + } + + if (nl_msg_nlmsghdr(buffer)->nlmsg_type != brc_family) { + VLOG_DBG_RL(&rl, "received type (%"PRIu16") != brcompat family (%d)", + nl_msg_nlmsghdr(buffer)->nlmsg_type, brc_family); + goto error; + } + + switch (genlmsghdr->cmd) { + case BRC_GENL_C_DP_ADD: + retval = brc_handle_dp_cmd(buffer, true); + break; + + case BRC_GENL_C_DP_DEL: + retval = brc_handle_dp_cmd(buffer, false); + break; + + case BRC_GENL_C_PORT_ADD: + retval = brc_handle_port_cmd(buffer, true); + break; + + case BRC_GENL_C_PORT_DEL: + retval = brc_handle_port_cmd(buffer, false); + break; + + default: + retval = EPROTO; + } + +error: + ofpbuf_delete(buffer); + return retval; +} + +void +brc_run(void) +{ + brc_recv_update(); +} + +void +brc_wait(void) +{ + poll_fd_wait(nl_sock_fd(nl_sock), POLLIN); +} + +void +brc_init(const char *file_name) +{ + if (brc_open(&nl_sock)) { + ofp_fatal(0, "could not open brcompat socket. Check " + "\"brcompat\" kernel module."); + } + + if (dpif_open(-1, &mgmt_dpif) != 0) { + ofp_fatal(0, "could not open datapath interface"); + } + + config_name = file_name; +} diff --git a/vswitchd/brcompat.h b/vswitchd/brcompat.h new file mode 100644 index 00000000..d1218344 --- /dev/null +++ b/vswitchd/brcompat.h @@ -0,0 +1,41 @@ +/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford + * Junior University + * + * We are making the OpenFlow specification and associated documentation + * (Software) available for public use and benefit with the expectation + * that others will use, modify and enhance the Software and contribute + * those enhancements back to the community. However, since we would + * like to make the Software available for broadest use, with as few + * restrictions as possible permission is hereby granted, free of + * charge, to any person obtaining a copy of this Software to deal in + * the Software under the copyrights without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * The name and trademarks of copyright holder(s) may NOT be used in + * advertising or publicity pertaining to the Software or any + * derivatives without specific, written prior permission. + */ + +#ifndef VSWITCHD_BRCOMPAT_H +#define VSWITCHD_BRCOMPAT_H 1 + +void brc_init(const char *); +void brc_wait(void); +void brc_run(void); + +#endif /* brcompat.h */ diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c index f2138324..fc6bc631 100644 --- a/vswitchd/bridge.c +++ b/vswitchd/bridge.c @@ -204,7 +204,7 @@ bridge_init(void) } for (i = 0; i < DP_MAX; i++) { - int retval = dpif_del_dp(&mgmt_dpif, i); + int retval = dpif_del_dp(&mgmt_dpif, i, NULL); if (retval && retval != ENOENT) { VLOG_ERR("failed to delete datapath nl:%d: %s", i, strerror(retval)); @@ -221,6 +221,7 @@ bridge_reconfigure(void) /* Collect old and new bridges. */ svec_init(&old_br); + svec_init(&new_br); LIST_FOR_EACH (br, struct bridge, node, &all_bridges) { svec_add(&old_br, br->name); } @@ -317,10 +318,6 @@ bridge_reconfigure(void) i++; } } - if (!br->n_ports) { - VLOG_ERR("%s bridge has no ports, dropping", br->name); - bridge_destroy(br); - } } } @@ -372,7 +369,6 @@ bridge_flush(struct bridge *br) /* Bridge reconfiguration functions. */ -static int allocate_dp_idx(void); static void sanitize_opp(struct ofp_phy_port *opp); static void run_secchan(struct bridge *); static void start_secchan(struct bridge *); @@ -385,7 +381,6 @@ bridge_create(const char *name) assert(!bridge_lookup(name)); br = xcalloc(1, sizeof *br); - list_push_back(&all_bridges, &br->node); br->name = xstrdup(name); br->txqlen = 0; br->ml = mac_learning_create(); @@ -407,33 +402,27 @@ bridge_create(const char *name) br->stats_mgr = stats_mgr_create(br->rconn); /* Create kernel datapath. */ - for (;;) { - /* Pick a datapath index. - * - * XXX we could make a bad choice if a user created a datapath manually - * with dpctl. Ideally the kernel module should provide a way to pick - * the datapath index for us. */ - br->dp_idx = allocate_dp_idx(); - if (br->dp_idx < 0) { - VLOG_EMER("out of datapath indexes; cannot create switches"); - /* XXX free memory */ - return NULL; - } - - /* Create the kernel datapath. */ - retval = dpif_add_dp(&mgmt_dpif, br->dp_idx); - if (retval) { - VLOG_ERR("failed to create datapath nl:%d: %s", - br->dp_idx, strerror(retval)); - } - break; + retval = dpif_add_dp(&mgmt_dpif, -1, br->name); + if (retval) { + VLOG_ERR("failed to create datapath %s: %s", + br->name, strerror(retval)); + free(br); + return NULL; } - VLOG_WARN("created bridge %s on datapath nl:%d", br->name, br->dp_idx); + list_push_back(&all_bridges, &br->node); + + br->dp_idx = dpif_get_idx(br->name); + if (br->dp_idx == -1) { + VLOG_WARN("bad dp_idx for bridge %s", br->name); + } + + VLOG_WARN("created bridge %s with dp_idx %d", br->name, br->dp_idx); return br; } + static void log_secchan_died(enum vlog_level level, struct bridge *br, bool expected) { @@ -567,7 +556,7 @@ bridge_destroy(struct bridge *br) list_remove(&br->node); free(br->name); if (br->dp_idx >= 0) { - int retval = dpif_del_dp(&mgmt_dpif, br->dp_idx); + int retval = dpif_del_dp(&mgmt_dpif, br->dp_idx, NULL); if (!retval || retval == ENOENT) { assert(br->dp_idx < DP_MAX); in_use_dps[br->dp_idx] = false; @@ -607,17 +596,10 @@ bridge_lookup(const char *name) return NULL; } -static int -allocate_dp_idx(void) +bool +bridge_exists(const char *name) { - int i; - for (i = 0; i < DP_MAX; i++) { - if (!in_use_dps[i]) { - in_use_dps[i] = true; - return i; - } - } - return -1; + return bridge_lookup(name) ? true : false; } static int @@ -720,6 +702,7 @@ bridge_reconfigure_one(struct bridge *br) /* Collect old and new ports. */ svec_init(&old_ports); + svec_init(&new_ports); for (i = 0; i < br->n_ports; i++) { svec_add(&old_ports, br->ports[i]->name); } @@ -2007,6 +1990,7 @@ port_reconfigure(struct port *port) /* Collect old and new interfaces. */ svec_init(&old_ifaces); + svec_init(&new_ifaces); for (i = 0; i < port->n_ifaces; i++) { svec_add(&old_ifaces, port->ifaces[i]->name); } diff --git a/vswitchd/bridge.h b/vswitchd/bridge.h index 684537ca..a3421405 100644 --- a/vswitchd/bridge.h +++ b/vswitchd/bridge.h @@ -41,5 +41,6 @@ void bridge_init(void); void bridge_reconfigure(void); void bridge_run(void); void bridge_wait(void); +bool bridge_exists(const char *); #endif /* bridge.h */ diff --git a/vswitchd/cfg.c b/vswitchd/cfg.c index 7996bde4..55f43199 100644 --- a/vswitchd/cfg.c +++ b/vswitchd/cfg.c @@ -209,10 +209,10 @@ cfg_count(const char *key_, ...) return retval; } -/* Initializes 'svec' to all of the immediate subsections of 'section'. For +/* Fills 'svec' with all of the immediate subsections of 'section'. For * example, if 'section' is "bridge" and keys bridge.a, bridge.b, bridge.b.c, * and bridge.c.x.y.z exist, then 'svec' would be initialized to a, b, and - * c. */ + * c. The caller must first initialize 'svec'. */ void cfg_get_subsections(struct svec *svec, const char *section_, ...) { @@ -226,7 +226,7 @@ cfg_get_subsections(struct svec *svec, const char *section_, ...) ds_put_char(§ion, '.'); va_end(args); - svec_init(svec); + svec_clear(svec); for (p = find_key_le(ds_cstr(§ion)); *p && !strncmp(section.string, *p, section.length); p++) { @@ -345,7 +345,8 @@ cfg_get_mac(int idx, const char *key_, ...) return eth_addr_to_uint64(mac); } -/* Initializes 'svec' with all of the string values of 'key'. */ +/* Fills 'svec' with all of the string values of 'key'. The caller must + * first initialize 'svec'. */ void cfg_get_all_strings(struct svec *svec, const char *key_, ...) { @@ -353,15 +354,16 @@ cfg_get_all_strings(struct svec *svec, const char *key_, ...) char *key; FORMAT_KEY(key_, key); - svec_init(svec); + svec_clear(svec); for (p = find_key_le(key), q = find_key_ge(key); p < q; p++) { svec_add(svec, extract_value(*p)); } free(key); } -/* Initializes 'svec' with all of the values of 'key' that are valid keys. - * Values of 'key' that are not valid keys are omitted. */ +/* Fills 'svec' with all of the values of 'key' that are valid keys. + * Values of 'key' that are not valid keys are omitted. The caller + * must first initialize 'svec'. */ void cfg_get_all_keys(struct svec *svec, const char *key_, ...) { @@ -369,7 +371,7 @@ cfg_get_all_keys(struct svec *svec, const char *key_, ...) char *key; FORMAT_KEY(key_, key); - svec_init(svec); + svec_clear(svec); for (p = find_key_le(key), q = find_key_ge(key); p < q; p++) { const char *value = extract_value(*p); if (is_key(value)) { diff --git a/vswitchd/vswitchd.8.in b/vswitchd/vswitchd.8.in index 55deb649..5238109f 100644 --- a/vswitchd/vswitchd.8.in +++ b/vswitchd/vswitchd.8.in @@ -77,6 +77,15 @@ For a description of \fBvswitchd\fR configuration syntax, see . .PP The rest of \fBvswitchd\fR's options are truly optional: +.TP +\fB-b \fIfile\fR, \fB--brcompat=\fIfile\fR +. +The \fB-b\fR or \fB--brcompat\fR option puts \fBvswitchd\fR into bridge +compatibility mode. This means it listens for bridge ioctl commands +(e.g., those generated by the \fBbrctl\fR program) to add or remove +datapaths and the interfaces that attach to them. The \fIfile\fR +argument should be the master list for Bridge Configuration and be one +of the files included in the \fB--config\fR option. . .so lib/daemon.man .so lib/vlog.man diff --git a/vswitchd/vswitchd.c b/vswitchd/vswitchd.c index 36925158..00a733f4 100644 --- a/vswitchd/vswitchd.c +++ b/vswitchd/vswitchd.c @@ -41,6 +41,7 @@ #include #include +#include "brcompat.h" #include "bridge.h" #include "cfg.h" #include "command-line.h" @@ -64,6 +65,8 @@ static void usage(void) NO_RETURN; static void reconfigure(void); +static bool brc_enabled = false; + int main(int argc, char *argv[]) { @@ -96,6 +99,11 @@ main(int argc, char *argv[]) } bridge_run(); + if (brc_enabled) { + brc_run(); + brc_wait(); + } + signal_wait(sighup); bridge_wait(); poll_block(); @@ -119,6 +127,7 @@ parse_options(int argc, char *argv[]) VLOG_OPTION_ENUMS }; static struct option long_options[] = { + {"brcompat", required_argument, 0, 'b'}, {"config", required_argument, 0, 'F'}, {"help", no_argument, 0, 'h'}, {"version", no_argument, 0, 'V'}, @@ -143,6 +152,11 @@ parse_options(int argc, char *argv[]) } switch (c) { + case 'b': + brc_enabled = true; + brc_init(optarg); + break; + case 'F': configured = true; error = cfg_add_file(optarg); @@ -194,6 +208,7 @@ usage(void) "usage: %s [OPTIONS]\n", program_name, program_name); printf("\nConfiguration options (must specify at least one):\n" + " -b, --brcompat=FILE run with bridge compatibility hooks\n" " -F, --config=FILE|DIR reads configuration from FILE or DIR\n"); daemon_usage(); vlog_usage(); -- 2.30.2