From f2459fe7d91c4c325dfaa3ed18f56200b63ae27e Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Mon, 12 Apr 2010 15:53:39 -0400 Subject: [PATCH] datapath: Add generic virtual port layer. Currently the datapath directly accesses devices through their Linux functions. Obviously this doesn't work for virtual devices that are not backed by an actual Linux device. This creates a new virtual port layer which handles all interaction with devices. The existing support for Linux devices was then implemented on top of this layer as two device types. It splits out and renames dp_dev to internal_dev. There were several places where datapath devices had to handled in a special manner and this cleans that up by putting all the special casing in a single location. --- datapath/Modules.mk | 12 +- datapath/actions.c | 52 +- datapath/actions.h | 3 +- datapath/datapath.c | 466 ++++----- datapath/datapath.h | 35 +- datapath/dp_dev.c | 271 ------ datapath/dp_dev.h | 34 - datapath/dp_notify.c | 29 +- datapath/dp_sysfs.h | 8 +- datapath/dp_sysfs_dp.c | 85 +- datapath/dp_sysfs_if.c | 66 +- datapath/flow.c | 5 +- datapath/vport-internal_dev.c | 368 +++++++ datapath/vport-internal_dev.h | 20 + datapath/vport-netdev.c | 380 ++++++++ datapath/vport-netdev.h | 41 + datapath/vport.c | 1170 +++++++++++++++++++++++ datapath/vport.h | 235 +++++ include/openvswitch/datapath-protocol.h | 55 +- lib/dpif-linux.c | 4 +- 20 files changed, 2624 insertions(+), 715 deletions(-) delete mode 100644 datapath/dp_dev.c delete mode 100644 datapath/dp_dev.h create mode 100644 datapath/vport-internal_dev.c create mode 100644 datapath/vport-internal_dev.h create mode 100644 datapath/vport-netdev.c create mode 100644 datapath/vport-netdev.h create mode 100644 datapath/vport.c create mode 100644 datapath/vport.h diff --git a/datapath/Modules.mk b/datapath/Modules.mk index 211f96fc..1e8bc047 100644 --- a/datapath/Modules.mk +++ b/datapath/Modules.mk @@ -12,20 +12,24 @@ dist_modules = $(both_modules) # Modules to distribute openvswitch_sources = \ actions.c \ datapath.c \ - dp_dev.c \ dp_notify.c \ dp_sysfs_dp.c \ dp_sysfs_if.c \ flow.c \ - table.c + table.c \ + vport.c \ + vport-internal_dev.c \ + vport-netdev.c openvswitch_headers = \ actions.h \ compat.h \ datapath.h \ - dp_dev.h \ dp_sysfs.h \ - flow.h + flow.h \ + vport.h \ + vport-internal_dev.h \ + vport-netdev.h dist_sources = $(foreach module,$(dist_modules),$($(module)_sources)) dist_headers = $(foreach module,$(dist_modules),$($(module)_headers)) diff --git a/datapath/actions.c b/datapath/actions.c index 10324619..40204118 100644 --- a/datapath/actions.c +++ b/datapath/actions.c @@ -18,10 +18,11 @@ #include #include #include -#include "datapath.h" -#include "dp_dev.h" + #include "actions.h" +#include "datapath.h" #include "openvswitch/datapath-protocol.h" +#include "vport.h" static struct sk_buff * make_writable(struct sk_buff *skb, unsigned min_headroom, gfp_t gfp) @@ -360,42 +361,27 @@ static inline unsigned packet_length(const struct sk_buff *skb) return length; } -int dp_xmit_skb(struct sk_buff *skb) -{ - struct datapath *dp = skb->dev->br_port->dp; - int len = skb->len; - - if (packet_length(skb) > skb->dev->mtu && !skb_is_gso(skb)) { - printk(KERN_WARNING "%s: dropped over-mtu packet: %d > %d\n", - dp_name(dp), packet_length(skb), skb->dev->mtu); - kfree_skb(skb); - return -E2BIG; - } - - forward_ip_summed(skb); - dev_queue_xmit(skb); - - return len; -} - static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port) { - struct net_bridge_port *p; - struct net_device *dev; + struct dp_port *p; + int mtu; if (!skb) goto error; - p = dp->ports[out_port]; + p = rcu_dereference(dp->ports[out_port]); if (!p) goto error; - dev = skb->dev = p->dev; - if (is_dp_dev(dev)) - dp_dev_recv(dev, skb); - else - dp_xmit_skb(skb); + mtu = vport_get_mtu(p->vport); + if (packet_length(skb) > mtu && !skb_is_gso(skb)) { + printk(KERN_WARNING "%s: dropped over-mtu packet: %d > %d\n", + dp_name(dp), packet_length(skb), mtu); + goto error; + } + + vport_send(p->vport, skb); return; error: @@ -414,8 +400,8 @@ static int output_group(struct datapath *dp, __u16 group, if (!g) return -1; for (i = 0; i < g->n_ports; i++) { - struct net_bridge_port *p = dp->ports[g->ports[i]]; - if (!p || skb->dev == p->dev) + struct dp_port *p = rcu_dereference(dp->ports[g->ports[i]]); + if (!p || OVS_CB(skb)->dp_port == p) continue; if (prev_port != -1) { struct sk_buff *clone = skb_clone(skb, gfp); @@ -441,7 +427,7 @@ output_control(struct datapath *dp, struct sk_buff *skb, u32 arg, gfp_t gfp) * information about what happened to it. */ static void sflow_sample(struct datapath *dp, struct sk_buff *skb, const union odp_action *a, int n_actions, - gfp_t gfp, struct net_bridge_port *nbp) + gfp_t gfp, struct dp_port *dp_port) { struct odp_sflow_sample_header *hdr; unsigned int actlen = n_actions * sizeof(union odp_action); @@ -455,7 +441,7 @@ static void sflow_sample(struct datapath *dp, struct sk_buff *skb, memcpy(__skb_push(nskb, actlen), a, actlen); hdr = (struct odp_sflow_sample_header*)__skb_push(nskb, hdrlen); hdr->n_actions = n_actions; - hdr->sample_pool = atomic_read(&nbp->sflow_pool); + hdr->sample_pool = atomic_read(&dp_port->sflow_pool); dp_output_control(dp, nskb, _ODPL_SFLOW_NR, 0); } @@ -473,7 +459,7 @@ int execute_actions(struct datapath *dp, struct sk_buff *skb, int err; if (dp->sflow_probability) { - struct net_bridge_port *p = skb->dev->br_port; + struct dp_port *p = OVS_CB(skb)->dp_port; if (p) { atomic_inc(&p->sflow_pool); if (dp->sflow_probability == UINT_MAX || diff --git a/datapath/actions.h b/datapath/actions.h index 065c0e40..04bc6544 100644 --- a/datapath/actions.h +++ b/datapath/actions.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009 Nicira Networks. + * Copyright (c) 2009, 2010 Nicira Networks. * Distributed under the terms of the GNU GPL version 2. * * Significant portions of this file may be copied from parts of the Linux @@ -16,7 +16,6 @@ struct sk_buff; struct odp_flow_key; union odp_action; -int dp_xmit_skb(struct sk_buff *); int execute_actions(struct datapath *dp, struct sk_buff *skb, struct odp_flow_key *key, const union odp_action *, int n_actions, diff --git a/datapath/datapath.c b/datapath/datapath.c index 9dfd6042..a7b20f5f 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include @@ -21,7 +20,6 @@ #include #include #include -#include #include #include #include @@ -42,13 +40,12 @@ #include #include #include -#include #include "openvswitch/datapath-protocol.h" #include "datapath.h" #include "actions.h" -#include "dp_dev.h" #include "flow.h" +#include "vport-internal_dev.h" #include "compat.h" @@ -62,7 +59,7 @@ EXPORT_SYMBOL(dp_ioctl_hook); * dp_mutex nests inside the RTNL lock: if you need both you must take the RTNL * lock first. * - * It is safe to access the datapath and net_bridge_port structures with just + * It is safe to access the datapath and dp_port structures with just * dp_mutex. */ static struct datapath *dps[ODP_MAX]; @@ -71,7 +68,7 @@ static DEFINE_MUTEX(dp_mutex); /* Number of milliseconds between runs of the maintenance thread. */ #define MAINT_SLEEP_MSECS 1000 -static int new_nbp(struct datapath *, struct net_device *, int port_no); +static int new_dp_port(struct datapath *, struct odp_port *, int port_no); /* Must be called with rcu_read_lock or dp_mutex. */ struct datapath *get_dp(int dp_idx) @@ -94,6 +91,12 @@ static struct datapath *get_dp_locked(int dp_idx) return dp; } +/* Must be called with rcu_read_lock or RTNL lock. */ +const char *dp_name(const struct datapath *dp) +{ + return vport_get_name(dp->ports[ODPP_LOCAL]->vport); +} + static inline size_t br_nlmsg_size(void) { return NLMSG_ALIGN(sizeof(struct ifinfomsg)) @@ -106,14 +109,21 @@ static inline size_t br_nlmsg_size(void) } static int dp_fill_ifinfo(struct sk_buff *skb, - const struct net_bridge_port *port, + const struct dp_port *port, int event, unsigned int flags) { const struct datapath *dp = port->dp; - const struct net_device *dev = port->dev; + int ifindex = vport_get_ifindex(port->vport); + int iflink = vport_get_iflink(port->vport); struct ifinfomsg *hdr; struct nlmsghdr *nlh; + if (ifindex < 0) + return ifindex; + + if (iflink < 0) + return iflink; + nlh = nlmsg_put(skb, 0, 0, event, sizeof(*hdr), flags); if (nlh == NULL) return -EMSGSIZE; @@ -121,24 +131,26 @@ static int dp_fill_ifinfo(struct sk_buff *skb, hdr = nlmsg_data(nlh); hdr->ifi_family = AF_BRIDGE; hdr->__ifi_pad = 0; - hdr->ifi_type = dev->type; - hdr->ifi_index = dev->ifindex; - hdr->ifi_flags = dev_get_flags(dev); + hdr->ifi_type = ARPHRD_ETHER; + hdr->ifi_index = ifindex; + hdr->ifi_flags = vport_get_flags(port->vport); hdr->ifi_change = 0; - NLA_PUT_STRING(skb, IFLA_IFNAME, dev->name); - NLA_PUT_U32(skb, IFLA_MASTER, dp->ports[ODPP_LOCAL]->dev->ifindex); - NLA_PUT_U32(skb, IFLA_MTU, dev->mtu); + NLA_PUT_STRING(skb, IFLA_IFNAME, vport_get_name(port->vport)); + NLA_PUT_U32(skb, IFLA_MASTER, vport_get_ifindex(dp->ports[ODPP_LOCAL]->vport)); + NLA_PUT_U32(skb, IFLA_MTU, vport_get_mtu(port->vport)); #ifdef IFLA_OPERSTATE NLA_PUT_U8(skb, IFLA_OPERSTATE, - netif_running(dev) ? dev->operstate : IF_OPER_DOWN); + vport_is_running(port->vport) + ? vport_get_operstate(port->vport) + : IF_OPER_DOWN); #endif - if (dev->addr_len) - NLA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr); + NLA_PUT(skb, IFLA_ADDRESS, ETH_ALEN, + vport_get_addr(port->vport)); - if (dev->ifindex != dev->iflink) - NLA_PUT_U32(skb, IFLA_LINK, dev->iflink); + if (ifindex != iflink) + NLA_PUT_U32(skb, IFLA_LINK,iflink); return nlmsg_end(skb, nlh); @@ -147,9 +159,8 @@ nla_put_failure: return -EMSGSIZE; } -static void dp_ifinfo_notify(int event, struct net_bridge_port *port) +static void dp_ifinfo_notify(int event, struct dp_port *port) { - struct net *net = dev_net(port->dev); struct sk_buff *skb; int err = -ENOBUFS; @@ -164,11 +175,11 @@ static void dp_ifinfo_notify(int event, struct net_bridge_port *port) kfree_skb(skb); goto errout; } - rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_KERNEL); + rtnl_notify(skb, &init_net, 0, RTNLGRP_LINK, NULL, GFP_KERNEL); return; errout: if (err < 0) - rtnl_set_sk_err(net, RTNLGRP_LINK, err); + rtnl_set_sk_err(&init_net, RTNLGRP_LINK, err); } static void release_dp(struct kobject *kobj) @@ -183,7 +194,7 @@ static struct kobj_type dp_ktype = { static int create_dp(int dp_idx, const char __user *devnamep) { - struct net_device *dp_dev; + struct odp_port internal_dev_port; char devname[IFNAMSIZ]; struct datapath *dp; int err; @@ -234,14 +245,13 @@ static int create_dp(int dp_idx, const char __user *devnamep) goto err_free_dp; /* Set up our datapath device. */ - dp_dev = dp_dev_create(dp, devname, ODPP_LOCAL); - err = PTR_ERR(dp_dev); - if (IS_ERR(dp_dev)) - goto err_destroy_table; - - err = new_nbp(dp, dp_dev, ODPP_LOCAL); + strncpy(internal_dev_port.devname, devname, IFNAMSIZ - 1); + internal_dev_port.flags = ODP_PORT_INTERNAL; + err = new_dp_port(dp, &internal_dev_port, ODPP_LOCAL); if (err) { - dp_dev_destroy(dp_dev); + if (err == -EBUSY) + err = -EEXIST; + goto err_destroy_table; } @@ -259,7 +269,7 @@ static int create_dp(int dp_idx, const char __user *devnamep) return 0; err_destroy_local_port: - dp_del_port(dp->ports[ODPP_LOCAL]); + dp_detach_port(dp->ports[ODPP_LOCAL], 1); err_destroy_table: dp_table_destroy(dp->table, 0); err_free_dp: @@ -275,18 +285,18 @@ err: static void do_destroy_dp(struct datapath *dp) { - struct net_bridge_port *p, *n; + struct dp_port *p, *n; int i; list_for_each_entry_safe (p, n, &dp->port_list, node) if (p->port_no != ODPP_LOCAL) - dp_del_port(p); + dp_detach_port(p, 1); dp_sysfs_del_dp(dp); rcu_assign_pointer(dps[dp->dp_idx], NULL); - dp_del_port(dp->ports[ODPP_LOCAL]); + dp_detach_port(dp->ports[ODPP_LOCAL], 1); dp_table_destroy(dp->table, 1); @@ -320,9 +330,9 @@ err_unlock: return err; } -static void release_nbp(struct kobject *kobj) +static void release_dp_port(struct kobject *kobj) { - struct net_bridge_port *p = container_of(kobj, struct net_bridge_port, kobj); + struct dp_port *p = container_of(kobj, struct dp_port, kobj); kfree(p); } @@ -330,36 +340,45 @@ static struct kobj_type brport_ktype = { #ifdef CONFIG_SYSFS .sysfs_ops = &brport_sysfs_ops, #endif - .release = release_nbp + .release = release_dp_port }; /* Called with RTNL lock and dp_mutex. */ -static int new_nbp(struct datapath *dp, struct net_device *dev, int port_no) +static int new_dp_port(struct datapath *dp, struct odp_port *odp_port, int port_no) { - struct net_bridge_port *p; + struct vport *vport; + struct dp_port *p; + int err; + + vport = vport_locate(odp_port->devname); + if (!vport) { + vport_lock(); + + if (odp_port->flags & ODP_PORT_INTERNAL) + vport = __vport_add(odp_port->devname, "internal", NULL); + else + vport = __vport_add(odp_port->devname, "netdev", NULL); - if (dev->br_port != NULL) - return -EBUSY; + vport_unlock(); + + if (IS_ERR(vport)) + return PTR_ERR(vport); + } p = kzalloc(sizeof(*p), GFP_KERNEL); if (!p) return -ENOMEM; - dev_set_promiscuity(dev, 1); - dev_hold(dev); p->port_no = port_no; p->dp = dp; - p->dev = dev; atomic_set(&p->sflow_pool, 0); - if (!is_dp_dev(dev)) - rcu_assign_pointer(dev->br_port, p); - else { - /* It would make sense to assign dev->br_port here too, but - * that causes packets received on internal ports to get caught - * in dp_frame_hook(). In turn dp_frame_hook() can reject them - * back to network stack, but that's a waste of time. */ + + err = vport_attach(vport, p); + if (err) { + kfree(p); + return err; } - dev_disable_lro(dev); + rcu_assign_pointer(dp->ports[port_no], p); list_add_rcu(&p->node, &dp->port_list); dp->n_ports++; @@ -374,9 +393,8 @@ static int new_nbp(struct datapath *dp, struct net_device *dev, int port_no) return 0; } -static int add_port(int dp_idx, struct odp_port __user *portp) +static int attach_port(int dp_idx, struct odp_port __user *portp) { - struct net_device *dev; struct datapath *dp; struct odp_port port; int port_no; @@ -400,35 +418,16 @@ static int add_port(int dp_idx, struct odp_port __user *portp) goto out_unlock_dp; got_port_no: - if (!(port.flags & ODP_PORT_INTERNAL)) { - err = -ENODEV; - dev = dev_get_by_name(&init_net, port.devname); - if (!dev) - goto out_unlock_dp; - - err = -EINVAL; - if (dev->flags & IFF_LOOPBACK || dev->type != ARPHRD_ETHER || - is_dp_dev(dev)) - goto out_put; - } else { - dev = dp_dev_create(dp, port.devname, port_no); - err = PTR_ERR(dev); - if (IS_ERR(dev)) - goto out_unlock_dp; - dev_hold(dev); - } - - err = new_nbp(dp, dev, port_no); + err = new_dp_port(dp, &port, port_no); if (err) - goto out_put; + goto out_unlock_dp; - set_dp_devs_mtu(dp, dev); + if (!(port.flags & ODP_PORT_INTERNAL)) + set_internal_devs_mtu(dp); dp_sysfs_add_if(dp->ports[port_no]); err = __put_user(port_no, &portp->port); -out_put: - dev_put(dev); out_unlock_dp: mutex_unlock(&dp->mutex); out_unlock_rtnl: @@ -437,45 +436,48 @@ out: return err; } -int dp_del_port(struct net_bridge_port *p) +int dp_detach_port(struct dp_port *p, int may_delete) { + struct vport *vport = p->vport; + int err; + ASSERT_RTNL(); if (p->port_no != ODPP_LOCAL) dp_sysfs_del_if(p); dp_ifinfo_notify(RTM_DELLINK, p); - p->dp->n_ports--; - - if (is_dp_dev(p->dev)) { - /* Make sure that no packets arrive from now on, since - * dp_dev_xmit() will try to find itself through - * p->dp->ports[], and we're about to set that to null. */ - netif_tx_disable(p->dev); - } - /* First drop references to device. */ - dev_set_promiscuity(p->dev, -1); + p->dp->n_ports--; list_del_rcu(&p->node); rcu_assign_pointer(p->dp->ports[p->port_no], NULL); - rcu_assign_pointer(p->dev->br_port, NULL); + + err = vport_detach(vport); + if (err) + return err; /* Then wait until no one is still using it, and destroy it. */ synchronize_rcu(); - if (is_dp_dev(p->dev)) - dp_dev_destroy(p->dev); - dev_put(p->dev); + if (may_delete) { + const char *port_type = vport_get_type(vport); + + if (!strcmp(port_type, "netdev") || !strcmp(port_type, "internal")) { + vport_lock(); + __vport_del(vport); + vport_unlock(); + } + } + kobject_put(&p->kobj); return 0; } -static int del_port(int dp_idx, int port_no) +static int detach_port(int dp_idx, int port_no) { - struct net_bridge_port *p; + struct dp_port *p; struct datapath *dp; - LIST_HEAD(dp_devs); int err; err = -EINVAL; @@ -493,7 +495,7 @@ static int del_port(int dp_idx, int port_no) if (!p) goto out_unlock_dp; - err = dp_del_port(p); + err = dp_detach_port(p, 1); out_unlock_dp: mutex_unlock(&dp->mutex); @@ -503,31 +505,8 @@ out: return err; } -/* Must be called with rcu_read_lock. */ -static void -do_port_input(struct net_bridge_port *p, struct sk_buff *skb) -{ - /* LRO isn't suitable for bridging. We turn it off but make sure - * that it wasn't reactivated. */ - if (skb_warn_if_lro(skb)) - return; - - /* Make our own copy of the packet. Otherwise we will mangle the - * packet for anyone who came before us (e.g. tcpdump via AF_PACKET). - * (No one comes after us, since we tell handle_bridge() that we took - * the packet.) */ - skb = skb_share_check(skb, GFP_ATOMIC); - if (!skb) - return; - - /* Push the Ethernet header back on. */ - skb_push(skb, ETH_HLEN); - skb_reset_mac_header(skb); - dp_process_received_packet(skb, p); -} - /* Must be called with rcu_read_lock and with bottom-halves disabled. */ -void dp_process_received_packet(struct sk_buff *skb, struct net_bridge_port *p) +void dp_process_received_packet(struct dp_port *p, struct sk_buff *skb) { struct datapath *dp = p->dp; struct dp_stats_percpu *stats; @@ -535,9 +514,10 @@ void dp_process_received_packet(struct sk_buff *skb, struct net_bridge_port *p) struct sw_flow *flow; WARN_ON_ONCE(skb_shared(skb)); + skb_warn_if_lro(skb); + OVS_CB(skb)->dp_port = p; compute_ip_summed(skb, false); - OVS_CB(skb)->tun_id = 0; /* BHs are off so we don't have to use get_cpu()/put_cpu() here. */ stats = percpu_ptr(dp->stats_percpu, smp_processor_id()); @@ -563,29 +543,6 @@ void dp_process_received_packet(struct sk_buff *skb, struct net_bridge_port *p) } } -/* - * Used as br_handle_frame_hook. (Cannot run bridge at the same time, even on - * different set of devices!) - */ -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22) -/* Called with rcu_read_lock and bottom-halves disabled. */ -static struct sk_buff *dp_frame_hook(struct net_bridge_port *p, - struct sk_buff *skb) -{ - do_port_input(p, skb); - return NULL; -} -#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) -/* Called with rcu_read_lock and bottom-halves disabled. */ -static int dp_frame_hook(struct net_bridge_port *p, struct sk_buff **pskb) -{ - do_port_input(p, *pskb); - return 1; -} -#else -#error -#endif - #if defined(CONFIG_XEN) && defined(HAVE_PROTO_DATA_VALID) /* This code is based on a skb_checksum_setup from net/dev/core.c from a * combination of Lenny's 2.6.26 Xen kernel and Xen's @@ -779,13 +736,10 @@ queue_control_packets(struct sk_buff *skb, struct sk_buff_head *queue, int port_no; int err; - port_no = ODPP_LOCAL; - if (skb->dev) { - if (skb->dev->br_port) - port_no = skb->dev->br_port->port_no; - else if (is_dp_dev(skb->dev)) - port_no = dp_dev_priv(skb->dev)->port_no; - } + if (OVS_CB(skb)->dp_port) + port_no = OVS_CB(skb)->dp_port->port_no; + else + port_no = ODPP_LOCAL; do { struct odp_msg *header; @@ -1304,11 +1258,10 @@ static int do_execute(struct datapath *dp, const struct odp_execute *executep) if (!skb) goto error_free_actions; - if (execute.in_port < DP_MAX_PORTS) { - struct net_bridge_port *p = dp->ports[execute.in_port]; - if (p) - skb->dev = p->dev; - } + if (execute.in_port < DP_MAX_PORTS) + OVS_CB(skb)->dp_port = dp->ports[execute.in_port]; + else + OVS_CB(skb)->dp_port = NULL; err = -EFAULT; if (copy_from_user(skb_put(skb, execute.length), execute.data, @@ -1368,57 +1321,58 @@ static int get_dp_stats(struct datapath *dp, struct odp_stats __user *statsp) /* MTU of the dp pseudo-device: ETH_DATA_LEN or the minimum of the ports */ int dp_min_mtu(const struct datapath *dp) { - struct net_bridge_port *p; + struct dp_port *p; int mtu = 0; ASSERT_RTNL(); list_for_each_entry_rcu (p, &dp->port_list, node) { - struct net_device *dev = p->dev; + int dev_mtu; /* Skip any internal ports, since that's what we're trying to * set. */ - if (is_dp_dev(dev)) + if (is_internal_vport(p->vport)) continue; - if (!mtu || dev->mtu < mtu) - mtu = dev->mtu; + dev_mtu = vport_get_mtu(p->vport); + if (!mtu || dev_mtu < mtu) + mtu = dev_mtu; } return mtu ? mtu : ETH_DATA_LEN; } -/* Sets the MTU of all datapath devices to the minimum of the ports. 'dev' - * is the device whose MTU may have changed. Must be called with RTNL lock - * and dp_mutex. */ -void set_dp_devs_mtu(const struct datapath *dp, struct net_device *dev) +/* Sets the MTU of all datapath devices to the minimum of the ports. Must + * be called with RTNL lock and dp_mutex. */ +void set_internal_devs_mtu(const struct datapath *dp) { - struct net_bridge_port *p; + struct dp_port *p; int mtu; ASSERT_RTNL(); - if (is_dp_dev(dev)) - return; - mtu = dp_min_mtu(dp); list_for_each_entry_rcu (p, &dp->port_list, node) { - struct net_device *br_dev = p->dev; - - if (is_dp_dev(br_dev)) - dev_set_mtu(br_dev, mtu); + if (is_internal_vport(p->vport)) + vport_set_mtu(p->vport, mtu); } } static int -put_port(const struct net_bridge_port *p, struct odp_port __user *uop) +put_port(const struct dp_port *p, struct odp_port __user *uop) { struct odp_port op; + memset(&op, 0, sizeof op); - strncpy(op.devname, p->dev->name, sizeof op.devname); + + rcu_read_lock(); + strncpy(op.devname, vport_get_name(p->vport), sizeof op.devname); + rcu_read_unlock(); + op.port = p->port_no; - op.flags = is_dp_dev(p->dev) ? ODP_PORT_INTERNAL : 0; + op.flags = is_internal_vport(p->vport) ? ODP_PORT_INTERNAL : 0; + return copy_to_user(uop, &op, sizeof op) ? -EFAULT : 0; } @@ -1429,41 +1383,52 @@ query_port(struct datapath *dp, struct odp_port __user *uport) if (copy_from_user(&port, uport, sizeof port)) return -EFAULT; + if (port.devname[0]) { - struct net_bridge_port *p; - struct net_device *dev; - int err; + struct vport *vport; + struct dp_port *dp_port; + int err = 0; port.devname[IFNAMSIZ - 1] = '\0'; - dev = dev_get_by_name(&init_net, port.devname); - if (!dev) - return -ENODEV; + vport_lock(); + rcu_read_lock(); - p = dev->br_port; - if (!p && is_dp_dev(dev)) { - struct dp_dev *dp_dev = dp_dev_priv(dev); - if (dp_dev->dp == dp) - p = dp->ports[dp_dev->port_no]; + vport = vport_locate(port.devname); + if (!vport) { + err = -ENODEV; + goto error_unlock; } - err = p && p->dp == dp ? put_port(p, uport) : -ENOENT; - dev_put(dev); - return err; + dp_port = vport_get_dp_port(vport); + if (!dp_port || dp_port->dp != dp) { + err = -ENOENT; + goto error_unlock; + } + + port.port = dp_port->port_no; + +error_unlock: + rcu_read_unlock(); + vport_unlock(); + + if (err) + return err; } else { if (port.port >= DP_MAX_PORTS) return -EINVAL; if (!dp->ports[port.port]) return -ENOENT; - return put_port(dp->ports[port.port], uport); } + + return put_port(dp->ports[port.port], uport); } static int list_ports(struct datapath *dp, struct odp_portvec __user *pvp) { struct odp_portvec pv; - struct net_bridge_port *p; + struct dp_port *p; int idx; if (copy_from_user(&pv, pvp, sizeof pv)) @@ -1580,14 +1545,46 @@ static long openvswitch_ioctl(struct file *f, unsigned int cmd, err = destroy_dp(dp_idx); goto exit; - case ODP_PORT_ADD: - err = add_port(dp_idx, (struct odp_port __user *)argp); + case ODP_PORT_ATTACH: + err = attach_port(dp_idx, (struct odp_port __user *)argp); goto exit; - case ODP_PORT_DEL: + case ODP_PORT_DETACH: err = get_user(port_no, (int __user *)argp); if (!err) - err = del_port(dp_idx, port_no); + err = detach_port(dp_idx, port_no); + goto exit; + + case ODP_VPORT_ADD: + err = vport_add((struct odp_vport_add __user *)argp); + goto exit; + + case ODP_VPORT_MOD: + err = vport_mod((struct odp_vport_mod __user *)argp); + goto exit; + + case ODP_VPORT_DEL: + err = vport_del((char __user *)argp); + goto exit; + + case ODP_VPORT_STATS_GET: + err = vport_stats_get((struct odp_vport_stats_req __user *)argp); + goto exit; + + case ODP_VPORT_ETHER_GET: + err = vport_ether_get((struct odp_vport_ether __user *)argp); + goto exit; + + case ODP_VPORT_ETHER_SET: + err = vport_ether_set((struct odp_vport_ether __user *)argp); + goto exit; + + case ODP_VPORT_MTU_GET: + err = vport_mtu_get((struct odp_vport_mtu __user *)argp); + goto exit; + + case ODP_VPORT_MTU_SET: + err = vport_mtu_set((struct odp_vport_mtu __user *)argp); goto exit; } @@ -1784,89 +1781,37 @@ struct file_operations openvswitch_fops = { static int major; -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,27) -static struct llc_sap *dp_stp_sap; - -static int dp_stp_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt, struct net_device *orig_dev) -{ - /* We don't really care about STP packets, we just listen for them for - * mutual exclusion with the bridge module, so this just discards - * them. */ - kfree_skb(skb); - return 0; -} - -static int dp_avoid_bridge_init(void) -{ - /* Register to receive STP packets because the bridge module also - * attempts to do so. Since there can only be a single listener for a - * given protocol, this provides mutual exclusion against the bridge - * module, preventing both of them from being loaded at the same - * time. */ - dp_stp_sap = llc_sap_open(LLC_SAP_BSPAN, dp_stp_rcv); - if (!dp_stp_sap) { - printk(KERN_ERR "openvswitch: can't register sap for STP (probably the bridge module is loaded)\n"); - return -EADDRINUSE; - } - return 0; -} - -static void dp_avoid_bridge_exit(void) -{ - llc_sap_put(dp_stp_sap); -} -#else /* Linux 2.6.27 or later. */ -static int dp_avoid_bridge_init(void) -{ - /* Linux 2.6.27 introduces a way for multiple clients to register for - * STP packets, which interferes with what we try to do above. - * Instead, just check whether there's a bridge hook defined. This is - * not as safe--the bridge module is willing to load over the top of - * us--but it provides a little bit of protection. */ - if (br_handle_frame_hook) { - printk(KERN_ERR "openvswitch: bridge module is loaded, cannot load over it\n"); - return -EADDRINUSE; - } - return 0; -} - -static void dp_avoid_bridge_exit(void) -{ - /* Nothing to do. */ -} -#endif /* Linux 2.6.27 or later */ - static int __init dp_init(void) { + struct sk_buff *dummy_skb; int err; - printk("Open vSwitch %s, built "__DATE__" "__TIME__"\n", VERSION BUILDNR); + BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > sizeof(dummy_skb->cb)); - err = dp_avoid_bridge_init(); - if (err) - return err; + printk("Open vSwitch %s, built "__DATE__" "__TIME__"\n", VERSION BUILDNR); err = flow_init(); if (err) goto error; - err = register_netdevice_notifier(&dp_device_notifier); + err = vport_init(); if (err) goto error_flow_exit; + err = register_netdevice_notifier(&dp_device_notifier); + if (err) + goto error_vport_exit; + major = register_chrdev(0, "openvswitch", &openvswitch_fops); if (err < 0) goto error_unreg_notifier; - /* Hook into callback used by the bridge to intercept packets. - * Parasites we are. */ - br_handle_frame_hook = dp_frame_hook; - return 0; error_unreg_notifier: unregister_netdevice_notifier(&dp_device_notifier); +error_vport_exit: + vport_exit(); error_flow_exit: flow_exit(); error: @@ -1878,9 +1823,8 @@ static void dp_cleanup(void) rcu_barrier(); unregister_chrdev(major, "openvswitch"); unregister_netdevice_notifier(&dp_device_notifier); + vport_exit(); flow_exit(); - br_handle_frame_hook = NULL; - dp_avoid_bridge_exit(); } module_init(dp_init); diff --git a/datapath/datapath.h b/datapath/datapath.h index faf24704..553e19fd 100644 --- a/datapath/datapath.h +++ b/datapath/datapath.h @@ -21,6 +21,9 @@ #include "flow.h" #include "dp_sysfs.h" +struct vport; +struct dp_port; + /* Mask for the priority bits in a vlan header. If we ever merge upstream * then this should go into include/linux/if_vlan.h. */ #define VLAN_PCP_MASK 0xe000 @@ -120,7 +123,7 @@ struct dp_port_group { * @table: Current flow table (RCU protected). * @groups: Port groups, used by ODPAT_OUTPUT_GROUP action (RCU protected). * @n_ports: Number of ports currently in @ports. - * @ports: Map from port number to &struct net_bridge_port. %ODPP_LOCAL port + * @ports: Map from port number to &struct dp_port. %ODPP_LOCAL port * always exists, other ports may be %NULL. * @port_list: List of all ports in @ports in arbitrary order. * @stats_percpu: Per-CPU datapath statistics. @@ -148,7 +151,7 @@ struct datapath { /* Switch ports. */ unsigned int n_ports; - struct net_bridge_port *ports[DP_MAX_PORTS]; + struct dp_port *ports[DP_MAX_PORTS]; struct list_head port_list; /* Stats. */ @@ -159,23 +162,23 @@ struct datapath { }; /** - * struct net_bridge_port - one port within a datapath + * struct dp_port - one port within a datapath * @port_no: Index into @dp's @ports array. * @dp: Datapath to which this port belongs. - * @dev: The network device attached to this port. The @br_port member in @dev - * points back to this &struct net_bridge_port. + * @vport: The network device attached to this port. The contents depends on + * the device and should be accessed only through the vport_* functions. * @kobj: Represents /sys/class/net//brport. * @linkname: The name of the link from /sys/class/net//brif to this - * &struct net_bridge_port. (We keep this around so that we can delete it - * if @dev gets renamed.) Set to the null string when no link exists. + * &struct dp_port. (We keep this around so that we can delete it if the + * device gets renamed.) Set to the null string when no link exists. * @node: Element in @dp's @port_list. * @sflow_pool: Number of packets that were candidates for sFlow sampling, * regardless of whether they were actually chosen and sent down to userspace. */ -struct net_bridge_port { +struct dp_port { u16 port_no; struct datapath *dp; - struct net_device *dev; + struct vport *vport; struct kobject kobj; char linkname[IFNAMSIZ]; struct list_head node; @@ -191,10 +194,12 @@ enum csum_type { /** * struct ovs_skb_cb - OVS data in skb CB + * @br_port: The bridge port on which the skb entered the switch. * @ip_summed: Consistently stores L4 checksumming status across different * kernel versions. */ struct ovs_skb_cb { + struct dp_port *dp_port; enum csum_type ip_summed; __be32 tun_id; }; @@ -215,18 +220,14 @@ int dp_table_foreach(struct dp_table *table, int (*callback)(struct sw_flow *flow, void *aux), void *aux); -void dp_process_received_packet(struct sk_buff *, struct net_bridge_port *); -int dp_del_port(struct net_bridge_port *); +void dp_process_received_packet(struct dp_port *, struct sk_buff *); +int dp_detach_port(struct dp_port *, int may_delete); int dp_output_control(struct datapath *, struct sk_buff *, int, u32 arg); int dp_min_mtu(const struct datapath *dp); -void set_dp_devs_mtu(const struct datapath *dp, struct net_device *dev); +void set_internal_devs_mtu(const struct datapath *dp); struct datapath *get_dp(int dp_idx); - -static inline const char *dp_name(const struct datapath *dp) -{ - return dp->ports[ODPP_LOCAL]->dev->name; -} +const char *dp_name(const struct datapath *dp); #if defined(CONFIG_XEN) && defined(HAVE_PROTO_DATA_VALID) int vswitch_skb_checksum_setup(struct sk_buff *skb); diff --git a/datapath/dp_dev.c b/datapath/dp_dev.c deleted file mode 100644 index 2bbd6fec..00000000 --- a/datapath/dp_dev.c +++ /dev/null @@ -1,271 +0,0 @@ -/* - * Copyright (c) 2009, 2010 Nicira Networks. - * Distributed under the terms of the GNU GPL version 2. - * - * Significant portions of this file may be copied from parts of the Linux - * kernel, by Linus Torvalds and others. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "datapath.h" -#include "dp_dev.h" - -struct pcpu_lstats { - unsigned long rx_packets; - unsigned long rx_bytes; - unsigned long tx_packets; - unsigned long tx_bytes; -}; - -struct datapath *dp_dev_get_dp(struct net_device *netdev) -{ - return dp_dev_priv(netdev)->dp; -} - -static struct net_device_stats *dp_dev_get_stats(struct net_device *netdev) -{ - struct dp_dev *dp_dev = dp_dev_priv(netdev); - struct net_device_stats *stats; - int i; - - stats = &dp_dev->stats; - memset(stats, 0, sizeof *stats); - for_each_possible_cpu(i) { - const struct pcpu_lstats *lb_stats; - - lb_stats = per_cpu_ptr(dp_dev->lstats, i); - stats->rx_bytes += lb_stats->rx_bytes; - stats->rx_packets += lb_stats->rx_packets; - stats->tx_bytes += lb_stats->tx_bytes; - stats->tx_packets += lb_stats->tx_packets; - } - return stats; -} - -int dp_dev_recv(struct net_device *netdev, struct sk_buff *skb) -{ - struct dp_dev *dp_dev = dp_dev_priv(netdev); - struct pcpu_lstats *lb_stats; - int len; - len = skb->len; - skb->pkt_type = PACKET_HOST; - skb->protocol = eth_type_trans(skb, netdev); - if (in_interrupt()) - netif_rx(skb); - else - netif_rx_ni(skb); - netdev->last_rx = jiffies; - - preempt_disable(); - lb_stats = per_cpu_ptr(dp_dev->lstats, smp_processor_id()); - lb_stats->rx_packets++; - lb_stats->rx_bytes += len; - preempt_enable(); - - return len; -} - -static int dp_dev_mac_addr(struct net_device *dev, void *p) -{ - struct sockaddr *addr = p; - - if (!is_valid_ether_addr(addr->sa_data)) - return -EADDRNOTAVAIL; - memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); - return 0; -} - -/* Not reentrant (because it is called with BHs disabled), but may be called - * simultaneously on different CPUs. */ -static int dp_dev_xmit(struct sk_buff *skb, struct net_device *netdev) -{ - struct dp_dev *dp_dev = dp_dev_priv(netdev); - struct pcpu_lstats *lb_stats; - - /* dp_process_received_packet() needs its own clone. */ - skb = skb_share_check(skb, GFP_ATOMIC); - if (!skb) - return 0; - - lb_stats = per_cpu_ptr(dp_dev->lstats, smp_processor_id()); - lb_stats->tx_packets++; - lb_stats->tx_bytes += skb->len; - - skb_reset_mac_header(skb); - rcu_read_lock_bh(); - dp_process_received_packet(skb, dp_dev->dp->ports[dp_dev->port_no]); - rcu_read_unlock_bh(); - - return 0; -} - -static int dp_dev_open(struct net_device *netdev) -{ - netif_start_queue(netdev); - return 0; -} - -static int dp_dev_stop(struct net_device *netdev) -{ - netif_stop_queue(netdev); - return 0; -} - -static void dp_getinfo(struct net_device *netdev, struct ethtool_drvinfo *info) -{ - struct dp_dev *dp_dev = dp_dev_priv(netdev); - strcpy(info->driver, "openvswitch"); - sprintf(info->bus_info, "%d.%d", dp_dev->dp->dp_idx, dp_dev->port_no); -} - -static struct ethtool_ops dp_ethtool_ops = { - .get_drvinfo = dp_getinfo, - .get_link = ethtool_op_get_link, - .get_sg = ethtool_op_get_sg, - .get_tx_csum = ethtool_op_get_tx_csum, - .get_tso = ethtool_op_get_tso, -}; - -static int dp_dev_change_mtu(struct net_device *dev, int new_mtu) -{ - if (new_mtu < 68 || new_mtu > dp_min_mtu(dp_dev_get_dp(dev))) - return -EINVAL; - - dev->mtu = new_mtu; - return 0; -} - -static int dp_dev_init(struct net_device *netdev) -{ - struct dp_dev *dp_dev = dp_dev_priv(netdev); - - dp_dev->lstats = alloc_percpu(struct pcpu_lstats); - if (!dp_dev->lstats) - return -ENOMEM; - - return 0; -} - -static void dp_dev_free(struct net_device *netdev) -{ - struct dp_dev *dp_dev = dp_dev_priv(netdev); - - free_percpu(dp_dev->lstats); - free_netdev(netdev); -} - -static int dp_dev_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) -{ - if (dp_ioctl_hook) - return dp_ioctl_hook(dev, ifr, cmd); - return -EOPNOTSUPP; -} - -#ifdef HAVE_NET_DEVICE_OPS -static const struct net_device_ops dp_dev_netdev_ops = { - .ndo_init = dp_dev_init, - .ndo_open = dp_dev_open, - .ndo_stop = dp_dev_stop, - .ndo_start_xmit = dp_dev_xmit, - .ndo_set_mac_address = dp_dev_mac_addr, - .ndo_do_ioctl = dp_dev_do_ioctl, - .ndo_change_mtu = dp_dev_change_mtu, - .ndo_get_stats = dp_dev_get_stats, -}; -#endif - -static void -do_setup(struct net_device *netdev) -{ - ether_setup(netdev); - -#ifdef HAVE_NET_DEVICE_OPS - netdev->netdev_ops = &dp_dev_netdev_ops; -#else - netdev->do_ioctl = dp_dev_do_ioctl; - netdev->get_stats = dp_dev_get_stats; - netdev->hard_start_xmit = dp_dev_xmit; - netdev->open = dp_dev_open; - netdev->stop = dp_dev_stop; - netdev->set_mac_address = dp_dev_mac_addr; - netdev->change_mtu = dp_dev_change_mtu; - netdev->init = dp_dev_init; -#endif - - netdev->destructor = dp_dev_free; - SET_ETHTOOL_OPS(netdev, &dp_ethtool_ops); - netdev->tx_queue_len = 0; - - netdev->flags = IFF_BROADCAST | IFF_MULTICAST; - netdev->features = NETIF_F_LLTX; /* XXX other features? */ - - random_ether_addr(netdev->dev_addr); - - /* Set the OUI to the Nicira one. */ - netdev->dev_addr[0] = 0x00; - netdev->dev_addr[1] = 0x23; - netdev->dev_addr[2] = 0x20; - - /* Set the top bit to indicate random Nicira address. */ - netdev->dev_addr[3] |= 0x80; -} - -/* Create a datapath device associated with 'dp'. If 'dp_name' is null, - * the device name will be of the form 'of'. Returns the new device or - * an error code. - * - * Called with RTNL lock and dp_mutex. */ -struct net_device *dp_dev_create(struct datapath *dp, const char *dp_name, int port_no) -{ - struct dp_dev *dp_dev; - struct net_device *netdev; - char dev_name[IFNAMSIZ]; - int err; - - if (dp_name) { - if (strlen(dp_name) >= IFNAMSIZ) - return ERR_PTR(-EINVAL); - strncpy(dev_name, dp_name, sizeof(dev_name)); - } else - snprintf(dev_name, sizeof dev_name, "of%d", dp->dp_idx); - - netdev = alloc_netdev(sizeof(struct dp_dev), dev_name, do_setup); - if (!netdev) - return ERR_PTR(-ENOMEM); - - dp_dev = dp_dev_priv(netdev); - dp_dev->dp = dp; - dp_dev->port_no = port_no; - dp_dev->dev = netdev; - - err = register_netdevice(netdev); - if (err) { - free_netdev(netdev); - return ERR_PTR(err); - } - - return netdev; -} - -/* Called with RTNL lock and dp_mutex.*/ -void dp_dev_destroy(struct net_device *netdev) -{ - unregister_netdevice(netdev); -} - -int is_dp_dev(struct net_device *netdev) -{ -#ifdef HAVE_NET_DEVICE_OPS - return netdev->netdev_ops == &dp_dev_netdev_ops; -#else - return netdev->open == dp_dev_open; -#endif -} diff --git a/datapath/dp_dev.h b/datapath/dp_dev.h deleted file mode 100644 index 1fb4394f..00000000 --- a/datapath/dp_dev.h +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright (c) 2009 Nicira Networks. - * Distributed under the terms of the GNU GPL version 2. - * - * Significant portions of this file may be copied from parts of the Linux - * kernel, by Linus Torvalds and others. - */ - -#ifndef DP_DEV_H -#define DP_DEV_H 1 - -#include - -struct dp_dev { - struct datapath *dp; - int port_no; - - struct net_device *dev; - struct net_device_stats stats; - struct pcpu_lstats *lstats; -}; - -static inline struct dp_dev *dp_dev_priv(struct net_device *netdev) -{ - return netdev_priv(netdev); -} - -struct net_device *dp_dev_create(struct datapath *, const char *, int port_no); -void dp_dev_destroy(struct net_device *); -int dp_dev_recv(struct net_device *, struct sk_buff *); -int is_dp_dev(struct net_device *); -struct datapath *dp_dev_get_dp(struct net_device *); - -#endif /* dp_dev.h */ diff --git a/datapath/dp_notify.c b/datapath/dp_notify.c index 0278988d..4a16a93f 100644 --- a/datapath/dp_notify.c +++ b/datapath/dp_notify.c @@ -1,6 +1,6 @@ /* * Distributed under the terms of the GNU GPL version 2. - * Copyright (c) 2007, 2008, 2009 Nicira Networks. + * Copyright (c) 2007, 2008, 2009, 2010 Nicira Networks. * * Significant portions of this file may be copied from parts of the Linux * kernel, by Linus Torvalds and others. @@ -11,21 +11,28 @@ #include #include "datapath.h" -#include "dp_dev.h" +#include "vport-internal_dev.h" +#include "vport-netdev.h" static int dp_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { struct net_device *dev = ptr; - struct net_bridge_port *p; + struct vport *vport; + struct dp_port *p; struct datapath *dp; - if (is_dp_dev(dev)) { - struct dp_dev *dp_dev = dp_dev_priv(dev); - p = dp_dev->dp->ports[dp_dev->port_no]; - } else { - p = dev->br_port; + if (is_internal_dev(dev)) + vport = internal_dev_get_vport(dev); + else { + vport = netdev_get_vport(dev); + + if (!vport) + return NOTIFY_DONE; } + + p = vport_get_dp_port(vport); + if (!p) return NOTIFY_DONE; dp = p->dp; @@ -33,7 +40,7 @@ static int dp_device_event(struct notifier_block *unused, unsigned long event, switch (event) { case NETDEV_UNREGISTER: mutex_lock(&dp->mutex); - dp_del_port(p); + dp_detach_port(p, 1); mutex_unlock(&dp->mutex); break; @@ -47,9 +54,9 @@ static int dp_device_event(struct notifier_block *unused, unsigned long event, break; case NETDEV_CHANGEMTU: - if (!is_dp_dev(dev)) { + if (!is_internal_dev(dev)) { mutex_lock(&dp->mutex); - set_dp_devs_mtu(dp, dev); + set_internal_devs_mtu(dp); mutex_unlock(&dp->mutex); } break; diff --git a/datapath/dp_sysfs.h b/datapath/dp_sysfs.h index be044eaf..2d688ac7 100644 --- a/datapath/dp_sysfs.h +++ b/datapath/dp_sysfs.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009 Nicira Networks. + * Copyright (c) 2009, 2010 Nicira Networks. * Distributed under the terms of the GNU GPL version 2. * * Significant portions of this file may be copied from parts of the Linux @@ -10,15 +10,15 @@ #define DP_SYSFS_H 1 struct datapath; -struct net_bridge_port; +struct dp_port; /* dp_sysfs_dp.c */ int dp_sysfs_add_dp(struct datapath *dp); int dp_sysfs_del_dp(struct datapath *dp); /* dp_sysfs_if.c */ -int dp_sysfs_add_if(struct net_bridge_port *p); -int dp_sysfs_del_if(struct net_bridge_port *p); +int dp_sysfs_add_if(struct dp_port *p); +int dp_sysfs_del_if(struct dp_port *p); #ifdef CONFIG_SYSFS extern struct sysfs_ops brport_sysfs_ops; diff --git a/datapath/dp_sysfs_dp.c b/datapath/dp_sysfs_dp.c index 3cd6d1af..91dd56f8 100644 --- a/datapath/dp_sysfs_dp.c +++ b/datapath/dp_sysfs_dp.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009 Nicira Networks. + * Copyright (c) 2009, 2010 Nicira Networks. * Distributed under the terms of the GNU GPL version 2. * * Significant portions of this file may be copied from parts of the Linux @@ -26,24 +26,29 @@ #include "dp_sysfs.h" #include "datapath.h" -#include "dp_dev.h" +#include "vport-internal_dev.h" #ifdef CONFIG_SYSFS #define to_dev(obj) container_of(obj, struct device, kobj) /* Hack to attempt to build on more platforms. */ #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,21) -#define DP_DEVICE_ATTR CLASS_DEVICE_ATTR +#define INTERNAL_DEVICE_ATTR CLASS_DEVICE_ATTR #define DEVICE_PARAMS struct class_device *d #define DEVICE_ARGS d #define DEV_ATTR(NAME) class_device_attr_##NAME #else -#define DP_DEVICE_ATTR DEVICE_ATTR +#define INTERNAL_DEVICE_ATTR DEVICE_ATTR #define DEVICE_PARAMS struct device *d, struct device_attribute *attr #define DEVICE_ARGS d, attr #define DEV_ATTR(NAME) dev_attr_##NAME #endif +struct datapath *sysfs_get_dp(struct net_device *netdev) +{ + return vport_get_dp_port(internal_dev_get_vport(netdev))->dp; +} + /* * Common code for storing bridge parameters. */ @@ -51,7 +56,7 @@ static ssize_t store_bridge_parm(DEVICE_PARAMS, const char *buf, size_t len, void (*set)(struct datapath *, unsigned long)) { - struct datapath *dp = dp_dev_get_dp(to_net_dev(d)); + struct datapath *dp = sysfs_get_dp(to_net_dev(d)); char *endp; unsigned long val; @@ -83,7 +88,7 @@ static ssize_t store_bridge_parm(DEVICE_PARAMS, static ssize_t show_forward_delay(DEVICE_PARAMS, char *buf) { #if 0 - struct datapath *dp = dp_dev_get_dp(to_net_dev(d)); + struct datapath *dp = sysfs_get_dp(to_net_dev(d)); return sprintf(buf, "%lu\n", jiffies_to_clock_t(br->forward_delay)); #else return sprintf(buf, "%d\n", 0); @@ -107,7 +112,7 @@ static ssize_t store_forward_delay(DEVICE_PARAMS, { return store_bridge_parm(DEVICE_ARGS, buf, len, set_forward_delay); } -static DP_DEVICE_ATTR(forward_delay, S_IRUGO | S_IWUSR, +static INTERNAL_DEVICE_ATTR(forward_delay, S_IRUGO | S_IWUSR, show_forward_delay, store_forward_delay); static ssize_t show_hello_time(DEVICE_PARAMS, char *buf) @@ -138,7 +143,7 @@ static ssize_t store_hello_time(DEVICE_PARAMS, { return store_bridge_parm(DEVICE_ARGS, buf, len, set_hello_time); } -static DP_DEVICE_ATTR(hello_time, S_IRUGO | S_IWUSR, show_hello_time, +static INTERNAL_DEVICE_ATTR(hello_time, S_IRUGO | S_IWUSR, show_hello_time, store_hello_time); static ssize_t show_max_age(DEVICE_PARAMS, char *buf) @@ -168,12 +173,12 @@ static ssize_t store_max_age(DEVICE_PARAMS, { return store_bridge_parm(DEVICE_ARGS, buf, len, set_max_age); } -static DP_DEVICE_ATTR(max_age, S_IRUGO | S_IWUSR, show_max_age, store_max_age); +static INTERNAL_DEVICE_ATTR(max_age, S_IRUGO | S_IWUSR, show_max_age, store_max_age); static ssize_t show_ageing_time(DEVICE_PARAMS, char *buf) { #if 0 - struct datapath *dp = dp_dev_get_dp(to_net_dev(d)); + struct datapath *dp = sysfs_get_dp(to_net_dev(d)); return sprintf(buf, "%lu\n", jiffies_to_clock_t(br->ageing_time)); #else return sprintf(buf, "%d\n", 0); @@ -194,13 +199,13 @@ static ssize_t store_ageing_time(DEVICE_PARAMS, { return store_bridge_parm(DEVICE_ARGS, buf, len, set_ageing_time); } -static DP_DEVICE_ATTR(ageing_time, S_IRUGO | S_IWUSR, show_ageing_time, +static INTERNAL_DEVICE_ATTR(ageing_time, S_IRUGO | S_IWUSR, show_ageing_time, store_ageing_time); static ssize_t show_stp_state(DEVICE_PARAMS, char *buf) { #if 0 - struct datapath *dp = dp_dev_get_dp(to_net_dev(d)); + struct datapath *dp = sysfs_get_dp(to_net_dev(d)); return sprintf(buf, "%d\n", br->stp_enabled); #else return sprintf(buf, "%d\n", 0); @@ -212,7 +217,7 @@ static ssize_t store_stp_state(DEVICE_PARAMS, const char *buf, size_t len) { - struct datapath *dp = dp_dev_get_dp(to_net_dev(d)); + struct datapath *dp = sysfs_get_dp(to_net_dev(d)); #if 0 char *endp; unsigned long val; @@ -233,13 +238,13 @@ static ssize_t store_stp_state(DEVICE_PARAMS, return len; } -static DP_DEVICE_ATTR(stp_state, S_IRUGO | S_IWUSR, show_stp_state, +static INTERNAL_DEVICE_ATTR(stp_state, S_IRUGO | S_IWUSR, show_stp_state, store_stp_state); static ssize_t show_priority(DEVICE_PARAMS, char *buf) { #if 0 - struct datapath *dp = dp_dev_get_dp(to_net_dev(d)); + struct datapath *dp = sysfs_get_dp(to_net_dev(d)); return sprintf(buf, "%d\n", (br->bridge_id.prio[0] << 8) | br->bridge_id.prio[1]); #else @@ -261,7 +266,7 @@ static ssize_t store_priority(DEVICE_PARAMS, { return store_bridge_parm(DEVICE_ARGS, buf, len, set_priority); } -static DP_DEVICE_ATTR(priority, S_IRUGO | S_IWUSR, show_priority, store_priority); +static INTERNAL_DEVICE_ATTR(priority, S_IRUGO | S_IWUSR, show_priority, store_priority); static ssize_t show_root_id(DEVICE_PARAMS, char *buf) { @@ -271,18 +276,18 @@ static ssize_t show_root_id(DEVICE_PARAMS, char *buf) return sprintf(buf, "0000.010203040506\n"); #endif } -static DP_DEVICE_ATTR(root_id, S_IRUGO, show_root_id, NULL); +static INTERNAL_DEVICE_ATTR(root_id, S_IRUGO, show_root_id, NULL); static ssize_t show_bridge_id(DEVICE_PARAMS, char *buf) { - struct datapath *dp = dp_dev_get_dp(to_net_dev(d)); - const unsigned char *addr = dp->ports[ODPP_LOCAL]->dev->dev_addr; + struct datapath *dp = sysfs_get_dp(to_net_dev(d)); + const unsigned char *addr = vport_get_addr(dp->ports[ODPP_LOCAL]->vport); /* xxx Do we need a lock of some sort? */ return sprintf(buf, "%.2x%.2x.%.2x%.2x%.2x%.2x%.2x%.2x\n", 0, 0, addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]); } -static DP_DEVICE_ATTR(bridge_id, S_IRUGO, show_bridge_id, NULL); +static INTERNAL_DEVICE_ATTR(bridge_id, S_IRUGO, show_bridge_id, NULL); static ssize_t show_root_port(DEVICE_PARAMS, char *buf) { @@ -292,7 +297,7 @@ static ssize_t show_root_port(DEVICE_PARAMS, char *buf) return sprintf(buf, "%d\n", 0); #endif } -static DP_DEVICE_ATTR(root_port, S_IRUGO, show_root_port, NULL); +static INTERNAL_DEVICE_ATTR(root_port, S_IRUGO, show_root_port, NULL); static ssize_t show_root_path_cost(DEVICE_PARAMS, char *buf) { @@ -302,7 +307,7 @@ static ssize_t show_root_path_cost(DEVICE_PARAMS, char *buf) return sprintf(buf, "%d\n", 0); #endif } -static DP_DEVICE_ATTR(root_path_cost, S_IRUGO, show_root_path_cost, NULL); +static INTERNAL_DEVICE_ATTR(root_path_cost, S_IRUGO, show_root_path_cost, NULL); static ssize_t show_topology_change(DEVICE_PARAMS, char *buf) { @@ -312,69 +317,69 @@ static ssize_t show_topology_change(DEVICE_PARAMS, char *buf) return sprintf(buf, "%d\n", 0); #endif } -static DP_DEVICE_ATTR(topology_change, S_IRUGO, show_topology_change, NULL); +static INTERNAL_DEVICE_ATTR(topology_change, S_IRUGO, show_topology_change, NULL); static ssize_t show_topology_change_detected(DEVICE_PARAMS, char *buf) { #if 0 - struct datapath *dp = dp_dev_get_dp(to_net_dev(d)); + struct datapath *dp = sysfs_get_dp(to_net_dev(d)); return sprintf(buf, "%d\n", br->topology_change_detected); #else return sprintf(buf, "%d\n", 0); #endif } -static DP_DEVICE_ATTR(topology_change_detected, S_IRUGO, +static INTERNAL_DEVICE_ATTR(topology_change_detected, S_IRUGO, show_topology_change_detected, NULL); static ssize_t show_hello_timer(DEVICE_PARAMS, char *buf) { #if 0 - struct datapath *dp = dp_dev_get_dp(to_net_dev(d)); + struct datapath *dp = sysfs_get_dp(to_net_dev(d)); return sprintf(buf, "%ld\n", br_timer_value(&br->hello_timer)); #else return sprintf(buf, "%d\n", 0); #endif } -static DP_DEVICE_ATTR(hello_timer, S_IRUGO, show_hello_timer, NULL); +static INTERNAL_DEVICE_ATTR(hello_timer, S_IRUGO, show_hello_timer, NULL); static ssize_t show_tcn_timer(DEVICE_PARAMS, char *buf) { #if 0 - struct datapath *dp = dp_dev_get_dp(to_net_dev(d)); + struct datapath *dp = sysfs_get_dp(to_net_dev(d)); return sprintf(buf, "%ld\n", br_timer_value(&br->tcn_timer)); #else return sprintf(buf, "%d\n", 0); #endif } -static DP_DEVICE_ATTR(tcn_timer, S_IRUGO, show_tcn_timer, NULL); +static INTERNAL_DEVICE_ATTR(tcn_timer, S_IRUGO, show_tcn_timer, NULL); static ssize_t show_topology_change_timer(DEVICE_PARAMS, char *buf) { #if 0 - struct datapath *dp = dp_dev_get_dp(to_net_dev(d)); + struct datapath *dp = sysfs_get_dp(to_net_dev(d)); return sprintf(buf, "%ld\n", br_timer_value(&br->topology_change_timer)); #else return sprintf(buf, "%d\n", 0); #endif } -static DP_DEVICE_ATTR(topology_change_timer, S_IRUGO, show_topology_change_timer, +static INTERNAL_DEVICE_ATTR(topology_change_timer, S_IRUGO, show_topology_change_timer, NULL); static ssize_t show_gc_timer(DEVICE_PARAMS, char *buf) { #if 0 - struct datapath *dp = dp_dev_get_dp(to_net_dev(d)); + struct datapath *dp = sysfs_get_dp(to_net_dev(d)); return sprintf(buf, "%ld\n", br_timer_value(&br->gc_timer)); #else return sprintf(buf, "%d\n", 0); #endif } -static DP_DEVICE_ATTR(gc_timer, S_IRUGO, show_gc_timer, NULL); +static INTERNAL_DEVICE_ATTR(gc_timer, S_IRUGO, show_gc_timer, NULL); static ssize_t show_group_addr(DEVICE_PARAMS, char *buf) { #if 0 - struct datapath *dp = dp_dev_get_dp(to_net_dev(d)); + struct datapath *dp = sysfs_get_dp(to_net_dev(d)); return sprintf(buf, "%x:%x:%x:%x:%x:%x\n", br->group_addr[0], br->group_addr[1], br->group_addr[2], br->group_addr[3], @@ -387,7 +392,7 @@ static ssize_t show_group_addr(DEVICE_PARAMS, char *buf) static ssize_t store_group_addr(DEVICE_PARAMS, const char *buf, size_t len) { - struct datapath *dp = dp_dev_get_dp(to_net_dev(d)); + struct datapath *dp = sysfs_get_dp(to_net_dev(d)); #if 0 unsigned new_addr[6]; int i; @@ -423,7 +428,7 @@ static ssize_t store_group_addr(DEVICE_PARAMS, return len; } -static DP_DEVICE_ATTR(group_addr, S_IRUGO | S_IWUSR, +static INTERNAL_DEVICE_ATTR(group_addr, S_IRUGO | S_IWUSR, show_group_addr, store_group_addr); static struct attribute *bridge_attrs[] = { @@ -464,7 +469,7 @@ static struct attribute_group bridge_group = { */ int dp_sysfs_add_dp(struct datapath *dp) { - struct kobject *kobj = &dp->ports[ODPP_LOCAL]->dev->NETDEV_DEV_MEMBER.kobj; + struct kobject *kobj = vport_get_kobj(dp->ports[ODPP_LOCAL]->vport); int err; /* Create /sys/class/net//bridge directory. */ @@ -493,7 +498,7 @@ int dp_sysfs_add_dp(struct datapath *dp) int dp_sysfs_del_dp(struct datapath *dp) { - struct kobject *kobj = &dp->ports[ODPP_LOCAL]->dev->NETDEV_DEV_MEMBER.kobj; + struct kobject *kobj = vport_get_kobj(dp->ports[ODPP_LOCAL]->vport); kobject_del(&dp->ifobj); sysfs_remove_group(kobj, &bridge_group); @@ -503,6 +508,6 @@ int dp_sysfs_del_dp(struct datapath *dp) #else /* !CONFIG_SYSFS */ int dp_sysfs_add_dp(struct datapath *dp) { return 0; } int dp_sysfs_del_dp(struct datapath *dp) { return 0; } -int dp_sysfs_add_if(struct net_bridge_port *p) { return 0; } -int dp_sysfs_del_if(struct net_bridge_port *p) { return 0; } +int dp_sysfs_add_if(struct dp_port *p) { return 0; } +int dp_sysfs_del_if(struct dp_port *p) { return 0; } #endif /* !CONFIG_SYSFS */ diff --git a/datapath/dp_sysfs_if.c b/datapath/dp_sysfs_if.c index 95c26dc4..e06037cb 100644 --- a/datapath/dp_sysfs_if.c +++ b/datapath/dp_sysfs_if.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009 Nicira Networks. + * Copyright (c) 2009, 2010 Nicira Networks. * Distributed under the terms of the GNU GPL version 2. * * Significant portions of this file may be copied from parts of the Linux @@ -18,15 +18,17 @@ #include #include #include -#include "dp_sysfs.h" + #include "datapath.h" +#include "dp_sysfs.h" +#include "vport.h" #ifdef CONFIG_SYSFS struct brport_attribute { struct attribute attr; - ssize_t (*show)(struct net_bridge_port *, char *); - ssize_t (*store)(struct net_bridge_port *, unsigned long); + ssize_t (*show)(struct dp_port *, char *); + ssize_t (*store)(struct dp_port *, unsigned long); }; #define BRPORT_ATTR(_name,_mode,_show,_store) \ @@ -38,7 +40,7 @@ struct brport_attribute brport_attr_##_name = { \ .store = _store, \ }; -static ssize_t show_path_cost(struct net_bridge_port *p, char *buf) +static ssize_t show_path_cost(struct dp_port *p, char *buf) { #if 0 return sprintf(buf, "%d\n", p->path_cost); @@ -46,7 +48,7 @@ static ssize_t show_path_cost(struct net_bridge_port *p, char *buf) return sprintf(buf, "%d\n", 0); #endif } -static ssize_t store_path_cost(struct net_bridge_port *p, unsigned long v) +static ssize_t store_path_cost(struct dp_port *p, unsigned long v) { #if 0 br_stp_set_path_cost(p, v); @@ -56,7 +58,7 @@ static ssize_t store_path_cost(struct net_bridge_port *p, unsigned long v) static BRPORT_ATTR(path_cost, S_IRUGO | S_IWUSR, show_path_cost, store_path_cost); -static ssize_t show_priority(struct net_bridge_port *p, char *buf) +static ssize_t show_priority(struct dp_port *p, char *buf) { #if 0 return sprintf(buf, "%d\n", p->priority); @@ -64,7 +66,7 @@ static ssize_t show_priority(struct net_bridge_port *p, char *buf) return sprintf(buf, "%d\n", 0); #endif } -static ssize_t store_priority(struct net_bridge_port *p, unsigned long v) +static ssize_t store_priority(struct dp_port *p, unsigned long v) { #if 0 if (v >= (1<<(16-BR_PORT_BITS))) @@ -76,7 +78,7 @@ static ssize_t store_priority(struct net_bridge_port *p, unsigned long v) static BRPORT_ATTR(priority, S_IRUGO | S_IWUSR, show_priority, store_priority); -static ssize_t show_designated_root(struct net_bridge_port *p, char *buf) +static ssize_t show_designated_root(struct dp_port *p, char *buf) { #if 0 return br_show_bridge_id(buf, &p->designated_root); @@ -86,7 +88,7 @@ static ssize_t show_designated_root(struct net_bridge_port *p, char *buf) } static BRPORT_ATTR(designated_root, S_IRUGO, show_designated_root, NULL); -static ssize_t show_designated_bridge(struct net_bridge_port *p, char *buf) +static ssize_t show_designated_bridge(struct dp_port *p, char *buf) { #if 0 return br_show_bridge_id(buf, &p->designated_bridge); @@ -96,7 +98,7 @@ static ssize_t show_designated_bridge(struct net_bridge_port *p, char *buf) } static BRPORT_ATTR(designated_bridge, S_IRUGO, show_designated_bridge, NULL); -static ssize_t show_designated_port(struct net_bridge_port *p, char *buf) +static ssize_t show_designated_port(struct dp_port *p, char *buf) { #if 0 return sprintf(buf, "%d\n", p->designated_port); @@ -106,7 +108,7 @@ static ssize_t show_designated_port(struct net_bridge_port *p, char *buf) } static BRPORT_ATTR(designated_port, S_IRUGO, show_designated_port, NULL); -static ssize_t show_designated_cost(struct net_bridge_port *p, char *buf) +static ssize_t show_designated_cost(struct dp_port *p, char *buf) { #if 0 return sprintf(buf, "%d\n", p->designated_cost); @@ -116,7 +118,7 @@ static ssize_t show_designated_cost(struct net_bridge_port *p, char *buf) } static BRPORT_ATTR(designated_cost, S_IRUGO, show_designated_cost, NULL); -static ssize_t show_port_id(struct net_bridge_port *p, char *buf) +static ssize_t show_port_id(struct dp_port *p, char *buf) { #if 0 return sprintf(buf, "0x%x\n", p->port_id); @@ -126,14 +128,14 @@ static ssize_t show_port_id(struct net_bridge_port *p, char *buf) } static BRPORT_ATTR(port_id, S_IRUGO, show_port_id, NULL); -static ssize_t show_port_no(struct net_bridge_port *p, char *buf) +static ssize_t show_port_no(struct dp_port *p, char *buf) { return sprintf(buf, "0x%x\n", p->port_no); } static BRPORT_ATTR(port_no, S_IRUGO, show_port_no, NULL); -static ssize_t show_change_ack(struct net_bridge_port *p, char *buf) +static ssize_t show_change_ack(struct dp_port *p, char *buf) { #if 0 return sprintf(buf, "%d\n", p->topology_change_ack); @@ -143,7 +145,7 @@ static ssize_t show_change_ack(struct net_bridge_port *p, char *buf) } static BRPORT_ATTR(change_ack, S_IRUGO, show_change_ack, NULL); -static ssize_t show_config_pending(struct net_bridge_port *p, char *buf) +static ssize_t show_config_pending(struct dp_port *p, char *buf) { #if 0 return sprintf(buf, "%d\n", p->config_pending); @@ -153,7 +155,7 @@ static ssize_t show_config_pending(struct net_bridge_port *p, char *buf) } static BRPORT_ATTR(config_pending, S_IRUGO, show_config_pending, NULL); -static ssize_t show_port_state(struct net_bridge_port *p, char *buf) +static ssize_t show_port_state(struct dp_port *p, char *buf) { #if 0 return sprintf(buf, "%d\n", p->state); @@ -163,7 +165,7 @@ static ssize_t show_port_state(struct net_bridge_port *p, char *buf) } static BRPORT_ATTR(state, S_IRUGO, show_port_state, NULL); -static ssize_t show_message_age_timer(struct net_bridge_port *p, +static ssize_t show_message_age_timer(struct dp_port *p, char *buf) { #if 0 @@ -174,7 +176,7 @@ static ssize_t show_message_age_timer(struct net_bridge_port *p, } static BRPORT_ATTR(message_age_timer, S_IRUGO, show_message_age_timer, NULL); -static ssize_t show_forward_delay_timer(struct net_bridge_port *p, +static ssize_t show_forward_delay_timer(struct dp_port *p, char *buf) { #if 0 @@ -185,7 +187,7 @@ static ssize_t show_forward_delay_timer(struct net_bridge_port *p, } static BRPORT_ATTR(forward_delay_timer, S_IRUGO, show_forward_delay_timer, NULL); -static ssize_t show_hold_timer(struct net_bridge_port *p, +static ssize_t show_hold_timer(struct dp_port *p, char *buf) { #if 0 @@ -215,13 +217,13 @@ static struct brport_attribute *brport_attrs[] = { }; #define to_brport_attr(_at) container_of(_at, struct brport_attribute, attr) -#define to_brport(obj) container_of(obj, struct net_bridge_port, kobj) +#define to_brport(obj) container_of(obj, struct dp_port, kobj) static ssize_t brport_show(struct kobject * kobj, struct attribute * attr, char * buf) { struct brport_attribute * brport_attr = to_brport_attr(attr); - struct net_bridge_port * p = to_brport(kobj); + struct dp_port * p = to_brport(kobj); return brport_attr->show(p, buf); } @@ -230,7 +232,7 @@ static ssize_t brport_store(struct kobject * kobj, struct attribute * attr, const char * buf, size_t count) { - struct net_bridge_port * p = to_brport(kobj); + struct dp_port * p = to_brport(kobj); #if 0 struct brport_attribute * brport_attr = to_brport_attr(attr); char *endp; @@ -271,22 +273,24 @@ struct sysfs_ops brport_sysfs_ops = { * Creates a brport subdirectory with bridge attributes. * Puts symlink in bridge's brport subdirectory */ -int dp_sysfs_add_if(struct net_bridge_port *p) +int dp_sysfs_add_if(struct dp_port *p) { + struct kobject *kobj = vport_get_kobj(p->vport); struct datapath *dp = p->dp; struct brport_attribute **a; int err; /* Create /sys/class/net//brport directory. */ - err = kobject_add(&p->kobj, &p->dev->NETDEV_DEV_MEMBER.kobj, - SYSFS_BRIDGE_PORT_ATTR); + if (!kobj) + return -ENOENT; + + err = kobject_add(&p->kobj, kobj, SYSFS_BRIDGE_PORT_ATTR); if (err) goto err; /* Create symlink from /sys/class/net//brport/bridge to * /sys/class/net/. */ - err = sysfs_create_link(&p->kobj, - &dp->ports[ODPP_LOCAL]->dev->NETDEV_DEV_MEMBER.kobj, + err = sysfs_create_link(&p->kobj, vport_get_kobj(dp->ports[ODPP_LOCAL]->vport), SYSFS_BRIDGE_PORT_LINK); /* "bridge" */ if (err) goto err_del; @@ -300,10 +304,10 @@ int dp_sysfs_add_if(struct net_bridge_port *p) /* Create symlink from /sys/class/net//brif/ to * /sys/class/net//brport. */ - err = sysfs_create_link(&dp->ifobj, &p->kobj, p->dev->name); + err = sysfs_create_link(&dp->ifobj, &p->kobj, vport_get_name(p->vport)); if (err) goto err_del; - strcpy(p->linkname, p->dev->name); + strcpy(p->linkname, vport_get_name(p->vport)); kobject_uevent(&p->kobj, KOBJ_ADD); @@ -316,7 +320,7 @@ err: return err; } -int dp_sysfs_del_if(struct net_bridge_port *p) +int dp_sysfs_del_if(struct dp_port *p) { if (p->linkname[0]) { sysfs_remove_link(&p->dp->ifobj, p->linkname); diff --git a/datapath/flow.c b/datapath/flow.c index 094a2c8d..8228da2e 100644 --- a/datapath/flow.c +++ b/datapath/flow.c @@ -97,7 +97,6 @@ static inline struct ovs_tcphdr *ovs_tcp_hdr(const struct sk_buff *skb) void flow_used(struct sw_flow *flow, struct sk_buff *skb) { - unsigned long flags; u8 tcp_flags = 0; if (flow->key.dl_type == htons(ETH_P_IP) && iphdr_ok(skb)) { @@ -109,12 +108,12 @@ void flow_used(struct sw_flow *flow, struct sk_buff *skb) } } - spin_lock_irqsave(&flow->lock, flags); + spin_lock_bh(&flow->lock); getnstimeofday(&flow->used); flow->packet_count++; flow->byte_count += skb->len; flow->tcp_flags |= tcp_flags; - spin_unlock_irqrestore(&flow->lock, flags); + spin_unlock_bh(&flow->lock); } struct sw_flow_actions *flow_actions_alloc(size_t n_actions) diff --git a/datapath/vport-internal_dev.c b/datapath/vport-internal_dev.c new file mode 100644 index 00000000..6d52db0f --- /dev/null +++ b/datapath/vport-internal_dev.c @@ -0,0 +1,368 @@ +/* + * Copyright (c) 2009, 2010 Nicira Networks. + * Distributed under the terms of the GNU GPL version 2. + * + * Significant portions of this file may be copied from parts of the Linux + * kernel, by Linus Torvalds and others. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "datapath.h" +#include "vport-internal_dev.h" +#include "vport-netdev.h" + +struct pcpu_lstats { + unsigned long rx_packets; + unsigned long rx_bytes; + unsigned long tx_packets; + unsigned long tx_bytes; +}; + +struct internal_dev { + struct vport *vport; + + struct net_device_stats stats; + struct pcpu_lstats *lstats; +}; + +struct vport_ops internal_vport_ops; + +static inline struct internal_dev *internal_dev_priv(struct net_device *netdev) +{ + return netdev_priv(netdev); +} + +static struct net_device_stats *internal_dev_get_stats(struct net_device *netdev) +{ + struct internal_dev *internal_dev = internal_dev_priv(netdev); + struct net_device_stats *stats; + int i; + + stats = &internal_dev->stats; + memset(stats, 0, sizeof(struct net_device_stats)); + for_each_possible_cpu(i) { + const struct pcpu_lstats *lb_stats; + + lb_stats = per_cpu_ptr(internal_dev->lstats, i); + stats->rx_bytes += lb_stats->rx_bytes; + stats->rx_packets += lb_stats->rx_packets; + stats->tx_bytes += lb_stats->tx_bytes; + stats->tx_packets += lb_stats->tx_packets; + } + return stats; +} + +static int internal_dev_mac_addr(struct net_device *dev, void *p) +{ + struct sockaddr *addr = p; + + if (!is_valid_ether_addr(addr->sa_data)) + return -EADDRNOTAVAIL; + memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); + return 0; +} + +/* Not reentrant (because it is called with BHs disabled), but may be called + * simultaneously on different CPUs. */ +static int internal_dev_xmit(struct sk_buff *skb, struct net_device *netdev) +{ + struct internal_dev *internal_dev = internal_dev_priv(netdev); + struct vport *vport = internal_dev_get_vport(netdev); + struct pcpu_lstats *lb_stats; + + /* We need our own clone. */ + skb = skb_share_check(skb, GFP_ATOMIC); + if (!skb) + return 0; + + lb_stats = per_cpu_ptr(internal_dev->lstats, smp_processor_id()); + lb_stats->tx_packets++; + lb_stats->tx_bytes += skb->len; + + skb_reset_mac_header(skb); + rcu_read_lock_bh(); + vport_receive(vport, skb); + rcu_read_unlock_bh(); + + return 0; +} + +static int internal_dev_open(struct net_device *netdev) +{ + netif_start_queue(netdev); + return 0; +} + +static int internal_dev_stop(struct net_device *netdev) +{ + netif_stop_queue(netdev); + return 0; +} + +static void internal_dev_getinfo(struct net_device *netdev, + struct ethtool_drvinfo *info) +{ + struct dp_port *dp_port = vport_get_dp_port(internal_dev_get_vport(netdev)); + + strcpy(info->driver, "openvswitch"); + if (dp_port) + sprintf(info->bus_info, "%d.%d", dp_port->dp->dp_idx, dp_port->port_no); +} + +static struct ethtool_ops internal_dev_ethtool_ops = { + .get_drvinfo = internal_dev_getinfo, + .get_link = ethtool_op_get_link, + .get_sg = ethtool_op_get_sg, + .get_tx_csum = ethtool_op_get_tx_csum, + .get_tso = ethtool_op_get_tso, +}; + +static int internal_dev_change_mtu(struct net_device *netdev, int new_mtu) +{ + struct dp_port *dp_port = vport_get_dp_port(internal_dev_get_vport(netdev)); + + if (new_mtu < 68) + return -EINVAL; + + if (dp_port) { + int min_mtu; + + mutex_lock(&dp_port->dp->mutex); + min_mtu = dp_min_mtu(dp_port->dp); + mutex_unlock(&dp_port->dp->mutex); + + if (new_mtu > min_mtu) + return -EINVAL; + } + + netdev->mtu = new_mtu; + return 0; +} + +static int internal_dev_init(struct net_device *netdev) +{ + struct internal_dev *internal_dev = internal_dev_priv(netdev); + + internal_dev->lstats = alloc_percpu(struct pcpu_lstats); + if (!internal_dev->lstats) + return -ENOMEM; + + return 0; +} + +static void internal_dev_free(struct net_device *netdev) +{ + struct internal_dev *internal_dev = internal_dev_priv(netdev); + + free_percpu(internal_dev->lstats); + free_netdev(netdev); +} + +static int internal_dev_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +{ + if (dp_ioctl_hook) + return dp_ioctl_hook(dev, ifr, cmd); + return -EOPNOTSUPP; +} + +#ifdef HAVE_NET_DEVICE_OPS +static const struct net_device_ops internal_dev_netdev_ops = { + .ndo_init = internal_dev_init, + .ndo_open = internal_dev_open, + .ndo_stop = internal_dev_stop, + .ndo_start_xmit = internal_dev_xmit, + .ndo_set_mac_address = internal_dev_mac_addr, + .ndo_do_ioctl = internal_dev_do_ioctl, + .ndo_change_mtu = internal_dev_change_mtu, + .ndo_get_stats = internal_dev_get_stats, +}; +#endif + +static void +do_setup(struct net_device *netdev) +{ + ether_setup(netdev); + +#ifdef HAVE_NET_DEVICE_OPS + netdev->netdev_ops = &internal_dev_netdev_ops; +#else + netdev->do_ioctl = internal_dev_do_ioctl; + netdev->get_stats = internal_dev_get_stats; + netdev->hard_start_xmit = internal_dev_xmit; + netdev->open = internal_dev_open; + netdev->stop = internal_dev_stop; + netdev->set_mac_address = internal_dev_mac_addr; + netdev->change_mtu = internal_dev_change_mtu; + netdev->init = internal_dev_init; +#endif + + netdev->destructor = internal_dev_free; + SET_ETHTOOL_OPS(netdev, &internal_dev_ethtool_ops); + netdev->tx_queue_len = 0; + + netdev->flags = IFF_BROADCAST | IFF_MULTICAST; + netdev->features = NETIF_F_LLTX; /* XXX other features? */ + + vport_gen_ether_addr(netdev->dev_addr); +} + +static struct vport * +internal_dev_create(const char *name, const void __user *config) +{ + struct vport *vport; + struct netdev_vport *netdev_vport; + struct internal_dev *internal_dev; + int err; + + vport = vport_alloc(sizeof(struct netdev_vport), &internal_vport_ops); + if (IS_ERR(vport)) { + err = PTR_ERR(vport); + goto error; + } + + netdev_vport = netdev_vport_priv(vport); + + netdev_vport->dev = alloc_netdev(sizeof(struct internal_dev), name, do_setup); + if (!netdev_vport->dev) { + err = -ENOMEM; + goto error_free_vport; + } + + internal_dev = internal_dev_priv(netdev_vport->dev); + internal_dev->vport = vport; + + err = register_netdevice(netdev_vport->dev); + if (err) + goto error_free_netdev; + + return vport; + +error_free_netdev: + free_netdev(netdev_vport->dev); +error_free_vport: + vport_free(vport); +error: + return ERR_PTR(err); +} + +static int +internal_dev_destroy(struct vport *vport) +{ + struct netdev_vport *netdev_vport = netdev_vport_priv(vport); + + unregister_netdevice(netdev_vport->dev); + vport_free(vport); + + return 0; +} + +static int +internal_dev_attach(struct vport *vport) +{ + struct netdev_vport *netdev_vport = netdev_vport_priv(vport); + + dev_set_promiscuity(netdev_vport->dev, 1); + + /* It would make sense to assign dev->br_port here too, but + * that causes packets received on internal ports to get caught + * in netdev_frame_hook(). In turn netdev_frame_hook() can reject them + * back to the network stack, but that's a waste of time. */ + + return 0; +} + +static int +internal_dev_detach(struct vport *vport) +{ + struct netdev_vport *netdev_vport = netdev_vport_priv(vport); + + dev_set_promiscuity(netdev_vport->dev, -1); + + /* Make sure that no packets arrive from now on, since + * internal_dev_xmit() will try to find itself through + * p->dp->ports[], and we're about to set that to null. */ + netif_tx_disable(netdev_vport->dev); + + return 0; +} + +static int +internal_dev_recv(struct vport *vport, struct sk_buff *skb) +{ + struct net_device *netdev = netdev_vport_priv(vport)->dev; + struct internal_dev *internal_dev = internal_dev_priv(netdev); + struct pcpu_lstats *lb_stats; + int len; + + skb->dev = netdev; + len = skb->len; + skb->pkt_type = PACKET_HOST; + skb->protocol = eth_type_trans(skb, netdev); + + if (in_interrupt()) + netif_rx(skb); + else + netif_rx_ni(skb); + netdev->last_rx = jiffies; + + preempt_disable(); + lb_stats = per_cpu_ptr(internal_dev->lstats, smp_processor_id()); + lb_stats->rx_packets++; + lb_stats->rx_bytes += len; + preempt_enable(); + + return len; +} + +struct vport_ops internal_vport_ops = { + .type = "internal", + .flags = VPORT_F_REQUIRED, + .create = internal_dev_create, + .destroy = internal_dev_destroy, + .attach = internal_dev_attach, + .detach = internal_dev_detach, + .set_mtu = netdev_set_mtu, + .set_addr = netdev_set_addr, + .get_name = netdev_get_name, + .get_addr = netdev_get_addr, + .get_kobj = netdev_get_kobj, + .get_stats = netdev_get_stats, + .get_dev_flags = netdev_get_dev_flags, + .is_running = netdev_is_running, + .get_operstate = netdev_get_operstate, + .get_ifindex = netdev_get_ifindex, + .get_iflink = netdev_get_iflink, + .get_mtu = netdev_get_mtu, + .send = internal_dev_recv, +}; + +int is_internal_dev(const struct net_device *netdev) +{ +#ifdef HAVE_NET_DEVICE_OPS + return netdev->netdev_ops == &internal_dev_netdev_ops; +#else + return netdev->open == internal_dev_open; +#endif +} + +int +is_internal_vport(const struct vport *vport) +{ + return vport->ops == &internal_vport_ops; +} + +struct vport * +internal_dev_get_vport(struct net_device *netdev) +{ + struct internal_dev *internal_dev = internal_dev_priv(netdev); + return rcu_dereference(internal_dev->vport); +} diff --git a/datapath/vport-internal_dev.h b/datapath/vport-internal_dev.h new file mode 100644 index 00000000..7c765209 --- /dev/null +++ b/datapath/vport-internal_dev.h @@ -0,0 +1,20 @@ +/* + * Copyright (c) 2009, 2010 Nicira Networks. + * Distributed under the terms of the GNU GPL version 2. + * + * Significant portions of this file may be copied from parts of the Linux + * kernel, by Linus Torvalds and others. + */ + +#ifndef VPORT_INTERNAL_DEV_H +#define VPORT_INTERNAL_DEV_H 1 + +#include "datapath.h" +#include "vport.h" + +int is_internal_vport(const struct vport *); + +int is_internal_dev(const struct net_device *); +struct vport *internal_dev_get_vport(struct net_device *); + +#endif /* vport-internal_dev.h */ diff --git a/datapath/vport-netdev.c b/datapath/vport-netdev.c new file mode 100644 index 00000000..980df01f --- /dev/null +++ b/datapath/vport-netdev.c @@ -0,0 +1,380 @@ +/* + * Copyright (c) 2010 Nicira Networks. + * Distributed under the terms of the GNU GPL version 2. + * + * Significant portions of this file may be copied from parts of the Linux + * kernel, by Linus Torvalds and others. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "datapath.h" +#include "vport-internal_dev.h" +#include "vport-netdev.h" + +#include "compat.h" + +struct vport_ops netdev_vport_ops; + +static void netdev_port_receive(struct net_bridge_port *, struct sk_buff *); + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,27) +static struct llc_sap *netdev_stp_sap; + +static int +netdev_stp_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev) +{ + /* We don't really care about STP packets, we just listen for them for + * mutual exclusion with the bridge module, so this just discards + * them. */ + kfree_skb(skb); + return 0; +} + +static int +netdev_avoid_bridge_init(void) +{ + /* Register to receive STP packets because the bridge module also + * attempts to do so. Since there can only be a single listener for a + * given protocol, this provides mutual exclusion against the bridge + * module, preventing both of them from being loaded at the same + * time. */ + netdev_stp_sap = llc_sap_open(LLC_SAP_BSPAN, netdev_stp_rcv); + if (!netdev_stp_sap) { + printk(KERN_ERR "openvswitch: can't register sap for STP (probably the bridge module is loaded)\n"); + return -EADDRINUSE; + } + return 0; +} + +static void +netdev_avoid_bridge_exit(void) +{ + llc_sap_put(netdev_stp_sap); +} +#else /* Linux 2.6.27 or later. */ +static int +netdev_avoid_bridge_init(void) +{ + /* Linux 2.6.27 introduces a way for multiple clients to register for + * STP packets, which interferes with what we try to do above. + * Instead, just check whether there's a bridge hook defined. This is + * not as safe--the bridge module is willing to load over the top of + * us--but it provides a little bit of protection. */ + if (br_handle_frame_hook) { + printk(KERN_ERR "openvswitch: bridge module is loaded, cannot load over it\n"); + return -EADDRINUSE; + } + return 0; +} + +static void +netdev_avoid_bridge_exit(void) +{ + /* Nothing to do. */ +} +#endif /* Linux 2.6.27 or later */ + +/* + * Used as br_handle_frame_hook. (Cannot run bridge at the same time, even on + * different set of devices!) + */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22) +/* Called with rcu_read_lock and bottom-halves disabled. */ +static struct sk_buff * +netdev_frame_hook(struct net_bridge_port *p, struct sk_buff *skb) +{ + netdev_port_receive(p, skb); + return NULL; +} +#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) +/* Called with rcu_read_lock and bottom-halves disabled. */ +static int +netdev_frame_hook(struct net_bridge_port *p, struct sk_buff **pskb) +{ + netdev_port_receive(p, *pskb); + return 1; +} +#else +#error +#endif + +static int +netdev_init(void) +{ + int err; + + err = netdev_avoid_bridge_init(); + if (err) + return err; + + /* Hook into callback used by the bridge to intercept packets. + * Parasites we are. */ + br_handle_frame_hook = netdev_frame_hook; + + return 0; +} + +static void +netdev_exit(void) +{ + br_handle_frame_hook = NULL; + netdev_avoid_bridge_exit(); +} + +static struct vport * +netdev_create(const char *name, const void __user *config) +{ + struct vport *vport; + struct netdev_vport *netdev_vport; + int err; + + vport = vport_alloc(sizeof(struct netdev_vport), &netdev_vport_ops); + if (IS_ERR(vport)) { + err = PTR_ERR(vport); + goto error; + } + + netdev_vport = netdev_vport_priv(vport); + + netdev_vport->dev = dev_get_by_name(&init_net, name); + if (!netdev_vport->dev) { + err = -ENODEV; + goto error_free_vport; + } + + if (netdev_vport->dev->flags & IFF_LOOPBACK || + netdev_vport->dev->type != ARPHRD_ETHER || + is_internal_dev(netdev_vport->dev)) { + err = -EINVAL; + goto error_put; + } + + if (netdev_vport->dev->br_port) { + err = -EBUSY; + goto error_put; + } + + return vport; + +error_put: + dev_put(netdev_vport->dev); +error_free_vport: + vport_free(vport); +error: + return ERR_PTR(err); +} + +static int +netdev_destroy(struct vport *vport) +{ + struct netdev_vport *netdev_vport = netdev_vport_priv(vport); + + dev_put(netdev_vport->dev); + vport_free(vport); + + return 0; +} + +static int +netdev_attach(struct vport *vport) +{ + struct netdev_vport *netdev_vport = netdev_vport_priv(vport); + + dev_set_promiscuity(netdev_vport->dev, 1); + dev_disable_lro(netdev_vport->dev); + rcu_assign_pointer(netdev_vport->dev->br_port, (struct net_bridge_port *)vport); + + return 0; +} + +static int +netdev_detach(struct vport *vport) +{ + struct netdev_vport *netdev_vport = netdev_vport_priv(vport); + + rcu_assign_pointer(netdev_vport->dev->br_port, NULL); + dev_set_promiscuity(netdev_vport->dev, -1); + + return 0; +} + +int +netdev_set_mtu(struct vport *vport, int mtu) +{ + struct netdev_vport *netdev_vport = netdev_vport_priv(vport); + return dev_set_mtu(netdev_vport->dev, mtu); +} + +int +netdev_set_addr(struct vport *vport, const unsigned char *addr) +{ + struct netdev_vport *netdev_vport = netdev_vport_priv(vport); + struct sockaddr sa; + + sa.sa_family = ARPHRD_ETHER; + memcpy(sa.sa_data, addr, ETH_ALEN); + + return dev_set_mac_address(netdev_vport->dev, &sa); +} + +const char * +netdev_get_name(const struct vport *vport) +{ + const struct netdev_vport *netdev_vport = netdev_vport_priv(vport); + return netdev_vport->dev->name; +} + +const unsigned char * +netdev_get_addr(const struct vport *vport) +{ + const struct netdev_vport *netdev_vport = netdev_vport_priv(vport); + return netdev_vport->dev->dev_addr; +} + +struct kobject * +netdev_get_kobj(const struct vport *vport) +{ + const struct netdev_vport *netdev_vport = netdev_vport_priv(vport); + return &netdev_vport->dev->NETDEV_DEV_MEMBER.kobj; +} + +int +netdev_get_stats(const struct vport *vport, struct odp_vport_stats *stats) +{ + const struct netdev_vport *netdev_vport = netdev_vport_priv(vport); + const struct net_device_stats *netdev_stats; + + netdev_stats = dev_get_stats(netdev_vport->dev); + + stats->rx_bytes = netdev_stats->rx_bytes; + stats->rx_packets = netdev_stats->rx_packets; + stats->tx_bytes = netdev_stats->tx_bytes; + stats->tx_packets = netdev_stats->tx_packets; + stats->rx_dropped = netdev_stats->rx_dropped; + stats->rx_errors = netdev_stats->rx_errors; + stats->rx_frame_err = netdev_stats->rx_frame_errors; + stats->rx_over_err = netdev_stats->rx_over_errors; + stats->rx_crc_err = netdev_stats->rx_crc_errors; + stats->tx_dropped = netdev_stats->tx_dropped; + stats->tx_errors = netdev_stats->tx_errors; + stats->collisions = netdev_stats->collisions; + + return 0; +} + +unsigned +netdev_get_dev_flags(const struct vport *vport) +{ + const struct netdev_vport *netdev_vport = netdev_vport_priv(vport); + return dev_get_flags(netdev_vport->dev); +} + +int +netdev_is_running(const struct vport *vport) +{ + const struct netdev_vport *netdev_vport = netdev_vport_priv(vport); + return netif_running(netdev_vport->dev); +} + +unsigned char +netdev_get_operstate(const struct vport *vport) +{ + const struct netdev_vport *netdev_vport = netdev_vport_priv(vport); + return netdev_vport->dev->operstate; +} + +int +netdev_get_ifindex(const struct vport *vport) +{ + const struct netdev_vport *netdev_vport = netdev_vport_priv(vport); + return netdev_vport->dev->ifindex; +} + +int +netdev_get_iflink(const struct vport *vport) +{ + const struct netdev_vport *netdev_vport = netdev_vport_priv(vport); + return netdev_vport->dev->iflink; +} + +int +netdev_get_mtu(const struct vport *vport) +{ + const struct netdev_vport *netdev_vport = netdev_vport_priv(vport); + return netdev_vport->dev->mtu; +} + +/* Must be called with rcu_read_lock. */ +static void +netdev_port_receive(struct net_bridge_port *p, struct sk_buff *skb) +{ + struct vport *vport = (struct vport *)p; + + /* Make our own copy of the packet. Otherwise we will mangle the + * packet for anyone who came before us (e.g. tcpdump via AF_PACKET). + * (No one comes after us, since we tell handle_bridge() that we took + * the packet.) */ + skb = skb_share_check(skb, GFP_ATOMIC); + if (!skb) + return; + + /* Push the Ethernet header back on. */ + skb_push(skb, ETH_HLEN); + skb_reset_mac_header(skb); + + vport_receive(vport, skb); +} + +static int +netdev_send(struct vport *vport, struct sk_buff *skb) +{ + struct netdev_vport *netdev_vport = netdev_vport_priv(vport); + int len = skb->len; + + skb->dev = netdev_vport->dev; + forward_ip_summed(skb); + dev_queue_xmit(skb); + + return len; +} + +/* Returns null if this device is not attached to a datapath. */ +struct vport * +netdev_get_vport(struct net_device *dev) +{ + return (struct vport *)dev->br_port; +} + +struct vport_ops netdev_vport_ops = { + .type = "netdev", + .flags = VPORT_F_REQUIRED, + .init = netdev_init, + .exit = netdev_exit, + .create = netdev_create, + .destroy = netdev_destroy, + .attach = netdev_attach, + .detach = netdev_detach, + .set_mtu = netdev_set_mtu, + .set_addr = netdev_set_addr, + .get_name = netdev_get_name, + .get_addr = netdev_get_addr, + .get_kobj = netdev_get_kobj, + .get_stats = netdev_get_stats, + .get_dev_flags = netdev_get_dev_flags, + .is_running = netdev_is_running, + .get_operstate = netdev_get_operstate, + .get_ifindex = netdev_get_ifindex, + .get_iflink = netdev_get_iflink, + .get_mtu = netdev_get_mtu, + .send = netdev_send, +}; diff --git a/datapath/vport-netdev.h b/datapath/vport-netdev.h new file mode 100644 index 00000000..19f176cd --- /dev/null +++ b/datapath/vport-netdev.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2010 Nicira Networks. + * Distributed under the terms of the GNU GPL version 2. + * + * Significant portions of this file may be copied from parts of the Linux + * kernel, by Linus Torvalds and others. + */ + +#ifndef VPORT_NETDEV_H +#define VPORT_NETDEV_H 1 + +#include + +#include "vport.h" + +struct vport *netdev_get_vport(struct net_device *dev); + +struct netdev_vport { + struct net_device *dev; +}; + +static inline struct netdev_vport * +netdev_vport_priv(const struct vport *vport) +{ + return vport_priv(vport); +} + +int netdev_set_mtu(struct vport *, int mtu); +int netdev_set_addr(struct vport *, const unsigned char *addr); +const char *netdev_get_name(const struct vport *); +const unsigned char *netdev_get_addr(const struct vport *); +struct kobject *netdev_get_kobj(const struct vport *); +int netdev_get_stats(const struct vport *, struct odp_vport_stats *); +unsigned netdev_get_dev_flags(const struct vport *); +int netdev_is_running(const struct vport *); +unsigned char netdev_get_operstate(const struct vport *); +int netdev_get_ifindex(const struct vport *); +int netdev_get_iflink(const struct vport *); +int netdev_get_mtu(const struct vport *); + +#endif /* vport_netdev.h */ diff --git a/datapath/vport.c b/datapath/vport.c new file mode 100644 index 00000000..a166ef90 --- /dev/null +++ b/datapath/vport.c @@ -0,0 +1,1170 @@ +/* + * Copyright (c) 2010 Nicira Networks. + * Distributed under the terms of the GNU GPL version 2. + * + * Significant portions of this file may be copied from parts of the Linux + * kernel, by Linus Torvalds and others. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "vport.h" + +extern struct vport_ops netdev_vport_ops; +extern struct vport_ops internal_vport_ops; +extern struct vport_ops gre_vport_ops; + +static struct vport_ops *base_vport_ops_list[] = { + &netdev_vport_ops, + &internal_vport_ops, +}; + +static const struct vport_ops **vport_ops_list; +static int n_vport_types; + +static struct hlist_head *dev_table; +#define VPORT_HASH_BUCKETS 1024 + +/* Both RTNL lock and vport_mutex need to be held when updating dev_table. + * + * If you use vport_locate and then perform some operations, you need to hold + * one of these locks if you don't want the vport to be deleted out from under + * you. + * + * If you get a reference to a vport through a dp_port, it is protected + * by RCU and you need to hold rcu_read_lock instead when reading. + * + * If multiple locks are taken, the hierarchy is: + * 1. RTNL + * 2. DP + * 3. vport + */ +static DEFINE_MUTEX(vport_mutex); + +/** + * vport_lock - acquire vport lock + * + * Acquire global vport lock. See above comment about locking requirements + * and specific function definitions. May sleep. + */ +void +vport_lock(void) +{ + mutex_lock(&vport_mutex); +} + +/** + * vport_unlock - release vport lock + * + * Release lock acquired with vport_lock. + */ +void +vport_unlock(void) +{ + mutex_unlock(&vport_mutex); +} + +#define ASSERT_VPORT() do { \ + if (unlikely(!mutex_is_locked(&vport_mutex))) { \ + printk(KERN_ERR "openvswitch: vport lock not held at %s (%d)\n", \ + __FILE__, __LINE__); \ + dump_stack(); \ + } \ +} while(0) + +/** + * vport_init - initialize vport subsystem + * + * Called at module load time to initialize the vport subsystem and any + * compiled in vport types. + */ +int +vport_init(void) +{ + int err; + int i; + + dev_table = kzalloc(VPORT_HASH_BUCKETS * sizeof(struct hlist_head), + GFP_KERNEL); + if (!dev_table) { + err = -ENOMEM; + goto error; + } + + vport_ops_list = kmalloc(ARRAY_SIZE(base_vport_ops_list) * + sizeof(struct vport_ops *), GFP_KERNEL); + if (!vport_ops_list) { + err = -ENOMEM; + goto error_dev_table; + } + + for (i = 0; i < ARRAY_SIZE(base_vport_ops_list); i++) { + struct vport_ops *new_ops = base_vport_ops_list[i]; + + if (new_ops->get_stats && new_ops->flags & VPORT_F_GEN_STATS) { + printk(KERN_INFO "openvswitch: both get_stats() and VPORT_F_GEN_STATS defined on vport %s, dropping VPORT_F_GEN_STATS\n", new_ops->type); + new_ops->flags &= ~VPORT_F_GEN_STATS; + } + + if (new_ops->init) + err = new_ops->init(); + else + err = 0; + + if (!err) + vport_ops_list[n_vport_types++] = new_ops; + else if (new_ops->flags & VPORT_F_REQUIRED) { + vport_exit(); + goto error; + } + } + + return 0; + +error_dev_table: + kfree(dev_table); +error: + return err; +} + +static void +vport_del_all(void) +{ + int i; + + rtnl_lock(); + vport_lock(); + + for (i = 0; i < VPORT_HASH_BUCKETS; i++) { + struct hlist_head *bucket = &dev_table[i]; + struct vport *vport; + struct hlist_node *node, *next; + + hlist_for_each_entry_safe(vport, node, next, bucket, hash_node) + __vport_del(vport); + } + + vport_unlock(); + rtnl_unlock(); +} + +/** + * vport_exit - shutdown vport subsystem + * + * Called at module exit time to shutdown the vport subsystem and any + * initialized vport types. + */ +void +vport_exit(void) +{ + int i; + + vport_del_all(); + + for (i = 0; i < n_vport_types; i++) { + if (vport_ops_list[i]->exit) + vport_ops_list[i]->exit(); + } + + kfree(vport_ops_list); + kfree(dev_table); +} + +/** + * vport_add - add vport device (for userspace callers) + * + * @uvport_config: New port configuration. + * + * Creates a new vport with the specified configuration (which is dependent + * on device type). This function is for userspace callers and assumes no + * locks are held. + */ +int +vport_add(const struct odp_vport_add __user *uvport_config) +{ + struct odp_vport_add vport_config; + struct vport *vport; + int err = 0; + + if (copy_from_user(&vport_config, uvport_config, sizeof(struct odp_vport_add))) + return -EFAULT; + + vport_config.port_type[VPORT_TYPE_SIZE - 1] = '\0'; + vport_config.devname[IFNAMSIZ - 1] = '\0'; + + rtnl_lock(); + + vport = vport_locate(vport_config.devname); + if (vport) { + err = -EEXIST; + goto out; + } + + vport_lock(); + vport = __vport_add(vport_config.devname, vport_config.port_type, + vport_config.config); + vport_unlock(); + + if (IS_ERR(vport)) + err = PTR_ERR(vport); + +out: + rtnl_unlock(); + return err; +} + +/** + * vport_mod - modify existing vport device (for userspace callers) + * + * @uvport_config: New configuration for vport + * + * Modifies an existing device with the specified configuration (which is + * dependent on device type). This function is for userspace callers and + * assumes no locks are held. + */ +int +vport_mod(const struct odp_vport_mod __user *uvport_config) +{ + struct odp_vport_mod vport_config; + struct vport *vport; + int err; + + if (copy_from_user(&vport_config, uvport_config, sizeof(struct odp_vport_mod))) + return -EFAULT; + + vport_config.devname[IFNAMSIZ - 1] = '\0'; + + rtnl_lock(); + + vport = vport_locate(vport_config.devname); + if (!vport) { + err = -ENODEV; + goto out; + } + + vport_lock(); + err = __vport_mod(vport, vport_config.config); + vport_unlock(); + +out: + rtnl_unlock(); + return err; +} + +/** + * vport_del - delete existing vport device (for userspace callers) + * + * @udevname: Name of device to delete + * + * Deletes the specified device. Detaches the device from a datapath first + * if it is attached. Deleting the device will fail if it does not exist or it + * is the datapath local port. It is also possible to fail for less obvious + * reasons, such as lack of memory. This function is for userspace callers and + * assumes no locks are held. + */ +int +vport_del(const char __user *udevname) +{ + char devname[IFNAMSIZ]; + struct vport *vport; + struct dp_port *dp_port; + int err = 0; + + if (strncpy_from_user(devname, udevname, IFNAMSIZ - 1) < 0) + return -EFAULT; + devname[IFNAMSIZ - 1] = '\0'; + + rtnl_lock(); + + vport = vport_locate(devname); + if (!vport) { + err = -ENODEV; + goto out; + } + + dp_port = vport_get_dp_port(vport); + if (dp_port) { + struct datapath *dp = dp_port->dp; + + mutex_lock(&dp->mutex); + + if (!strcmp(dp_name(dp), devname)) { + err = -EINVAL; + goto dp_port_out; + } + + err = dp_detach_port(dp_port, 0); + +dp_port_out: + mutex_unlock(&dp->mutex); + + if (err) + goto out; + } + + vport_lock(); + err = __vport_del(vport); + vport_unlock(); + +out: + rtnl_unlock(); + return err; +} + +/** + * vport_stats_get - retrieve device stats (for userspace callers) + * + * @ustats_req: Stats request parameters. + * + * Retrieves transmit, receive, and error stats for the given device. This + * function is for userspace callers and assumes no locks are held. + */ +int +vport_stats_get(struct odp_vport_stats_req __user *ustats_req) +{ + struct odp_vport_stats_req stats_req; + struct vport *vport; + int err; + + if (copy_from_user(&stats_req, ustats_req, sizeof(struct odp_vport_stats_req))) + return -EFAULT; + + stats_req.devname[IFNAMSIZ - 1] = '\0'; + + vport_lock(); + + vport = vport_locate(stats_req.devname); + if (!vport) { + err = -ENODEV; + goto out; + } + + if (vport->ops->get_stats) + err = vport->ops->get_stats(vport, &stats_req.stats); + else if (vport->ops->flags & VPORT_F_GEN_STATS) { + int i; + + memset(&stats_req.stats, 0, sizeof(struct odp_vport_stats)); + + for_each_possible_cpu(i) { + const struct vport_percpu_stats *percpu_stats; + + percpu_stats = per_cpu_ptr(vport->percpu_stats, i); + stats_req.stats.rx_bytes += percpu_stats->rx_bytes; + stats_req.stats.rx_packets += percpu_stats->rx_packets; + stats_req.stats.tx_bytes += percpu_stats->tx_bytes; + stats_req.stats.tx_packets += percpu_stats->tx_packets; + } + + spin_lock_bh(&vport->err_stats.lock); + + stats_req.stats.rx_dropped = vport->err_stats.rx_dropped; + stats_req.stats.rx_errors = vport->err_stats.rx_errors + + vport->err_stats.rx_frame_err + + vport->err_stats.rx_over_err + + vport->err_stats.rx_crc_err; + stats_req.stats.rx_frame_err = vport->err_stats.rx_frame_err; + stats_req.stats.rx_over_err = vport->err_stats.rx_over_err; + stats_req.stats.rx_crc_err = vport->err_stats.rx_crc_err; + stats_req.stats.tx_dropped = vport->err_stats.tx_dropped; + stats_req.stats.tx_errors = vport->err_stats.tx_errors; + stats_req.stats.collisions = vport->err_stats.collisions; + + spin_unlock_bh(&vport->err_stats.lock); + + err = 0; + } else + err = -EOPNOTSUPP; + +out: + vport_unlock(); + + if (!err) + if (copy_to_user(ustats_req, &stats_req, sizeof(struct odp_vport_stats_req))) + err = -EFAULT; + + return err; +} + +/** + * vport_ether_get - retrieve device Ethernet address (for userspace callers) + * + * @uvport_ether: Ethernet address request parameters. + * + * Retrieves the Ethernet address of the given device. This function is for + * userspace callers and assumes no locks are held. + */ +int +vport_ether_get(struct odp_vport_ether __user *uvport_ether) +{ + struct odp_vport_ether vport_ether; + struct vport *vport; + int err = 0; + + if (copy_from_user(&vport_ether, uvport_ether, sizeof(struct odp_vport_ether))) + return -EFAULT; + + vport_ether.devname[IFNAMSIZ - 1] = '\0'; + + vport_lock(); + + vport = vport_locate(vport_ether.devname); + if (!vport) { + err = -ENODEV; + goto out; + } + + memcpy(vport_ether.ether_addr, vport_get_addr(vport), ETH_ALEN); + +out: + vport_unlock(); + + if (!err) + if (copy_to_user(uvport_ether, &vport_ether, sizeof(struct odp_vport_ether))) + err = -EFAULT; + + return err; +} + +/** + * vport_ether_set - set device Ethernet address (for userspace callers) + * + * @uvport_ether: Ethernet address request parameters. + * + * Sets the Ethernet address of the given device. Some devices may not support + * setting the Ethernet address, in which case the result will always be + * -EOPNOTSUPP. This function is for userspace callers and assumes no locks + * are held. + */ +int +vport_ether_set(struct odp_vport_ether __user *uvport_ether) +{ + struct odp_vport_ether vport_ether; + struct vport *vport; + int err; + + if (copy_from_user(&vport_ether, uvport_ether, sizeof(struct odp_vport_ether))) + return -EFAULT; + + vport_ether.devname[IFNAMSIZ - 1] = '\0'; + + rtnl_lock(); + vport_lock(); + + vport = vport_locate(vport_ether.devname); + if (!vport) { + err = -ENODEV; + goto out; + } + + err = vport_set_addr(vport, vport_ether.ether_addr); + +out: + vport_unlock(); + rtnl_unlock(); + return err; +} + +/** + * vport_mut_get - retrieve device MTU (for userspace callers) + * + * @uvport_mtu: MTU request parameters. + * + * Retrieves the MTU of the given device. This function is for userspace + * callers and assumes no locks are held. + */ +int +vport_mtu_get(struct odp_vport_mtu __user *uvport_mtu) +{ + struct odp_vport_mtu vport_mtu; + struct vport *vport; + int err = 0; + + if (copy_from_user(&vport_mtu, uvport_mtu, sizeof(struct odp_vport_mtu))) + return -EFAULT; + + vport_mtu.devname[IFNAMSIZ - 1] = '\0'; + + vport_lock(); + + vport = vport_locate(vport_mtu.devname); + if (!vport) { + err = -ENODEV; + goto out; + } + + vport_mtu.mtu = vport_get_mtu(vport); + +out: + vport_unlock(); + + if (!err) + if (copy_to_user(uvport_mtu, &vport_mtu, sizeof(struct odp_vport_mtu))) + err = -EFAULT; + + return err; +} + +/** + * vport_mtu_set - set device MTU (for userspace callers) + * + * @uvport_mtu: MTU request parameters. + * + * Sets the MTU of the given device. Some devices may not support setting the + * MTU, in which case the result will always be -EOPNOTSUPP. This function is + * for userspace callers and assumes no locks are held. + */ +int +vport_mtu_set(struct odp_vport_mtu __user *uvport_mtu) +{ + struct odp_vport_mtu vport_mtu; + struct vport *vport; + int err; + + if (copy_from_user(&vport_mtu, uvport_mtu, sizeof(struct odp_vport_mtu))) + return -EFAULT; + + vport_mtu.devname[IFNAMSIZ - 1] = '\0'; + + rtnl_lock(); + vport_lock(); + + vport = vport_locate(vport_mtu.devname); + if (!vport) { + err = -ENODEV; + goto out; + } + + err = vport_set_mtu(vport, vport_mtu.mtu); + +out: + vport_unlock(); + rtnl_unlock(); + return err; +} + +static struct hlist_head * +hash_bucket(const char *name) +{ + unsigned int hash = full_name_hash(name, strlen(name)); + return &dev_table[hash & (VPORT_HASH_BUCKETS - 1)]; +} + +/** + * vport_locate - find a port that has already been created + * + * @name: name of port to find + * + * Either RTNL or vport lock must be acquired before calling this function + * and held while using the found port. See the locking comments at the + * top of the file. + */ +struct vport * +vport_locate(const char *name) +{ + struct hlist_head *bucket = hash_bucket(name); + struct vport *vport; + struct hlist_node *node; + + if (unlikely(!mutex_is_locked(&vport_mutex) && !rtnl_is_locked())) { + printk(KERN_ERR "openvswitch: neither RTNL nor vport lock held in vport_locate\n"); + dump_stack(); + } + + hlist_for_each_entry(vport, node, bucket, hash_node) + if (!strcmp(name, vport_get_name(vport))) + return vport; + + return NULL; +} + +static void +register_vport(struct vport *vport) +{ + hlist_add_head(&vport->hash_node, hash_bucket(vport_get_name(vport))); +} + +static void +unregister_vport(struct vport *vport) +{ + hlist_del(&vport->hash_node); +} + +/** + * vport_alloc - allocate and initialize new vport + * + * @priv_size: Size of private data area to allocate. + * @ops: vport device ops + * + * Allocate and initialize a new vport defined by @ops. The vport will contain + * a private data area of size @priv_size that can be accessed using + * vport_priv(). vports that are no longer needed should be released with + * vport_free(). + */ +struct vport * +vport_alloc(int priv_size, const struct vport_ops *ops) +{ + struct vport *vport; + size_t alloc_size; + + alloc_size = sizeof(struct vport); + if (priv_size) { + alloc_size = ALIGN(alloc_size, VPORT_ALIGN); + alloc_size += priv_size; + } + + vport = kzalloc(alloc_size, GFP_KERNEL); + if (!vport) + return ERR_PTR(-ENOMEM); + + vport->ops = ops; + + if (vport->ops->flags & VPORT_F_GEN_STATS) { + vport->percpu_stats = alloc_percpu(struct vport_percpu_stats); + if (!vport->percpu_stats) + return ERR_PTR(-ENOMEM); + + spin_lock_init(&vport->err_stats.lock); + } + + return vport; +} + +/** + * vport_free - uninitialize and free vport + * + * @vport: vport to free + * + * Frees a vport allocated with vport_alloc() when it is no longer needed. + */ +void +vport_free(struct vport *vport) +{ + if (vport->ops->flags & VPORT_F_GEN_STATS) + free_percpu(vport->percpu_stats); + + kfree(vport); +} + +/** + * __vport_add - add vport device (for kernel callers) + * + * @name: Name of new device. + * @type: Type of new device (to be matched against types in registered vport + * ops). + * @config: Device type specific configuration. Userspace pointer. + * + * Creates a new vport with the specified configuration (which is dependent + * on device type). Both RTNL and vport locks must be held. + */ +struct vport * +__vport_add(const char *name, const char *type, const void __user *config) +{ + struct vport *vport; + int err = 0; + int i; + + ASSERT_RTNL(); + ASSERT_VPORT(); + + for (i = 0; i < n_vport_types; i++) { + if (!strcmp(vport_ops_list[i]->type, type)) { + vport = vport_ops_list[i]->create(name, config); + if (IS_ERR(vport)) { + err = PTR_ERR(vport); + goto out; + } + + register_vport(vport); + return vport; + } + } + + err = -EAFNOSUPPORT; + +out: + return ERR_PTR(err); +} + +/** + * __vport_mod - modify existing vport device (for kernel callers) + * + * @vport: vport to modify. + * @config: Device type specific configuration. Userspace pointer. + * + * Modifies an existing device with the specified configuration (which is + * dependent on device type). Both RTNL and vport locks must be held. + */ +int +__vport_mod(struct vport *vport, const void __user *config) +{ + ASSERT_RTNL(); + ASSERT_VPORT(); + + if (vport->ops->modify) + return vport->ops->modify(vport, config); + else + return -EOPNOTSUPP; +} + +/** + * __vport_del - delete existing vport device (for kernel callers) + * + * @vport: vport to delete. + * + * Deletes the specified device. The device must not be currently attached to + * a datapath. It is possible to fail for reasons such as lack of memory. + * Both RTNL and vport locks must be held. + */ +int +__vport_del(struct vport *vport) +{ + ASSERT_RTNL(); + ASSERT_VPORT(); + BUG_ON(vport_get_dp_port(vport)); + + unregister_vport(vport); + + return vport->ops->destroy(vport); +} + +/** + * vport_attach - attach a vport to a datapath + * + * @vport: vport to attach. + * @dp_port: Datapath port to attach the vport to. + * + * Attaches a vport to a specific datapath so that packets may be exchanged. + * Both ports must be currently unattached. @dp_port must be successfully + * attached to a vport before it is connected to a datapath and must not be + * modified while connected. RTNL lock and the appropriate DP mutex must be held. + */ +int +vport_attach(struct vport *vport, struct dp_port *dp_port) +{ + ASSERT_RTNL(); + + if (dp_port->vport) + return -EBUSY; + + if (vport_get_dp_port(vport)) + return -EBUSY; + + if (vport->ops->attach) { + int err; + + err = vport->ops->attach(vport); + if (err) + return err; + } + + dp_port->vport = vport; + rcu_assign_pointer(vport->dp_port, dp_port); + + return 0; +} + +/** + * vport_detach - detach a vport from a datapath + * + * @vport: vport to detach. + * + * Detaches a vport from a datapath. May fail for a variety of reasons, + * including lack of memory. RTNL lock and the appropriate DP mutex must be held. + */ +int +vport_detach(struct vport *vport) +{ + struct dp_port *dp_port; + + ASSERT_RTNL(); + + dp_port = vport_get_dp_port(vport); + if (!dp_port) + return -EINVAL; + + dp_port->vport = NULL; + rcu_assign_pointer(vport->dp_port, NULL); + + if (vport->ops->detach) + return vport->ops->detach(vport); + else + return 0; +} + +/** + * vport_set_mtu - set device MTU (for kernel callers) + * + * @vport: vport on which to set MTU. + * @mtu: New MTU. + * + * Sets the MTU of the given device. Some devices may not support setting the + * MTU, in which case the result will always be -EOPNOTSUPP. RTNL lock must + * be held. + */ +int +vport_set_mtu(struct vport *vport, int mtu) +{ + ASSERT_RTNL(); + + if (mtu < 68) + return -EINVAL; + + if (vport->ops->set_mtu) + return vport->ops->set_mtu(vport, mtu); + else + return -EOPNOTSUPP; +} + +/** + * vport_set_addr - set device Ethernet address (for kernel callers) + * + * @vport: vport on which to set Ethernet address. + * @addr: New address. + * + * Sets the Ethernet address of the given device. Some devices may not support + * setting the Ethernet address, in which case the result will always be + * -EOPNOTSUPP. RTNL lock must be held. + */ +int +vport_set_addr(struct vport *vport, const unsigned char *addr) +{ + ASSERT_RTNL(); + + if (!is_valid_ether_addr(addr)) + return -EADDRNOTAVAIL; + + if (vport->ops->set_addr) + return vport->ops->set_addr(vport, addr); + else + return -EOPNOTSUPP; +} + +/** + * vport_get_name - retrieve device name + * + * @vport: vport from which to retrieve the name. + * + * Retrieves the name of the given device. Either RTNL lock or rcu_read_lock + * must be held for the entire duration that the name is in use. + */ +const char * +vport_get_name(const struct vport *vport) +{ + return vport->ops->get_name(vport); +} + +/** + * vport_get_type - retrieve device type + * + * @vport: vport from which to retrieve the type. + * + * Retrieves the type of the given device. Either RTNL lock or rcu_read_lock + * must be held for the entire duration that the type is in use. + */ +const char * +vport_get_type(const struct vport *vport) +{ + return vport->ops->type; +} + +/** + * vport_get_addr - retrieve device Ethernet address (for kernel callers) + * + * @vport: vport from which to retrieve the Ethernet address. + * + * Retrieves the Ethernet address of the given device. Either RTNL lock or + * rcu_read_lock must be held for the entire duration that the Ethernet address + * is in use. + */ +const unsigned char * +vport_get_addr(const struct vport *vport) +{ + return vport->ops->get_addr(vport); +} + +/** + * vport_get_dp_port - retrieve attached datapath port + * + * @vport: vport from which to retrieve the datapath port. + * + * Retrieves the attached datapath port or null if not attached. Either RTNL + * lock or rcu_read_lock must be held for the entire duration that the datapath + * port is being accessed. + */ +struct dp_port * +vport_get_dp_port(const struct vport *vport) +{ + return rcu_dereference(vport->dp_port); +} + +/** + * vport_get_kobj - retrieve associated kobj + * + * @vport: vport from which to retrieve the associated kobj + * + * Retrieves the associated kobj or null if no kobj. The returned kobj is + * valid for as long as the vport exists. + */ +struct kobject * +vport_get_kobj(const struct vport *vport) +{ + if (vport->ops->get_kobj) + return vport->ops->get_kobj(vport); + else + return NULL; +} + +/** + * vport_get_flags - retrieve device flags + * + * @vport: vport from which to retrieve the flags + * + * Retrieves the flags of the given device. Either RTNL lock or rcu_read_lock + * must be held. + */ +unsigned +vport_get_flags(const struct vport *vport) +{ + return vport->ops->get_dev_flags(vport); +} + +/** + * vport_get_flags - check whether device is running + * + * @vport: vport on which to check status. + * + * Checks whether the given device is running. Either RTNL lock or + * rcu_read_lock must be held. + */ +int +vport_is_running(const struct vport *vport) +{ + return vport->ops->is_running(vport); +} + +/** + * vport_get_flags - retrieve device operating state + * + * @vport: vport from which to check status + * + * Retrieves the RFC2863 operstate of the given device. Either RTNL lock or + * rcu_read_lock must be held. + */ +unsigned char +vport_get_operstate(const struct vport *vport) +{ + return vport->ops->get_operstate(vport); +} + +/** + * vport_get_ifindex - retrieve device system interface index + * + * @vport: vport from which to retrieve index + * + * Retrieves the system interface index of the given device. Not all devices + * will have system indexes, in which case the index of the datapath local + * port is returned. Returns a negative index on error. Either RTNL lock or + * rcu_read_lock must be held. + */ +int +vport_get_ifindex(const struct vport *vport) +{ + const struct dp_port *dp_port; + + if (vport->ops->get_ifindex) + return vport->ops->get_ifindex(vport); + + /* If we don't actually have an ifindex, use the local port's. + * Userspace doesn't check it anyways. */ + dp_port = vport_get_dp_port(vport); + if (!dp_port) + return -EAGAIN; + + return vport_get_ifindex(dp_port->dp->ports[ODPP_LOCAL]->vport); +} + +/** + * vport_get_iflink - retrieve device system link index + * + * @vport: vport from which to retrieve index + * + * Retrieves the system link index of the given device. The link is the index + * of the interface on which the packet will actually be sent. In most cases + * this is the same as the ifindex but may be different for tunnel devices. + * Returns a negative index on error. Either RTNL lock or rcu_read_lock must + * be held. + */ +int +vport_get_iflink(const struct vport *vport) +{ + if (vport->ops->get_iflink) + return vport->ops->get_iflink(vport); + + /* If we don't have an iflink, use the ifindex. In most cases they + * are the same. */ + return vport_get_ifindex(vport); +} + +/** + * vport_get_mtu - retrieve device MTU (for kernel callers) + * + * @vport: vport from which to retrieve MTU + * + * Retrieves the MTU of the given device. Either RTNL lock or rcu_read_lock + * must be held. + */ +int +vport_get_mtu(const struct vport *vport) +{ + return vport->ops->get_mtu(vport); +} + +/** + * vport_receive - pass up received packet to the datapath for processing + * + * @vport: vport that received the packet + * @skb: skb that was received + * + * Must be called with rcu_read_lock and bottom halves disabled. The packet + * cannot be shared and skb->data should point to the Ethernet header. + */ +void +vport_receive(struct vport *vport, struct sk_buff *skb) +{ + struct dp_port *dp_port = vport_get_dp_port(vport); + + if (!dp_port) + return; + + if (vport->ops->flags & VPORT_F_GEN_STATS) { + struct vport_percpu_stats *stats; + + local_bh_disable(); + + stats = per_cpu_ptr(vport->percpu_stats, smp_processor_id()); + stats->rx_packets++; + stats->rx_bytes += skb->len; + + local_bh_enable(); + } + + if (!(vport->ops->flags & VPORT_F_TUN_ID)) + OVS_CB(skb)->tun_id = 0; + + dp_process_received_packet(dp_port, skb); +} + +/** + * vport_send - send a packet on a device + * + * @vport: vport on which to send the packet + * @skb: skb to send + * + * Sends the given packet and returns the length of data sent. Either RTNL + * lock or rcu_read_lock must be held. + */ +int +vport_send(struct vport *vport, struct sk_buff *skb) +{ + int sent; + + sent = vport->ops->send(vport, skb); + + if (vport->ops->flags & VPORT_F_GEN_STATS && sent > 0) { + struct vport_percpu_stats *stats; + + local_bh_disable(); + + stats = per_cpu_ptr(vport->percpu_stats, smp_processor_id()); + stats->tx_packets++; + stats->tx_bytes += sent; + + local_bh_enable(); + } + + return sent; +} + +/** + * vport_record_error - indicate device error to generic stats layer + * + * @vport: vport that encountered the error + * @err_type: one of enum vport_err_type types to indicate the error type + * + * If using the vport generic stats layer indicate that an error of the given + * type has occured. + */ +void +vport_record_error(struct vport *vport, enum vport_err_type err_type) +{ + if (vport->ops->flags & VPORT_F_GEN_STATS) { + + spin_lock_bh(&vport->err_stats.lock); + + switch (err_type) { + case VPORT_E_RX_DROPPED: + vport->err_stats.rx_dropped++; + break; + + case VPORT_E_RX_ERROR: + vport->err_stats.rx_errors++; + break; + + case VPORT_E_RX_FRAME: + vport->err_stats.rx_frame_err++; + break; + + case VPORT_E_RX_OVER: + vport->err_stats.rx_over_err++; + break; + + case VPORT_E_RX_CRC: + vport->err_stats.rx_crc_err++; + break; + + case VPORT_E_TX_DROPPED: + vport->err_stats.tx_dropped++; + break; + + case VPORT_E_TX_ERROR: + vport->err_stats.tx_errors++; + break; + + case VPORT_E_COLLISION: + vport->err_stats.collisions++; + break; + }; + + spin_unlock_bh(&vport->err_stats.lock); + } +} + +/** + * vport_gen_ether_addr - generate an Ethernet address + * + * @addr: location to store generated address + * + * Generates a random Ethernet address for use when creating a device that + * has no natural address. + */ +void +vport_gen_ether_addr(u8 *addr) +{ + random_ether_addr(addr); + + /* Set the OUI to the Nicira one. */ + addr[0] = 0x00; + addr[1] = 0x23; + addr[2] = 0x20; + + /* Set the top bit to indicate random address. */ + addr[3] |= 0x80; +} diff --git a/datapath/vport.h b/datapath/vport.h new file mode 100644 index 00000000..7b71226c --- /dev/null +++ b/datapath/vport.h @@ -0,0 +1,235 @@ +/* + * Copyright (c) 2010 Nicira Networks. + * Distributed under the terms of the GNU GPL version 2. + * + * Significant portions of this file may be copied from parts of the Linux + * kernel, by Linus Torvalds and others. + */ + +#ifndef VPORT_H +#define VPORT_H 1 + +#include +#include +#include + +#include "datapath.h" +#include "openvswitch/datapath-protocol.h" + +struct vport; +struct dp_port; + +/* The following definitions are for users of the vport subsytem: */ + +void vport_lock(void); +void vport_unlock(void); + +int vport_init(void); +void vport_exit(void); + +int vport_add(const struct odp_vport_add __user *); +int vport_mod(const struct odp_vport_mod __user *); +int vport_del(const char __user *udevname); + +int vport_stats_get(struct odp_vport_stats_req __user *); +int vport_ether_get(struct odp_vport_ether __user *); +int vport_ether_set(struct odp_vport_ether __user *); +int vport_mtu_get(struct odp_vport_mtu __user *); +int vport_mtu_set(struct odp_vport_mtu __user *); + +struct vport *__vport_add(const char *name, const char *type, const void __user *config); +int __vport_mod(struct vport *, const void __user *config); +int __vport_del(struct vport *); + +struct vport *vport_locate(const char *name); + +int vport_attach(struct vport *, struct dp_port *); +int vport_detach(struct vport *); + +int vport_set_mtu(struct vport *, int mtu); +int vport_set_addr(struct vport *, const unsigned char *); + +const char *vport_get_name(const struct vport *); +const char *vport_get_type(const struct vport *); +const unsigned char *vport_get_addr(const struct vport *); + +struct dp_port *vport_get_dp_port(const struct vport *); + +struct kobject *vport_get_kobj(const struct vport *); + +unsigned vport_get_flags(const struct vport *); +int vport_is_running(const struct vport *); +unsigned char vport_get_operstate(const struct vport *); + +int vport_get_ifindex(const struct vport *); +int vport_get_iflink(const struct vport *); + +int vport_get_mtu(const struct vport *); + +int vport_send(struct vport *, struct sk_buff *); + +/* The following definitions are for implementers of vport devices: */ + +struct vport_percpu_stats { + u64 rx_bytes; + u64 rx_packets; + u64 tx_bytes; + u64 tx_packets; +}; + +struct vport_err_stats { + spinlock_t lock; + + u64 rx_dropped; + u64 rx_errors; + u64 rx_frame_err; + u64 rx_over_err; + u64 rx_crc_err; + u64 tx_dropped; + u64 tx_errors; + u64 collisions; +}; + +struct vport { + struct hlist_node hash_node; + const struct vport_ops *ops; + struct dp_port *dp_port; + + struct vport_percpu_stats *percpu_stats; + struct vport_err_stats err_stats; +}; + +#define VPORT_F_REQUIRED (1 << 0) /* If init fails, module loading fails. */ +#define VPORT_F_GEN_STATS (1 << 1) /* Track stats at the generic layer. */ +#define VPORT_F_TUN_ID (1 << 2) /* Sets OVS_CB(skb)->tun_id. */ + +/** + * struct vport_ops - definition of a type of virtual port + * + * @type: Name of port type, such as "netdev" or "internal" to be matched + * against the device type when a new port needs to be created. + * @flags: Flags of type VPORT_F_* that influence how the generic vport layer + * handles this vport. + * @init: Called at module initialization. If VPORT_F_REQUIRED is set then the + * failure of this function will cause the module to not load. If the flag is + * not set and initialzation fails then no vports of this type can be created. + * @exit: Called at module unload. + * @create: Create a new vport called 'name' with vport type specific + * configuration 'config' (which must be copied from userspace before use). On + * success must allocate a new vport using vport_alloc(). + * @modify: Modify the configuration of an existing vport. May be null if + * modification is not supported. + * @destroy: Destroy and free a vport using vport_free(). Prior to destruction + * @detach will be called followed by synchronize_rcu(). + * @attach: Attach a previously created vport to a datapath. After attachment + * packets may be sent and received. Prior to attachment any packets may be + * silently discarded. May be null if not needed. + * @detach: Detach a vport from a datapath. May be null if not needed. + * @set_mtu: Set the device's MTU. May be null if not supported. + * @set_addr: Set the device's MAC address. May be null if not supported. + * @get_name: Get the device's name. + * @get_addr: Get the device's MAC address. + * @get_kobj: Get the kobj associated with the device (may return null). + * @get_stats: Fill in the transmit/receive stats. May be null if stats are + * not supported or if generic stats are in use. If defined overrides + * VPORT_F_GEN_STATS. + * @get_dev_flags: Get the device's flags. + * @is_running: Checks whether the device is running. + * @get_operstate: Get the device's operating state. + * @get_ifindex: Get the system interface index associated with the device. + * May be null if the device does not have an ifindex. + * @get_iflink: Get the system interface index associated with the device that + * will be used to send packets (may be different than ifindex for tunnels). + * May be null if the device does not have an iflink. + * @get_mtu: Get the device's MTU. + * @send: Send a packet on the device. Returns the length of the packet sent. + */ +struct vport_ops { + const char *type; + u32 flags; + + /* Called at module init and exit respectively. */ + int (*init)(void); + void (*exit)(void); + + /* Called with RTNL lock. */ + struct vport *(*create)(const char *name, const void __user *config); + int (*modify)(struct vport *, const void __user *config); + int (*destroy)(struct vport *); + + int (*attach)(struct vport *); + int (*detach)(struct vport *); + + int (*set_mtu)(struct vport *, int mtu); + int (*set_addr)(struct vport *, const unsigned char *); + + /* Called with rcu_read_lock or RTNL lock. */ + const char *(*get_name)(const struct vport *); + const unsigned char *(*get_addr)(const struct vport *); + struct kobject *(*get_kobj)(const struct vport *); + int (*get_stats)(const struct vport *, struct odp_vport_stats *); + + unsigned (*get_dev_flags)(const struct vport *); + int (*is_running)(const struct vport *); + unsigned char (*get_operstate)(const struct vport *); + + int (*get_ifindex)(const struct vport *); + int (*get_iflink)(const struct vport *); + + int (*get_mtu)(const struct vport *); + + int (*send)(struct vport *, struct sk_buff *); +}; + +enum vport_err_type { + VPORT_E_RX_DROPPED, + VPORT_E_RX_ERROR, + VPORT_E_RX_FRAME, + VPORT_E_RX_OVER, + VPORT_E_RX_CRC, + VPORT_E_TX_DROPPED, + VPORT_E_TX_ERROR, + VPORT_E_COLLISION, +}; + +struct vport *vport_alloc(int priv_size, const struct vport_ops *); +void vport_free(struct vport *); + +#define VPORT_ALIGN 8 + +/** + * vport_priv - access private data area of vport + * + * @vport: vport to access + * + * If a nonzero size was passed in priv_size of vport_alloc() a private data + * area was allocated on creation. This allows that area to be accessed and + * used for any purpose needed by the vport implementer. + */ +static inline void * +vport_priv(const struct vport *vport) +{ + return (u8 *)vport + ALIGN(sizeof(struct vport), VPORT_ALIGN); +} + +/** + * vport_from_priv - lookup vport from private data pointer + * + * @priv: Start of private data area. + * + * It is sometimes useful to translate from a pointer to the private data + * area to the vport, such as in the case where the private data pointer is + * the result of a hash table lookup. @priv must point to the start of the + * private data area. + */ +static inline struct vport * +vport_from_priv(const void *priv) +{ + return (struct vport *)(priv - ALIGN(sizeof(struct vport), VPORT_ALIGN)); +} + +void vport_receive(struct vport *, struct sk_buff *); +void vport_record_error(struct vport *, enum vport_err_type err_type); +void vport_gen_ether_addr(u8 *addr); + +#endif /* vport.h */ diff --git a/include/openvswitch/datapath-protocol.h b/include/openvswitch/datapath-protocol.h index 6ddff014..94723ef0 100644 --- a/include/openvswitch/datapath-protocol.h +++ b/include/openvswitch/datapath-protocol.h @@ -61,8 +61,8 @@ #define ODP_GET_LISTEN_MASK _IOW('O', 5, int) #define ODP_SET_LISTEN_MASK _IOR('O', 6, int) -#define ODP_PORT_ADD _IOR('O', 7, struct odp_port) -#define ODP_PORT_DEL _IOR('O', 8, int) +#define ODP_PORT_ATTACH _IOR('O', 7, struct odp_port) +#define ODP_PORT_DETACH _IOR('O', 8, int) #define ODP_PORT_QUERY _IOWR('O', 9, struct odp_port) #define ODP_PORT_LIST _IOWR('O', 10, struct odp_portvec) @@ -80,6 +80,15 @@ #define ODP_SET_SFLOW_PROBABILITY _IOR('O', 19, int) #define ODP_GET_SFLOW_PROBABILITY _IOW('O', 20, int) +#define ODP_VPORT_ADD _IOR('O', 21, struct odp_vport_add) +#define ODP_VPORT_MOD _IOR('O', 22, struct odp_vport_mod) +#define ODP_VPORT_DEL _IO('O', 23) +#define ODP_VPORT_STATS_GET _IOWR('O', 24, struct odp_vport_stats_req) +#define ODP_VPORT_ETHER_GET _IOWR('O', 25, struct odp_vport_ether) +#define ODP_VPORT_ETHER_SET _IOW('O', 26, struct odp_vport_ether) +#define ODP_VPORT_MTU_GET _IOWR('O', 27, struct odp_vport_mtu) +#define ODP_VPORT_MTU_SET _IOW('O', 28, struct odp_vport_mtu) + struct odp_stats { /* Flows. */ __u32 n_flows; /* Number of flows in flow table. */ @@ -356,6 +365,48 @@ struct odp_execute { __u32 length; }; +#define VPORT_TYPE_SIZE 16 +struct odp_vport_add { + char port_type[VPORT_TYPE_SIZE]; + char devname[16]; /* IFNAMSIZ */ + void *config; +}; + +struct odp_vport_mod { + char devname[16]; /* IFNAMSIZ */ + void *config; +}; + +struct odp_vport_stats { + __u64 rx_packets; + __u64 tx_packets; + __u64 rx_bytes; + __u64 tx_bytes; + __u64 rx_dropped; + __u64 tx_dropped; + __u64 rx_errors; + __u64 tx_errors; + __u64 rx_frame_err; + __u64 rx_over_err; + __u64 rx_crc_err; + __u64 collisions; +}; + +struct odp_vport_stats_req { + char devname[16]; /* IFNAMSIZ */ + struct odp_vport_stats stats; +}; + +struct odp_vport_ether { + char devname[16]; /* IFNAMSIZ */ + unsigned char ether_addr[ETH_ALEN]; +}; + +struct odp_vport_mtu { + char devname[16]; /* IFNAMSIZ */ + __u16 mtu; +}; + /* Values below this cutoff are 802.3 packets and the two bytes * following MAC addresses are used as a frame length. Otherwise, the * two bytes are used as the Ethernet type. diff --git a/lib/dpif-linux.c b/lib/dpif-linux.c index 1eaba744..593127f4 100644 --- a/lib/dpif-linux.c +++ b/lib/dpif-linux.c @@ -230,7 +230,7 @@ dpif_linux_port_add(struct dpif *dpif_, const char *devname, uint16_t flags, memset(&port, 0, sizeof port); strncpy(port.devname, devname, sizeof port.devname); port.flags = flags; - error = do_ioctl(dpif_, ODP_PORT_ADD, &port); + error = do_ioctl(dpif_, ODP_PORT_ATTACH, &port); if (!error) { *port_no = port.port; } @@ -241,7 +241,7 @@ static int dpif_linux_port_del(struct dpif *dpif_, uint16_t port_no) { int tmp = port_no; - return do_ioctl(dpif_, ODP_PORT_DEL, &tmp); + return do_ioctl(dpif_, ODP_PORT_DETACH, &tmp); } static int -- 2.30.2