From: Ben Pfaff Date: Mon, 11 May 2009 21:26:44 +0000 (-0700) Subject: datapath: Add support for "internal" ports similar to the local port. X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=3c8085b05c4fb2226ccc986c1d24ca573b9539d3;p=openvswitch datapath: Add support for "internal" ports similar to the local port. The datapath has supported a simulated "local port" for a long time, but it has never been possible to create additional ports with the same characteristics. One way to do this is using the veth driver, but this is somewhat awkward, since there is no desire to create a pair of devices; one suffices. The immediate purpose for this feature is to allow an IP address to be put on both a physical interface and a tagged VLAN attached to that interface on Xen. --- diff --git a/datapath/actions.c b/datapath/actions.c index 9215b11c..a065e7e8 100644 --- a/datapath/actions.c +++ b/datapath/actions.c @@ -329,7 +329,7 @@ int dp_xmit_skb(struct sk_buff *skb) if (packet_length(skb) > skb->dev->mtu && !skb_is_gso(skb)) { printk(KERN_WARNING "%s: dropped over-mtu packet: %d > %d\n", - dp->netdev->name, packet_length(skb), skb->dev->mtu); + dp_name(dp), packet_length(skb), skb->dev->mtu); kfree_skb(skb); return -E2BIG; } @@ -344,27 +344,20 @@ static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port) { struct net_bridge_port *p; + struct net_device *dev; if (!skb) goto error; - if (out_port == ODPP_LOCAL) { - struct net_device *dev = dp->netdev; - if (!dev) - goto error; -#ifdef SUPPORT_SNAT - snat_local_in(skb); -#endif - dp_dev_recv(dev, skb); - return; - } - p = dp->ports[out_port]; if (!p) goto error; - skb->dev = p->dev; - dp_xmit_skb(skb); + dev = skb->dev = p->dev; + if (is_dp_dev(dev)) + dp_dev_recv(dev, skb); + else + dp_xmit_skb(skb); return; error: diff --git a/datapath/brc_sysfs_dp.c b/datapath/brc_sysfs_dp.c index b5ac3b9c..6d12b04e 100644 --- a/datapath/brc_sysfs_dp.c +++ b/datapath/brc_sysfs_dp.c @@ -32,7 +32,6 @@ #define BRC_DEVICE_ATTR DEVICE_ATTR #endif - /* * Common code for storing bridge parameters. */ @@ -62,7 +61,7 @@ static ssize_t store_bridge_parm(struct class_device *d, */ if (val != 0) { printk("%s: xxx writing dp parms not supported yet!\n", - dp->netdev->name); + dp_name(dp)); } #endif return len; @@ -88,7 +87,7 @@ static void set_forward_delay(struct datapath *dp, unsigned long val) if (br_is_root_bridge(br)) br->bridge_forward_delay = delay; #else - printk("%s: xxx attempt to set_forward_delay()\n", dp->netdev->name); + printk("%s: xxx attempt to set_forward_delay()\n", dp_name(dp)); #endif } @@ -118,7 +117,7 @@ static void set_hello_time(struct datapath *dp, unsigned long val) if (br_is_root_bridge(br)) br->bridge_hello_time = t; #else - printk("%s: xxx attempt to set_hello_time()\n", dp->netdev->name); + printk("%s: xxx attempt to set_hello_time()\n", dp_name(dp)); #endif } @@ -150,7 +149,7 @@ static void set_max_age(struct datapath *dp, unsigned long val) if (br_is_root_bridge(br)) br->bridge_max_age = t; #else - printk("%s: xxx attempt to set_max_age()\n", dp->netdev->name); + printk("%s: xxx attempt to set_max_age()\n", dp_name(dp)); #endif } @@ -177,7 +176,7 @@ static void set_ageing_time(struct datapath *dp, unsigned long val) #if 0 br->ageing_time = clock_t_to_jiffies(val); #else - printk("%s: xxx attempt to set_ageing_time()\n", dp->netdev->name); + printk("%s: xxx attempt to set_ageing_time()\n", dp_name(dp)); #endif } @@ -221,7 +220,7 @@ static ssize_t store_stp_state(struct class_device *d, br_stp_set_enabled(br, val); rtnl_unlock(); #else - printk("%s: xxx attempt to set_stp_state()\n", dp->netdev->name); + printk("%s: xxx attempt to set_stp_state()\n", dp_name(dp)); #endif return len; @@ -246,7 +245,7 @@ static void set_priority(struct datapath *dp, unsigned long val) #if 0 br_stp_set_bridge_priority(br, (u16) val); #else - printk("%s: xxx attempt to set_priority()\n", dp->netdev->name); + printk("%s: xxx attempt to set_priority()\n", dp_name(dp)); #endif } @@ -272,7 +271,7 @@ static ssize_t show_bridge_id(struct class_device *d, char *buf) { struct datapath *dp = dp_dev_get_dp(to_net_dev(d)); - const unsigned char *addr = dp->netdev->dev_addr; + const unsigned char *addr = dp->ports[ODPP_LOCAL]->dev->dev_addr; /* xxx Do we need a lock of some sort? */ return sprintf(buf, "%.2x%.2x.%.2x%.2x%.2x%.2x%.2x%.2x\n", @@ -423,7 +422,7 @@ static ssize_t store_group_addr(struct class_device *d, br->group_addr[i] = new_addr[i]; spin_unlock_bh(&br->lock); #else - printk("%s: xxx attempt to store_group_addr()\n", dp->netdev->name); + printk("%s: xxx attempt to store_group_addr()\n", dp_name(dp)); #endif return len; } @@ -469,14 +468,13 @@ static struct attribute_group bridge_group = { */ int brc_sysfs_add_dp(struct datapath *dp) { - struct net_device *dev = dp->netdev; - struct kobject *kobj = to_kobj(dev); + struct kobject *kobj = to_kobj(dp->ports[ODPP_LOCAL]->dev); int err; err = sysfs_create_group(kobj, &bridge_group); if (err) { pr_info("%s: can't create group %s/%s\n", - __func__, dev->name, bridge_group.name); + __func__, dp_name(dp), bridge_group.name); goto out1; } @@ -489,29 +487,28 @@ int brc_sysfs_add_dp(struct datapath *dp) err = kobject_register(&dp->ifobj); if (err) { pr_info("%s: can't add kobject (directory) %s/%s\n", - __FUNCTION__, dev->name, dp->ifobj.name); + __FUNCTION__, dp_name(dp), dp->ifobj.name); goto out2; } #else br->ifobj = kobject_create_and_add(SYSFS_BRIDGE_PORT_SUBDIR, kobj); if (!br->ifobj) { pr_info("%s: can't add kobject (directory) %s/%s\n", - __func__, dev->name, SYSFS_BRIDGE_PORT_SUBDIR); + __func__, dp_name(dp), SYSFS_BRIDGE_PORT_SUBDIR); goto out2; } #endif return 0; out2: - sysfs_remove_group(to_kobj(dev), &bridge_group); + sysfs_remove_group(kobj, &bridge_group); out1: return err; } int brc_sysfs_del_dp(struct datapath *dp) { - struct net_device *dev = dp->netdev; - struct kobject *kobj = to_kobj(dev); + struct kobject *kobj = to_kobj(dp->ports[ODPP_LOCAL]->dev); #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25) kobject_unregister(&dp->ifobj); diff --git a/datapath/brc_sysfs_if.c b/datapath/brc_sysfs_if.c index 12fa3778..245cd7f6 100644 --- a/datapath/brc_sysfs_if.c +++ b/datapath/brc_sysfs_if.c @@ -248,7 +248,7 @@ static ssize_t brport_store(struct kobject * kobj, } #else printk("%s: xxx writing port parms not supported yet!\n", - p->dp->netdev->name); + dp_name(p->dp)); #endif return ret; } @@ -291,7 +291,8 @@ int brc_sysfs_add_if(struct net_bridge_port *p) if (err) goto err_put; - err = sysfs_create_link(&p->kobj, &dp->netdev->class_dev.kobj, + err = sysfs_create_link(&p->kobj, + &dp->ports[ODPP_LOCAL]->dev->class_dev.kobj, SYSFS_BRIDGE_PORT_LINK); if (err) goto err_del; diff --git a/datapath/brcompat.c b/datapath/brcompat.c index c9aa8904..4de694f4 100644 --- a/datapath/brcompat.c +++ b/datapath/brcompat.c @@ -46,7 +46,7 @@ get_dp_ifindices(int *indices, int num) struct datapath *dp = get_dp(i); if (!dp) continue; - indices[index++] = dp->netdev->ifindex; + indices[index++] = dp->ports[ODPP_LOCAL]->dev->ifindex; } rcu_read_unlock(); diff --git a/datapath/datapath.c b/datapath/datapath.c index d3dd6909..ac7219a0 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -141,7 +141,7 @@ static int dp_fill_ifinfo(struct sk_buff *skb, hdr->ifi_change = 0; NLA_PUT_STRING(skb, IFLA_IFNAME, dev->name); - NLA_PUT_U32(skb, IFLA_MASTER, dp->netdev->ifindex); + NLA_PUT_U32(skb, IFLA_MASTER, dp->ports[ODPP_LOCAL]->dev->ifindex); NLA_PUT_U32(skb, IFLA_MTU, dev->mtu); NLA_PUT_U8(skb, IFLA_OPERSTATE, operstate); @@ -183,6 +183,7 @@ errout: static int create_dp(int dp_idx, const char __user *devnamep) { + struct net_device *dp_dev; char devname[IFNAMSIZ]; struct datapath *dp; int err; @@ -222,8 +223,9 @@ static int create_dp(int dp_idx, const char __user *devnamep) init_waitqueue_head(&dp->waitqueue); /* Setup our datapath device */ - err = dp_dev_setup(dp, devname); - if (err) + dp_dev = dp_dev_create(dp, devname, ODPP_LOCAL); + err = PTR_ERR(dp_dev); + if (IS_ERR(dp_dev)) goto err_free_dp; err = -ENOMEM; @@ -232,7 +234,7 @@ static int create_dp(int dp_idx, const char __user *devnamep) goto err_destroy_dp_dev; INIT_LIST_HEAD(&dp->port_list); - err = new_nbp(dp, dp->netdev, ODPP_LOCAL); + err = new_nbp(dp, dp_dev, ODPP_LOCAL); if (err) goto err_destroy_table; @@ -261,11 +263,11 @@ err_destroy_stats_percpu: free_percpu(dp->stats_percpu); #endif err_destroy_local_port: - dp_del_port(dp->ports[ODPP_LOCAL]); + dp_del_port(dp->ports[ODPP_LOCAL], NULL); err_destroy_table: dp_table_destroy(dp->table, 0); err_destroy_dp_dev: - dp_dev_destroy(dp); + dp_dev_destroy(dp_dev); err_free_dp: kfree(dp); err_put_module: @@ -277,7 +279,7 @@ err: return err; } -static void do_destroy_dp(struct datapath *dp) +static void do_destroy_dp(struct datapath *dp, struct list_head *dp_devs) { struct net_bridge_port *p, *n; int i; @@ -287,20 +289,16 @@ static void do_destroy_dp(struct datapath *dp) kthread_stop(dp->dp_task); #endif - /* Drop references to DP. */ - list_for_each_entry_safe (p, n, &dp->port_list, node) - dp_del_port(p); - if (dp_del_dp_hook) dp_del_dp_hook(dp); + /* Drop references to DP. */ + list_for_each_entry_safe (p, n, &dp->port_list, node) + dp_del_port(p, dp_devs); + rcu_assign_pointer(dps[dp->dp_idx], NULL); synchronize_rcu(); - /* Destroy dp->netdev. (Must follow deleting switch ports since the - * ODPP_LOCAL port has a reference to it.) */ - dp_dev_destroy(dp); - /* Wait until no longer in use, then destroy it. */ synchronize_rcu(); dp_table_destroy(dp->table, 1); @@ -315,8 +313,9 @@ static void do_destroy_dp(struct datapath *dp) static int destroy_dp(int dp_idx) { - struct net_device *dev = NULL; + struct dp_dev *dp_dev, *next; struct datapath *dp; + LIST_HEAD(dp_devs); int err; rtnl_lock(); @@ -326,15 +325,14 @@ static int destroy_dp(int dp_idx) if (!dp) goto err_unlock; - dev = dp->netdev; - do_destroy_dp(dp); + do_destroy_dp(dp, &dp_devs); err = 0; err_unlock: mutex_unlock(&dp_mutex); rtnl_unlock(); - if (dev) - free_netdev(dev); + list_for_each_entry_safe (dp_dev, next, &dp_devs, list) + free_netdev(dp_dev->dev); return err; } @@ -358,17 +356,13 @@ static int new_nbp(struct datapath *dp, struct net_device *dev, int port_no) #endif p->dp = dp; p->dev = dev; - if (port_no != ODPP_LOCAL) + if (!is_dp_dev(dev)) rcu_assign_pointer(dev->br_port, p); else { - /* For consistency it would make sense to assign dev->br_port - * here too, but we can't because that would cause packets - * received on the local port to get caught in - * dp_frame_hook(). On modern Linux kernels that's not a big - * deal--we would just return an appropriate value--but on - * Linux 2.4 there's no way for the frame hook to pass along - * the skbuff to the rest of the network stack. So we're - * stuck with the status quo. */ + /* It would make sense to assign dev->br_port here too, but + * that causes packets received on internal ports to get caught + * in dp_frame_hook(). In turn dp_frame_hook() can reject them + * back to network stack, but that's a waste of time. */ } rcu_assign_pointer(dp->ports[port_no], p); list_add_rcu(&p->node, &dp->port_list); @@ -403,19 +397,27 @@ static int add_port(int dp_idx, struct odp_port __user *portp) if (!dp) goto out_unlock_rtnl; - err = -ENODEV; - dev = dev_get_by_name(&init_net, port.devname); - if (!dev) + err = -EEXIST; + if (dp->ports[port_no]) goto out_unlock_dp; - err = -EINVAL; - if (dev->flags & IFF_LOOPBACK || dev->type != ARPHRD_ETHER || - is_dp_dev(dev)) - goto out_put; + if (!(port.flags & ODP_PORT_INTERNAL)) { + err = -ENODEV; + dev = dev_get_by_name(&init_net, port.devname); + if (!dev) + goto out_unlock_dp; - err = -EEXIST; - if (dp->ports[port_no]) - goto out_put; + err = -EINVAL; + if (dev->flags & IFF_LOOPBACK || dev->type != ARPHRD_ETHER || + is_dp_dev(dev)) + goto out_put; + } else { + dev = dp_dev_create(dp, port.devname, port_no); + err = PTR_ERR(dev); + if (IS_ERR(dev)) + goto out_unlock_dp; + dev_hold(dev); + } err = new_nbp(dp, dev, port_no); if (err) @@ -445,12 +447,12 @@ static void free_snat(struct net_bridge_port *p) #endif /* !SUPPORT_SNAT */ } -int dp_del_port(struct net_bridge_port *p) +int dp_del_port(struct net_bridge_port *p, struct list_head *dp_devs) { ASSERT_RTNL(); #ifdef SUPPORT_SYSFS - if ((p->port_no != ODPP_LOCAL) && dp_del_if_hook) + if (!is_dp_dev(p->dev) && dp_del_if_hook) sysfs_remove_link(&p->dp->ifobj, p->dev->name); #endif dp_ifinfo_notify(RTM_DELLINK, p); @@ -467,10 +469,17 @@ int dp_del_port(struct net_bridge_port *p) synchronize_rcu(); free_snat(p); - if ((p->port_no != ODPP_LOCAL) && dp_del_if_hook) { + if (!is_dp_dev(p->dev) && dp_del_if_hook) { dp_del_if_hook(p); } else { dev_put(p->dev); + if (is_dp_dev(p->dev)) { + dp_dev_destroy(p->dev); + if (dp_devs) { + struct dp_dev *dp_dev = dp_dev_priv(p->dev); + list_add(&dp_dev->list, dp_devs); + } + } kfree(p); } @@ -479,8 +488,10 @@ int dp_del_port(struct net_bridge_port *p) static int del_port(int dp_idx, int port_no) { + struct dp_dev *dp_dev, *next; struct net_bridge_port *p; struct datapath *dp; + LIST_HEAD(dp_devs); int err; err = -EINVAL; @@ -498,13 +509,15 @@ static int del_port(int dp_idx, int port_no) if (!p) goto out_unlock_dp; - err = dp_del_port(p); + err = dp_del_port(p, &dp_devs); out_unlock_dp: mutex_unlock(&dp->mutex); out_unlock_rtnl: rtnl_unlock(); out: + list_for_each_entry_safe (dp_dev, next, &dp_devs, list) + free_netdev(dp_dev->dev); return err; } @@ -669,7 +682,7 @@ dp_output_control(struct datapath *dp, struct sk_buff *skb, int queue_no, { struct dp_stats_percpu *stats; struct sk_buff_head *queue; - struct odp_msg *header; + int port_no; int err; WARN_ON_ONCE(skb_shared(skb)); @@ -731,9 +744,19 @@ dp_output_control(struct datapath *dp, struct sk_buff *skb, int queue_no, } } + /* Figure out port number. */ + port_no = ODPP_LOCAL; + if (skb->dev) { + if (skb->dev->br_port) + port_no = skb->dev->br_port->port_no; + else if (is_dp_dev(skb->dev)) + port_no = dp_dev_priv(skb->dev)->port_no; + } + /* Append each packet to queue. There will be only one packet unless * we broke up a GSO packet above. */ do { + struct odp_msg *header; struct sk_buff *nskb = skb->next; skb->next = NULL; @@ -750,9 +773,7 @@ dp_output_control(struct datapath *dp, struct sk_buff *skb, int queue_no, header = (struct odp_msg*)__skb_push(skb, sizeof *header); header->type = queue_no; header->length = skb->len; - header->port = (skb->dev && skb->dev->br_port - ? skb->dev->br_port->port_no - : ODPP_LOCAL); + header->port = port_no; header->reserved = 0; header->arg = arg; skb_queue_tail(queue, skb); @@ -1241,6 +1262,7 @@ put_port(const struct net_bridge_port *p, struct odp_port __user *uop) memset(&op, 0, sizeof op); strncpy(op.devname, p->dev->name, sizeof op.devname); op.port = p->port_no; + op.flags = is_dp_dev(p->dev) ? ODP_PORT_INTERNAL : 0; return copy_to_user(uop, &op, sizeof op) ? -EFAULT : 0; } @@ -1252,6 +1274,7 @@ query_port(struct datapath *dp, struct odp_port __user *uport) if (copy_from_user(&port, uport, sizeof port)) return -EFAULT; if (port.devname[0]) { + struct net_bridge_port *p; struct net_device *dev; int err; @@ -1261,12 +1284,13 @@ query_port(struct datapath *dp, struct odp_port __user *uport) if (!dev) return -ENODEV; - if (dev == dp->ports[ODPP_LOCAL]->dev) { - err = put_port(dp->ports[ODPP_LOCAL], uport); - } else { - struct net_bridge_port *p = dev->br_port; - err = p && p->dp == dp ? put_port(p, uport) : -ENOENT; + p = dev->br_port; + if (!p && is_dp_dev(dev)) { + struct dp_dev *dp_dev = dp_dev_priv(dev); + if (dp_dev->dp == dp) + p = dp->ports[dp_dev->port_no]; } + err = p && p->dp == dp ? put_port(p, uport) : -ENOENT; dev_put(dev); return err; diff --git a/datapath/datapath.h b/datapath/datapath.h index 1458d084..66bb01cf 100644 --- a/datapath/datapath.h +++ b/datapath/datapath.h @@ -62,8 +62,6 @@ struct datapath { struct task_struct *dp_task; /* Kernel thread for maintenance. */ #endif - struct net_device *netdev; /* ofX network device. */ - #ifdef SUPPORT_SYSFS struct kobject ifobj; #endif @@ -124,15 +122,19 @@ int dp_table_foreach(struct dp_table *table, void *aux); void dp_process_received_packet(struct sk_buff *, struct net_bridge_port *); -int dp_del_port(struct net_bridge_port *); +int dp_del_port(struct net_bridge_port *, struct list_head *); int dp_output_port(struct datapath *, struct sk_buff *, int out_port, int ignore_no_fwd); int dp_output_control(struct datapath *, struct sk_buff *, int, u32 arg); void dp_set_origin(struct datapath *, u16, struct sk_buff *); -/* Should hold at least RCU read lock when calling */ struct datapath *get_dp(int dp_idx); +static inline const char *dp_name(const struct datapath *dp) +{ + return dp->ports[ODPP_LOCAL]->dev->name; +} + #ifdef CONFIG_XEN int skb_checksum_setup(struct sk_buff *skb); #else diff --git a/datapath/dp_dev.c b/datapath/dp_dev.c index 711ffa40..d0bf9494 100644 --- a/datapath/dp_dev.c +++ b/datapath/dp_dev.c @@ -8,12 +8,7 @@ #include "datapath.h" #include "dp_dev.h" - - -static struct dp_dev *dp_dev_priv(struct net_device *netdev) -{ - return netdev_priv(netdev); -} +#include "snat.h" struct datapath *dp_dev_get_dp(struct net_device *netdev) { @@ -29,9 +24,12 @@ static struct net_device_stats *dp_dev_get_stats(struct net_device *netdev) int dp_dev_recv(struct net_device *netdev, struct sk_buff *skb) { - int len = skb->len; struct dp_dev *dp_dev = dp_dev_priv(netdev); - skb->dev = netdev; + int len; +#ifdef SUPPORT_SNAT + snat_internal_in(skb); +#endif + len = skb->len; skb->pkt_type = PACKET_HOST; skb->protocol = eth_type_trans(skb, netdev); if (in_interrupt()) @@ -57,7 +55,6 @@ static int dp_dev_mac_addr(struct net_device *dev, void *p) static int dp_dev_xmit(struct sk_buff *skb, struct net_device *netdev) { struct dp_dev *dp_dev = dp_dev_priv(netdev); - struct datapath *dp = dp_dev->dp; /* By orphaning 'skb' we will screw up socket accounting slightly, but * the effect is limited to the device queue length. If we don't @@ -76,16 +73,16 @@ static int dp_dev_xmit(struct sk_buff *skb, struct net_device *netdev) dp_dev->stats.tx_packets++; dp_dev->stats.tx_bytes += skb->len; - if (skb_queue_len(&dp_dev->xmit_queue) >= dp->netdev->tx_queue_len) { + if (skb_queue_len(&dp_dev->xmit_queue) >= netdev->tx_queue_len) { /* Queue overflow. Stop transmitter. */ - netif_stop_queue(dp->netdev); + netif_stop_queue(netdev); /* We won't see all dropped packets individually, so overrun * error is appropriate. */ dp_dev->stats.tx_fifo_errors++; } skb_queue_tail(&dp_dev->xmit_queue, skb); - dp->netdev->trans_start = jiffies; + netdev->trans_start = jiffies; schedule_work(&dp_dev->xmit_work); @@ -101,10 +98,10 @@ static void dp_dev_do_xmit(struct work_struct *work) while ((skb = skb_dequeue(&dp_dev->xmit_queue)) != NULL) { skb_reset_mac_header(skb); rcu_read_lock_bh(); - dp_process_received_packet(skb, dp->ports[ODPP_LOCAL]); + dp_process_received_packet(skb, dp->ports[dp_dev->port_no]); rcu_read_unlock_bh(); } - netif_wake_queue(dp->netdev); + netif_wake_queue(dp_dev->dev); } static int dp_dev_open(struct net_device *netdev) @@ -162,10 +159,11 @@ do_setup(struct net_device *netdev) } /* Create a datapath device associated with 'dp'. If 'dp_name' is null, - * the device name will be of the form 'of'. + * the device name will be of the form 'of'. Returns the new device or + * an error code. * - * Called with RTNL lock and dp_mutex.*/ -int dp_dev_setup(struct datapath *dp, const char *dp_name) + * Called with RTNL lock and dp_mutex. */ +struct net_device *dp_dev_create(struct datapath *dp, const char *dp_name, int port_no) { struct dp_dev *dp_dev; struct net_device *netdev; @@ -174,38 +172,39 @@ int dp_dev_setup(struct datapath *dp, const char *dp_name) if (dp_name) { if (strlen(dp_name) >= IFNAMSIZ) - return -EINVAL; + return ERR_PTR(-EINVAL); strncpy(dev_name, dp_name, sizeof(dev_name)); } else snprintf(dev_name, sizeof dev_name, "of%d", dp->dp_idx); netdev = alloc_netdev(sizeof(struct dp_dev), dev_name, do_setup); if (!netdev) - return -ENOMEM; + return ERR_PTR(-ENOMEM); err = register_netdevice(netdev); if (err) { free_netdev(netdev); - return err; + return ERR_PTR(err); } dp_dev = dp_dev_priv(netdev); dp_dev->dp = dp; + dp_dev->port_no = port_no; + dp_dev->dev = netdev; skb_queue_head_init(&dp_dev->xmit_queue); INIT_WORK(&dp_dev->xmit_work, dp_dev_do_xmit); - dp->netdev = netdev; - return 0; + return netdev; } /* Called with RTNL lock and dp_mutex.*/ -void dp_dev_destroy(struct datapath *dp) +void dp_dev_destroy(struct net_device *netdev) { - struct dp_dev *dp_dev = dp_dev_priv(dp->netdev); + struct dp_dev *dp_dev = dp_dev_priv(netdev); - netif_tx_disable(dp->netdev); + netif_tx_disable(netdev); synchronize_net(); skb_queue_purge(&dp_dev->xmit_queue); - unregister_netdevice(dp->netdev); + unregister_netdevice(netdev); } int is_dp_dev(struct net_device *netdev) diff --git a/datapath/dp_dev.h b/datapath/dp_dev.h index 26f72485..84874390 100644 --- a/datapath/dp_dev.h +++ b/datapath/dp_dev.h @@ -2,14 +2,24 @@ #define DP_DEV_H 1 struct dp_dev { - struct net_device_stats stats; struct datapath *dp; + int port_no; + + struct net_device *dev; + struct net_device_stats stats; struct sk_buff_head xmit_queue; struct work_struct xmit_work; + + struct list_head list; }; -int dp_dev_setup(struct datapath *, const char *); -void dp_dev_destroy(struct datapath *); +static inline struct dp_dev *dp_dev_priv(struct net_device *netdev) +{ + return netdev_priv(netdev); +} + +struct net_device *dp_dev_create(struct datapath *, const char *, int port_no); +void dp_dev_destroy(struct net_device *); int dp_dev_recv(struct net_device *, struct sk_buff *); int is_dp_dev(struct net_device *); struct datapath *dp_dev_get_dp(struct net_device *); diff --git a/datapath/dp_notify.c b/datapath/dp_notify.c index 425c0146..2150974d 100644 --- a/datapath/dp_notify.c +++ b/datapath/dp_notify.c @@ -19,7 +19,7 @@ static int dp_device_event(struct notifier_block *unused, unsigned long event, if (event == NETDEV_UNREGISTER && p) { struct datapath *dp = p->dp; mutex_lock(&dp->mutex); - dp_del_port(p); + dp_del_port(p, NULL); mutex_unlock(&dp->mutex); } return NOTIFY_DONE; diff --git a/datapath/snat.c b/datapath/snat.c index 48a01de3..6b9be469 100644 --- a/datapath/snat.c +++ b/datapath/snat.c @@ -157,10 +157,10 @@ done: spin_unlock_irqrestore(&p->lock, flags); } -/* When the packet is bound for a local interface, strip off the fake +/* When the packet is bound for an internal interface, strip off the fake * routing table. */ -void snat_local_in(struct sk_buff *skb) +void snat_internal_in(struct sk_buff *skb) { if (skb->dst == (struct dst_entry *)&__fake_rtable) { dst_release(skb->dst); @@ -299,7 +299,7 @@ handle_arp_snat(struct sk_buff *skb) return 0; } if (s_nbp == nat_nbp) - memcpy(mac_addr, s_nbp->dp->netdev->dev_addr, sizeof(mac_addr)); + memcpy(mac_addr, s_nbp->dp->ports[ODPP_LOCAL]->dev->dev_addr, sizeof(mac_addr)); else if (!is_zero_ether_addr(nat_nbp->snat->mac_addr)) memcpy(mac_addr, nat_nbp->snat->mac_addr, sizeof(mac_addr)); else { @@ -525,7 +525,7 @@ snat_skb(struct datapath *dp, const struct sk_buff *skb, int out_port, } /* Set the source MAC to the OF interface */ - memcpy(eth_hdr(nskb)->h_source, dp->netdev->dev_addr, ETH_ALEN); + memcpy(eth_hdr(nskb)->h_source, dp->ports[ODPP_LOCAL]->dev->dev_addr, ETH_ALEN); update_mapping(p, skb); diff --git a/datapath/snat.h b/datapath/snat.h index 0d0dc6d3..ca7f3525 100644 --- a/datapath/snat.h +++ b/datapath/snat.h @@ -8,7 +8,7 @@ #include "datapath.h" -void snat_local_in(struct sk_buff *skb); +void snat_internal_in(struct sk_buff *skb); int snat_pre_route(struct sk_buff *skb); void snat_skb(struct datapath *dp, const struct sk_buff *skb, int out_port, gfp_t gfp); diff --git a/include/openvswitch/datapath-protocol.h b/include/openvswitch/datapath-protocol.h index e2bbaa9f..3f2f71bd 100644 --- a/include/openvswitch/datapath-protocol.h +++ b/include/openvswitch/datapath-protocol.h @@ -120,10 +120,11 @@ struct odp_msg { /* Followed by packet data. */ }; +#define ODP_PORT_INTERNAL (1 << 0) /* This port is simulated. */ struct odp_port { char devname[16]; /* IFNAMSIZ */ __u16 port; - __u16 reserved1; + __u16 flags; __u32 reserved2; }; diff --git a/lib/dpif.c b/lib/dpif.c index 35b03d9b..f002f6ea 100644 --- a/lib/dpif.c +++ b/lib/dpif.c @@ -275,7 +275,8 @@ dpif_purge(struct dpif *dpif) } int -dpif_port_add(struct dpif *dpif, const char *devname, uint16_t port_no) +dpif_port_add(struct dpif *dpif, const char *devname, uint16_t port_no, + uint16_t flags) { struct odp_port port; @@ -283,6 +284,7 @@ dpif_port_add(struct dpif *dpif, const char *devname, uint16_t port_no) memset(&port, 0, sizeof port); strncpy(port.devname, devname, sizeof port.devname); port.port = port_no; + port.flags = flags; if (!ioctl(dpif->fd, ODP_PORT_ADD, &port)) { VLOG_DBG_RL(&dpmsg_rl, "dp%u: added %s as port %"PRIu16, dpif->minor, devname, port_no); diff --git a/lib/dpif.h b/lib/dpif.h index a540deff..8a4c6662 100644 --- a/lib/dpif.h +++ b/lib/dpif.h @@ -69,7 +69,8 @@ int dpif_get_listen_mask(const struct dpif *, int *listen_mask); int dpif_set_listen_mask(struct dpif *, int listen_mask); int dpif_purge(struct dpif *); -int dpif_port_add(struct dpif *, const char *devname, uint16_t port_no); +int dpif_port_add(struct dpif *, const char *devname, uint16_t port_no, + uint16_t flags); int dpif_port_del(struct dpif *, uint16_t port_no); int dpif_port_query_by_number(const struct dpif *, uint16_t port_no, struct odp_port *); diff --git a/utilities/dpctl.8.in b/utilities/dpctl.8.in index 4c6b6882..34f42812 100644 --- a/utilities/dpctl.8.in +++ b/utilities/dpctl.8.in @@ -68,13 +68,28 @@ Deletes datapath \fIdp\fR. If \fIdp\fR is associated with any network devices, they are automatically removed. .TP -\fBaddif \fIdp netdev\fR... +\fBaddif \fIdp netdev\fR[\fIoption\fR...]... Adds each \fInetdev\fR to the set of network devices datapath \fIdp\fR monitors, where \fIdp\fR is the name of an existing datapath, and \fInetdev\fR is the name of one of the host's network devices, e.g. \fBeth0\fR. Once a network device has been added to a datapath, the datapath has complete ownership of the network device's -traffic and the network device appears silent to the rest of the system. +traffic and the network device appears silent to the rest of the +system. + +A \fInetdev\fR may be followed by a comma-separated list of options. +The following options are currently supported: + +.RS +.IP "\fBport=\fIportno\fR" +Specifies \fIportno\fR (a number between 1 and 255) as the port number +at which \fInetdev\fR will be attached. By default, \fBaddif\fR +automatically selects the lowest available port number. + +.IP "\fBinternal\fR" +Instead of attaching an existing \fInetdev\fR, creates an internal +port (analogous to the local port) with that name. +.RE .TP \fBdelif \fIdp netdev\fR... diff --git a/utilities/dpctl.c b/utilities/dpctl.c index 6247af71..15ea4883 100644 --- a/utilities/dpctl.c +++ b/utilities/dpctl.c @@ -382,29 +382,60 @@ do_add_port(const struct settings *s UNUSED, int argc UNUSED, char *argv[]) run(dpif_open(argv[1], &dpif), "opening datapath"); for (i = 2; i < argc; i++) { char *save_ptr = NULL; - char *devname, *port_s; - uint16_t port; + char *devname, *suboptions; + int port = -1; + int flags = 0; int error; - devname = strtok_r(argv[i], "@@", &save_ptr); + devname = strtok_r(argv[i], ",,", &save_ptr); if (!devname) { ovs_error(0, "%s is not a valid network device name", argv[i]); continue; } - if (if_up(devname)) { - failure = true; - continue; - } + suboptions = strtok_r(NULL, "", &save_ptr); + if (suboptions) { + enum { + AP_PORT, + AP_INTERNAL + }; + static char *options[] = { + "port", + "internal" + }; + + while (*suboptions != '\0') { + char *value; + + switch (getsubopt(&suboptions, options, &value)) { + case AP_PORT: + if (!value) { + ovs_error(0, "'port' suboption requires a value"); + } + port = atoi(value); + break; - port_s = strtok_r(NULL, "", &save_ptr); - port = port_s ? atoi(port_s) : get_free_port(&dpif); + case AP_INTERNAL: + flags |= ODP_PORT_INTERNAL; + break; - error = dpif_port_add(&dpif, devname, port); + default: + ovs_error(0, "unknown suboption '%s'", value); + break; + } + } + } + if (port < 0) { + port = get_free_port(&dpif); + } + + error = dpif_port_add(&dpif, devname, port, flags); if (error) { ovs_error(error, "adding %s as port %"PRIu16" of %s failed", devname, port, argv[1]); failure = true; + } else if (if_up(devname)) { + failure = true; } } dpif_close(&dpif); @@ -489,7 +520,11 @@ show_dpif(struct dpif *dpif) } query_ports(dpif, &ports, &n_ports); for (i = 0; i < n_ports; i++) { - printf("\tport %u: %s\n", ports[i].port, ports[i].devname); + printf("\tport %u: %s", ports[i].port, ports[i].devname); + if (ports[i].flags & ODP_PORT_INTERNAL) { + printf(" (internal)"); + } + printf("\n"); } free(ports); dpif_close(dpif); diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c index 24c74769..2a907ebd 100644 --- a/vswitchd/bridge.c +++ b/vswitchd/bridge.c @@ -444,7 +444,9 @@ bridge_reconfigure(void) for (i = 0; i < add_ifaces.n; i++) { const char *if_name = add_ifaces.names[i]; for (;;) { - int error = dpif_port_add(&br->dpif, if_name, next_port_no++); + int internal = cfg_get_bool(0, "iface.%s.internal", if_name); + int error = dpif_port_add(&br->dpif, if_name, next_port_no++, + internal ? ODP_PORT_INTERNAL : 0); if (error != EEXIST) { if (next_port_no >= 256) { VLOG_ERR("ran out of valid port numbers on dp%u", @@ -575,7 +577,8 @@ bridge_pick_local_hw_addr(struct bridge *br, uint8_t ea[ETH_ADDR_LEN], for (j = 0; j < port->n_ifaces; j++) { struct iface *iface = port->ifaces[j]; uint8_t iface_ea[ETH_ADDR_LEN]; - if (iface->dp_ifidx == ODPP_LOCAL) { + if (iface->dp_ifidx == ODPP_LOCAL + || cfg_get_bool(0, "iface.%s.internal", iface->name)) { continue; } error = netdev_nodev_get_etheraddr(iface->name, iface_ea); diff --git a/vswitchd/vswitchd.conf.5.in b/vswitchd/vswitchd.conf.5.in index dc6462f0..bc3a7f56 100644 --- a/vswitchd/vswitchd.conf.5.in +++ b/vswitchd/vswitchd.conf.5.in @@ -52,12 +52,25 @@ the names of its network devices as values for key \fBbridge.\fIname\fB.port\fR. (The specified \fIname\fR may not begin with \fBdp\fR or \fBnl:\fR followed by a digit.) .PP -A bridge with a given \fIname\fR always has an associated network -device with the same \fIname\fR. This network device may be included +The names given on \fBbridge.\fIname\fB.port\fR must be the names of +existing network devices, except for ``internal ports.'' An internal +port is a simulated network device that receives traffic only only +through the virtual switch and switches any traffic sent it through +virtual switch. An internal port may configured with an IP address, +etc. using the usual system tools (e.g. \fBifconfig\fR, \fBip\fR). To +designate network device \fInetdev\fR as an internal port, add +\fBiface.\fInetdev\fB.internal=true\fR to the configuration file. +\fBvswitchd\fR will honor this configuration setting by automatically +creating the named internal port. +.PP +A bridge with a given \fIname\fR always has an internal port with the +same \fIname\fR, called the ``local port.'' This network device may +be included in the bridge, by specifying it as one of the values for key \fBbridge.\fIname\fB.port\fR, or it may be omitted. If it is included, then its MAC address is by default the lowest-numbered MAC -address among the other bridge ports, ignoring bridge ports that are +address among the other bridge ports, ignoring other internal ports +and bridge ports that are used as port mirroring destinations (see \fBPort Mirroring\fR, below). To use a specific MAC address instead, set \fBbridge.\fIname\fB.mac\fR to a MAC address in the format