X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;ds=inline;f=datapath%2Fdatapath.c;h=e927b2469515938fe9f5e08f5c1bfa5debf32076;hb=cb5087caddd187e71d5f32118d8d807c9c131725;hp=de607e8bec6443fc8925e2ef4058de991f8eae22;hpb=8fef8c7121222233075a03d57db7e0b48d5f6be5;p=openvswitch diff --git a/datapath/datapath.c b/datapath/datapath.c index de607e8b..e927b246 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -56,9 +56,7 @@ int (*dp_ioctl_hook)(struct net_device *dev, struct ifreq *rq, int cmd); EXPORT_SYMBOL(dp_ioctl_hook); /* Datapaths. Protected on the read side by rcu_read_lock, on the write side - * by dp_mutex. dp_mutex is almost completely redundant with genl_mutex - * maintained by the Generic Netlink code, but the timeout path needs mutual - * exclusion too. + * by dp_mutex. * * dp_mutex nests inside the RTNL lock: if you need both you must take the RTNL * lock first. @@ -224,7 +222,7 @@ static int create_dp(int dp_idx, const char __user *devnamep) init_waitqueue_head(&dp->waitqueue); /* Initialize kobject for bridge. This will be added as - * /sys/class/net//bridge later, if sysfs is enabled. */ + * /sys/class/net//brif later, if sysfs is enabled. */ dp->ifobj.kset = NULL; kobject_init(&dp->ifobj, &dp_ktype); @@ -395,7 +393,7 @@ static int add_port(int dp_idx, struct odp_port __user *portp) for (port_no = 1; port_no < DP_MAX_PORTS; port_no++) if (!dp->ports[port_no]) goto got_port_no; - err = -EXFULL; + err = -EFBIG; goto out_unlock_dp; got_port_no: @@ -627,13 +625,93 @@ out: int vswitch_skb_checksum_setup(struct sk_buff *skb) { return 0; } #endif /* CONFIG_XEN && linux == 2.6.18 */ +/* Append each packet in 'skb' list to 'queue'. There will be only one packet + * unless we broke up a GSO packet. */ +static int +queue_control_packets(struct sk_buff *skb, struct sk_buff_head *queue, + int queue_no, u32 arg) +{ + struct sk_buff *nskb; + int port_no; + int err; + + port_no = ODPP_LOCAL; + if (skb->dev) { + if (skb->dev->br_port) + port_no = skb->dev->br_port->port_no; + else if (is_dp_dev(skb->dev)) + port_no = dp_dev_priv(skb->dev)->port_no; + } + + do { + struct odp_msg *header; + + nskb = skb->next; + skb->next = NULL; + + /* If a checksum-deferred packet is forwarded to the + * controller, correct the pointers and checksum. This happens + * on a regular basis only on Xen, on which VMs can pass up + * packets that do not have their checksum computed. + */ + err = vswitch_skb_checksum_setup(skb); + if (err) + goto err_kfree_skbs; +#ifndef CHECKSUM_HW + if (skb->ip_summed == CHECKSUM_PARTIAL) { +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22) + /* Until 2.6.22, the start of the transport header was + * also the start of data to be checksummed. Linux + * 2.6.22 introduced the csum_start field for this + * purpose, but we should point the transport header to + * it anyway for backward compatibility, as + * dev_queue_xmit() does even in 2.6.28. */ + skb_set_transport_header(skb, skb->csum_start - + skb_headroom(skb)); +#endif + err = skb_checksum_help(skb); + if (err) + goto err_kfree_skbs; + } +#else + if (skb->ip_summed == CHECKSUM_HW) { + err = skb_checksum_help(skb, 0); + if (err) + goto err_kfree_skbs; + } +#endif + + err = skb_cow(skb, sizeof *header); + if (err) + goto err_kfree_skbs; + + header = (struct odp_msg*)__skb_push(skb, sizeof *header); + header->type = queue_no; + header->length = skb->len; + header->port = port_no; + header->reserved = 0; + header->arg = arg; + skb_queue_tail(queue, skb); + + skb = nskb; + } while (skb); + return 0; + +err_kfree_skbs: + kfree_skb(skb); + while ((skb = nskb) != NULL) { + nskb = skb->next; + kfree_skb(skb); + } + return err; +} + int dp_output_control(struct datapath *dp, struct sk_buff *skb, int queue_no, u32 arg) { struct dp_stats_percpu *stats; struct sk_buff_head *queue; - int port_no; int err; WARN_ON_ONCE(skb_shared(skb)); @@ -644,40 +722,6 @@ dp_output_control(struct datapath *dp, struct sk_buff *skb, int queue_no, if (skb_queue_len(queue) >= DP_MAX_QUEUE_LEN) goto err_kfree_skb; - /* If a checksum-deferred packet is forwarded to the controller, - * correct the pointers and checksum. This happens on a regular basis - * only on Xen (the CHECKSUM_HW case), on which VMs can pass up packets - * that do not have their checksum computed. We also implement it for - * the non-Xen case, but it is difficult to trigger or test this case - * there, hence the WARN_ON_ONCE(). - */ - err = vswitch_skb_checksum_setup(skb); - if (err) - goto err_kfree_skb; -#ifndef CHECKSUM_HW - if (skb->ip_summed == CHECKSUM_PARTIAL) { - WARN_ON_ONCE(1); -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22) - /* Until 2.6.22, the start of the transport header was also the - * start of data to be checksummed. Linux 2.6.22 introduced - * the csum_start field for this purpose, but we should point - * the transport header to it anyway for backward - * compatibility, as dev_queue_xmit() does even in 2.6.28. */ - skb_set_transport_header(skb, skb->csum_start - - skb_headroom(skb)); -#endif - err = skb_checksum_help(skb); - if (err) - goto err_kfree_skb; - } -#else - if (skb->ip_summed == CHECKSUM_HW) { - err = skb_checksum_help(skb, 0); - if (err) - goto err_kfree_skb; - } -#endif - /* Break apart GSO packets into their component pieces. Otherwise * userspace may try to stuff a 64kB packet into a 1500-byte MTU. */ if (skb_is_gso(skb)) { @@ -695,45 +739,9 @@ dp_output_control(struct datapath *dp, struct sk_buff *skb, int queue_no, } } - /* Figure out port number. */ - port_no = ODPP_LOCAL; - if (skb->dev) { - if (skb->dev->br_port) - port_no = skb->dev->br_port->port_no; - else if (is_dp_dev(skb->dev)) - port_no = dp_dev_priv(skb->dev)->port_no; - } - - /* Append each packet to queue. There will be only one packet unless - * we broke up a GSO packet above. */ - do { - struct odp_msg *header; - struct sk_buff *nskb = skb->next; - skb->next = NULL; - - err = skb_cow(skb, sizeof *header); - if (err) { - while (nskb) { - kfree_skb(skb); - skb = nskb; - nskb = skb->next; - } - goto err_kfree_skb; - } - - header = (struct odp_msg*)__skb_push(skb, sizeof *header); - header->type = queue_no; - header->length = skb->len; - header->port = port_no; - header->reserved = 0; - header->arg = arg; - skb_queue_tail(queue, skb); - - skb = nskb; - } while (skb); - + err = queue_control_packets(skb, queue, queue_no, arg); wake_up_interruptible(&dp->waitqueue); - return 0; + return err; err_kfree_skb: kfree_skb(skb); @@ -842,7 +850,7 @@ static void clear_stats(struct sw_flow *flow) static int put_flow(struct datapath *dp, struct odp_flow_put __user *ufp) { struct odp_flow_put uf; - struct sw_flow *flow, **bucket; + struct sw_flow *flow; struct dp_table *table; struct odp_flow_stats stats; int error; @@ -852,15 +860,10 @@ static int put_flow(struct datapath *dp, struct odp_flow_put __user *ufp) goto error; uf.flow.key.reserved = 0; -retry: table = rcu_dereference(dp->table); - bucket = dp_table_lookup_for_insert(table, &uf.flow.key); - if (!bucket) { - /* No such flow, and the slots where it could go are full. */ - error = uf.flags & ODPPF_CREATE ? -EXFULL : -ENOENT; - goto error; - } else if (!*bucket) { - /* No such flow, but we found an available slot for it. */ + flow = dp_table_lookup(table, &uf.flow.key); + if (!flow) { + /* No such flow. */ struct sw_flow_actions *acts; error = -ENOENT; @@ -868,14 +871,15 @@ retry: goto error; /* Expand table, if necessary, to make room. */ - if (dp->n_flows * 4 >= table->n_buckets && - table->n_buckets < DP_MAX_BUCKETS) { + if (dp->n_flows >= table->n_buckets) { + error = -ENOSPC; + if (table->n_buckets >= DP_MAX_BUCKETS) + goto error; + error = dp_table_expand(dp); if (error) goto error; - - /* The bucket's location has changed. Try again. */ - goto retry; + table = rcu_dereference(dp->table); } /* Allocate flow. */ @@ -895,12 +899,13 @@ retry: rcu_assign_pointer(flow->sf_acts, acts); /* Put flow in bucket. */ - rcu_assign_pointer(*bucket, flow); + error = dp_table_insert(table, flow); + if (error) + goto error_free_flow_acts; dp->n_flows++; memset(&stats, 0, sizeof(struct odp_flow_stats)); } else { /* We found a matching flow. */ - struct sw_flow *flow = *rcu_dereference(bucket); struct sw_flow_actions *old_acts, *new_acts; unsigned long int flags; @@ -938,6 +943,8 @@ retry: return -EFAULT; return 0; +error_free_flow_acts: + kfree(flow->sf_acts); error_free_flow: kmem_cache_free(flow_cache, flow); error: @@ -1111,6 +1118,7 @@ static int do_execute(struct datapath *dp, const struct odp_execute *executep) struct odp_flow_key key; struct sk_buff *skb; struct sw_flow_actions *actions; + struct ethhdr *eth; int err; err = -EFAULT; @@ -1150,6 +1158,17 @@ static int do_execute(struct datapath *dp, const struct odp_execute *executep) execute.length)) goto error_free_skb; + skb_reset_mac_header(skb); + eth = eth_hdr(skb); + + /* Normally, setting the skb 'protocol' field would be handled by a + * call to eth_type_trans(), but it assumes there's a sending + * device, which we may not have. */ + if (ntohs(eth->h_proto) >= 1536) + skb->protocol = eth->h_proto; + else + skb->protocol = htons(ETH_P_802_2); + flow_extract(skb, execute.in_port, &key); err = execute_actions(dp, skb, &key, actions->actions, actions->n_actions, GFP_KERNEL); @@ -1170,8 +1189,8 @@ static int get_dp_stats(struct datapath *dp, struct odp_stats __user *statsp) int i; stats.n_flows = dp->n_flows; - stats.cur_capacity = rcu_dereference(dp->table)->n_buckets * 2; - stats.max_capacity = DP_MAX_BUCKETS * 2; + stats.cur_capacity = rcu_dereference(dp->table)->n_buckets; + stats.max_capacity = DP_MAX_BUCKETS; stats.n_ports = dp->n_ports; stats.max_ports = DP_MAX_PORTS; stats.max_groups = DP_MAX_GROUPS;