X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;ds=sidebyside;f=datapath%2Fdatapath.c;h=dcff05f2986dbc23749c01063eb47ce3f10b120c;hb=76f1c218bd84e44666c5a0ae54d543dbce42c376;hp=dee1b0f88faf6d9eb466685daab264ff6ea53509;hpb=254f2dc8e3eb18debf4a8f238b9c87cf4d4dbd3f;p=openvswitch diff --git a/datapath/datapath.c b/datapath/datapath.c index dee1b0f8..dcff05f2 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -51,6 +51,7 @@ #include "flow.h" #include "loop_counter.h" #include "table.h" +#include "vlan.h" #include "vport-internal_dev.h" int (*dp_ioctl_hook)(struct net_device *dev, struct ifreq *rq, int cmd); @@ -79,6 +80,8 @@ EXPORT_SYMBOL(dp_ioctl_hook); static LIST_HEAD(dps); static struct vport *new_vport(const struct vport_parms *); +static int queue_control_packets(struct datapath *, struct sk_buff *, + const struct dp_upcall_info *); /* Must be called with rcu_read_lock, genl_mutex, or RTNL lock. */ struct datapath *get_dp(int dp_ifindex) @@ -365,13 +368,94 @@ static void copy_and_csum_skb(struct sk_buff *skb, void *to) *(__sum16 *)(to + csum_start + csum_offset) = csum_fold(csum); } -static struct genl_family dp_packet_genl_family; +static struct genl_family dp_packet_genl_family = { + .id = GENL_ID_GENERATE, + .hdrsize = sizeof(struct odp_header), + .name = ODP_PACKET_FAMILY, + .version = 1, + .maxattr = ODP_PACKET_ATTR_MAX +}; + +/* Generic Netlink multicast groups for upcalls. + * + * We really want three unique multicast groups per datapath, but we can't even + * get one, because genl_register_mc_group() takes genl_lock, which is also + * held during Generic Netlink message processing, so trying to acquire + * multicast groups during ODP_DP_NEW processing deadlocks. Instead, we + * preallocate a few groups and use them round-robin for datapaths. Collision + * isn't fatal--multicast listeners should check that the family is the one + * that they want and discard others--but it wastes time and memory to receive + * unwanted messages. + */ #define PACKET_N_MC_GROUPS 16 +static struct genl_multicast_group packet_mc_groups[PACKET_N_MC_GROUPS]; -static int packet_mc_group(struct datapath *dp, u8 cmd) +static u32 packet_mc_group(struct datapath *dp, u8 cmd) { + u32 idx; BUILD_BUG_ON_NOT_POWER_OF_2(PACKET_N_MC_GROUPS); - return jhash_2words(dp->dp_ifindex, cmd, 0) & (PACKET_N_MC_GROUPS - 1); + + idx = jhash_2words(dp->dp_ifindex, cmd, 0) & (PACKET_N_MC_GROUPS - 1); + return packet_mc_groups[idx].id; +} + +static int packet_register_mc_groups(void) +{ + int i; + + for (i = 0; i < PACKET_N_MC_GROUPS; i++) { + struct genl_multicast_group *group = &packet_mc_groups[i]; + int error; + + sprintf(group->name, "packet%d", i); + error = genl_register_mc_group(&dp_packet_genl_family, group); + if (error) + return error; + } + return 0; +} + +int dp_upcall(struct datapath *dp, struct sk_buff *skb, const struct dp_upcall_info *upcall_info) +{ + struct dp_stats_percpu *stats; + int err; + + WARN_ON_ONCE(skb_shared(skb)); + + forward_ip_summed(skb); + + err = vswitch_skb_checksum_setup(skb); + if (err) + goto err_kfree_skb; + + /* Break apart GSO packets into their component pieces. Otherwise + * userspace may try to stuff a 64kB packet into a 1500-byte MTU. */ + if (skb_is_gso(skb)) { + struct sk_buff *nskb = skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM); + + kfree_skb(skb); + skb = nskb; + if (IS_ERR(skb)) { + err = PTR_ERR(skb); + goto err; + } + } + + return queue_control_packets(dp, skb, upcall_info); + +err_kfree_skb: + kfree_skb(skb); +err: + local_bh_disable(); + stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id()); + + write_seqcount_begin(&stats->seqlock); + stats->n_lost++; + write_seqcount_end(&stats->seqlock); + + local_bh_enable(); + + return err; } /* Send each packet in the 'skb' list to userspace for 'dp' as directed by @@ -400,8 +484,11 @@ static int queue_control_packets(struct datapath *dp, struct sk_buff *skb, nskb = skb->next; skb->next = NULL; + err = vlan_deaccel_tag(skb); + if (unlikely(err)) + goto err_kfree_skbs; + len = sizeof(struct odp_header); - len += nla_total_size(4); /* ODP_PACKET_ATTR_TYPE. */ len += nla_total_size(skb->len); len += nla_total_size(FLOW_BUFSIZE); if (upcall_info->userdata) @@ -461,86 +548,6 @@ err_kfree_skbs: return err; } -/* Generic Netlink multicast groups for upcalls. - * - * We really want three unique multicast groups per datapath, but we can't even - * get one, because genl_register_mc_group() takes genl_lock, which is also - * held during Generic Netlink message processing, so trying to acquire - * multicast groups during ODP_DP_NEW processing deadlocks. Instead, we - * preallocate a few groups and use them round-robin for datapaths. Collision - * isn't fatal--multicast listeners should check that the family is the one - * that they want and discard others--but it wastes time and memory to receive - * unwanted messages. - */ -static struct genl_multicast_group packet_mc_groups[PACKET_N_MC_GROUPS]; - -static struct genl_family dp_packet_genl_family = { - .id = GENL_ID_GENERATE, - .hdrsize = sizeof(struct odp_header), - .name = ODP_PACKET_FAMILY, - .version = 1, - .maxattr = ODP_PACKET_ATTR_MAX -}; - -static int packet_register_mc_groups(void) -{ - int i; - - for (i = 0; i < PACKET_N_MC_GROUPS; i++) { - struct genl_multicast_group *group = &packet_mc_groups[i]; - int error; - - sprintf(group->name, "packet%d", i); - error = genl_register_mc_group(&dp_packet_genl_family, group); - if (error) - return error; - } - return 0; -} - -int dp_upcall(struct datapath *dp, struct sk_buff *skb, const struct dp_upcall_info *upcall_info) -{ - struct dp_stats_percpu *stats; - int err; - - WARN_ON_ONCE(skb_shared(skb)); - - forward_ip_summed(skb); - - err = vswitch_skb_checksum_setup(skb); - if (err) - goto err_kfree_skb; - - /* Break apart GSO packets into their component pieces. Otherwise - * userspace may try to stuff a 64kB packet into a 1500-byte MTU. */ - if (skb_is_gso(skb)) { - struct sk_buff *nskb = skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM); - - kfree_skb(skb); - skb = nskb; - if (IS_ERR(skb)) { - err = PTR_ERR(skb); - goto err; - } - } - - return queue_control_packets(dp, skb, upcall_info); - -err_kfree_skb: - kfree_skb(skb); -err: - local_bh_disable(); - stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id()); - - write_seqcount_begin(&stats->seqlock); - stats->n_lost++; - write_seqcount_end(&stats->seqlock); - - local_bh_enable(); - - return err; -} - /* Called with genl_mutex. */ static int flush_flows(int dp_ifindex) { @@ -570,58 +577,58 @@ static int validate_actions(const struct nlattr *attr) int rem; nla_for_each_nested(a, attr, rem) { - static const u32 action_lens[ODPAT_MAX + 1] = { - [ODPAT_OUTPUT] = 4, - [ODPAT_CONTROLLER] = 8, - [ODPAT_SET_DL_TCI] = 2, - [ODPAT_STRIP_VLAN] = 0, - [ODPAT_SET_DL_SRC] = ETH_ALEN, - [ODPAT_SET_DL_DST] = ETH_ALEN, - [ODPAT_SET_NW_SRC] = 4, - [ODPAT_SET_NW_DST] = 4, - [ODPAT_SET_NW_TOS] = 1, - [ODPAT_SET_TP_SRC] = 2, - [ODPAT_SET_TP_DST] = 2, - [ODPAT_SET_TUNNEL] = 8, - [ODPAT_SET_PRIORITY] = 4, - [ODPAT_POP_PRIORITY] = 0, - [ODPAT_DROP_SPOOFED_ARP] = 0, + static const u32 action_lens[ODP_ACTION_ATTR_MAX + 1] = { + [ODP_ACTION_ATTR_OUTPUT] = 4, + [ODP_ACTION_ATTR_CONTROLLER] = 8, + [ODP_ACTION_ATTR_SET_DL_TCI] = 2, + [ODP_ACTION_ATTR_STRIP_VLAN] = 0, + [ODP_ACTION_ATTR_SET_DL_SRC] = ETH_ALEN, + [ODP_ACTION_ATTR_SET_DL_DST] = ETH_ALEN, + [ODP_ACTION_ATTR_SET_NW_SRC] = 4, + [ODP_ACTION_ATTR_SET_NW_DST] = 4, + [ODP_ACTION_ATTR_SET_NW_TOS] = 1, + [ODP_ACTION_ATTR_SET_TP_SRC] = 2, + [ODP_ACTION_ATTR_SET_TP_DST] = 2, + [ODP_ACTION_ATTR_SET_TUNNEL] = 8, + [ODP_ACTION_ATTR_SET_PRIORITY] = 4, + [ODP_ACTION_ATTR_POP_PRIORITY] = 0, + [ODP_ACTION_ATTR_DROP_SPOOFED_ARP] = 0, }; int type = nla_type(a); - if (type > ODPAT_MAX || nla_len(a) != action_lens[type]) + if (type > ODP_ACTION_ATTR_MAX || nla_len(a) != action_lens[type]) return -EINVAL; switch (type) { - case ODPAT_UNSPEC: + case ODP_ACTION_ATTR_UNSPEC: return -EINVAL; - case ODPAT_CONTROLLER: - case ODPAT_STRIP_VLAN: - case ODPAT_SET_DL_SRC: - case ODPAT_SET_DL_DST: - case ODPAT_SET_NW_SRC: - case ODPAT_SET_NW_DST: - case ODPAT_SET_TP_SRC: - case ODPAT_SET_TP_DST: - case ODPAT_SET_TUNNEL: - case ODPAT_SET_PRIORITY: - case ODPAT_POP_PRIORITY: - case ODPAT_DROP_SPOOFED_ARP: + case ODP_ACTION_ATTR_CONTROLLER: + case ODP_ACTION_ATTR_STRIP_VLAN: + case ODP_ACTION_ATTR_SET_DL_SRC: + case ODP_ACTION_ATTR_SET_DL_DST: + case ODP_ACTION_ATTR_SET_NW_SRC: + case ODP_ACTION_ATTR_SET_NW_DST: + case ODP_ACTION_ATTR_SET_TP_SRC: + case ODP_ACTION_ATTR_SET_TP_DST: + case ODP_ACTION_ATTR_SET_TUNNEL: + case ODP_ACTION_ATTR_SET_PRIORITY: + case ODP_ACTION_ATTR_POP_PRIORITY: + case ODP_ACTION_ATTR_DROP_SPOOFED_ARP: /* No validation needed. */ break; - case ODPAT_OUTPUT: + case ODP_ACTION_ATTR_OUTPUT: if (nla_get_u32(a) >= DP_MAX_PORTS) return -EINVAL; break; - case ODPAT_SET_DL_TCI: + case ODP_ACTION_ATTR_SET_DL_TCI: if (nla_get_be16(a) & htons(VLAN_CFI_MASK)) return -EINVAL; break; - case ODPAT_SET_NW_TOS: + case ODP_ACTION_ATTR_SET_NW_TOS: if (nla_get_u8(a) & INET_ECN_MASK) return -EINVAL; break; @@ -702,6 +709,15 @@ static int odp_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) if (err) goto exit; + /* Initialize OVS_CB (it came from Netlink so might not be zeroed). */ + OVS_CB(packet)->vport = NULL; + OVS_CB(packet)->flow = NULL; + /* execute_actions() will reset tun_id to 0 anyhow. */ +#ifdef NEED_CSUM_NORMALIZE + OVS_CB(packet)->ip_summed = OVS_CSUM_NONE; +#endif + vlan_copy_skb_tci(packet); + rcu_read_lock(); dp = get_dp(odp_header->dp_ifindex); err = -ENODEV; @@ -771,6 +787,8 @@ int dp_min_mtu(const struct datapath *dp) continue; dev_mtu = vport_get_mtu(p); + if (!dev_mtu) + continue; if (!mtu || dev_mtu < mtu) mtu = dev_mtu; } @@ -825,7 +843,6 @@ static int odp_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, struct nlattr *nla; unsigned long used; u8 tcp_flags; - int nla_len; int err; sf_acts = rcu_dereference_protected(flow->sf_acts, @@ -853,7 +870,7 @@ static int odp_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, spin_unlock_bh(&flow->lock); if (used) - NLA_PUT_MSECS(skb, ODP_FLOW_ATTR_USED, used); + NLA_PUT_U64(skb, ODP_FLOW_ATTR_USED, flow_used_time(used)); if (stats.n_packets) NLA_PUT(skb, ODP_FLOW_ATTR_STATS, sizeof(struct odp_flow_stats), &stats); @@ -861,23 +878,20 @@ static int odp_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, if (tcp_flags) NLA_PUT_U8(skb, ODP_FLOW_ATTR_TCP_FLAGS, tcp_flags); - /* If ODP_FLOW_ATTR_ACTIONS doesn't fit, and this is the first flow to - * be dumped into 'skb', then expand the skb. This is unusual for - * Netlink but individual action lists can be longer than a page and - * thus entirely undumpable if we didn't do this. */ - nla_len = nla_total_size(sf_acts->actions_len); - if (nla_len > skb_tailroom(skb) && !skb_orig_len) { - int hdr_off = (unsigned char *)odp_header - skb->data; - - err = pskb_expand_head(skb, 0, nla_len - skb_tailroom(skb), GFP_KERNEL); - if (err) - goto error; - - odp_header = (struct odp_header *)(skb->data + hdr_off); - } - nla = nla_nest_start(skb, ODP_FLOW_ATTR_ACTIONS); - memcpy(__skb_put(skb, sf_acts->actions_len), sf_acts->actions, sf_acts->actions_len); - nla_nest_end(skb, nla); + /* If ODP_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if + * this is the first flow to be dumped into 'skb'. This is unusual for + * Netlink but individual action lists can be longer than + * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this. + * The userspace caller can always fetch the actions separately if it + * really wants them. (Most userspace callers in fact don't care.) + * + * This can only fail for dump operations because the skb is always + * properly sized for single flows. + */ + err = nla_put(skb, ODP_FLOW_ATTR_ACTIONS, sf_acts->actions_len, + sf_acts->actions); + if (err < 0 && skb_orig_len) + goto error; return genlmsg_end(skb, odp_header); @@ -1584,6 +1598,7 @@ static int odp_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, struct odp_header *odp_header; struct nlattr *nla; int ifindex, iflink; + int mtu; int err; odp_header = genlmsg_put(skb, pid, seq, &dp_vport_genl_family, @@ -1605,7 +1620,9 @@ static int odp_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, NLA_PUT(skb, ODP_VPORT_ATTR_ADDRESS, ETH_ALEN, vport_get_addr(vport)); - NLA_PUT_U32(skb, ODP_VPORT_ATTR_MTU, vport_get_mtu(vport)); + mtu = vport_get_mtu(vport); + if (mtu) + NLA_PUT_U32(skb, ODP_VPORT_ATTR_MTU, mtu); err = vport_get_options(vport, skb); if (err == -EMSGSIZE)