From: Ben Pfaff Date: Fri, 28 Jan 2011 22:00:51 +0000 (-0800) Subject: datapath: Convert ODP_FLOW_* commands to use AF_NETLINK socket layer. X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=37a1300c3ca05a97160d44dc34298c447b50ed9d;p=openvswitch datapath: Convert ODP_FLOW_* commands to use AF_NETLINK socket layer. This completes the transition to the Generic Netlink interface, and so this commit restores support for Linux 2.6.18 and later. Signed-off-by: Ben Pfaff Acked-by: Jesse Gross --- diff --git a/datapath/datapath.c b/datapath/datapath.c index 4caa496a..8931456a 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -12,7 +12,6 @@ #include #include -#include #include #include #include @@ -44,7 +43,6 @@ #include #include #include -#include #include "openvswitch/datapath-protocol.h" #include "checksum.h" @@ -557,12 +555,12 @@ static int flush_flows(int dp_idx) return 0; } -static int validate_actions(const struct nlattr *actions, u32 actions_len) +static int validate_actions(const struct nlattr *attr) { const struct nlattr *a; int rem; - nla_for_each_attr(a, actions, actions_len, rem) { + nla_for_each_nested(a, attr, rem) { static const u32 action_lens[ODPAT_MAX + 1] = { [ODPAT_OUTPUT] = 4, [ODPAT_CONTROLLER] = 8, @@ -629,28 +627,6 @@ static int validate_actions(const struct nlattr *actions, u32 actions_len) return 0; } - -struct dp_flowcmd { - u32 nlmsg_flags; - u32 dp_idx; - u32 total_len; - struct sw_flow_key key; - const struct nlattr *actions; - u32 actions_len; - bool clear; - u64 state; -}; - -static struct sw_flow_actions *get_actions(const struct dp_flowcmd *flowcmd) -{ - struct sw_flow_actions *actions; - - actions = flow_actions_alloc(flowcmd->actions_len); - if (!IS_ERR(actions) && flowcmd->actions_len) - memcpy(actions->actions, flowcmd->actions, flowcmd->actions_len); - return actions; -} - static void clear_stats(struct sw_flow *flow) { flow->used = 0; @@ -680,8 +656,6 @@ static int odp_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) struct odp_header *odp_header = info->userhdr; struct nlattr **a = info->attrs; struct sk_buff *packet; - unsigned int actions_len; - struct nlattr *actions; struct sw_flow_key key; struct datapath *dp; struct ethhdr *eth; @@ -693,9 +667,7 @@ static int odp_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) nla_len(a[ODP_PACKET_ATTR_PACKET]) < ETH_HLEN) goto exit; - actions = nla_data(a[ODP_PACKET_ATTR_ACTIONS]); - actions_len = nla_len(a[ODP_PACKET_ATTR_ACTIONS]); - err = validate_actions(actions, actions_len); + err = validate_actions(a[ODP_PACKET_ATTR_ACTIONS]); if (err) goto exit; @@ -725,7 +697,9 @@ static int odp_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) dp = get_dp(odp_header->dp_idx); err = -ENODEV; if (dp) - err = execute_actions(dp, packet, &key, actions, actions_len); + err = execute_actions(dp, packet, &key, + nla_data(a[ODP_PACKET_ATTR_ACTIONS]), + nla_len(a[ODP_PACKET_ATTR_ACTIONS])); rcu_read_unlock(); exit: @@ -817,46 +791,49 @@ static const struct nla_policy flow_policy[ODP_FLOW_ATTR_MAX + 1] = { [ODP_FLOW_ATTR_KEY] = { .type = NLA_NESTED }, [ODP_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED }, [ODP_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG }, - [ODP_FLOW_ATTR_STATE] = { .type = NLA_U64 }, }; +static struct genl_family dp_flow_genl_family = { + .id = GENL_ID_GENERATE, + .hdrsize = sizeof(struct odp_header), + .name = ODP_FLOW_FAMILY, + .version = 1, + .maxattr = ODP_FLOW_ATTR_MAX +}; -static int copy_flow_to_user(struct odp_flow __user *dst, struct datapath *dp, - struct sw_flow *flow, u32 total_len, u64 state) +static struct genl_multicast_group dp_flow_multicast_group = { + .name = ODP_FLOW_MCGROUP +}; + +/* Called with genl_lock. */ +static int odp_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, + struct sk_buff *skb, u32 pid, u32 seq, u32 flags, u8 cmd) { + const int skb_orig_len = skb->len; const struct sw_flow_actions *sf_acts; struct odp_flow_stats stats; - struct odp_flow *odp_flow; - struct sk_buff *skb; + struct odp_header *odp_header; struct nlattr *nla; unsigned long used; u8 tcp_flags; + int nla_len; int err; sf_acts = rcu_dereference_protected(flow->sf_acts, lockdep_genl_is_held()); - skb = alloc_skb(128 + FLOW_BUFSIZE + sf_acts->actions_len, GFP_KERNEL); - err = -ENOMEM; - if (!skb) - goto exit; + odp_header = genlmsg_put(skb, pid, seq, &dp_flow_genl_family, flags, cmd); + if (!odp_header) + return -EMSGSIZE; - odp_flow = (struct odp_flow*)__skb_put(skb, sizeof(struct odp_flow)); - odp_flow->dp_idx = dp->dp_idx; - odp_flow->total_len = total_len; + odp_header->dp_idx = dp->dp_idx; nla = nla_nest_start(skb, ODP_FLOW_ATTR_KEY); if (!nla) goto nla_put_failure; err = flow_to_nlattrs(&flow->key, skb); if (err) - goto exit_free; - nla_nest_end(skb, nla); - - nla = nla_nest_start(skb, ODP_FLOW_ATTR_ACTIONS); - if (!nla || skb_tailroom(skb) < sf_acts->actions_len) - goto nla_put_failure; - memcpy(__skb_put(skb, sf_acts->actions_len), sf_acts->actions, sf_acts->actions_len); + goto error; nla_nest_end(skb, nla); spin_lock_bh(&flow->lock); @@ -875,130 +852,116 @@ static int copy_flow_to_user(struct odp_flow __user *dst, struct datapath *dp, if (tcp_flags) NLA_PUT_U8(skb, ODP_FLOW_ATTR_TCP_FLAGS, tcp_flags); - if (state) - NLA_PUT_U64(skb, ODP_FLOW_ATTR_STATE, state); + /* If ODP_FLOW_ATTR_ACTIONS doesn't fit, and this is the first flow to + * be dumped into 'skb', then expand the skb. This is unusual for + * Netlink but individual action lists can be longer than a page and + * thus entirely undumpable if we didn't do this. */ + nla_len = nla_total_size(sf_acts->actions_len); + if (nla_len > skb_tailroom(skb) && !skb_orig_len) { + int hdr_off = (unsigned char *)odp_header - skb->data; - if (skb->len > total_len) - goto nla_put_failure; + err = pskb_expand_head(skb, 0, nla_len - skb_tailroom(skb), GFP_KERNEL); + if (err) + goto error; - odp_flow->len = skb->len; - err = copy_to_user(dst, skb->data, skb->len) ? -EFAULT : 0; - goto exit_free; + odp_header = (struct odp_header *)(skb->data + hdr_off); + } + nla = nla_nest_start(skb, ODP_FLOW_ATTR_ACTIONS); + memcpy(__skb_put(skb, sf_acts->actions_len), sf_acts->actions, sf_acts->actions_len); + nla_nest_end(skb, nla); + + return genlmsg_end(skb, odp_header); nla_put_failure: err = -EMSGSIZE; -exit_free: - kfree_skb(skb); -exit: +error: + genlmsg_cancel(skb, odp_header); return err; } -/* Called with genl_mutex. */ -static struct sk_buff *copy_flow_from_user(struct odp_flow __user *uodp_flow, - struct dp_flowcmd *flowcmd) +static struct sk_buff *odp_flow_cmd_alloc_info(struct sw_flow *flow) { - struct nlattr *a[ODP_FLOW_ATTR_MAX + 1]; - struct odp_flow *odp_flow; - struct sk_buff *skb; - u32 len; - int err; - - if (get_user(len, &uodp_flow->len)) - return ERR_PTR(-EFAULT); - if (len < sizeof(struct odp_flow)) - return ERR_PTR(-EINVAL); - - skb = alloc_skb(len, GFP_KERNEL); - if (!skb) - return ERR_PTR(-ENOMEM); - - err = -EFAULT; - if (copy_from_user(__skb_put(skb, len), uodp_flow, len)) - goto error_free_skb; - - odp_flow = (struct odp_flow *)skb->data; - err = -EINVAL; - if (odp_flow->len != len) - goto error_free_skb; - - flowcmd->nlmsg_flags = odp_flow->nlmsg_flags; - flowcmd->dp_idx = odp_flow->dp_idx; - flowcmd->total_len = odp_flow->total_len; - - err = nla_parse(a, ODP_FLOW_ATTR_MAX, - (struct nlattr *)(skb->data + sizeof(struct odp_flow)), - skb->len - sizeof(struct odp_flow), flow_policy); - if (err) - goto error_free_skb; + const struct sw_flow_actions *sf_acts; + int len; - /* ODP_FLOW_ATTR_KEY. */ - if (a[ODP_FLOW_ATTR_KEY]) { - err = flow_from_nlattrs(&flowcmd->key, a[ODP_FLOW_ATTR_KEY]); - if (err) - goto error_free_skb; - } else - memset(&flowcmd->key, 0, sizeof(struct sw_flow_key)); + sf_acts = rcu_dereference_protected(flow->sf_acts, + lockdep_genl_is_held()); - /* ODP_FLOW_ATTR_ACTIONS. */ - if (a[ODP_FLOW_ATTR_ACTIONS]) { - flowcmd->actions = nla_data(a[ODP_FLOW_ATTR_ACTIONS]); - flowcmd->actions_len = nla_len(a[ODP_FLOW_ATTR_ACTIONS]); - err = validate_actions(flowcmd->actions, flowcmd->actions_len); - if (err) - goto error_free_skb; - } else { - flowcmd->actions = NULL; - flowcmd->actions_len = 0; - } + len = nla_total_size(FLOW_BUFSIZE); /* ODP_FLOW_ATTR_KEY */ + len += nla_total_size(sf_acts->actions_len); /* ODP_FLOW_ATTR_ACTIONS */ + len += nla_total_size(sizeof(struct odp_flow_stats)); /* ODP_FLOW_ATTR_STATS */ + len += nla_total_size(1); /* ODP_FLOW_ATTR_TCP_FLAGS */ + len += nla_total_size(8); /* ODP_FLOW_ATTR_USED */ + return genlmsg_new(NLMSG_ALIGN(sizeof(struct odp_header)) + len, GFP_KERNEL); +} - flowcmd->clear = a[ODP_FLOW_ATTR_CLEAR] != NULL; +static struct sk_buff *odp_flow_cmd_build_info(struct sw_flow *flow, struct datapath *dp, + u32 pid, u32 seq, u8 cmd) +{ + struct sk_buff *skb; + int retval; - flowcmd->state = a[ODP_FLOW_ATTR_STATE] ? nla_get_u64(a[ODP_FLOW_ATTR_STATE]) : 0; + skb = odp_flow_cmd_alloc_info(flow); + if (!skb) + return ERR_PTR(-ENOMEM); + retval = odp_flow_cmd_fill_info(flow, dp, skb, pid, seq, 0, cmd); + BUG_ON(retval < 0); return skb; - -error_free_skb: - kfree_skb(skb); - return ERR_PTR(err); } -static int new_flow(unsigned int cmd, struct odp_flow __user *uodp_flow) +static int odp_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) { + struct nlattr **a = info->attrs; + struct odp_header *odp_header = info->userhdr; struct tbl_node *flow_node; - struct dp_flowcmd flowcmd; + struct sw_flow_key key; struct sw_flow *flow; - struct sk_buff *skb; + struct sk_buff *reply; struct datapath *dp; struct tbl *table; u32 hash; int error; - skb = copy_flow_from_user(uodp_flow, &flowcmd); - error = PTR_ERR(skb); - if (IS_ERR(skb)) - goto exit; + /* Extract key. */ + error = -EINVAL; + if (!a[ODP_FLOW_ATTR_KEY]) + goto error; + error = flow_from_nlattrs(&key, a[ODP_FLOW_ATTR_KEY]); + if (error) + goto error; - dp = get_dp(flowcmd.dp_idx); + /* Validate actions. */ + if (a[ODP_FLOW_ATTR_ACTIONS]) { + error = validate_actions(a[ODP_FLOW_ATTR_ACTIONS]); + if (error) + goto error; + } else if (info->genlhdr->cmd == ODP_FLOW_CMD_NEW) { + error = -EINVAL; + goto error; + } + + dp = get_dp(odp_header->dp_idx); error = -ENODEV; if (!dp) - goto exit; + goto error; - hash = flow_hash(&flowcmd.key); + hash = flow_hash(&key); table = get_table_protected(dp); - flow_node = tbl_lookup(table, &flowcmd.key, hash, flow_cmp); + flow_node = tbl_lookup(table, &key, hash, flow_cmp); if (!flow_node) { struct sw_flow_actions *acts; /* Bail out if we're not allowed to create a new flow. */ error = -ENOENT; - if (cmd == ODP_FLOW_SET) - goto exit; + if (info->genlhdr->cmd == ODP_FLOW_CMD_SET) + goto error; /* Expand table, if necessary, to make room. */ if (tbl_count(table) >= tbl_n_buckets(table)) { error = expand_table(dp); if (error) - goto exit; + goto error; table = get_table_protected(dp); } @@ -1006,26 +969,25 @@ static int new_flow(unsigned int cmd, struct odp_flow __user *uodp_flow) flow = flow_alloc(); if (IS_ERR(flow)) { error = PTR_ERR(flow); - goto exit; + goto error; } - flow->key = flowcmd.key; + flow->key = key; clear_stats(flow); /* Obtain actions. */ - acts = get_actions(&flowcmd); + acts = flow_actions_alloc(a[ODP_FLOW_ATTR_ACTIONS]); error = PTR_ERR(acts); if (IS_ERR(acts)) goto error_free_flow; rcu_assign_pointer(flow->sf_acts, acts); - error = copy_flow_to_user(uodp_flow, dp, flow, flowcmd.total_len, 0); - if (error) - goto error_free_flow; - /* Put flow in bucket. */ error = tbl_insert(table, &flow->tbl_node, hash); if (error) goto error_free_flow; + + reply = odp_flow_cmd_build_info(flow, dp, info->snd_pid, + info->snd_seq, ODP_FLOW_CMD_NEW); } else { /* We found a matching flow. */ struct sw_flow_actions *old_acts; @@ -1037,124 +999,194 @@ static int new_flow(unsigned int cmd, struct odp_flow __user *uodp_flow) * gets fixed. */ error = -EEXIST; - if (flowcmd.nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) - goto error_kfree_skb; + if (info->genlhdr->cmd == ODP_FLOW_CMD_NEW && + info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) + goto error; /* Update actions. */ flow = flow_cast(flow_node); old_acts = rcu_dereference_protected(flow->sf_acts, lockdep_genl_is_held()); - if (flowcmd.actions && - (old_acts->actions_len != flowcmd.actions_len || - memcmp(old_acts->actions, flowcmd.actions, - flowcmd.actions_len))) { + if (a[ODP_FLOW_ATTR_ACTIONS] && + (old_acts->actions_len != nla_len(a[ODP_FLOW_ATTR_ACTIONS]) || + memcmp(old_acts->actions, nla_data(a[ODP_FLOW_ATTR_ACTIONS]), + old_acts->actions_len))) { struct sw_flow_actions *new_acts; - new_acts = get_actions(&flowcmd); + new_acts = flow_actions_alloc(a[ODP_FLOW_ATTR_ACTIONS]); error = PTR_ERR(new_acts); if (IS_ERR(new_acts)) - goto error_kfree_skb; + goto error; rcu_assign_pointer(flow->sf_acts, new_acts); flow_deferred_free_acts(old_acts); } - error = copy_flow_to_user(uodp_flow, dp, flow, flowcmd.total_len, 0); - if (error) - goto error_kfree_skb; + reply = odp_flow_cmd_build_info(flow, dp, info->snd_pid, + info->snd_seq, ODP_FLOW_CMD_NEW); /* Clear stats. */ - if (flowcmd.clear) { + if (a[ODP_FLOW_ATTR_CLEAR]) { spin_lock_bh(&flow->lock); clear_stats(flow); spin_unlock_bh(&flow->lock); } } - kfree_skb(skb); + + if (!IS_ERR(reply)) + genl_notify(reply, genl_info_net(info), info->snd_pid, + dp_flow_multicast_group.id, info->nlhdr, GFP_KERNEL); + else + netlink_set_err(INIT_NET_GENL_SOCK, 0, + dp_flow_multicast_group.id, PTR_ERR(reply)); return 0; error_free_flow: flow_put(flow); -error_kfree_skb: - kfree_skb(skb); -exit: +error: return error; } -static int get_or_del_flow(unsigned int cmd, struct odp_flow __user *uodp_flow) +static int odp_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) { + struct nlattr **a = info->attrs; + struct odp_header *odp_header = info->userhdr; + struct sw_flow_key key; struct tbl_node *flow_node; - struct dp_flowcmd flowcmd; + struct sk_buff *reply; struct sw_flow *flow; - struct sk_buff *skb; struct datapath *dp; struct tbl *table; int err; - skb = copy_flow_from_user(uodp_flow, &flowcmd); - if (IS_ERR(skb)) - return PTR_ERR(skb); + if (!a[ODP_FLOW_ATTR_KEY]) + return -EINVAL; + err = flow_from_nlattrs(&key, a[ODP_FLOW_ATTR_KEY]); + if (err) + return err; - dp = get_dp(flowcmd.dp_idx); + dp = get_dp(odp_header->dp_idx); if (!dp) return -ENODEV; table = get_table_protected(dp); - flow_node = tbl_lookup(table, &flowcmd.key, flow_hash(&flowcmd.key), flow_cmp); + flow_node = tbl_lookup(table, &key, flow_hash(&key), flow_cmp); if (!flow_node) return -ENOENT; - if (cmd == ODP_FLOW_DEL) { - err = tbl_remove(table, flow_node); - if (err) - return err; - } - flow = flow_cast(flow_node); - err = copy_flow_to_user(uodp_flow, dp, flow, flowcmd.total_len, 0); - if (!err && cmd == ODP_FLOW_DEL) - flow_deferred_free(flow); + reply = odp_flow_cmd_build_info(flow, dp, info->snd_pid, info->snd_seq, ODP_FLOW_CMD_NEW); + if (IS_ERR(reply)) + return PTR_ERR(reply); - return err; + return genlmsg_reply(reply, info); } -static int dump_flow(struct odp_flow __user *uodp_flow) +static int odp_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) { + struct nlattr **a = info->attrs; + struct odp_header *odp_header = info->userhdr; + struct sw_flow_key key; struct tbl_node *flow_node; - struct dp_flowcmd flowcmd; + struct sk_buff *reply; struct sw_flow *flow; - struct sk_buff *skb; struct datapath *dp; - u32 bucket, obj; + struct tbl *table; int err; - skb = copy_flow_from_user(uodp_flow, &flowcmd); - err = PTR_ERR(skb); - if (IS_ERR(skb)) - goto exit; + if (!a[ODP_FLOW_ATTR_KEY]) + return flush_flows(odp_header->dp_idx); + err = flow_from_nlattrs(&key, a[ODP_FLOW_ATTR_KEY]); + if (err) + return err; - dp = get_dp(flowcmd.dp_idx); - err = -ENODEV; + dp = get_dp(odp_header->dp_idx); if (!dp) - goto exit_kfree_skb; + return -ENODEV; - bucket = flowcmd.state >> 32; - obj = flowcmd.state; - flow_node = tbl_next(get_table_protected(dp), &bucket, &obj); - err = -ENODEV; + table = get_table_protected(dp); + flow_node = tbl_lookup(table, &key, flow_hash(&key), flow_cmp); if (!flow_node) - goto exit_kfree_skb; - + return -ENOENT; flow = flow_cast(flow_node); - err = copy_flow_to_user(uodp_flow, dp, flow, flowcmd.total_len, - ((u64)bucket << 32) | obj); -exit_kfree_skb: - kfree_skb(skb); -exit: - return err; + reply = odp_flow_cmd_alloc_info(flow); + if (!reply) + return -ENOMEM; + + err = tbl_remove(table, flow_node); + if (err) { + kfree_skb(reply); + return err; + } + + err = odp_flow_cmd_fill_info(flow, dp, reply, info->snd_pid, + info->snd_seq, 0, ODP_FLOW_CMD_DEL); + BUG_ON(err < 0); + + flow_deferred_free(flow); + + genl_notify(reply, genl_info_net(info), info->snd_pid, + dp_flow_multicast_group.id, info->nlhdr, GFP_KERNEL); + return 0; +} + +static int odp_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct odp_header *odp_header = genlmsg_data(nlmsg_data(cb->nlh)); + struct datapath *dp; + + dp = get_dp(odp_header->dp_idx); + if (!dp) + return -ENODEV; + + for (;;) { + struct tbl_node *flow_node; + struct sw_flow *flow; + u32 bucket, obj; + + bucket = cb->args[0]; + obj = cb->args[1]; + flow_node = tbl_next(get_table_protected(dp), &bucket, &obj); + if (!flow_node) + break; + + flow = flow_cast(flow_node); + if (odp_flow_cmd_fill_info(flow, dp, skb, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, + ODP_FLOW_CMD_NEW) < 0) + break; + + cb->args[0] = bucket; + cb->args[1] = obj; + } + return skb->len; } +static struct genl_ops dp_flow_genl_ops[] = { + { .cmd = ODP_FLOW_CMD_NEW, + .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ + .policy = flow_policy, + .doit = odp_flow_cmd_new_or_set + }, + { .cmd = ODP_FLOW_CMD_DEL, + .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ + .policy = flow_policy, + .doit = odp_flow_cmd_del + }, + { .cmd = ODP_FLOW_CMD_GET, + .flags = 0, /* OK for unprivileged users. */ + .policy = flow_policy, + .doit = odp_flow_cmd_get, + .dumpit = odp_flow_cmd_dump + }, + { .cmd = ODP_FLOW_CMD_SET, + .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ + .policy = flow_policy, + .doit = odp_flow_cmd_new_or_set, + }, +}; + static const struct nla_policy datapath_policy[ODP_DP_ATTR_MAX + 1] = { #ifdef HAVE_NLA_NUL_STRING [ODP_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, @@ -1925,72 +1957,6 @@ static struct genl_ops dp_vport_genl_ops[] = { }, }; -static long openvswitch_ioctl(struct file *f, unsigned int cmd, - unsigned long argp) -{ - int err; - - genl_lock(); - switch (cmd) { - case ODP_FLOW_FLUSH: - err = flush_flows(argp); - goto exit; - - case ODP_FLOW_NEW: - case ODP_FLOW_SET: - err = new_flow(cmd, (struct odp_flow __user *)argp); - goto exit; - - case ODP_FLOW_GET: - case ODP_FLOW_DEL: - err = get_or_del_flow(cmd, (struct odp_flow __user *)argp); - goto exit; - - case ODP_FLOW_DUMP: - err = dump_flow((struct odp_flow __user *)argp); - goto exit; - - default: - err = -ENOIOCTLCMD; - break; - } -exit: - genl_unlock(); - return err; -} - -#ifdef CONFIG_COMPAT -static long openvswitch_compat_ioctl(struct file *f, unsigned int cmd, unsigned long argp) -{ - switch (cmd) { - case ODP_FLOW_FLUSH: - /* Ioctls that don't need any translation at all. */ - return openvswitch_ioctl(f, cmd, argp); - - case ODP_FLOW_NEW: - case ODP_FLOW_DEL: - case ODP_FLOW_GET: - case ODP_FLOW_SET: - case ODP_FLOW_DUMP: - /* Ioctls that just need their pointer argument extended. */ - return openvswitch_ioctl(f, cmd, (unsigned long)compat_ptr(argp)); - - default: - return -ENOIOCTLCMD; - } -} -#endif - -static struct file_operations openvswitch_fops = { - .owner = THIS_MODULE, - .unlocked_ioctl = openvswitch_ioctl, -#ifdef CONFIG_COMPAT - .compat_ioctl = openvswitch_compat_ioctl, -#endif -}; - -static int major; - struct genl_family_and_ops { struct genl_family *family; struct genl_ops *ops; @@ -2005,6 +1971,9 @@ static const struct genl_family_and_ops dp_genl_families[] = { { &dp_vport_genl_family, dp_vport_genl_ops, ARRAY_SIZE(dp_vport_genl_ops), &dp_vport_multicast_group }, + { &dp_flow_genl_family, + dp_flow_genl_ops, ARRAY_SIZE(dp_flow_genl_ops), + &dp_flow_multicast_group }, { &dp_packet_genl_family, dp_packet_genl_ops, ARRAY_SIZE(dp_packet_genl_ops), NULL }, @@ -2073,18 +2042,12 @@ static int __init dp_init(void) if (err) goto error_vport_exit; - major = register_chrdev(0, "openvswitch", &openvswitch_fops); - if (err < 0) - goto error_unreg_notifier; - err = dp_register_genl(); if (err < 0) - goto error_unreg_chrdev; + goto error_unreg_notifier; return 0; -error_unreg_chrdev: - unregister_chrdev(major, "openvswitch"); error_unreg_notifier: unregister_netdevice_notifier(&dp_device_notifier); error_vport_exit: @@ -2099,7 +2062,6 @@ static void dp_cleanup(void) { rcu_barrier(); dp_unregister_genl(ARRAY_SIZE(dp_genl_families)); - unregister_chrdev(major, "openvswitch"); unregister_netdevice_notifier(&dp_device_notifier); vport_exit(); flow_exit(); diff --git a/datapath/flow.c b/datapath/flow.c index 0987fd2a..e90b36a9 100644 --- a/datapath/flow.c +++ b/datapath/flow.c @@ -103,13 +103,11 @@ void flow_used(struct sw_flow *flow, struct sk_buff *skb) spin_unlock_bh(&flow->lock); } -struct sw_flow_actions *flow_actions_alloc(u32 actions_len) +struct sw_flow_actions *flow_actions_alloc(const struct nlattr *actions) { + int actions_len = nla_len(actions); struct sw_flow_actions *sfa; - if (actions_len % NLA_ALIGNTO) - return ERR_PTR(-EINVAL); - /* At least DP_MAX_PORTS actions are required to be able to flood a * packet to every port. Factor of 2 allows for setting VLAN tags, * etc. */ @@ -121,6 +119,7 @@ struct sw_flow_actions *flow_actions_alloc(u32 actions_len) return ERR_PTR(-ENOMEM); sfa->actions_len = actions_len; + memcpy(sfa->actions, nla_data(actions), actions_len); return sfa; } diff --git a/datapath/flow.h b/datapath/flow.h index f9aa44a4..78d5a701 100644 --- a/datapath/flow.h +++ b/datapath/flow.h @@ -83,7 +83,7 @@ struct sw_flow *flow_alloc(void); void flow_deferred_free(struct sw_flow *); void flow_free_tbl(struct tbl_node *); -struct sw_flow_actions *flow_actions_alloc(u32 actions_len); +struct sw_flow_actions *flow_actions_alloc(const struct nlattr *); void flow_deferred_free_acts(struct sw_flow_actions *); void flow_hold(struct sw_flow *); diff --git a/include/openvswitch/datapath-protocol.h b/include/openvswitch/datapath-protocol.h index 81836517..1ba43c5b 100644 --- a/include/openvswitch/datapath-protocol.h +++ b/include/openvswitch/datapath-protocol.h @@ -69,13 +69,6 @@ #include #include - -#define ODP_FLOW_NEW _IOWR('O', 13, struct odp_flow) -#define ODP_FLOW_DEL _IOWR('O', 14, struct odp_flow) -#define ODP_FLOW_GET _IOWR('O', 15, struct odp_flow) -#define ODP_FLOW_SET _IOWR('O', 16, struct odp_flow) -#define ODP_FLOW_DUMP _IOWR('O', 17, struct odp_flow) -#define ODP_FLOW_FLUSH _IO('O', 19) /* Datapaths. */ @@ -288,6 +281,19 @@ enum { }; #define ODP_PATCH_ATTR_MAX (__ODP_PATCH_ATTR_MAX - 1) + +/* Flows. */ + +#define ODP_FLOW_FAMILY "odp_flow" +#define ODP_FLOW_MCGROUP "odp_flow" + +enum odp_flow_cmd { + ODP_FLOW_CMD_UNSPEC, + ODP_FLOW_CMD_NEW, + ODP_FLOW_CMD_DEL, + ODP_FLOW_CMD_GET, + ODP_FLOW_CMD_SET +}; struct odp_flow_stats { uint64_t n_packets; /* Number of matched packets. */ @@ -350,23 +356,32 @@ struct odp_key_arp { }; /** - * struct odp_flow - header with basic information about a flow. - * @dp_idx: Datapath index. - * @len: Length of this structure plus the Netlink attributes following it. - * @total_len: Total space available for kernel reply to request. + * enum odp_flow_attr - attributes for %ODP_FLOW_* commands. + * @ODP_FLOW_ATTR_KEY: Nested %ODP_KEY_ATTR_* attributes specifying the flow + * key. Always present in notifications. Required for all requests (except + * dumps). + * @ODP_FLOW_ATTR_ACTIONS: Nested %ODPAT_* attributes specifying the actions to + * take for packets that match the key. Always present in notifications. + * Required for %ODP_FLOW_CMD_NEW requests, optional on %ODP_FLOW_CMD_SET + * request to change the existing actions, ignored for other requests. + * @ODP_FLOW_ATTR_STATS: &struct odp_flow_stats giving statistics for this + * flow. Present in notifications if the stats would be nonzero. Ignored in + * requests. + * @ODP_FLOW_ATTR_TCP_FLAGS: An 8-bit value giving the OR'd value of all of the + * TCP flags seen on packets in this flow. Only present in notifications for + * TCP flows, and only if it would be nonzero. Ignored in requests. + * @ODP_FLOW_ATTR_USED: A 64-bit integer giving the time, in milliseconds on + * the system monotonic clock, at which a packet was last processed for this + * flow. Only present in notifications if a packet has been processed for this + * flow. Ignored in requests. + * @ODP_FLOW_ATTR_CLEAR: If present in a %ODP_FLOW_CMD_SET request, clears the + * last-used time, accumulated TCP flags, and statistics for this flow. + * Otherwise ignored in requests. Never present in notifications. * - * Followed by &struct nlattr attributes, whose types are drawn from - * %ODP_FLOW_ATTR_*, up to a length of @len bytes including the &struct - * odp_flow header. + * These attributes follow the &struct odp_header within the Generic Netlink + * payload for %ODP_FLOW_* commands. */ -struct odp_flow { - uint32_t nlmsg_flags; - uint32_t dp_idx; - uint32_t len; - uint32_t total_len; -}; - -enum odp_flow_type { +enum odp_flow_attr { ODP_FLOW_ATTR_UNSPEC, ODP_FLOW_ATTR_KEY, /* Sequence of ODP_KEY_ATTR_* attributes. */ ODP_FLOW_ATTR_ACTIONS, /* Sequence of nested ODPAT_* attributes. */ @@ -374,7 +389,6 @@ enum odp_flow_type { ODP_FLOW_ATTR_TCP_FLAGS, /* 8-bit OR'd TCP flags. */ ODP_FLOW_ATTR_USED, /* u64 msecs last used in monotonic time. */ ODP_FLOW_ATTR_CLEAR, /* Flag to clear stats, tcp_flags, used. */ - ODP_FLOW_ATTR_STATE, /* u64 state for ODP_FLOW_DUMP. */ __ODP_FLOW_ATTR_MAX }; diff --git a/lib/dpif-linux.c b/lib/dpif-linux.c index b2d65ed8..fe10952d 100644 --- a/lib/dpif-linux.c +++ b/lib/dpif-linux.c @@ -25,12 +25,10 @@ #include #include #include -#include #include #include #include #include -#include #include #include @@ -80,18 +78,18 @@ static int dpif_linux_dp_get(const struct dpif *, struct dpif_linux_dp *reply, struct ofpbuf **bufp); struct dpif_linux_flow { - /* ioctl command argument. */ - int cmd; + /* Generic Netlink header. */ + uint8_t cmd; - /* struct odp_flow header. */ + /* struct odp_header. */ unsigned int nlmsg_flags; uint32_t dp_idx; /* Attributes. * - * The 'stats', 'used', and 'state' members point to 64-bit data that might - * only be aligned on 32-bit boundaries, so get_unaligned_u64() should be - * used to access their values. */ + * The 'stats' and 'used' members point to 64-bit data that might only be + * aligned on 32-bit boundaries, so get_unaligned_u64() should be used to + * access their values. */ const struct nlattr *key; /* ODP_FLOW_ATTR_KEY. */ size_t key_len; const struct nlattr *actions; /* ODP_FLOW_ATTR_ACTIONS. */ @@ -100,10 +98,13 @@ struct dpif_linux_flow { const uint8_t *tcp_flags; /* ODP_FLOW_ATTR_TCP_FLAGS. */ const uint64_t *used; /* ODP_FLOW_ATTR_USED. */ bool clear; /* ODP_FLOW_ATTR_CLEAR. */ - const uint64_t *state; /* ODP_FLOW_ATTR_STATE. */ }; static void dpif_linux_flow_init(struct dpif_linux_flow *); +static int dpif_linux_flow_from_ofpbuf(struct dpif_linux_flow *, + const struct ofpbuf *); +static void dpif_linux_flow_to_ofpbuf(const struct dpif_linux_flow *, + struct ofpbuf *); static int dpif_linux_flow_transact(const struct dpif_linux_flow *request, struct dpif_linux_flow *reply, struct ofpbuf **bufp); @@ -113,7 +114,6 @@ static void dpif_linux_flow_get_stats(const struct dpif_linux_flow *, /* Datapath interface for the openvswitch Linux kernel module. */ struct dpif_linux { struct dpif dpif; - int fd; /* Multicast group messages. */ struct nl_sock *mc_sock; @@ -136,6 +136,7 @@ static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(9999, 5); /* Generic Netlink family numbers for ODP. */ static int odp_datapath_family; static int odp_vport_family; +static int odp_flow_family; static int odp_packet_family; /* Generic Netlink socket. */ @@ -145,9 +146,6 @@ static int dpif_linux_init(void); static int open_dpif(const struct dpif_linux_dp *, const struct dpif_linux_vport *local_vport, struct dpif **); -static int get_openvswitch_major(void); -static int open_minor(int minor, int *fdp); -static int make_openvswitch_device(int minor, char **fnp); static void dpif_linux_port_changed(const struct rtnetlink_link_change *, void *dpif); @@ -168,7 +166,6 @@ dpif_linux_enumerate(struct svec *all_dps) { struct nl_dump dump; struct ofpbuf msg; - int major; int error; error = dpif_linux_init(); @@ -176,12 +173,6 @@ dpif_linux_enumerate(struct svec *all_dps) return error; } - /* Check that the Open vSwitch module is loaded. */ - major = get_openvswitch_major(); - if (major < 0) { - return -major; - } - dpif_linux_dp_dump_start(&dump); while (nl_dump_next(&dump, &msg)) { struct dpif_linux_dp dp; @@ -252,14 +243,8 @@ open_dpif(const struct dpif_linux_dp *dp, struct dpif_linux *dpif; char *name; int error; - int fd; int i; - error = open_minor(dp_idx, &fd); - if (error) { - goto error; - } - dpif = xmalloc(sizeof *dpif); error = rtnetlink_link_notifier_register(&dpif->port_notifier, dpif_linux_port_changed, dpif); @@ -271,7 +256,6 @@ open_dpif(const struct dpif_linux_dp *dp, dpif_init(&dpif->dpif, &dpif_linux_class, name, dp_idx, dp_idx); free(name); - dpif->fd = fd; dpif->mc_sock = NULL; for (i = 0; i < DPIF_N_UC_TYPES; i++) { dpif->mcgroups[i] = dp->mcgroups[i]; @@ -288,8 +272,6 @@ open_dpif(const struct dpif_linux_dp *dp, error_free: free(dpif); - close(fd); -error: return error; } @@ -300,7 +282,6 @@ dpif_linux_close(struct dpif *dpif_) rtnetlink_link_notifier_unregister(&dpif->port_notifier); shash_destroy(&dpif->changed_ports); free(dpif->local_ifname); - close(dpif->fd); free(dpif); } @@ -472,7 +453,12 @@ static int dpif_linux_flow_flush(struct dpif *dpif_) { struct dpif_linux *dpif = dpif_linux_cast(dpif_); - return ioctl(dpif->fd, ODP_FLOW_FLUSH, dpif->minor) ? errno : 0; + struct dpif_linux_flow flow; + + dpif_linux_flow_init(&flow); + flow.cmd = ODP_FLOW_CMD_DEL; + flow.dp_idx = dpif->minor; + return dpif_linux_flow_transact(&flow, NULL, NULL); } struct dpif_linux_port_state { @@ -574,7 +560,7 @@ dpif_linux_flow_get(const struct dpif *dpif_, int error; dpif_linux_flow_init(&request); - request.cmd = ODP_FLOW_GET; + request.cmd = ODP_FLOW_CMD_GET; request.dp_idx = dpif->minor; request.key = key; request.key_len = key_len; @@ -606,7 +592,7 @@ dpif_linux_flow_put(struct dpif *dpif_, enum dpif_flow_put_flags flags, int error; dpif_linux_flow_init(&request); - request.cmd = flags & DPIF_FP_CREATE ? ODP_FLOW_NEW : ODP_FLOW_SET; + request.cmd = flags & DPIF_FP_CREATE ? ODP_FLOW_CMD_NEW : ODP_FLOW_CMD_SET; request.dp_idx = dpif->minor; request.key = key; request.key_len = key_len; @@ -637,7 +623,7 @@ dpif_linux_flow_del(struct dpif *dpif_, int error; dpif_linux_flow_init(&request); - request.cmd = ODP_FLOW_DEL; + request.cmd = ODP_FLOW_CMD_DEL; request.dp_idx = dpif->minor; request.key = key; request.key_len = key_len; @@ -652,37 +638,48 @@ dpif_linux_flow_del(struct dpif *dpif_, } struct dpif_linux_flow_state { + struct nl_dump dump; struct dpif_linux_flow flow; - struct ofpbuf *buf; struct dpif_flow_stats stats; }; static int -dpif_linux_flow_dump_start(const struct dpif *dpif OVS_UNUSED, void **statep) +dpif_linux_flow_dump_start(const struct dpif *dpif_, void **statep) { - *statep = xzalloc(sizeof(struct dpif_linux_flow_state)); + struct dpif_linux *dpif = dpif_linux_cast(dpif_); + struct dpif_linux_flow_state *state; + struct dpif_linux_flow request; + struct ofpbuf *buf; + + *statep = state = xmalloc(sizeof *state); + + dpif_linux_flow_init(&request); + request.cmd = ODP_DP_CMD_GET; + request.dp_idx = dpif->minor; + + buf = ofpbuf_new(1024); + dpif_linux_flow_to_ofpbuf(&request, buf); + nl_dump_start(&state->dump, genl_sock, buf); + ofpbuf_delete(buf); + return 0; } static int -dpif_linux_flow_dump_next(const struct dpif *dpif_, void *state_, +dpif_linux_flow_dump_next(const struct dpif *dpif_ OVS_UNUSED, void *state_, const struct nlattr **key, size_t *key_len, const struct nlattr **actions, size_t *actions_len, const struct dpif_flow_stats **stats) { - struct dpif_linux *dpif = dpif_linux_cast(dpif_); struct dpif_linux_flow_state *state = state_; - struct ofpbuf *old_buf = state->buf; - struct dpif_linux_flow request; + struct ofpbuf buf; int error; - dpif_linux_flow_init(&request); - request.cmd = ODP_FLOW_DUMP; - request.dp_idx = dpif->minor; - request.state = state->flow.state; - error = dpif_linux_flow_transact(&request, &state->flow, &state->buf); - ofpbuf_delete(old_buf); + if (!nl_dump_next(&state->dump, &buf)) { + return EOF; + } + error = dpif_linux_flow_from_ofpbuf(&state->flow, &buf); if (!error) { if (key) { *key = state->flow.key; @@ -697,17 +694,16 @@ dpif_linux_flow_dump_next(const struct dpif *dpif_, void *state_, *stats = &state->stats; } } - return error == ENODEV ? EOF : error; + return error; } static int dpif_linux_flow_dump_done(const struct dpif *dpif OVS_UNUSED, void *state_) { struct dpif_linux_flow_state *state = state_; - - ofpbuf_delete(state->buf); + int error = nl_dump_done(&state->dump); free(state); - return 0; + return error; } static int @@ -994,8 +990,6 @@ const struct dpif_class dpif_linux_class = { dpif_linux_recv_purge, }; -static int get_major(const char *target); - static int dpif_linux_init(void) { @@ -1004,9 +998,17 @@ dpif_linux_init(void) if (error < 0) { error = nl_lookup_genl_family(ODP_DATAPATH_FAMILY, &odp_datapath_family); + if (error) { + VLOG_ERR("Generic Netlink family '%s' does not exist. " + "The Open vSwitch kernel module is probably not loaded.", + ODP_DATAPATH_FAMILY); + } if (!error) { error = nl_lookup_genl_family(ODP_VPORT_FAMILY, &odp_vport_family); } + if (!error) { + error = nl_lookup_genl_family(ODP_FLOW_FAMILY, &odp_flow_family); + } if (!error) { error = nl_lookup_genl_family(ODP_PACKET_FAMILY, &odp_packet_family); @@ -1037,146 +1039,6 @@ dpif_linux_is_internal_device(const char *name) return reply.type == ODP_VPORT_TYPE_INTERNAL; } -static int -make_openvswitch_device(int minor, char **fnp) -{ - const char dirname[] = "/dev/net"; - int major; - dev_t dev; - struct stat s; - char fn[128]; - - *fnp = NULL; - - major = get_openvswitch_major(); - if (major < 0) { - return -major; - } - dev = makedev(major, minor); - - sprintf(fn, "%s/dp%d", dirname, minor); - if (!stat(fn, &s)) { - if (!S_ISCHR(s.st_mode)) { - VLOG_WARN_RL(&error_rl, "%s is not a character device, fixing", - fn); - } else if (s.st_rdev != dev) { - VLOG_WARN_RL(&error_rl, - "%s is device %u:%u but should be %u:%u, fixing", - fn, major(s.st_rdev), minor(s.st_rdev), - major(dev), minor(dev)); - } else { - goto success; - } - if (unlink(fn)) { - VLOG_WARN_RL(&error_rl, "%s: unlink failed (%s)", - fn, strerror(errno)); - return errno; - } - } else if (errno == ENOENT) { - if (stat(dirname, &s)) { - if (errno == ENOENT) { - if (mkdir(dirname, 0755)) { - VLOG_WARN_RL(&error_rl, "%s: mkdir failed (%s)", - dirname, strerror(errno)); - return errno; - } - } else { - VLOG_WARN_RL(&error_rl, "%s: stat failed (%s)", - dirname, strerror(errno)); - return errno; - } - } - } else { - VLOG_WARN_RL(&error_rl, "%s: stat failed (%s)", fn, strerror(errno)); - return errno; - } - - /* The device needs to be created. */ - if (mknod(fn, S_IFCHR | 0700, dev)) { - VLOG_WARN_RL(&error_rl, - "%s: creating character device %u:%u failed (%s)", - fn, major(dev), minor(dev), strerror(errno)); - return errno; - } - -success: - *fnp = xstrdup(fn); - return 0; -} - -/* Return the major device number of the Open vSwitch device. If it - * cannot be determined, a negative errno is returned. */ -static int -get_openvswitch_major(void) -{ - static int openvswitch_major = -1; - if (openvswitch_major < 0) { - openvswitch_major = get_major("openvswitch"); - } - return openvswitch_major; -} - -static int -get_major(const char *target) -{ - const char fn[] = "/proc/devices"; - char line[128]; - FILE *file; - int ln; - - file = fopen(fn, "r"); - if (!file) { - VLOG_ERR("opening %s failed (%s)", fn, strerror(errno)); - return -errno; - } - - for (ln = 1; fgets(line, sizeof line, file); ln++) { - char name[64]; - int major; - - if (!strncmp(line, "Character", 9) || line[0] == '\0') { - /* Nothing to do. */ - } else if (!strncmp(line, "Block", 5)) { - /* We only want character devices, so skip the rest of the file. */ - break; - } else if (sscanf(line, "%d %63s", &major, name)) { - if (!strcmp(name, target)) { - fclose(file); - return major; - } - } else { - VLOG_WARN_ONCE("%s:%d: syntax error", fn, ln); - } - } - - fclose(file); - - VLOG_ERR("%s: %s major not found (is the module loaded?)", fn, target); - return -ENODEV; -} - -static int -open_minor(int minor, int *fdp) -{ - int error; - char *fn; - - error = make_openvswitch_device(minor, &fn); - if (error) { - return error; - } - - *fdp = open(fn, O_RDONLY | O_NONBLOCK); - if (*fdp < 0) { - error = errno; - VLOG_WARN("%s: open failed (%s)", fn, strerror(error)); - free(fn); - return error; - } - free(fn); - return 0; -} - static void dpif_linux_port_changed(const struct rtnetlink_link_change *change, void *dpif_) @@ -1196,24 +1058,6 @@ dpif_linux_port_changed(const struct rtnetlink_link_change *change, dpif->change_error = true; } } - -static int -get_dp0_fd(int *dp0_fdp) -{ - static int dp0_fd = -1; - if (dp0_fd < 0) { - int error; - int fd; - - error = open_minor(0, &fd); - if (error) { - return error; - } - dp0_fd = fd; - } - *dp0_fdp = dp0_fd; - return 0; -} /* Parses the contents of 'buf', which contains a "struct odp_header" followed * by Netlink attributes, into 'vport'. Returns 0 if successful, otherwise a @@ -1588,8 +1432,8 @@ dpif_linux_dp_get(const struct dpif *dpif_, struct dpif_linux_dp *reply, return dpif_linux_dp_transact(&request, reply, bufp); } -/* Parses the contents of 'buf', which contains a "struct odp_flow" followed by - * Netlink attributes, into 'flow'. Returns 0 if successful, otherwise a +/* Parses the contents of 'buf', which contains a "struct odp_header" followed + * by Netlink attributes, into 'flow'. Returns 0 if successful, otherwise a * positive errno value. * * 'flow' will contain pointers into 'buf', so the caller should not free 'buf' @@ -1608,22 +1452,29 @@ dpif_linux_flow_from_ofpbuf(struct dpif_linux_flow *flow, [ODP_FLOW_ATTR_TCP_FLAGS] = { .type = NL_A_U8, .optional = true }, [ODP_FLOW_ATTR_USED] = { .type = NL_A_U64, .optional = true }, /* The kernel never uses ODP_FLOW_ATTR_CLEAR. */ - [ODP_FLOW_ATTR_STATE] = { .type = NL_A_U64, .optional = true }, }; - struct odp_flow *odp_flow; struct nlattr *a[ARRAY_SIZE(odp_flow_policy)]; + struct odp_header *odp_header; + struct nlmsghdr *nlmsg; + struct genlmsghdr *genl; + struct ofpbuf b; dpif_linux_flow_init(flow); - if (!nl_policy_parse(buf, sizeof *odp_flow, odp_flow_policy, - a, ARRAY_SIZE(odp_flow_policy))) { + ofpbuf_use_const(&b, buf->data, buf->size); + nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg); + genl = ofpbuf_try_pull(&b, sizeof *genl); + odp_header = ofpbuf_try_pull(&b, sizeof *odp_header); + if (!nlmsg || !genl || !odp_header + || nlmsg->nlmsg_type != odp_flow_family + || !nl_policy_parse(&b, 0, odp_flow_policy, a, + ARRAY_SIZE(odp_flow_policy))) { return EINVAL; } - odp_flow = buf->data; - flow->nlmsg_flags = odp_flow->nlmsg_flags; - flow->dp_idx = odp_flow->dp_idx; + flow->nlmsg_flags = nlmsg->nlmsg_flags; + flow->dp_idx = odp_header->dp_idx; flow->key = nl_attr_get(a[ODP_FLOW_ATTR_KEY]); flow->key_len = nl_attr_get_size(a[ODP_FLOW_ATTR_KEY]); if (a[ODP_FLOW_ATTR_ACTIONS]) { @@ -1636,21 +1487,22 @@ dpif_linux_flow_from_ofpbuf(struct dpif_linux_flow *flow, if (a[ODP_FLOW_ATTR_TCP_FLAGS]) { flow->tcp_flags = nl_attr_get(a[ODP_FLOW_ATTR_TCP_FLAGS]); } - if (a[ODP_FLOW_ATTR_STATE]) { - flow->state = nl_attr_get(a[ODP_FLOW_ATTR_STATE]); - } return 0; } -/* Appends to 'buf' (which must initially be empty) a "struct odp_flow" +/* Appends to 'buf' (which must initially be empty) a "struct odp_header" * followed by Netlink attributes corresponding to 'flow'. */ static void dpif_linux_flow_to_ofpbuf(const struct dpif_linux_flow *flow, struct ofpbuf *buf) { - struct odp_flow *odp_flow; + struct odp_header *odp_header; - ofpbuf_reserve(buf, sizeof odp_flow); + nl_msg_put_genlmsghdr(buf, 0, odp_flow_family, + NLM_F_REQUEST | flow->nlmsg_flags, flow->cmd, 1); + + odp_header = ofpbuf_put_uninit(buf, sizeof *odp_header); + odp_header->dp_idx = flow->dp_idx; if (flow->key_len) { nl_msg_put_unspec(buf, ODP_FLOW_ATTR_KEY, flow->key, flow->key_len); @@ -1669,17 +1521,6 @@ dpif_linux_flow_to_ofpbuf(const struct dpif_linux_flow *flow, if (flow->clear) { nl_msg_put_flag(buf, ODP_FLOW_ATTR_CLEAR); } - - if (flow->state) { - nl_msg_put_u64(buf, ODP_FLOW_ATTR_STATE, - get_unaligned_u64(flow->state)); - } - - odp_flow = ofpbuf_push_uninit(buf, sizeof *odp_flow); - odp_flow->nlmsg_flags = flow->nlmsg_flags; - odp_flow->dp_idx = flow->dp_idx; - odp_flow->len = buf->size; - odp_flow->total_len = (char *) ofpbuf_end(buf) - (char *) buf->data; } /* Clears 'flow' to "empty" values. */ @@ -1692,49 +1533,32 @@ dpif_linux_flow_init(struct dpif_linux_flow *flow) /* Executes 'request' in the kernel datapath. If the command fails, returns a * positive errno value. Otherwise, if 'reply' and 'bufp' are null, returns 0 * without doing anything else. If 'reply' and 'bufp' are nonnull, then the - * result of the command is expected to be an odp_flow also, which is decoded - * and stored in '*reply' and '*bufp'. The caller must free '*bufp' when the - * reply is no longer needed ('reply' will contain pointers into '*bufp'). */ + * result of the command is expected to be a flow also, which is decoded and + * stored in '*reply' and '*bufp'. The caller must free '*bufp' when the reply + * is no longer needed ('reply' will contain pointers into '*bufp'). */ int dpif_linux_flow_transact(const struct dpif_linux_flow *request, struct dpif_linux_flow *reply, struct ofpbuf **bufp) { - struct ofpbuf *buf = NULL; + struct ofpbuf *request_buf; int error; - int fd; assert((reply != NULL) == (bufp != NULL)); - error = get_dp0_fd(&fd); - if (error) { - goto error; - } - - buf = ofpbuf_new(1024); - dpif_linux_flow_to_ofpbuf(request, buf); - - error = ioctl(fd, request->cmd, buf->data) ? errno : 0; - if (error) { - goto error; - } + request_buf = ofpbuf_new(1024); + dpif_linux_flow_to_ofpbuf(request, request_buf); + error = nl_sock_transact(genl_sock, request_buf, bufp); + ofpbuf_delete(request_buf); - if (bufp) { - buf->size = ((struct odp_flow *) buf->data)->len; - error = dpif_linux_flow_from_ofpbuf(reply, buf); + if (reply) { + if (!error) { + error = dpif_linux_flow_from_ofpbuf(reply, *bufp); + } if (error) { - goto error; + dpif_linux_flow_init(reply); + ofpbuf_delete(*bufp); + *bufp = NULL; } - *bufp = buf; - } else { - ofpbuf_delete(buf); - } - return 0; - -error: - ofpbuf_delete(buf); - if (bufp) { - memset(reply, 0, sizeof *reply); - *bufp = NULL; } return error; }