From aaff4b55a7992ac47a143abb6bffac5c931f073a Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Fri, 28 Jan 2011 13:55:04 -0800 Subject: [PATCH] datapath: Convert ODP_DP_* commands to use AF_NETLINK socket layer. This commit calls genl_lock() and thus doesn't support Linux before 2.6.35, which wasn't exported before that version. That problem will be fixed once the whole userspace interface transitions to Generic Netlink a few commits from now. Signed-off-by: Ben Pfaff Acked-by: Jesse Gross --- datapath/datapath.c | 303 ++++++++---------- datapath/linux-2.6/compat-2.6/genetlink.inc | 59 ++++ .../compat-2.6/include/linux/rtnetlink.h | 2 +- .../compat-2.6/include/net/genetlink.h | 51 ++- .../compat-2.6/include/net/netlink.h | 38 +++ include/openvswitch/datapath-protocol.h | 69 ++-- lib/dpif-linux.c | 184 ++++++----- 7 files changed, 425 insertions(+), 281 deletions(-) diff --git a/datapath/datapath.c b/datapath/datapath.c index db20cf9e..f42ead18 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -1156,27 +1156,38 @@ exit: } static const struct nla_policy datapath_policy[ODP_DP_ATTR_MAX + 1] = { +#ifdef HAVE_NLA_NUL_STRING [ODP_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, +#endif [ODP_DP_ATTR_IPV4_FRAGS] = { .type = NLA_U32 }, [ODP_DP_ATTR_SAMPLING] = { .type = NLA_U32 }, }; -/* Called with genl_mutex. */ -static int copy_datapath_to_user(void __user *dst, struct datapath *dp, uint32_t total_len) +static struct genl_family dp_datapath_genl_family = { + .id = GENL_ID_GENERATE, + .hdrsize = sizeof(struct odp_header), + .name = ODP_DATAPATH_FAMILY, + .version = 1, + .maxattr = ODP_DP_ATTR_MAX +}; + +static struct genl_multicast_group dp_datapath_multicast_group = { + .name = ODP_DATAPATH_MCGROUP +}; + +static int odp_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb, + u32 pid, u32 seq, u32 flags, u8 cmd) { - struct odp_datapath *odp_datapath; - struct sk_buff *skb; + struct odp_header *odp_header; struct nlattr *nla; int err; - skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); - err = -ENOMEM; - if (!skb) - goto exit; + odp_header = genlmsg_put(skb, pid, seq, &dp_datapath_genl_family, + flags, cmd); + if (!odp_header) + goto error; - odp_datapath = (struct odp_datapath*)__skb_put(skb, sizeof(struct odp_datapath)); - odp_datapath->dp_idx = dp->dp_idx; - odp_datapath->total_len = total_len; + odp_header->dp_idx = dp->dp_idx; rcu_read_lock(); err = nla_put_string(skb, ODP_DP_ATTR_NAME, dp_name(dp)); @@ -1203,77 +1214,49 @@ static int copy_datapath_to_user(void __user *dst, struct datapath *dp, uint32_t NLA_PUT_U32(skb, ODP_PACKET_CMD_SAMPLE, packet_mc_group(dp, ODP_PACKET_CMD_SAMPLE)); nla_nest_end(skb, nla); - if (skb->len > total_len) - goto nla_put_failure; - - odp_datapath->len = skb->len; - err = copy_to_user(dst, skb->data, skb->len) ? -EFAULT : 0; - goto exit_free_skb; + return genlmsg_end(skb, odp_header); nla_put_failure: - err = -EMSGSIZE; -exit_free_skb: - kfree_skb(skb); -exit: - return err; + genlmsg_cancel(skb, odp_header); +error: + return -EMSGSIZE; } -/* Called with genl_mutex. */ -static struct sk_buff *copy_datapath_from_user(struct odp_datapath __user *uodp_datapath, struct nlattr *a[ODP_DP_ATTR_MAX + 1]) +static struct sk_buff *odp_dp_cmd_build_info(struct datapath *dp, u32 pid, + u32 seq, u8 cmd) { - struct odp_datapath *odp_datapath; struct sk_buff *skb; - u32 len; - int err; - - if (get_user(len, &uodp_datapath->len)) - return ERR_PTR(-EFAULT); - if (len < sizeof(struct odp_datapath)) - return ERR_PTR(-EINVAL); + int retval; - skb = alloc_skb(len, GFP_KERNEL); + skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!skb) return ERR_PTR(-ENOMEM); - err = -EFAULT; - if (copy_from_user(__skb_put(skb, len), uodp_datapath, len)) - goto error_free_skb; - - odp_datapath = (struct odp_datapath *)skb->data; - err = -EINVAL; - if (odp_datapath->len != len) - goto error_free_skb; - - err = nla_parse(a, ODP_DP_ATTR_MAX, - (struct nlattr *)(skb->data + sizeof(struct odp_datapath)), - skb->len - sizeof(struct odp_datapath), datapath_policy); - if (err) - goto error_free_skb; + retval = odp_dp_cmd_fill_info(dp, skb, pid, seq, 0, cmd); + if (retval < 0) { + kfree_skb(skb); + return ERR_PTR(retval); + } + return skb; +} +static int odp_dp_cmd_validate(struct nlattr *a[ODP_DP_ATTR_MAX + 1]) +{ if (a[ODP_DP_ATTR_IPV4_FRAGS]) { u32 frags = nla_get_u32(a[ODP_DP_ATTR_IPV4_FRAGS]); - err = -EINVAL; if (frags != ODP_DP_FRAG_ZERO && frags != ODP_DP_FRAG_DROP) - goto error_free_skb; + return -EINVAL; } - err = VERIFY_NUL_STRING(a[ODP_DP_ATTR_NAME], IFNAMSIZ - 1); - if (err) - goto error_free_skb; - - return skb; - -error_free_skb: - kfree_skb(skb); - return ERR_PTR(err); + return VERIFY_NUL_STRING(a[ODP_DP_ATTR_NAME], IFNAMSIZ - 1); } /* Called with genl_mutex and optionally with RTNL lock also. */ -static struct datapath *lookup_datapath(struct odp_datapath *odp_datapath, struct nlattr *a[ODP_DP_ATTR_MAX + 1]) +static struct datapath *lookup_datapath(struct odp_header *odp_header, struct nlattr *a[ODP_DP_ATTR_MAX + 1]) { if (!a[ODP_DP_ATTR_NAME]) { - struct datapath *dp = get_dp(odp_datapath->dp_idx); + struct datapath *dp = get_dp(odp_header->dp_idx); if (!dp) return ERR_PTR(-ENODEV); return dp; @@ -1301,33 +1284,31 @@ static void change_datapath(struct datapath *dp, struct nlattr *a[ODP_DP_ATTR_MA dp->sflow_probability = nla_get_u32(a[ODP_DP_ATTR_SAMPLING]); } -static int new_datapath(struct odp_datapath __user *uodp_datapath) +static int odp_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) { - struct nlattr *a[ODP_DP_ATTR_MAX + 1]; - struct odp_datapath *odp_datapath; + struct nlattr **a = info->attrs; + struct odp_header *odp_header = info->userhdr; struct vport_parms parms; - struct sk_buff *skb; + struct sk_buff *reply; struct datapath *dp; struct vport *vport; int dp_idx; int err; - skb = copy_datapath_from_user(uodp_datapath, a); - err = PTR_ERR(skb); - if (IS_ERR(skb)) - goto err; - odp_datapath = (struct odp_datapath *)skb->data; - err = -EINVAL; if (!a[ODP_DP_ATTR_NAME]) - goto err_free_skb; + goto err; + + err = odp_dp_cmd_validate(a); + if (err) + goto err; rtnl_lock(); err = -ENODEV; if (!try_module_get(THIS_MODULE)) goto err_unlock_rtnl; - dp_idx = odp_datapath->dp_idx; + dp_idx = odp_header->dp_idx; if (dp_idx < 0) { err = -EFBIG; for (dp_idx = 0; dp_idx < ARRAY_SIZE(dps); dp_idx++) { @@ -1385,11 +1366,18 @@ static int new_datapath(struct odp_datapath __user *uodp_datapath) change_datapath(dp, a); + reply = odp_dp_cmd_build_info(dp, info->snd_pid, info->snd_seq, ODP_DP_CMD_NEW); + err = PTR_ERR(reply); + if (IS_ERR(reply)) + goto err_destroy_local_port; + rcu_assign_pointer(dps[dp_idx], dp); dp_sysfs_add_dp(dp); rtnl_unlock(); + genl_notify(reply, genl_info_net(info), info->snd_pid, + dp_datapath_multicast_group.id, info->nlhdr, GFP_KERNEL); return 0; err_destroy_local_port: @@ -1402,30 +1390,31 @@ err_put_module: module_put(THIS_MODULE); err_unlock_rtnl: rtnl_unlock(); -err_free_skb: - kfree_skb(skb); err: return err; } -static int del_datapath(struct odp_datapath __user *uodp_datapath) +static int odp_dp_cmd_del(struct sk_buff *skb, struct genl_info *info) { - struct nlattr *a[ODP_DP_ATTR_MAX + 1]; struct vport *vport, *next_vport; + struct sk_buff *reply; struct datapath *dp; - struct sk_buff *skb; int err; - skb = copy_datapath_from_user(uodp_datapath, a); - err = PTR_ERR(skb); - if (IS_ERR(skb)) + err = odp_dp_cmd_validate(info->attrs); + if (err) goto exit; rtnl_lock(); - dp = lookup_datapath((struct odp_datapath *)skb->data, a); + dp = lookup_datapath(info->userhdr, info->attrs); err = PTR_ERR(dp); if (IS_ERR(dp)) - goto exit_free; + goto exit_unlock; + + reply = odp_dp_cmd_build_info(dp, info->snd_pid, info->snd_seq, ODP_DP_CMD_DEL); + err = PTR_ERR(reply); + if (IS_ERR(reply)) + goto exit_unlock; list_for_each_entry_safe (vport, next_vport, &dp->port_list, node) if (vport->port_no != ODPP_LOCAL) @@ -1438,96 +1427,108 @@ static int del_datapath(struct odp_datapath __user *uodp_datapath) call_rcu(&dp->rcu, destroy_dp_rcu); module_put(THIS_MODULE); + genl_notify(reply, genl_info_net(info), info->snd_pid, + dp_datapath_multicast_group.id, info->nlhdr, GFP_KERNEL); err = 0; -exit_free: - kfree_skb(skb); +exit_unlock: rtnl_unlock(); exit: return err; } -static int set_datapath(struct odp_datapath __user *uodp_datapath) +static int odp_dp_cmd_set(struct sk_buff *skb, struct genl_info *info) { - struct nlattr *a[ODP_DP_ATTR_MAX + 1]; + struct sk_buff *reply; struct datapath *dp; - struct sk_buff *skb; int err; - skb = copy_datapath_from_user(uodp_datapath, a); - err = PTR_ERR(skb); - if (IS_ERR(skb)) - goto exit; + err = odp_dp_cmd_validate(info->attrs); + if (err) + return err; - dp = lookup_datapath((struct odp_datapath *)skb->data, a); - err = PTR_ERR(dp); + dp = lookup_datapath(info->userhdr, info->attrs); if (IS_ERR(dp)) - goto exit_free; + return PTR_ERR(dp); - change_datapath(dp, a); - err = 0; + change_datapath(dp, info->attrs); -exit_free: - kfree_skb(skb); -exit: - return err; + reply = odp_dp_cmd_build_info(dp, info->snd_pid, info->snd_seq, ODP_DP_CMD_NEW); + if (IS_ERR(reply)) { + err = PTR_ERR(reply); + netlink_set_err(INIT_NET_GENL_SOCK, 0, + dp_datapath_multicast_group.id, err); + return 0; + } + + genl_notify(reply, genl_info_net(info), info->snd_pid, + dp_datapath_multicast_group.id, info->nlhdr, GFP_KERNEL); + return 0; } -static int get_datapath(struct odp_datapath __user *uodp_datapath) +static int odp_dp_cmd_get(struct sk_buff *skb, struct genl_info *info) { - struct nlattr *a[ODP_DP_ATTR_MAX + 1]; - struct odp_datapath *odp_datapath; + struct sk_buff *reply; struct datapath *dp; - struct sk_buff *skb; int err; - skb = copy_datapath_from_user(uodp_datapath, a); - err = PTR_ERR(skb); - if (IS_ERR(skb)) - goto exit; - odp_datapath = (struct odp_datapath *)skb->data; - - dp = lookup_datapath(odp_datapath, a); + err = odp_dp_cmd_validate(info->attrs); + if (err) + return err; - err = PTR_ERR(dp); + dp = lookup_datapath(info->userhdr, info->attrs); if (IS_ERR(dp)) - goto exit_free; + return PTR_ERR(dp); - err = copy_datapath_to_user(uodp_datapath, dp, odp_datapath->total_len); -exit_free: - kfree_skb(skb); -exit: - return err; + reply = odp_dp_cmd_build_info(dp, info->snd_pid, info->snd_seq, ODP_DP_CMD_NEW); + if (IS_ERR(reply)) + return PTR_ERR(reply); + + return genlmsg_reply(reply, info); } -static int dump_datapath(struct odp_datapath __user *uodp_datapath) +static int odp_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) { - struct nlattr *a[ODP_DP_ATTR_MAX + 1]; - struct odp_datapath *odp_datapath; - struct sk_buff *skb; u32 dp_idx; - int err; - skb = copy_datapath_from_user(uodp_datapath, a); - err = PTR_ERR(skb); - if (IS_ERR(skb)) - goto exit; - odp_datapath = (struct odp_datapath *)skb->data; - - err = -ENODEV; - for (dp_idx = odp_datapath->dp_idx; dp_idx < ARRAY_SIZE(dps); dp_idx++) { + for (dp_idx = cb->args[0]; dp_idx < ARRAY_SIZE(dps); dp_idx++) { struct datapath *dp = get_dp(dp_idx); if (!dp) continue; - - err = copy_datapath_to_user(uodp_datapath, dp, odp_datapath->total_len); - break; + if (odp_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, + ODP_DP_CMD_NEW) < 0) + break; } - kfree_skb(skb); -exit: - return err; + + cb->args[0] = dp_idx; + return skb->len; } +static struct genl_ops dp_datapath_genl_ops[] = { + { .cmd = ODP_DP_CMD_NEW, + .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ + .policy = datapath_policy, + .doit = odp_dp_cmd_new + }, + { .cmd = ODP_DP_CMD_DEL, + .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ + .policy = datapath_policy, + .doit = odp_dp_cmd_del + }, + { .cmd = ODP_DP_CMD_GET, + .flags = 0, /* OK for unprivileged users. */ + .policy = datapath_policy, + .doit = odp_dp_cmd_get, + .dumpit = odp_dp_cmd_dump + }, + { .cmd = ODP_DP_CMD_SET, + .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ + .policy = datapath_policy, + .doit = odp_dp_cmd_set, + }, +}; + static const struct nla_policy vport_policy[ODP_VPORT_ATTR_MAX + 1] = { [ODP_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, [ODP_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 }, @@ -1926,26 +1927,6 @@ static long openvswitch_ioctl(struct file *f, unsigned int cmd, genl_lock(); switch (cmd) { - case ODP_DP_NEW: - err = new_datapath((struct odp_datapath __user *)argp); - goto exit; - - case ODP_DP_GET: - err = get_datapath((struct odp_datapath __user *)argp); - goto exit; - - case ODP_DP_DEL: - err = del_datapath((struct odp_datapath __user *)argp); - goto exit; - - case ODP_DP_SET: - err = set_datapath((struct odp_datapath __user *)argp); - goto exit; - - case ODP_DP_DUMP: - err = dump_datapath((struct odp_datapath __user *)argp); - goto exit; - case ODP_VPORT_NEW: err = attach_vport((struct odp_vport __user *)argp); goto exit; @@ -2001,11 +1982,6 @@ static long openvswitch_compat_ioctl(struct file *f, unsigned int cmd, unsigned /* Ioctls that don't need any translation at all. */ return openvswitch_ioctl(f, cmd, argp); - case ODP_DP_NEW: - case ODP_DP_GET: - case ODP_DP_DEL: - case ODP_DP_SET: - case ODP_DP_DUMP: case ODP_VPORT_NEW: case ODP_VPORT_DEL: case ODP_VPORT_GET: @@ -2043,6 +2019,9 @@ struct genl_family_and_ops { }; static const struct genl_family_and_ops dp_genl_families[] = { + { &dp_datapath_genl_family, + dp_datapath_genl_ops, ARRAY_SIZE(dp_datapath_genl_ops), + &dp_datapath_multicast_group }, { &dp_packet_genl_family, dp_packet_genl_ops, ARRAY_SIZE(dp_packet_genl_ops), NULL }, diff --git a/datapath/linux-2.6/compat-2.6/genetlink.inc b/datapath/linux-2.6/compat-2.6/genetlink.inc index ea2adaa8..d381e4f0 100644 --- a/datapath/linux-2.6/compat-2.6/genetlink.inc +++ b/datapath/linux-2.6/compat-2.6/genetlink.inc @@ -72,6 +72,65 @@ err_out: } #endif +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) +/** + * nlmsg_notify - send a notification netlink message + * @sk: netlink socket to use + * @skb: notification message + * @pid: destination netlink pid for reports or 0 + * @group: destination multicast group or 0 + * @report: 1 to report back, 0 to disable + * @flags: allocation flags + */ +int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 pid, + unsigned int group, int report, gfp_t flags) +{ + int err = 0; + + if (group) { + int exclude_pid = 0; + + if (report) { + atomic_inc(&skb->users); + exclude_pid = pid; + } + + /* errors reported via destination sk->sk_err, but propagate + * delivery errors if NETLINK_BROADCAST_ERROR flag is set */ + err = nlmsg_multicast(sk, skb, exclude_pid, group, flags); + } + + if (report) { + int err2; + + err2 = nlmsg_unicast(sk, skb, pid); + if (!err || err == -ESRCH) + err = err2; + } + + return err; +} +#endif + +/* This is analogous to rtnl_notify() but uses genl_sock instead of rtnl. + * + * This is not (yet) in any upstream kernel. */ +void genl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group, + struct nlmsghdr *nlh, gfp_t flags) +{ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,32) + struct sock *sk = net->genl_sock; +#else + struct sock *sk = genl_sock; +#endif + int report = 0; + + if (nlh) + report = nlmsg_report(nlh); + + nlmsg_notify(sk, skb, pid, group, report, flags); +} + #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,30) /* This function wasn't exported before 2.6.30. Lose! */ void netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code) diff --git a/datapath/linux-2.6/compat-2.6/include/linux/rtnetlink.h b/datapath/linux-2.6/compat-2.6/include/linux/rtnetlink.h index 59be83f8..0a021494 100644 --- a/datapath/linux-2.6/compat-2.6/include/linux/rtnetlink.h +++ b/datapath/linux-2.6/compat-2.6/include/linux/rtnetlink.h @@ -12,7 +12,7 @@ static inline void rtnl_notify(struct sk_buff *skb, u32 pid, u32 group, BUG_ON(nlh != NULL); /* not implemented */ if (group) { /* errors reported via destination sk->sk_err */ - nlmsg_multicast(rtnl, skb, 0, group); + nlmsg_multicast(rtnl, skb, 0, group, flags); } } diff --git a/datapath/linux-2.6/compat-2.6/include/net/genetlink.h b/datapath/linux-2.6/compat-2.6/include/net/genetlink.h index f5bff63c..37b25b2e 100644 --- a/datapath/linux-2.6/compat-2.6/include/net/genetlink.h +++ b/datapath/linux-2.6/compat-2.6/include/net/genetlink.h @@ -1,12 +1,44 @@ #ifndef __NET_GENERIC_NETLINK_WRAPPER_H #define __NET_GENERIC_NETLINK_WRAPPER_H 1 - +#include #include + +/* Very special super-nasty workaround here: + * + * Before 2.6.19, nlmsg_multicast() lacked a 'flags' parameter. We work + * around that in our replacement, so that nlmsg_multicast + * is a macro that expands to rpl_nlmsg_multicast, which in turn has the + * 'flags' parameter. + * + * However, also before 2.6.19, contains an inline definition + * of genlmsg_multicast() that, of course, calls it without the 'flags' + * parameter. This causes a build failure. + * + * This works around the problem by temporarily renaming both nlmsg_multicast + * and genlmsg_multicast with a "busted_" prefix. (Nothing actually defines + * busted_nlmsg_multicast(), so if anything actually tries to call it, then + * we'll get a link error.) + */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) +#undef nlmsg_multicast +#define nlmsg_multicast busted_nlmsg_multicast +#define genlmsg_multicast busted_genlmsg_multicast +extern int busted_nlmsg_multicast(struct sock *sk, struct sk_buff *skb, + u32 pid, unsigned int group); +#endif /* linux kernel < v2.6.19 */ + #include_next + +/* Drop the "busted_" prefix described above. */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) +#undef nlmsg_multicast +#undef genlmsg_multicast +#define nlmsg_multicast rpl_nlmsg_multicast +#endif /* linux kernel < v2.6.19 */ + #include -#include #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) #include @@ -120,4 +152,19 @@ int genl_register_family_with_ops(struct genl_family *family, struct genl_ops *ops, size_t n_ops); #endif +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) +#define genl_notify(skb, net, pid, group, nlh, flags) \ + genl_notify(skb, pid, group, nlh, flags) +#endif +extern void genl_notify(struct sk_buff *skb, struct net *net, u32 pid, + u32 group, struct nlmsghdr *nlh, gfp_t flags); + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,19) && \ + LINUX_VERSION_CODE < KERNEL_VERSION(2,6,32) +static inline struct net *genl_info_net(struct genl_info *info) +{ + return &init_net; +} +#endif + #endif /* genetlink.h */ diff --git a/datapath/linux-2.6/compat-2.6/include/net/netlink.h b/datapath/linux-2.6/compat-2.6/include/net/netlink.h index 52238d8b..f4fb8437 100644 --- a/datapath/linux-2.6/compat-2.6/include/net/netlink.h +++ b/datapath/linux-2.6/compat-2.6/include/net/netlink.h @@ -128,4 +128,42 @@ static inline struct nlattr *nla_find_nested(struct nlattr *nla, int attrtype) } #endif +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) +/** + * nlmsg_report - need to report back to application? + * @nlh: netlink message header + * + * Returns 1 if a report back to the application is requested. + */ +static inline int nlmsg_report(const struct nlmsghdr *nlh) +{ + return !!(nlh->nlmsg_flags & NLM_F_ECHO); +} + +extern int nlmsg_notify(struct sock *sk, struct sk_buff *skb, + u32 pid, unsigned int group, int report, + gfp_t flags); +#endif /* linux kernel < 2.6.19 */ + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) +/* Before 2.6.19 the 'flags' parameter was missing, so replace it. We have to + * #include first because the 2.6.18 version of that header + * has an inline call to nlmsg_multicast() without, of course, any 'flags' + * argument. */ +#define nlmsg_multicast rpl_nlmsg_multicast +static inline int nlmsg_multicast(struct sock *sk, struct sk_buff *skb, + u32 pid, unsigned int group, gfp_t flags) +{ + int err; + + NETLINK_CB(skb).dst_group = group; + + err = netlink_broadcast(sk, skb, pid, group, flags); + if (err > 0) + err = 0; + + return err; +} +#endif /* linux kernel < 2.6.19 */ + #endif /* net/netlink.h */ diff --git a/include/openvswitch/datapath-protocol.h b/include/openvswitch/datapath-protocol.h index e903f085..39a3365f 100644 --- a/include/openvswitch/datapath-protocol.h +++ b/include/openvswitch/datapath-protocol.h @@ -70,12 +70,6 @@ #include #include -#define ODP_DP_NEW _IOWR('O', 0, struct odp_datapath) -#define ODP_DP_DEL _IOR('O', 1, struct odp_datapath) -#define ODP_DP_GET _IOWR('O', 2, struct odp_datapath) -#define ODP_DP_SET _IOWR('O', 3, struct odp_datapath) -#define ODP_DP_DUMP _IOWR('O', 4, struct odp_datapath) - #define ODP_VPORT_NEW _IOR('O', 7, struct odp_vport) #define ODP_VPORT_DEL _IOR('O', 8, struct odp_vport) #define ODP_VPORT_GET _IOWR('O', 9, struct odp_vport) @@ -88,6 +82,19 @@ #define ODP_FLOW_SET _IOWR('O', 16, struct odp_flow) #define ODP_FLOW_DUMP _IOWR('O', 17, struct odp_flow) #define ODP_FLOW_FLUSH _IO('O', 19) + +/* Datapaths. */ + +#define ODP_DATAPATH_FAMILY "odp_datapath" +#define ODP_DATAPATH_MCGROUP "odp_datapath" + +enum odp_datapath_cmd { + ODP_DP_CMD_UNSPEC, + ODP_DP_CMD_NEW, + ODP_DP_CMD_DEL, + ODP_DP_CMD_GET, + ODP_DP_CMD_SET +}; /** * struct odp_header - header for ODP Generic Netlink messages. @@ -101,22 +108,30 @@ struct odp_header { }; /** - * struct odp_datapath - header with basic information about a datapath. - * @dp_idx: Datapath index (-1 to make a request not specific to a datapath). - * @len: Length of this structure plus the Netlink attributes following it. - * @total_len: Total space available for kernel reply to request. + * enum odp_datapath_attr - attributes for %ODP_DP_* commands. + * @ODP_DP_ATTR_NAME: Name of the network device that serves as the "local + * port". This is the name of the network device whose dp_idx is given in the + * &struct odp_header. Always present in notifications. Required in + * %ODP_DP_NEW requests. May be used as an alternative to specifying dp_idx on + * other requests (with a dp_idx of %UINT32_MAX). + * @ODP_DP_ATTR_STATS: Statistics about packets that have passed through the + * datapath. Always present in notifications. + * @ODP_DP_ATTR_IPV4_FRAGS: One of %ODP_DP_FRAG_*. Always present in + * notifications. May be included in %ODP_DP_NEW or %ODP_DP_SET requests to + * change the fragment handling policy. + * @ODP_DP_ATTR_SAMPLING: 32-bit fraction of packets to sample with + * @ODP_PACKET_CMD_SAMPLE. A value of 0 samples no packets, a value of + * %UINT32_MAX samples all packets, and intermediate values sample intermediate + * fractions of packets. + * @ODP_DP_ATTR_MCGROUPS: Nested attributes with multicast groups. Each nested + * attribute has a %ODP_PACKET_CMD_* type with a 32-bit value giving the + * Generic Netlink multicast group number used for sending this datapath's + * messages with that command type up to userspace. * - * Followed by &struct nlattr attributes, whose types are drawn from - * %ODP_DP_ATTR_*, up to a length of @len bytes including the &struct - * odp_datapath header. + * These attributes follow the &struct odp_header within the Generic Netlink + * payload for %ODP_DP_* commands. */ -struct odp_datapath { - int32_t dp_idx; - uint32_t len; - uint32_t total_len; -}; - -enum odp_datapath_type { +enum odp_datapath_attr { ODP_DP_ATTR_UNSPEC, ODP_DP_ATTR_NAME, /* name of dp_ifidx netdev */ ODP_DP_ATTR_STATS, /* struct odp_stats */ @@ -128,7 +143,13 @@ enum odp_datapath_type { #define ODP_DP_ATTR_MAX (__ODP_DP_ATTR_MAX - 1) -/* Values for ODP_DP_ATTR_IPV4_FRAGS. */ +/** + * enum odp_frag_handling - policy for handling received IPv4 fragments. + * @ODP_DP_FRAG_ZERO: Treat IP fragments as IP protocol 0 and transport ports + * zero. + * @ODP_DP_FRAG_DROP: Drop IP fragments. Do not pass them through the flow + * table or up to userspace. + */ enum odp_frag_handling { ODP_DP_FRAG_UNSPEC, ODP_DP_FRAG_ZERO, /* Treat IP fragments as transport port 0. */ @@ -179,12 +200,6 @@ enum odp_packet_cmd { * * These attributes follow the &struct odp_header within the Generic Netlink * payload for %ODP_PACKET_* commands. - * - * The %ODP_PACKET_ATTR_TYPE, %ODP_PACKET_ATTR_PACKET and %ODP_PACKET_ATTR_KEY - * attributes are present for all notifications. For %ODP_PACKET_CMD_ACTION, - * the %ODP_PACKET_ATTR_USERDATA attribute is included if it would be nonzero. - * For %ODP_PACKET_CMD_SAMPLE, the %ODP_PACKET_ATTR_SAMPLE_POOL and - * %ODP_PACKET_ATTR_ACTIONS attributes are included. */ enum odp_packet_attr { ODP_PACKET_ATTR_UNSPEC, diff --git a/lib/dpif-linux.c b/lib/dpif-linux.c index e5e66ff0..5fb7035f 100644 --- a/lib/dpif-linux.c +++ b/lib/dpif-linux.c @@ -55,10 +55,10 @@ VLOG_DEFINE_THIS_MODULE(dpif_linux); struct dpif_linux_dp { - /* ioctl command argument. */ - int cmd; + /* Generic Netlink header. */ + uint8_t cmd; - /* struct odp_datapath header. */ + /* struct odp_header. */ uint32_t dp_idx; /* Attributes. */ @@ -70,6 +70,9 @@ struct dpif_linux_dp { }; static void dpif_linux_dp_init(struct dpif_linux_dp *); +static int dpif_linux_dp_from_ofpbuf(struct dpif_linux_dp *, + const struct ofpbuf *); +static void dpif_linux_dp_dump_start(struct nl_dump *); static int dpif_linux_dp_transact(const struct dpif_linux_dp *request, struct dpif_linux_dp *reply, struct ofpbuf **bufp); @@ -131,6 +134,7 @@ struct dpif_linux { static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(9999, 5); /* Generic Netlink family numbers for ODP. */ +static int odp_datapath_family; static int odp_packet_family; /* Generic Netlink socket. */ @@ -156,13 +160,14 @@ dpif_linux_cast(const struct dpif *dpif) static int dpif_linux_enumerate(struct svec *all_dps) { - uint32_t dp_idx; + struct nl_dump dump; + struct ofpbuf msg; int major; - int err; + int error; - err = dpif_linux_init(); - if (err) { - return err; + error = dpif_linux_init(); + if (error) { + return error; } /* Check that the Open vSwitch module is loaded. */ @@ -171,28 +176,15 @@ dpif_linux_enumerate(struct svec *all_dps) return -major; } - dp_idx = 0; - for (;;) { - struct dpif_linux_dp request, reply; - struct ofpbuf *buf; - char devname[16]; - int error; + dpif_linux_dp_dump_start(&dump); + while (nl_dump_next(&dump, &msg)) { + struct dpif_linux_dp dp; - dpif_linux_dp_init(&request); - request.dp_idx = dp_idx; - request.cmd = ODP_DP_DUMP; - - error = dpif_linux_dp_transact(&request, &reply, &buf); - if (error) { - return error == ENODEV ? 0 : error; + if (!dpif_linux_dp_from_ofpbuf(&dp, &msg)) { + svec_add(all_dps, dp.name); } - ofpbuf_delete(buf); - - sprintf(devname, "dp%d", reply.dp_idx); - svec_add(all_dps, devname); - - dp_idx = reply.dp_idx + 1; } + return nl_dump_done(&dump); } static int @@ -215,7 +207,7 @@ dpif_linux_open(const struct dpif_class *class OVS_UNUSED, const char *name, /* Create or look up datapath. */ dpif_linux_dp_init(&dp_request); - dp_request.cmd = create ? ODP_DP_NEW : ODP_DP_GET; + dp_request.cmd = create ? ODP_DP_CMD_NEW : ODP_DP_CMD_GET; dp_request.dp_idx = minor; dp_request.name = minor < 0 ? name : NULL; error = dpif_linux_dp_transact(&dp_request, &dp, &buf); @@ -323,7 +315,7 @@ dpif_linux_destroy(struct dpif *dpif_) struct dpif_linux_dp dp; dpif_linux_dp_init(&dp); - dp.cmd = ODP_DP_DEL; + dp.cmd = ODP_DP_CMD_DEL; dp.dp_idx = dpif->minor; return dpif_linux_dp_transact(&dp, NULL, NULL); } @@ -365,7 +357,7 @@ dpif_linux_set_drop_frags(struct dpif *dpif_, bool drop_frags) struct dpif_linux_dp dp; dpif_linux_dp_init(&dp); - dp.cmd = ODP_DP_SET; + dp.cmd = ODP_DP_CMD_SET; dp.dp_idx = dpif->minor; dp.ipv4_frags = drop_frags ? ODP_DP_FRAG_DROP : ODP_DP_FRAG_ZERO; return dpif_linux_dp_transact(&dp, NULL, NULL); @@ -806,7 +798,7 @@ dpif_linux_set_sflow_probability(struct dpif *dpif_, uint32_t probability) struct dpif_linux_dp dp; dpif_linux_dp_init(&dp); - dp.cmd = ODP_DP_SET; + dp.cmd = ODP_DP_CMD_SET; dp.dp_idx = dpif->minor; dp.sampling = &probability; return dpif_linux_dp_transact(&dp, NULL, NULL); @@ -847,6 +839,7 @@ parse_odp_packet(struct ofpbuf *buf, struct dpif_upcall *upcall, struct nlmsghdr *nlmsg; struct genlmsghdr *genl; struct ofpbuf b; + int type; ofpbuf_use_const(&b, buf->data, buf->size); @@ -854,18 +847,22 @@ parse_odp_packet(struct ofpbuf *buf, struct dpif_upcall *upcall, genl = ofpbuf_try_pull(&b, sizeof *genl); odp_header = ofpbuf_try_pull(&b, sizeof *odp_header); if (!nlmsg || !genl || !odp_header + || nlmsg->nlmsg_type != odp_packet_family || !nl_policy_parse(&b, 0, odp_packet_policy, a, ARRAY_SIZE(odp_packet_policy))) { return EINVAL; } - memset(upcall, 0, sizeof *upcall); - - upcall->type = (genl->cmd == ODP_PACKET_CMD_MISS ? DPIF_UC_MISS - : genl->cmd == ODP_PACKET_CMD_ACTION ? DPIF_UC_ACTION - : genl->cmd == ODP_PACKET_CMD_SAMPLE ? DPIF_UC_SAMPLE - : -1); + type = (genl->cmd == ODP_PACKET_CMD_MISS ? DPIF_UC_MISS + : genl->cmd == ODP_PACKET_CMD_ACTION ? DPIF_UC_ACTION + : genl->cmd == ODP_PACKET_CMD_SAMPLE ? DPIF_UC_SAMPLE + : -1); + if (type < 0) { + return EINVAL; + } + memset(upcall, 0, sizeof *upcall); + upcall->type = type; upcall->packet = buf; upcall->packet->data = (void *) nl_attr_get(a[ODP_PACKET_ATTR_PACKET]); upcall->packet->size = nl_attr_get_size(a[ODP_PACKET_ATTR_PACKET]); @@ -990,7 +987,12 @@ dpif_linux_init(void) static int error = -1; if (error < 0) { - error = nl_lookup_genl_family(ODP_PACKET_FAMILY, &odp_packet_family); + error = nl_lookup_genl_family(ODP_DATAPATH_FAMILY, + &odp_datapath_family); + if (!error) { + error = nl_lookup_genl_family(ODP_PACKET_FAMILY, + &odp_packet_family); + } if (!error) { error = nl_sock_create(NETLINK_GENERIC, &genl_sock); } @@ -1391,9 +1393,9 @@ dpif_linux_vport_get(const char *name, struct dpif_linux_vport *reply, return dpif_linux_vport_transact(&request, reply, bufp); } -/* Parses the contents of 'buf', which contains a "struct odp_datapath" - * followed by Netlink attributes, into 'dp'. Returns 0 if successful, - * otherwise a positive errno value. +/* Parses the contents of 'buf', which contains a "struct odp_header" followed + * by Netlink attributes, into 'dp'. Returns 0 if successful, otherwise a + * positive errno value. * * 'dp' will contain pointers into 'buf', so the caller should not free 'buf' * while 'dp' is still in use. */ @@ -1411,18 +1413,27 @@ dpif_linux_dp_from_ofpbuf(struct dpif_linux_dp *dp, const struct ofpbuf *buf) [ODP_DP_ATTR_MCGROUPS] = { .type = NL_A_NESTED, .optional = true }, }; - struct odp_datapath *odp_dp; struct nlattr *a[ARRAY_SIZE(odp_datapath_policy)]; + struct odp_header *odp_header; + struct nlmsghdr *nlmsg; + struct genlmsghdr *genl; + struct ofpbuf b; dpif_linux_dp_init(dp); - if (!nl_policy_parse(buf, sizeof *odp_dp, odp_datapath_policy, - a, ARRAY_SIZE(odp_datapath_policy))) { + ofpbuf_use_const(&b, buf->data, buf->size); + nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg); + genl = ofpbuf_try_pull(&b, sizeof *genl); + odp_header = ofpbuf_try_pull(&b, sizeof *odp_header); + if (!nlmsg || !genl || !odp_header + || nlmsg->nlmsg_type != odp_datapath_family + || !nl_policy_parse(&b, 0, odp_datapath_policy, a, + ARRAY_SIZE(odp_datapath_policy))) { return EINVAL; } - odp_dp = buf->data; - dp->dp_idx = odp_dp->dp_idx; + dp->cmd = genl->cmd; + dp->dp_idx = odp_header->dp_idx; dp->name = nl_attr_get_string(a[ODP_DP_ATTR_NAME]); if (a[ODP_DP_ATTR_STATS]) { /* Can't use structure assignment because Netlink doesn't ensure @@ -1468,14 +1479,17 @@ dpif_linux_dp_from_ofpbuf(struct dpif_linux_dp *dp, const struct ofpbuf *buf) return 0; } -/* Appends to 'buf' (which must initially be empty) a "struct odp_datapath" - * followed by Netlink attributes corresponding to 'dp'. */ +/* Appends to 'buf' the Generic Netlink message described by 'dp'. */ static void dpif_linux_dp_to_ofpbuf(const struct dpif_linux_dp *dp, struct ofpbuf *buf) { - struct odp_datapath *odp_dp; + struct odp_header *odp_header; - ofpbuf_reserve(buf, sizeof odp_dp); + nl_msg_put_genlmsghdr(buf, 0, odp_datapath_family, + NLM_F_REQUEST | NLM_F_ECHO, dp->cmd, 1); + + odp_header = ofpbuf_put_uninit(buf, sizeof *odp_header); + odp_header->dp_idx = dp->dp_idx; if (dp->name) { nl_msg_put_string(buf, ODP_DP_ATTR_NAME, dp->name); @@ -1490,11 +1504,6 @@ dpif_linux_dp_to_ofpbuf(const struct dpif_linux_dp *dp, struct ofpbuf *buf) if (dp->sampling) { nl_msg_put_u32(buf, ODP_DP_ATTR_SAMPLING, *dp->sampling); } - - odp_dp = ofpbuf_push_uninit(buf, sizeof *odp_dp); - odp_dp->dp_idx = dp->dp_idx; - odp_dp->len = buf->size; - odp_dp->total_len = (char *) ofpbuf_end(buf) - (char *) buf->data; } /* Clears 'dp' to "empty" values. */ @@ -1505,53 +1514,50 @@ dpif_linux_dp_init(struct dpif_linux_dp *dp) dp->dp_idx = -1; } +static void +dpif_linux_dp_dump_start(struct nl_dump *dump) +{ + struct dpif_linux_dp request; + struct ofpbuf *buf; + + dpif_linux_dp_init(&request); + request.cmd = ODP_DP_CMD_GET; + + buf = ofpbuf_new(1024); + dpif_linux_dp_to_ofpbuf(&request, buf); + nl_dump_start(dump, genl_sock, buf); + ofpbuf_delete(buf); +} + /* Executes 'request' in the kernel datapath. If the command fails, returns a * positive errno value. Otherwise, if 'reply' and 'bufp' are null, returns 0 * without doing anything else. If 'reply' and 'bufp' are nonnull, then the - * result of the command is expected to be an odp_datapath also, which is - * decoded and stored in '*reply' and '*bufp'. The caller must free '*bufp' - * when the reply is no longer needed ('reply' will contain pointers into - * '*bufp'). */ + * result of the command is expected to be of the same form, which is decoded + * and stored in '*reply' and '*bufp'. The caller must free '*bufp' when the + * reply is no longer needed ('reply' will contain pointers into '*bufp'). */ int dpif_linux_dp_transact(const struct dpif_linux_dp *request, struct dpif_linux_dp *reply, struct ofpbuf **bufp) { - struct ofpbuf *buf = NULL; + struct ofpbuf *request_buf; int error; - int fd; assert((reply != NULL) == (bufp != NULL)); - error = get_dp0_fd(&fd); - if (error) { - goto error; - } + request_buf = ofpbuf_new(1024); + dpif_linux_dp_to_ofpbuf(request, request_buf); + error = nl_sock_transact(genl_sock, request_buf, bufp); + ofpbuf_delete(request_buf); - buf = ofpbuf_new(1024); - dpif_linux_dp_to_ofpbuf(request, buf); - - error = ioctl(fd, request->cmd, buf->data) ? errno : 0; - if (error) { - goto error; - } - - if (bufp) { - buf->size = ((struct odp_datapath *) buf->data)->len; - error = dpif_linux_dp_from_ofpbuf(reply, buf); + if (reply) { + if (!error) { + error = dpif_linux_dp_from_ofpbuf(reply, *bufp); + } if (error) { - goto error; + dpif_linux_dp_init(reply); + ofpbuf_delete(*bufp); + *bufp = NULL; } - *bufp = buf; - } else { - ofpbuf_delete(buf); - } - return 0; - -error: - ofpbuf_delete(buf); - if (bufp) { - memset(reply, 0, sizeof *reply); - *bufp = NULL; } return error; } @@ -1567,7 +1573,7 @@ dpif_linux_dp_get(const struct dpif *dpif_, struct dpif_linux_dp *reply, struct dpif_linux_dp request; dpif_linux_dp_init(&request); - request.cmd = ODP_DP_GET; + request.cmd = ODP_DP_CMD_GET; request.dp_idx = dpif->minor; return dpif_linux_dp_transact(&request, reply, bufp); -- 2.30.2