+ struct odp_header *odp_header = genlmsg_data(nlmsg_data(cb->nlh));
+ struct datapath *dp;
+
+ dp = get_dp(odp_header->dp_ifindex);
+ if (!dp)
+ return -ENODEV;
+
+ for (;;) {
+ struct tbl_node *flow_node;
+ struct sw_flow *flow;
+ u32 bucket, obj;
+
+ bucket = cb->args[0];
+ obj = cb->args[1];
+ flow_node = tbl_next(get_table_protected(dp), &bucket, &obj);
+ if (!flow_node)
+ break;
+
+ flow = flow_cast(flow_node);
+ if (odp_flow_cmd_fill_info(flow, dp, skb, NETLINK_CB(cb->skb).pid,
+ cb->nlh->nlmsg_seq, NLM_F_MULTI,
+ ODP_FLOW_CMD_NEW) < 0)
+ break;
+
+ cb->args[0] = bucket;
+ cb->args[1] = obj;
+ }
+ return skb->len;
+}
+
+static struct genl_ops dp_flow_genl_ops[] = {
+ { .cmd = ODP_FLOW_CMD_NEW,
+ .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+ .policy = flow_policy,
+ .doit = odp_flow_cmd_new_or_set
+ },
+ { .cmd = ODP_FLOW_CMD_DEL,
+ .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+ .policy = flow_policy,
+ .doit = odp_flow_cmd_del
+ },
+ { .cmd = ODP_FLOW_CMD_GET,
+ .flags = 0, /* OK for unprivileged users. */
+ .policy = flow_policy,
+ .doit = odp_flow_cmd_get,
+ .dumpit = odp_flow_cmd_dump
+ },
+ { .cmd = ODP_FLOW_CMD_SET,
+ .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+ .policy = flow_policy,
+ .doit = odp_flow_cmd_new_or_set,
+ },
+};
+
+static const struct nla_policy datapath_policy[ODP_DP_ATTR_MAX + 1] = {
+#ifdef HAVE_NLA_NUL_STRING
+ [ODP_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
+#endif
+ [ODP_DP_ATTR_IPV4_FRAGS] = { .type = NLA_U32 },
+ [ODP_DP_ATTR_SAMPLING] = { .type = NLA_U32 },
+};
+
+static struct genl_family dp_datapath_genl_family = {
+ .id = GENL_ID_GENERATE,
+ .hdrsize = sizeof(struct odp_header),
+ .name = ODP_DATAPATH_FAMILY,
+ .version = 1,
+ .maxattr = ODP_DP_ATTR_MAX
+};
+
+static struct genl_multicast_group dp_datapath_multicast_group = {
+ .name = ODP_DATAPATH_MCGROUP
+};
+
+static int odp_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
+ u32 pid, u32 seq, u32 flags, u8 cmd)
+{
+ struct odp_header *odp_header;
+ struct nlattr *nla;
+ int err;
+
+ odp_header = genlmsg_put(skb, pid, seq, &dp_datapath_genl_family,
+ flags, cmd);
+ if (!odp_header)
+ goto error;
+
+ odp_header->dp_ifindex = dp->dp_ifindex;
+
+ rcu_read_lock();
+ err = nla_put_string(skb, ODP_DP_ATTR_NAME, dp_name(dp));
+ rcu_read_unlock();
+ if (err)
+ goto nla_put_failure;
+
+ nla = nla_reserve(skb, ODP_DP_ATTR_STATS, sizeof(struct odp_stats));
+ if (!nla)
+ goto nla_put_failure;
+ get_dp_stats(dp, nla_data(nla));
+
+ NLA_PUT_U32(skb, ODP_DP_ATTR_IPV4_FRAGS,
+ dp->drop_frags ? ODP_DP_FRAG_DROP : ODP_DP_FRAG_ZERO);
+
+ if (dp->sflow_probability)
+ NLA_PUT_U32(skb, ODP_DP_ATTR_SAMPLING, dp->sflow_probability);
+
+ nla = nla_nest_start(skb, ODP_DP_ATTR_MCGROUPS);
+ if (!nla)
+ goto nla_put_failure;
+ NLA_PUT_U32(skb, ODP_PACKET_CMD_MISS, packet_mc_group(dp, ODP_PACKET_CMD_MISS));
+ NLA_PUT_U32(skb, ODP_PACKET_CMD_ACTION, packet_mc_group(dp, ODP_PACKET_CMD_ACTION));
+ NLA_PUT_U32(skb, ODP_PACKET_CMD_SAMPLE, packet_mc_group(dp, ODP_PACKET_CMD_SAMPLE));
+ nla_nest_end(skb, nla);
+
+ return genlmsg_end(skb, odp_header);
+
+nla_put_failure:
+ genlmsg_cancel(skb, odp_header);
+error:
+ return -EMSGSIZE;
+}
+
+static struct sk_buff *odp_dp_cmd_build_info(struct datapath *dp, u32 pid,
+ u32 seq, u8 cmd)
+{
+ struct sk_buff *skb;
+ int retval;
+
+ skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!skb)
+ return ERR_PTR(-ENOMEM);
+
+ retval = odp_dp_cmd_fill_info(dp, skb, pid, seq, 0, cmd);
+ if (retval < 0) {
+ kfree_skb(skb);
+ return ERR_PTR(retval);
+ }
+ return skb;
+}
+
+static int odp_dp_cmd_validate(struct nlattr *a[ODP_DP_ATTR_MAX + 1])
+{
+ if (a[ODP_DP_ATTR_IPV4_FRAGS]) {
+ u32 frags = nla_get_u32(a[ODP_DP_ATTR_IPV4_FRAGS]);
+
+ if (frags != ODP_DP_FRAG_ZERO && frags != ODP_DP_FRAG_DROP)
+ return -EINVAL;
+ }
+
+ return CHECK_NUL_STRING(a[ODP_DP_ATTR_NAME], IFNAMSIZ - 1);
+}
+
+/* Called with genl_mutex and optionally with RTNL lock also. */
+static struct datapath *lookup_datapath(struct odp_header *odp_header, struct nlattr *a[ODP_DP_ATTR_MAX + 1])
+{
+ struct datapath *dp;
+
+ if (!a[ODP_DP_ATTR_NAME])
+ dp = get_dp(odp_header->dp_ifindex);
+ else {
+ struct vport *vport;
+
+ rcu_read_lock();
+ vport = vport_locate(nla_data(a[ODP_DP_ATTR_NAME]));
+ dp = vport && vport->port_no == ODPP_LOCAL ? vport->dp : NULL;
+ rcu_read_unlock();
+ }
+ return dp ? dp : ERR_PTR(-ENODEV);
+}
+
+/* Called with genl_mutex. */
+static void change_datapath(struct datapath *dp, struct nlattr *a[ODP_DP_ATTR_MAX + 1])
+{
+ if (a[ODP_DP_ATTR_IPV4_FRAGS])
+ dp->drop_frags = nla_get_u32(a[ODP_DP_ATTR_IPV4_FRAGS]) == ODP_DP_FRAG_DROP;
+ if (a[ODP_DP_ATTR_SAMPLING])
+ dp->sflow_probability = nla_get_u32(a[ODP_DP_ATTR_SAMPLING]);
+}
+
+static int odp_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
+{
+ struct nlattr **a = info->attrs;
+ struct vport_parms parms;
+ struct sk_buff *reply;
+ struct datapath *dp;
+ struct vport *vport;
+ int err;
+
+ err = -EINVAL;
+ if (!a[ODP_DP_ATTR_NAME])
+ goto err;
+
+ err = odp_dp_cmd_validate(a);
+ if (err)
+ goto err;
+
+ rtnl_lock();
+ err = -ENODEV;
+ if (!try_module_get(THIS_MODULE))
+ goto err_unlock_rtnl;
+
+ err = -ENOMEM;
+ dp = kzalloc(sizeof(*dp), GFP_KERNEL);
+ if (dp == NULL)
+ goto err_put_module;
+ INIT_LIST_HEAD(&dp->port_list);
+
+ /* Initialize kobject for bridge. This will be added as
+ * /sys/class/net/<devname>/brif later, if sysfs is enabled. */
+ dp->ifobj.kset = NULL;
+ kobject_init(&dp->ifobj, &dp_ktype);
+
+ /* Allocate table. */
+ err = -ENOMEM;
+ rcu_assign_pointer(dp->table, tbl_create(TBL_MIN_BUCKETS));
+ if (!dp->table)
+ goto err_free_dp;
+
+ /* Set up our datapath device. */
+ parms.name = nla_data(a[ODP_DP_ATTR_NAME]);
+ parms.type = ODP_VPORT_TYPE_INTERNAL;
+ parms.options = NULL;
+ parms.dp = dp;
+ parms.port_no = ODPP_LOCAL;
+ vport = new_vport(&parms);
+ if (IS_ERR(vport)) {
+ err = PTR_ERR(vport);
+ if (err == -EBUSY)
+ err = -EEXIST;
+
+ goto err_destroy_table;
+ }
+ dp->dp_ifindex = vport_get_ifindex(vport);
+
+ dp->drop_frags = 0;
+ dp->stats_percpu = alloc_percpu(struct dp_stats_percpu);
+ if (!dp->stats_percpu) {
+ err = -ENOMEM;
+ goto err_destroy_local_port;
+ }
+
+ change_datapath(dp, a);
+
+ reply = odp_dp_cmd_build_info(dp, info->snd_pid, info->snd_seq, ODP_DP_CMD_NEW);
+ err = PTR_ERR(reply);
+ if (IS_ERR(reply))
+ goto err_destroy_local_port;
+
+ list_add_tail(&dp->list_node, &dps);
+ dp_sysfs_add_dp(dp);
+
+ rtnl_unlock();
+
+ genl_notify(reply, genl_info_net(info), info->snd_pid,
+ dp_datapath_multicast_group.id, info->nlhdr, GFP_KERNEL);
+ return 0;
+
+err_destroy_local_port:
+ dp_detach_port(get_vport_protected(dp, ODPP_LOCAL));
+err_destroy_table:
+ tbl_destroy(get_table_protected(dp), NULL);
+err_free_dp:
+ kfree(dp);
+err_put_module:
+ module_put(THIS_MODULE);
+err_unlock_rtnl:
+ rtnl_unlock();
+err:
+ return err;
+}
+
+static int odp_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
+{
+ struct vport *vport, *next_vport;
+ struct sk_buff *reply;
+ struct datapath *dp;
+ int err;
+
+ err = odp_dp_cmd_validate(info->attrs);
+ if (err)
+ goto exit;
+
+ rtnl_lock();
+ dp = lookup_datapath(info->userhdr, info->attrs);
+ err = PTR_ERR(dp);
+ if (IS_ERR(dp))
+ goto exit_unlock;
+
+ reply = odp_dp_cmd_build_info(dp, info->snd_pid, info->snd_seq, ODP_DP_CMD_DEL);
+ err = PTR_ERR(reply);
+ if (IS_ERR(reply))
+ goto exit_unlock;
+
+ list_for_each_entry_safe (vport, next_vport, &dp->port_list, node)
+ if (vport->port_no != ODPP_LOCAL)
+ dp_detach_port(vport);
+
+ dp_sysfs_del_dp(dp);
+ list_del(&dp->list_node);
+ dp_detach_port(get_vport_protected(dp, ODPP_LOCAL));
+
+ /* rtnl_unlock() will wait until all the references to devices that
+ * are pending unregistration have been dropped. We do it here to
+ * ensure that any internal devices (which contain DP pointers) are
+ * fully destroyed before freeing the datapath.
+ */
+ rtnl_unlock();
+
+ call_rcu(&dp->rcu, destroy_dp_rcu);
+ module_put(THIS_MODULE);