2 * Copyright (c) 2008, 2009, 2010, 2011 Nicira Networks.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
19 #include "dpif-linux.h"
27 #include <linux/types.h>
28 #include <linux/ethtool.h>
29 #include <linux/pkt_sched.h>
30 #include <linux/rtnetlink.h>
31 #include <linux/sockios.h>
33 #include <sys/ioctl.h>
37 #include "dpif-provider.h"
39 #include "netdev-vport.h"
40 #include "netlink-socket.h"
44 #include "openvswitch/tunnel.h"
46 #include "poll-loop.h"
47 #include "rtnetlink.h"
48 #include "rtnetlink-link.h"
51 #include "unaligned.h"
55 VLOG_DEFINE_THIS_MODULE(dpif_linux);
57 struct dpif_linux_dp {
58 /* Generic Netlink header. */
61 /* struct odp_header. */
65 const char *name; /* ODP_DP_ATTR_NAME. */
66 struct odp_stats stats; /* ODP_DP_ATTR_STATS. */
67 enum odp_frag_handling ipv4_frags; /* ODP_DP_ATTR_IPV4_FRAGS. */
68 const uint32_t *sampling; /* ODP_DP_ATTR_SAMPLING. */
69 uint32_t mcgroups[DPIF_N_UC_TYPES]; /* ODP_DP_ATTR_MCGROUPS. */
72 static void dpif_linux_dp_init(struct dpif_linux_dp *);
73 static int dpif_linux_dp_from_ofpbuf(struct dpif_linux_dp *,
74 const struct ofpbuf *);
75 static void dpif_linux_dp_dump_start(struct nl_dump *);
76 static int dpif_linux_dp_transact(const struct dpif_linux_dp *request,
77 struct dpif_linux_dp *reply,
78 struct ofpbuf **bufp);
79 static int dpif_linux_dp_get(const struct dpif *, struct dpif_linux_dp *reply,
80 struct ofpbuf **bufp);
82 struct dpif_linux_flow {
83 /* ioctl command argument. */
86 /* struct odp_flow header. */
87 unsigned int nlmsg_flags;
92 * The 'stats', 'used', and 'state' members point to 64-bit data that might
93 * only be aligned on 32-bit boundaries, so get_unaligned_u64() should be
94 * used to access their values. */
95 const struct nlattr *key; /* ODP_FLOW_ATTR_KEY. */
97 const struct nlattr *actions; /* ODP_FLOW_ATTR_ACTIONS. */
99 const struct odp_flow_stats *stats; /* ODP_FLOW_ATTR_STATS. */
100 const uint8_t *tcp_flags; /* ODP_FLOW_ATTR_TCP_FLAGS. */
101 const uint64_t *used; /* ODP_FLOW_ATTR_USED. */
102 bool clear; /* ODP_FLOW_ATTR_CLEAR. */
103 const uint64_t *state; /* ODP_FLOW_ATTR_STATE. */
106 static void dpif_linux_flow_init(struct dpif_linux_flow *);
107 static int dpif_linux_flow_transact(const struct dpif_linux_flow *request,
108 struct dpif_linux_flow *reply,
109 struct ofpbuf **bufp);
110 static void dpif_linux_flow_get_stats(const struct dpif_linux_flow *,
111 struct dpif_flow_stats *);
113 /* Datapath interface for the openvswitch Linux kernel module. */
118 /* Multicast group messages. */
119 struct nl_sock *mc_sock;
120 uint32_t mcgroups[DPIF_N_UC_TYPES];
121 unsigned int listen_mask;
123 /* Used by dpif_linux_get_all_names(). */
127 /* Change notification. */
128 int local_ifindex; /* Ifindex of local port. */
129 struct shash changed_ports; /* Ports that have changed. */
130 struct rtnetlink_notifier port_notifier;
134 static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(9999, 5);
136 /* Generic Netlink family numbers for ODP. */
137 static int odp_datapath_family;
138 static int odp_vport_family;
139 static int odp_packet_family;
141 /* Generic Netlink socket. */
142 static struct nl_sock *genl_sock;
144 static int dpif_linux_init(void);
145 static int open_dpif(const struct dpif_linux_dp *,
146 const struct dpif_linux_vport *local_vport,
148 static int get_openvswitch_major(void);
149 static int open_minor(int minor, int *fdp);
150 static int make_openvswitch_device(int minor, char **fnp);
151 static void dpif_linux_port_changed(const struct rtnetlink_link_change *,
154 static void dpif_linux_vport_to_ofpbuf(const struct dpif_linux_vport *,
156 static int dpif_linux_vport_from_ofpbuf(struct dpif_linux_vport *,
157 const struct ofpbuf *);
159 static struct dpif_linux *
160 dpif_linux_cast(const struct dpif *dpif)
162 dpif_assert_class(dpif, &dpif_linux_class);
163 return CONTAINER_OF(dpif, struct dpif_linux, dpif);
167 dpif_linux_enumerate(struct svec *all_dps)
174 error = dpif_linux_init();
179 /* Check that the Open vSwitch module is loaded. */
180 major = get_openvswitch_major();
185 dpif_linux_dp_dump_start(&dump);
186 while (nl_dump_next(&dump, &msg)) {
187 struct dpif_linux_dp dp;
189 if (!dpif_linux_dp_from_ofpbuf(&dp, &msg)) {
190 svec_add(all_dps, dp.name);
193 return nl_dump_done(&dump);
197 dpif_linux_open(const struct dpif_class *class OVS_UNUSED, const char *name,
198 bool create, struct dpif **dpifp)
200 struct dpif_linux_vport vport_request, vport;
201 struct dpif_linux_dp dp_request, dp;
206 error = dpif_linux_init();
211 minor = !strncmp(name, "dp", 2)
212 && isdigit((unsigned char)name[2]) ? atoi(name + 2) : -1;
214 /* Create or look up datapath. */
215 dpif_linux_dp_init(&dp_request);
216 dp_request.cmd = create ? ODP_DP_CMD_NEW : ODP_DP_CMD_GET;
217 dp_request.dp_idx = minor;
218 dp_request.name = minor < 0 ? name : NULL;
219 error = dpif_linux_dp_transact(&dp_request, &dp, &buf);
223 ofpbuf_delete(buf); /* Pointers inside 'dp' are now invalid! */
225 /* Look up local port. */
226 dpif_linux_vport_init(&vport_request);
227 vport_request.cmd = ODP_VPORT_CMD_GET;
228 vport_request.dp_idx = dp.dp_idx;
229 vport_request.port_no = ODPP_LOCAL;
230 vport_request.name = minor < 0 ? name : NULL;
231 error = dpif_linux_vport_transact(&vport_request, &vport, &buf);
234 } else if (vport.port_no != ODPP_LOCAL) {
235 /* This is an Open vSwitch device but not the local port. We
236 * intentionally support only using the name of the local port as the
237 * name of a datapath; otherwise, it would be too difficult to
238 * enumerate all the names of a datapath. */
241 error = open_dpif(&dp, &vport, dpifp);
248 open_dpif(const struct dpif_linux_dp *dp,
249 const struct dpif_linux_vport *local_vport, struct dpif **dpifp)
251 int dp_idx = local_vport->dp_idx;
252 struct dpif_linux *dpif;
258 error = open_minor(dp_idx, &fd);
263 dpif = xmalloc(sizeof *dpif);
264 error = rtnetlink_link_notifier_register(&dpif->port_notifier,
265 dpif_linux_port_changed, dpif);
270 name = xasprintf("dp%d", dp_idx);
271 dpif_init(&dpif->dpif, &dpif_linux_class, name, dp_idx, dp_idx);
275 dpif->mc_sock = NULL;
276 for (i = 0; i < DPIF_N_UC_TYPES; i++) {
277 dpif->mcgroups[i] = dp->mcgroups[i];
279 dpif->listen_mask = 0;
280 dpif->local_ifname = xstrdup(local_vport->name);
281 dpif->local_ifindex = local_vport->ifindex;
282 dpif->minor = dp_idx;
283 shash_init(&dpif->changed_ports);
284 dpif->change_error = false;
285 *dpifp = &dpif->dpif;
297 dpif_linux_close(struct dpif *dpif_)
299 struct dpif_linux *dpif = dpif_linux_cast(dpif_);
300 rtnetlink_link_notifier_unregister(&dpif->port_notifier);
301 shash_destroy(&dpif->changed_ports);
302 free(dpif->local_ifname);
308 dpif_linux_get_all_names(const struct dpif *dpif_, struct svec *all_names)
310 struct dpif_linux *dpif = dpif_linux_cast(dpif_);
312 svec_add_nocopy(all_names, xasprintf("dp%d", dpif->minor));
313 svec_add(all_names, dpif->local_ifname);
318 dpif_linux_destroy(struct dpif *dpif_)
320 struct dpif_linux *dpif = dpif_linux_cast(dpif_);
321 struct dpif_linux_dp dp;
323 dpif_linux_dp_init(&dp);
324 dp.cmd = ODP_DP_CMD_DEL;
325 dp.dp_idx = dpif->minor;
326 return dpif_linux_dp_transact(&dp, NULL, NULL);
330 dpif_linux_get_stats(const struct dpif *dpif_, struct odp_stats *stats)
332 struct dpif_linux_dp dp;
336 error = dpif_linux_dp_get(dpif_, &dp, &buf);
345 dpif_linux_get_drop_frags(const struct dpif *dpif_, bool *drop_fragsp)
347 struct dpif_linux_dp dp;
351 error = dpif_linux_dp_get(dpif_, &dp, &buf);
353 *drop_fragsp = dp.ipv4_frags == ODP_DP_FRAG_DROP;
360 dpif_linux_set_drop_frags(struct dpif *dpif_, bool drop_frags)
362 struct dpif_linux *dpif = dpif_linux_cast(dpif_);
363 struct dpif_linux_dp dp;
365 dpif_linux_dp_init(&dp);
366 dp.cmd = ODP_DP_CMD_SET;
367 dp.dp_idx = dpif->minor;
368 dp.ipv4_frags = drop_frags ? ODP_DP_FRAG_DROP : ODP_DP_FRAG_ZERO;
369 return dpif_linux_dp_transact(&dp, NULL, NULL);
373 dpif_linux_port_add(struct dpif *dpif_, struct netdev *netdev,
376 struct dpif_linux *dpif = dpif_linux_cast(dpif_);
377 const char *name = netdev_get_name(netdev);
378 const char *type = netdev_get_type(netdev);
379 struct dpif_linux_vport request, reply;
380 const struct ofpbuf *options;
384 dpif_linux_vport_init(&request);
385 request.cmd = ODP_VPORT_CMD_NEW;
386 request.dp_idx = dpif->minor;
387 request.type = netdev_vport_get_vport_type(netdev);
388 if (request.type == ODP_VPORT_TYPE_UNSPEC) {
389 VLOG_WARN_RL(&error_rl, "%s: cannot create port `%s' because it has "
390 "unsupported type `%s'",
391 dpif_name(dpif_), name, type);
396 options = netdev_vport_get_options(netdev);
397 if (options && options->size) {
398 request.options = options->data;
399 request.options_len = options->size;
402 error = dpif_linux_vport_transact(&request, &reply, &buf);
404 *port_nop = reply.port_no;
412 dpif_linux_port_del(struct dpif *dpif_, uint16_t port_no)
414 struct dpif_linux *dpif = dpif_linux_cast(dpif_);
415 struct dpif_linux_vport vport;
417 dpif_linux_vport_init(&vport);
418 vport.cmd = ODP_VPORT_CMD_DEL;
419 vport.dp_idx = dpif->minor;
420 vport.port_no = port_no;
421 return dpif_linux_vport_transact(&vport, NULL, NULL);
425 dpif_linux_port_query__(const struct dpif *dpif, uint32_t port_no,
426 const char *port_name, struct dpif_port *dpif_port)
428 struct dpif_linux_vport request;
429 struct dpif_linux_vport reply;
433 dpif_linux_vport_init(&request);
434 request.cmd = ODP_VPORT_CMD_GET;
435 request.dp_idx = dpif_linux_cast(dpif)->minor;
436 request.port_no = port_no;
437 request.name = port_name;
439 error = dpif_linux_vport_transact(&request, &reply, &buf);
441 dpif_port->name = xstrdup(reply.name);
442 dpif_port->type = xstrdup(netdev_vport_get_netdev_type(&reply));
443 dpif_port->port_no = reply.port_no;
450 dpif_linux_port_query_by_number(const struct dpif *dpif, uint16_t port_no,
451 struct dpif_port *dpif_port)
453 return dpif_linux_port_query__(dpif, port_no, NULL, dpif_port);
457 dpif_linux_port_query_by_name(const struct dpif *dpif, const char *devname,
458 struct dpif_port *dpif_port)
460 return dpif_linux_port_query__(dpif, 0, devname, dpif_port);
464 dpif_linux_get_max_ports(const struct dpif *dpif OVS_UNUSED)
466 /* If the datapath increases its range of supported ports, then it should
467 * start reporting that. */
472 dpif_linux_flow_flush(struct dpif *dpif_)
474 struct dpif_linux *dpif = dpif_linux_cast(dpif_);
475 return ioctl(dpif->fd, ODP_FLOW_FLUSH, dpif->minor) ? errno : 0;
478 struct dpif_linux_port_state {
483 dpif_linux_port_dump_start(const struct dpif *dpif_, void **statep)
485 struct dpif_linux *dpif = dpif_linux_cast(dpif_);
486 struct dpif_linux_port_state *state;
487 struct dpif_linux_vport request;
490 *statep = state = xmalloc(sizeof *state);
492 dpif_linux_vport_init(&request);
493 request.cmd = ODP_DP_CMD_GET;
494 request.dp_idx = dpif->minor;
496 buf = ofpbuf_new(1024);
497 dpif_linux_vport_to_ofpbuf(&request, buf);
498 nl_dump_start(&state->dump, genl_sock, buf);
505 dpif_linux_port_dump_next(const struct dpif *dpif OVS_UNUSED, void *state_,
506 struct dpif_port *dpif_port)
508 struct dpif_linux_port_state *state = state_;
509 struct dpif_linux_vport vport;
513 if (!nl_dump_next(&state->dump, &buf)) {
517 error = dpif_linux_vport_from_ofpbuf(&vport, &buf);
522 dpif_port->name = (char *) vport.name;
523 dpif_port->type = (char *) netdev_vport_get_netdev_type(&vport);
524 dpif_port->port_no = vport.port_no;
529 dpif_linux_port_dump_done(const struct dpif *dpif OVS_UNUSED, void *state_)
531 struct dpif_linux_port_state *state = state_;
532 int error = nl_dump_done(&state->dump);
538 dpif_linux_port_poll(const struct dpif *dpif_, char **devnamep)
540 struct dpif_linux *dpif = dpif_linux_cast(dpif_);
542 if (dpif->change_error) {
543 dpif->change_error = false;
544 shash_clear(&dpif->changed_ports);
546 } else if (!shash_is_empty(&dpif->changed_ports)) {
547 struct shash_node *node = shash_first(&dpif->changed_ports);
548 *devnamep = shash_steal(&dpif->changed_ports, node);
556 dpif_linux_port_poll_wait(const struct dpif *dpif_)
558 struct dpif_linux *dpif = dpif_linux_cast(dpif_);
559 if (!shash_is_empty(&dpif->changed_ports) || dpif->change_error) {
560 poll_immediate_wake();
562 rtnetlink_link_notifier_wait();
567 dpif_linux_flow_get(const struct dpif *dpif_,
568 const struct nlattr *key, size_t key_len,
569 struct ofpbuf **actionsp, struct dpif_flow_stats *stats)
571 struct dpif_linux *dpif = dpif_linux_cast(dpif_);
572 struct dpif_linux_flow request, reply;
576 dpif_linux_flow_init(&request);
577 request.cmd = ODP_FLOW_GET;
578 request.dp_idx = dpif->minor;
580 request.key_len = key_len;
581 error = dpif_linux_flow_transact(&request, &reply, &buf);
584 dpif_linux_flow_get_stats(&reply, stats);
587 buf->data = (void *) reply.actions;
588 buf->size = reply.actions_len;
598 dpif_linux_flow_put(struct dpif *dpif_, enum dpif_flow_put_flags flags,
599 const struct nlattr *key, size_t key_len,
600 const struct nlattr *actions, size_t actions_len,
601 struct dpif_flow_stats *stats)
603 struct dpif_linux *dpif = dpif_linux_cast(dpif_);
604 struct dpif_linux_flow request, reply;
608 dpif_linux_flow_init(&request);
609 request.cmd = flags & DPIF_FP_CREATE ? ODP_FLOW_NEW : ODP_FLOW_SET;
610 request.dp_idx = dpif->minor;
612 request.key_len = key_len;
613 request.actions = actions;
614 request.actions_len = actions_len;
615 if (flags & DPIF_FP_ZERO_STATS) {
616 request.clear = true;
618 request.nlmsg_flags = flags & DPIF_FP_MODIFY ? 0 : NLM_F_CREATE;
619 error = dpif_linux_flow_transact(&request,
620 stats ? &reply : NULL,
621 stats ? &buf : NULL);
622 if (!error && stats) {
623 dpif_linux_flow_get_stats(&reply, stats);
630 dpif_linux_flow_del(struct dpif *dpif_,
631 const struct nlattr *key, size_t key_len,
632 struct dpif_flow_stats *stats)
634 struct dpif_linux *dpif = dpif_linux_cast(dpif_);
635 struct dpif_linux_flow request, reply;
639 dpif_linux_flow_init(&request);
640 request.cmd = ODP_FLOW_DEL;
641 request.dp_idx = dpif->minor;
643 request.key_len = key_len;
644 error = dpif_linux_flow_transact(&request,
645 stats ? &reply : NULL,
646 stats ? &buf : NULL);
647 if (!error && stats) {
648 dpif_linux_flow_get_stats(&reply, stats);
654 struct dpif_linux_flow_state {
655 struct dpif_linux_flow flow;
657 struct dpif_flow_stats stats;
661 dpif_linux_flow_dump_start(const struct dpif *dpif OVS_UNUSED, void **statep)
663 *statep = xzalloc(sizeof(struct dpif_linux_flow_state));
668 dpif_linux_flow_dump_next(const struct dpif *dpif_, void *state_,
669 const struct nlattr **key, size_t *key_len,
670 const struct nlattr **actions, size_t *actions_len,
671 const struct dpif_flow_stats **stats)
673 struct dpif_linux *dpif = dpif_linux_cast(dpif_);
674 struct dpif_linux_flow_state *state = state_;
675 struct ofpbuf *old_buf = state->buf;
676 struct dpif_linux_flow request;
679 dpif_linux_flow_init(&request);
680 request.cmd = ODP_FLOW_DUMP;
681 request.dp_idx = dpif->minor;
682 request.state = state->flow.state;
683 error = dpif_linux_flow_transact(&request, &state->flow, &state->buf);
684 ofpbuf_delete(old_buf);
688 *key = state->flow.key;
689 *key_len = state->flow.key_len;
692 *actions = state->flow.actions;
693 *actions_len = state->flow.actions_len;
696 dpif_linux_flow_get_stats(&state->flow, &state->stats);
697 *stats = &state->stats;
700 return error == ENODEV ? EOF : error;
704 dpif_linux_flow_dump_done(const struct dpif *dpif OVS_UNUSED, void *state_)
706 struct dpif_linux_flow_state *state = state_;
708 ofpbuf_delete(state->buf);
714 dpif_linux_execute(struct dpif *dpif_,
715 const struct nlattr *actions, size_t actions_len,
716 const struct ofpbuf *packet)
718 struct dpif_linux *dpif = dpif_linux_cast(dpif_);
719 struct odp_header *execute;
723 buf = ofpbuf_new(128 + actions_len + packet->size);
725 nl_msg_put_genlmsghdr(buf, 0, odp_packet_family, NLM_F_REQUEST,
726 ODP_PACKET_CMD_EXECUTE, 1);
728 execute = ofpbuf_put_uninit(buf, sizeof *execute);
729 execute->dp_idx = dpif->minor;
731 nl_msg_put_unspec(buf, ODP_PACKET_ATTR_PACKET, packet->data, packet->size);
732 nl_msg_put_unspec(buf, ODP_PACKET_ATTR_ACTIONS, actions, actions_len);
734 error = nl_sock_transact(genl_sock, buf, NULL);
740 dpif_linux_recv_get_mask(const struct dpif *dpif_, int *listen_mask)
742 struct dpif_linux *dpif = dpif_linux_cast(dpif_);
743 *listen_mask = dpif->listen_mask;
748 dpif_linux_recv_set_mask(struct dpif *dpif_, int listen_mask)
750 struct dpif_linux *dpif = dpif_linux_cast(dpif_);
754 if (listen_mask == dpif->listen_mask) {
756 } else if (!listen_mask) {
757 nl_sock_destroy(dpif->mc_sock);
758 dpif->mc_sock = NULL;
759 dpif->listen_mask = 0;
761 } else if (!dpif->mc_sock) {
762 error = nl_sock_create(NETLINK_GENERIC, &dpif->mc_sock);
768 /* Unsubscribe from old groups. */
769 for (i = 0; i < DPIF_N_UC_TYPES; i++) {
770 if (dpif->listen_mask & (1u << i)) {
771 nl_sock_leave_mcgroup(dpif->mc_sock, dpif->mcgroups[i]);
775 /* Update listen_mask. */
776 dpif->listen_mask = listen_mask;
778 /* Subscribe to new groups. */
780 for (i = 0; i < DPIF_N_UC_TYPES; i++) {
781 if (dpif->listen_mask & (1u << i)) {
784 retval = nl_sock_join_mcgroup(dpif->mc_sock, dpif->mcgroups[i]);
794 dpif_linux_get_sflow_probability(const struct dpif *dpif_,
795 uint32_t *probability)
797 struct dpif_linux_dp dp;
801 error = dpif_linux_dp_get(dpif_, &dp, &buf);
803 *probability = dp.sampling ? *dp.sampling : 0;
810 dpif_linux_set_sflow_probability(struct dpif *dpif_, uint32_t probability)
812 struct dpif_linux *dpif = dpif_linux_cast(dpif_);
813 struct dpif_linux_dp dp;
815 dpif_linux_dp_init(&dp);
816 dp.cmd = ODP_DP_CMD_SET;
817 dp.dp_idx = dpif->minor;
818 dp.sampling = &probability;
819 return dpif_linux_dp_transact(&dp, NULL, NULL);
823 dpif_linux_queue_to_priority(const struct dpif *dpif OVS_UNUSED,
824 uint32_t queue_id, uint32_t *priority)
826 if (queue_id < 0xf000) {
827 *priority = TC_H_MAKE(1 << 16, queue_id + 1);
835 parse_odp_packet(struct ofpbuf *buf, struct dpif_upcall *upcall,
838 static const struct nl_policy odp_packet_policy[] = {
839 /* Always present. */
840 [ODP_PACKET_ATTR_PACKET] = { .type = NL_A_UNSPEC,
841 .min_len = ETH_HEADER_LEN },
842 [ODP_PACKET_ATTR_KEY] = { .type = NL_A_NESTED },
844 /* ODP_PACKET_CMD_ACTION only. */
845 [ODP_PACKET_ATTR_USERDATA] = { .type = NL_A_U64, .optional = true },
847 /* ODP_PACKET_CMD_SAMPLE only. */
848 [ODP_PACKET_ATTR_SAMPLE_POOL] = { .type = NL_A_U32, .optional = true },
849 [ODP_PACKET_ATTR_ACTIONS] = { .type = NL_A_NESTED, .optional = true },
852 struct odp_header *odp_header;
853 struct nlattr *a[ARRAY_SIZE(odp_packet_policy)];
854 struct nlmsghdr *nlmsg;
855 struct genlmsghdr *genl;
859 ofpbuf_use_const(&b, buf->data, buf->size);
861 nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
862 genl = ofpbuf_try_pull(&b, sizeof *genl);
863 odp_header = ofpbuf_try_pull(&b, sizeof *odp_header);
864 if (!nlmsg || !genl || !odp_header
865 || nlmsg->nlmsg_type != odp_packet_family
866 || !nl_policy_parse(&b, 0, odp_packet_policy, a,
867 ARRAY_SIZE(odp_packet_policy))) {
871 type = (genl->cmd == ODP_PACKET_CMD_MISS ? DPIF_UC_MISS
872 : genl->cmd == ODP_PACKET_CMD_ACTION ? DPIF_UC_ACTION
873 : genl->cmd == ODP_PACKET_CMD_SAMPLE ? DPIF_UC_SAMPLE
879 memset(upcall, 0, sizeof *upcall);
881 upcall->packet = buf;
882 upcall->packet->data = (void *) nl_attr_get(a[ODP_PACKET_ATTR_PACKET]);
883 upcall->packet->size = nl_attr_get_size(a[ODP_PACKET_ATTR_PACKET]);
884 upcall->key = (void *) nl_attr_get(a[ODP_PACKET_ATTR_KEY]);
885 upcall->key_len = nl_attr_get_size(a[ODP_PACKET_ATTR_KEY]);
886 upcall->userdata = (a[ODP_PACKET_ATTR_USERDATA]
887 ? nl_attr_get_u64(a[ODP_PACKET_ATTR_USERDATA])
889 upcall->sample_pool = (a[ODP_PACKET_ATTR_SAMPLE_POOL]
890 ? nl_attr_get_u32(a[ODP_PACKET_ATTR_SAMPLE_POOL])
892 if (a[ODP_PACKET_ATTR_ACTIONS]) {
893 upcall->actions = (void *) nl_attr_get(a[ODP_PACKET_ATTR_ACTIONS]);
894 upcall->actions_len = nl_attr_get_size(a[ODP_PACKET_ATTR_ACTIONS]);
897 *dp_idx = odp_header->dp_idx;
903 dpif_linux_recv(struct dpif *dpif_, struct dpif_upcall *upcall)
905 struct dpif_linux *dpif = dpif_linux_cast(dpif_);
910 if (!dpif->mc_sock) {
914 for (i = 0; i < 50; i++) {
917 error = nl_sock_recv(dpif->mc_sock, &buf, false);
922 error = parse_odp_packet(buf, upcall, &dp_idx);
924 && dp_idx == dpif->minor
925 && dpif->listen_mask & (1u << upcall->type)) {
939 dpif_linux_recv_wait(struct dpif *dpif_)
941 struct dpif_linux *dpif = dpif_linux_cast(dpif_);
943 nl_sock_wait(dpif->mc_sock, POLLIN);
948 dpif_linux_recv_purge(struct dpif *dpif_)
950 struct dpif_linux *dpif = dpif_linux_cast(dpif_);
953 nl_sock_drain(dpif->mc_sock);
957 const struct dpif_class dpif_linux_class = {
961 dpif_linux_enumerate,
964 dpif_linux_get_all_names,
966 dpif_linux_get_stats,
967 dpif_linux_get_drop_frags,
968 dpif_linux_set_drop_frags,
971 dpif_linux_port_query_by_number,
972 dpif_linux_port_query_by_name,
973 dpif_linux_get_max_ports,
974 dpif_linux_port_dump_start,
975 dpif_linux_port_dump_next,
976 dpif_linux_port_dump_done,
977 dpif_linux_port_poll,
978 dpif_linux_port_poll_wait,
982 dpif_linux_flow_flush,
983 dpif_linux_flow_dump_start,
984 dpif_linux_flow_dump_next,
985 dpif_linux_flow_dump_done,
987 dpif_linux_recv_get_mask,
988 dpif_linux_recv_set_mask,
989 dpif_linux_get_sflow_probability,
990 dpif_linux_set_sflow_probability,
991 dpif_linux_queue_to_priority,
993 dpif_linux_recv_wait,
994 dpif_linux_recv_purge,
997 static int get_major(const char *target);
1000 dpif_linux_init(void)
1002 static int error = -1;
1005 error = nl_lookup_genl_family(ODP_DATAPATH_FAMILY,
1006 &odp_datapath_family);
1008 error = nl_lookup_genl_family(ODP_VPORT_FAMILY, &odp_vport_family);
1011 error = nl_lookup_genl_family(ODP_PACKET_FAMILY,
1012 &odp_packet_family);
1015 error = nl_sock_create(NETLINK_GENERIC, &genl_sock);
1023 dpif_linux_is_internal_device(const char *name)
1025 struct dpif_linux_vport reply;
1029 error = dpif_linux_vport_get(name, &reply, &buf);
1032 } else if (error != ENODEV) {
1033 VLOG_WARN_RL(&error_rl, "%s: vport query failed (%s)",
1034 name, strerror(error));
1037 return reply.type == ODP_VPORT_TYPE_INTERNAL;
1041 make_openvswitch_device(int minor, char **fnp)
1043 const char dirname[] = "/dev/net";
1051 major = get_openvswitch_major();
1055 dev = makedev(major, minor);
1057 sprintf(fn, "%s/dp%d", dirname, minor);
1058 if (!stat(fn, &s)) {
1059 if (!S_ISCHR(s.st_mode)) {
1060 VLOG_WARN_RL(&error_rl, "%s is not a character device, fixing",
1062 } else if (s.st_rdev != dev) {
1063 VLOG_WARN_RL(&error_rl,
1064 "%s is device %u:%u but should be %u:%u, fixing",
1065 fn, major(s.st_rdev), minor(s.st_rdev),
1066 major(dev), minor(dev));
1071 VLOG_WARN_RL(&error_rl, "%s: unlink failed (%s)",
1072 fn, strerror(errno));
1075 } else if (errno == ENOENT) {
1076 if (stat(dirname, &s)) {
1077 if (errno == ENOENT) {
1078 if (mkdir(dirname, 0755)) {
1079 VLOG_WARN_RL(&error_rl, "%s: mkdir failed (%s)",
1080 dirname, strerror(errno));
1084 VLOG_WARN_RL(&error_rl, "%s: stat failed (%s)",
1085 dirname, strerror(errno));
1090 VLOG_WARN_RL(&error_rl, "%s: stat failed (%s)", fn, strerror(errno));
1094 /* The device needs to be created. */
1095 if (mknod(fn, S_IFCHR | 0700, dev)) {
1096 VLOG_WARN_RL(&error_rl,
1097 "%s: creating character device %u:%u failed (%s)",
1098 fn, major(dev), minor(dev), strerror(errno));
1107 /* Return the major device number of the Open vSwitch device. If it
1108 * cannot be determined, a negative errno is returned. */
1110 get_openvswitch_major(void)
1112 static int openvswitch_major = -1;
1113 if (openvswitch_major < 0) {
1114 openvswitch_major = get_major("openvswitch");
1116 return openvswitch_major;
1120 get_major(const char *target)
1122 const char fn[] = "/proc/devices";
1127 file = fopen(fn, "r");
1129 VLOG_ERR("opening %s failed (%s)", fn, strerror(errno));
1133 for (ln = 1; fgets(line, sizeof line, file); ln++) {
1137 if (!strncmp(line, "Character", 9) || line[0] == '\0') {
1138 /* Nothing to do. */
1139 } else if (!strncmp(line, "Block", 5)) {
1140 /* We only want character devices, so skip the rest of the file. */
1142 } else if (sscanf(line, "%d %63s", &major, name)) {
1143 if (!strcmp(name, target)) {
1148 VLOG_WARN_ONCE("%s:%d: syntax error", fn, ln);
1154 VLOG_ERR("%s: %s major not found (is the module loaded?)", fn, target);
1159 open_minor(int minor, int *fdp)
1164 error = make_openvswitch_device(minor, &fn);
1169 *fdp = open(fn, O_RDONLY | O_NONBLOCK);
1172 VLOG_WARN("%s: open failed (%s)", fn, strerror(error));
1181 dpif_linux_port_changed(const struct rtnetlink_link_change *change,
1184 struct dpif_linux *dpif = dpif_;
1187 if (change->master_ifindex == dpif->local_ifindex
1188 && (change->nlmsg_type == RTM_NEWLINK
1189 || change->nlmsg_type == RTM_DELLINK))
1191 /* Our datapath changed, either adding a new port or deleting an
1193 shash_add_once(&dpif->changed_ports, change->ifname, NULL);
1196 dpif->change_error = true;
1201 get_dp0_fd(int *dp0_fdp)
1203 static int dp0_fd = -1;
1208 error = open_minor(0, &fd);
1218 /* Parses the contents of 'buf', which contains a "struct odp_header" followed
1219 * by Netlink attributes, into 'vport'. Returns 0 if successful, otherwise a
1220 * positive errno value.
1222 * 'vport' will contain pointers into 'buf', so the caller should not free
1223 * 'buf' while 'vport' is still in use. */
1225 dpif_linux_vport_from_ofpbuf(struct dpif_linux_vport *vport,
1226 const struct ofpbuf *buf)
1228 static const struct nl_policy odp_vport_policy[] = {
1229 [ODP_VPORT_ATTR_PORT_NO] = { .type = NL_A_U32 },
1230 [ODP_VPORT_ATTR_TYPE] = { .type = NL_A_U32 },
1231 [ODP_VPORT_ATTR_NAME] = { .type = NL_A_STRING, .max_len = IFNAMSIZ },
1232 [ODP_VPORT_ATTR_STATS] = { .type = NL_A_UNSPEC,
1233 .min_len = sizeof(struct rtnl_link_stats64),
1234 .max_len = sizeof(struct rtnl_link_stats64),
1236 [ODP_VPORT_ATTR_ADDRESS] = { .type = NL_A_UNSPEC,
1237 .min_len = ETH_ADDR_LEN,
1238 .max_len = ETH_ADDR_LEN,
1240 [ODP_VPORT_ATTR_MTU] = { .type = NL_A_U32, .optional = true },
1241 [ODP_VPORT_ATTR_OPTIONS] = { .type = NL_A_NESTED, .optional = true },
1242 [ODP_VPORT_ATTR_IFINDEX] = { .type = NL_A_U32, .optional = true },
1243 [ODP_VPORT_ATTR_IFLINK] = { .type = NL_A_U32, .optional = true },
1246 struct nlattr *a[ARRAY_SIZE(odp_vport_policy)];
1247 struct odp_header *odp_header;
1248 struct nlmsghdr *nlmsg;
1249 struct genlmsghdr *genl;
1252 dpif_linux_vport_init(vport);
1254 ofpbuf_use_const(&b, buf->data, buf->size);
1255 nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
1256 genl = ofpbuf_try_pull(&b, sizeof *genl);
1257 odp_header = ofpbuf_try_pull(&b, sizeof *odp_header);
1258 if (!nlmsg || !genl || !odp_header
1259 || nlmsg->nlmsg_type != odp_vport_family
1260 || !nl_policy_parse(&b, 0, odp_vport_policy, a,
1261 ARRAY_SIZE(odp_vport_policy))) {
1265 vport->cmd = genl->cmd;
1266 vport->dp_idx = odp_header->dp_idx;
1267 vport->port_no = nl_attr_get_u32(a[ODP_VPORT_ATTR_PORT_NO]);
1268 vport->type = nl_attr_get_u32(a[ODP_VPORT_ATTR_TYPE]);
1269 vport->name = nl_attr_get_string(a[ODP_VPORT_ATTR_NAME]);
1270 if (a[ODP_VPORT_ATTR_STATS]) {
1271 vport->stats = nl_attr_get(a[ODP_VPORT_ATTR_STATS]);
1273 if (a[ODP_VPORT_ATTR_ADDRESS]) {
1274 vport->address = nl_attr_get(a[ODP_VPORT_ATTR_ADDRESS]);
1276 if (a[ODP_VPORT_ATTR_MTU]) {
1277 vport->mtu = nl_attr_get_u32(a[ODP_VPORT_ATTR_MTU]);
1279 if (a[ODP_VPORT_ATTR_OPTIONS]) {
1280 vport->options = nl_attr_get(a[ODP_VPORT_ATTR_OPTIONS]);
1281 vport->options_len = nl_attr_get_size(a[ODP_VPORT_ATTR_OPTIONS]);
1283 if (a[ODP_VPORT_ATTR_IFINDEX]) {
1284 vport->ifindex = nl_attr_get_u32(a[ODP_VPORT_ATTR_IFINDEX]);
1286 if (a[ODP_VPORT_ATTR_IFLINK]) {
1287 vport->iflink = nl_attr_get_u32(a[ODP_VPORT_ATTR_IFLINK]);
1292 /* Appends to 'buf' (which must initially be empty) a "struct odp_header"
1293 * followed by Netlink attributes corresponding to 'vport'. */
1295 dpif_linux_vport_to_ofpbuf(const struct dpif_linux_vport *vport,
1298 struct odp_header *odp_header;
1300 nl_msg_put_genlmsghdr(buf, 0, odp_vport_family, NLM_F_REQUEST | NLM_F_ECHO,
1303 odp_header = ofpbuf_put_uninit(buf, sizeof *odp_header);
1304 odp_header->dp_idx = vport->dp_idx;
1306 if (vport->port_no != UINT32_MAX) {
1307 nl_msg_put_u32(buf, ODP_VPORT_ATTR_PORT_NO, vport->port_no);
1310 if (vport->type != ODP_VPORT_TYPE_UNSPEC) {
1311 nl_msg_put_u32(buf, ODP_VPORT_ATTR_TYPE, vport->type);
1315 nl_msg_put_string(buf, ODP_VPORT_ATTR_NAME, vport->name);
1319 nl_msg_put_unspec(buf, ODP_VPORT_ATTR_STATS,
1320 vport->stats, sizeof *vport->stats);
1323 if (vport->address) {
1324 nl_msg_put_unspec(buf, ODP_VPORT_ATTR_ADDRESS,
1325 vport->address, ETH_ADDR_LEN);
1329 nl_msg_put_u32(buf, ODP_VPORT_ATTR_MTU, vport->mtu);
1332 if (vport->options) {
1333 nl_msg_put_nested(buf, ODP_VPORT_ATTR_OPTIONS,
1334 vport->options, vport->options_len);
1337 if (vport->ifindex) {
1338 nl_msg_put_u32(buf, ODP_VPORT_ATTR_IFINDEX, vport->ifindex);
1341 if (vport->iflink) {
1342 nl_msg_put_u32(buf, ODP_VPORT_ATTR_IFLINK, vport->iflink);
1346 /* Clears 'vport' to "empty" values. */
1348 dpif_linux_vport_init(struct dpif_linux_vport *vport)
1350 memset(vport, 0, sizeof *vport);
1351 vport->dp_idx = UINT32_MAX;
1352 vport->port_no = UINT32_MAX;
1355 /* Executes 'request' in the kernel datapath. If the command fails, returns a
1356 * positive errno value. Otherwise, if 'reply' and 'bufp' are null, returns 0
1357 * without doing anything else. If 'reply' and 'bufp' are nonnull, then the
1358 * result of the command is expected to be an odp_vport also, which is decoded
1359 * and stored in '*reply' and '*bufp'. The caller must free '*bufp' when the
1360 * reply is no longer needed ('reply' will contain pointers into '*bufp'). */
1362 dpif_linux_vport_transact(const struct dpif_linux_vport *request,
1363 struct dpif_linux_vport *reply,
1364 struct ofpbuf **bufp)
1366 struct ofpbuf *request_buf;
1369 assert((reply != NULL) == (bufp != NULL));
1371 request_buf = ofpbuf_new(1024);
1372 dpif_linux_vport_to_ofpbuf(request, request_buf);
1373 error = nl_sock_transact(genl_sock, request_buf, bufp);
1374 ofpbuf_delete(request_buf);
1378 error = dpif_linux_vport_from_ofpbuf(reply, *bufp);
1381 dpif_linux_vport_init(reply);
1382 ofpbuf_delete(*bufp);
1389 /* Obtains information about the kernel vport named 'name' and stores it into
1390 * '*reply' and '*bufp'. The caller must free '*bufp' when the reply is no
1391 * longer needed ('reply' will contain pointers into '*bufp'). */
1393 dpif_linux_vport_get(const char *name, struct dpif_linux_vport *reply,
1394 struct ofpbuf **bufp)
1396 struct dpif_linux_vport request;
1398 dpif_linux_vport_init(&request);
1399 request.cmd = ODP_VPORT_CMD_GET;
1400 request.name = name;
1402 return dpif_linux_vport_transact(&request, reply, bufp);
1405 /* Parses the contents of 'buf', which contains a "struct odp_header" followed
1406 * by Netlink attributes, into 'dp'. Returns 0 if successful, otherwise a
1407 * positive errno value.
1409 * 'dp' will contain pointers into 'buf', so the caller should not free 'buf'
1410 * while 'dp' is still in use. */
1412 dpif_linux_dp_from_ofpbuf(struct dpif_linux_dp *dp, const struct ofpbuf *buf)
1414 static const struct nl_policy odp_datapath_policy[] = {
1415 [ODP_DP_ATTR_NAME] = { .type = NL_A_STRING, .max_len = IFNAMSIZ },
1416 [ODP_DP_ATTR_STATS] = { .type = NL_A_UNSPEC,
1417 .min_len = sizeof(struct odp_stats),
1418 .max_len = sizeof(struct odp_stats),
1420 [ODP_DP_ATTR_IPV4_FRAGS] = { .type = NL_A_U32, .optional = true },
1421 [ODP_DP_ATTR_SAMPLING] = { .type = NL_A_U32, .optional = true },
1422 [ODP_DP_ATTR_MCGROUPS] = { .type = NL_A_NESTED, .optional = true },
1425 struct nlattr *a[ARRAY_SIZE(odp_datapath_policy)];
1426 struct odp_header *odp_header;
1427 struct nlmsghdr *nlmsg;
1428 struct genlmsghdr *genl;
1431 dpif_linux_dp_init(dp);
1433 ofpbuf_use_const(&b, buf->data, buf->size);
1434 nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
1435 genl = ofpbuf_try_pull(&b, sizeof *genl);
1436 odp_header = ofpbuf_try_pull(&b, sizeof *odp_header);
1437 if (!nlmsg || !genl || !odp_header
1438 || nlmsg->nlmsg_type != odp_datapath_family
1439 || !nl_policy_parse(&b, 0, odp_datapath_policy, a,
1440 ARRAY_SIZE(odp_datapath_policy))) {
1444 dp->cmd = genl->cmd;
1445 dp->dp_idx = odp_header->dp_idx;
1446 dp->name = nl_attr_get_string(a[ODP_DP_ATTR_NAME]);
1447 if (a[ODP_DP_ATTR_STATS]) {
1448 /* Can't use structure assignment because Netlink doesn't ensure
1449 * sufficient alignment for 64-bit members. */
1450 memcpy(&dp->stats, nl_attr_get(a[ODP_DP_ATTR_STATS]),
1453 if (a[ODP_DP_ATTR_IPV4_FRAGS]) {
1454 dp->ipv4_frags = nl_attr_get_u32(a[ODP_DP_ATTR_IPV4_FRAGS]);
1456 if (a[ODP_DP_ATTR_SAMPLING]) {
1457 dp->sampling = nl_attr_get(a[ODP_DP_ATTR_SAMPLING]);
1460 if (a[ODP_DP_ATTR_MCGROUPS]) {
1461 static const struct nl_policy odp_mcgroup_policy[] = {
1462 [ODP_PACKET_CMD_MISS] = { .type = NL_A_U32, .optional = true },
1463 [ODP_PACKET_CMD_ACTION] = { .type = NL_A_U32, .optional = true },
1464 [ODP_PACKET_CMD_SAMPLE] = { .type = NL_A_U32, .optional = true },
1467 struct nlattr *mcgroups[ARRAY_SIZE(odp_mcgroup_policy)];
1469 if (!nl_parse_nested(a[ODP_DP_ATTR_MCGROUPS], odp_mcgroup_policy,
1470 mcgroups, ARRAY_SIZE(odp_mcgroup_policy))) {
1474 if (mcgroups[ODP_PACKET_CMD_MISS]) {
1475 dp->mcgroups[DPIF_UC_MISS]
1476 = nl_attr_get_u32(mcgroups[ODP_PACKET_CMD_MISS]);
1478 if (mcgroups[ODP_PACKET_CMD_ACTION]) {
1479 dp->mcgroups[DPIF_UC_ACTION]
1480 = nl_attr_get_u32(mcgroups[ODP_PACKET_CMD_ACTION]);
1482 if (mcgroups[ODP_PACKET_CMD_SAMPLE]) {
1483 dp->mcgroups[DPIF_UC_SAMPLE]
1484 = nl_attr_get_u32(mcgroups[ODP_PACKET_CMD_SAMPLE]);
1491 /* Appends to 'buf' the Generic Netlink message described by 'dp'. */
1493 dpif_linux_dp_to_ofpbuf(const struct dpif_linux_dp *dp, struct ofpbuf *buf)
1495 struct odp_header *odp_header;
1497 nl_msg_put_genlmsghdr(buf, 0, odp_datapath_family,
1498 NLM_F_REQUEST | NLM_F_ECHO, dp->cmd, 1);
1500 odp_header = ofpbuf_put_uninit(buf, sizeof *odp_header);
1501 odp_header->dp_idx = dp->dp_idx;
1504 nl_msg_put_string(buf, ODP_DP_ATTR_NAME, dp->name);
1507 /* Skip ODP_DP_ATTR_STATS since we never have a reason to serialize it. */
1509 if (dp->ipv4_frags) {
1510 nl_msg_put_u32(buf, ODP_DP_ATTR_IPV4_FRAGS, dp->ipv4_frags);
1514 nl_msg_put_u32(buf, ODP_DP_ATTR_SAMPLING, *dp->sampling);
1518 /* Clears 'dp' to "empty" values. */
1520 dpif_linux_dp_init(struct dpif_linux_dp *dp)
1522 memset(dp, 0, sizeof *dp);
1527 dpif_linux_dp_dump_start(struct nl_dump *dump)
1529 struct dpif_linux_dp request;
1532 dpif_linux_dp_init(&request);
1533 request.cmd = ODP_DP_CMD_GET;
1535 buf = ofpbuf_new(1024);
1536 dpif_linux_dp_to_ofpbuf(&request, buf);
1537 nl_dump_start(dump, genl_sock, buf);
1541 /* Executes 'request' in the kernel datapath. If the command fails, returns a
1542 * positive errno value. Otherwise, if 'reply' and 'bufp' are null, returns 0
1543 * without doing anything else. If 'reply' and 'bufp' are nonnull, then the
1544 * result of the command is expected to be of the same form, which is decoded
1545 * and stored in '*reply' and '*bufp'. The caller must free '*bufp' when the
1546 * reply is no longer needed ('reply' will contain pointers into '*bufp'). */
1548 dpif_linux_dp_transact(const struct dpif_linux_dp *request,
1549 struct dpif_linux_dp *reply, struct ofpbuf **bufp)
1551 struct ofpbuf *request_buf;
1554 assert((reply != NULL) == (bufp != NULL));
1556 request_buf = ofpbuf_new(1024);
1557 dpif_linux_dp_to_ofpbuf(request, request_buf);
1558 error = nl_sock_transact(genl_sock, request_buf, bufp);
1559 ofpbuf_delete(request_buf);
1563 error = dpif_linux_dp_from_ofpbuf(reply, *bufp);
1566 dpif_linux_dp_init(reply);
1567 ofpbuf_delete(*bufp);
1574 /* Obtains information about 'dpif_' and stores it into '*reply' and '*bufp'.
1575 * The caller must free '*bufp' when the reply is no longer needed ('reply'
1576 * will contain pointers into '*bufp'). */
1578 dpif_linux_dp_get(const struct dpif *dpif_, struct dpif_linux_dp *reply,
1579 struct ofpbuf **bufp)
1581 struct dpif_linux *dpif = dpif_linux_cast(dpif_);
1582 struct dpif_linux_dp request;
1584 dpif_linux_dp_init(&request);
1585 request.cmd = ODP_DP_CMD_GET;
1586 request.dp_idx = dpif->minor;
1588 return dpif_linux_dp_transact(&request, reply, bufp);
1591 /* Parses the contents of 'buf', which contains a "struct odp_flow" followed by
1592 * Netlink attributes, into 'flow'. Returns 0 if successful, otherwise a
1593 * positive errno value.
1595 * 'flow' will contain pointers into 'buf', so the caller should not free 'buf'
1596 * while 'flow' is still in use. */
1598 dpif_linux_flow_from_ofpbuf(struct dpif_linux_flow *flow,
1599 const struct ofpbuf *buf)
1601 static const struct nl_policy odp_flow_policy[] = {
1602 [ODP_FLOW_ATTR_KEY] = { .type = NL_A_NESTED },
1603 [ODP_FLOW_ATTR_ACTIONS] = { .type = NL_A_NESTED, .optional = true },
1604 [ODP_FLOW_ATTR_STATS] = { .type = NL_A_UNSPEC,
1605 .min_len = sizeof(struct odp_flow_stats),
1606 .max_len = sizeof(struct odp_flow_stats),
1608 [ODP_FLOW_ATTR_TCP_FLAGS] = { .type = NL_A_U8, .optional = true },
1609 [ODP_FLOW_ATTR_USED] = { .type = NL_A_U64, .optional = true },
1610 /* The kernel never uses ODP_FLOW_ATTR_CLEAR. */
1611 [ODP_FLOW_ATTR_STATE] = { .type = NL_A_U64, .optional = true },
1614 struct odp_flow *odp_flow;
1615 struct nlattr *a[ARRAY_SIZE(odp_flow_policy)];
1617 dpif_linux_flow_init(flow);
1619 if (!nl_policy_parse(buf, sizeof *odp_flow, odp_flow_policy,
1620 a, ARRAY_SIZE(odp_flow_policy))) {
1623 odp_flow = buf->data;
1625 flow->nlmsg_flags = odp_flow->nlmsg_flags;
1626 flow->dp_idx = odp_flow->dp_idx;
1627 flow->key = nl_attr_get(a[ODP_FLOW_ATTR_KEY]);
1628 flow->key_len = nl_attr_get_size(a[ODP_FLOW_ATTR_KEY]);
1629 if (a[ODP_FLOW_ATTR_ACTIONS]) {
1630 flow->actions = nl_attr_get(a[ODP_FLOW_ATTR_ACTIONS]);
1631 flow->actions_len = nl_attr_get_size(a[ODP_FLOW_ATTR_ACTIONS]);
1633 if (a[ODP_FLOW_ATTR_STATS]) {
1634 flow->stats = nl_attr_get(a[ODP_FLOW_ATTR_STATS]);
1636 if (a[ODP_FLOW_ATTR_TCP_FLAGS]) {
1637 flow->tcp_flags = nl_attr_get(a[ODP_FLOW_ATTR_TCP_FLAGS]);
1639 if (a[ODP_FLOW_ATTR_STATE]) {
1640 flow->state = nl_attr_get(a[ODP_FLOW_ATTR_STATE]);
1645 /* Appends to 'buf' (which must initially be empty) a "struct odp_flow"
1646 * followed by Netlink attributes corresponding to 'flow'. */
1648 dpif_linux_flow_to_ofpbuf(const struct dpif_linux_flow *flow,
1651 struct odp_flow *odp_flow;
1653 ofpbuf_reserve(buf, sizeof odp_flow);
1655 if (flow->key_len) {
1656 nl_msg_put_unspec(buf, ODP_FLOW_ATTR_KEY, flow->key, flow->key_len);
1659 if (flow->actions_len) {
1660 nl_msg_put_unspec(buf, ODP_FLOW_ATTR_ACTIONS,
1661 flow->actions, flow->actions_len);
1664 /* We never need to send these to the kernel. */
1665 assert(!flow->stats);
1666 assert(!flow->tcp_flags);
1667 assert(!flow->used);
1670 nl_msg_put_flag(buf, ODP_FLOW_ATTR_CLEAR);
1674 nl_msg_put_u64(buf, ODP_FLOW_ATTR_STATE,
1675 get_unaligned_u64(flow->state));
1678 odp_flow = ofpbuf_push_uninit(buf, sizeof *odp_flow);
1679 odp_flow->nlmsg_flags = flow->nlmsg_flags;
1680 odp_flow->dp_idx = flow->dp_idx;
1681 odp_flow->len = buf->size;
1682 odp_flow->total_len = (char *) ofpbuf_end(buf) - (char *) buf->data;
1685 /* Clears 'flow' to "empty" values. */
1687 dpif_linux_flow_init(struct dpif_linux_flow *flow)
1689 memset(flow, 0, sizeof *flow);
1692 /* Executes 'request' in the kernel datapath. If the command fails, returns a
1693 * positive errno value. Otherwise, if 'reply' and 'bufp' are null, returns 0
1694 * without doing anything else. If 'reply' and 'bufp' are nonnull, then the
1695 * result of the command is expected to be an odp_flow also, which is decoded
1696 * and stored in '*reply' and '*bufp'. The caller must free '*bufp' when the
1697 * reply is no longer needed ('reply' will contain pointers into '*bufp'). */
1699 dpif_linux_flow_transact(const struct dpif_linux_flow *request,
1700 struct dpif_linux_flow *reply, struct ofpbuf **bufp)
1702 struct ofpbuf *buf = NULL;
1706 assert((reply != NULL) == (bufp != NULL));
1708 error = get_dp0_fd(&fd);
1713 buf = ofpbuf_new(1024);
1714 dpif_linux_flow_to_ofpbuf(request, buf);
1716 error = ioctl(fd, request->cmd, buf->data) ? errno : 0;
1722 buf->size = ((struct odp_flow *) buf->data)->len;
1723 error = dpif_linux_flow_from_ofpbuf(reply, buf);
1736 memset(reply, 0, sizeof *reply);
1743 dpif_linux_flow_get_stats(const struct dpif_linux_flow *flow,
1744 struct dpif_flow_stats *stats)
1747 stats->n_packets = get_unaligned_u64(&flow->stats->n_packets);
1748 stats->n_bytes = get_unaligned_u64(&flow->stats->n_bytes);
1750 stats->n_packets = 0;
1753 stats->used = flow->used ? get_unaligned_u64(flow->used) : 0;
1754 stats->tcp_flags = flow->tcp_flags ? *flow->tcp_flags : 0;