+static void
+dpif_linux_dp_dump_start(struct nl_dump *dump)
+{
+ struct dpif_linux_dp request;
+ struct ofpbuf *buf;
+
+ dpif_linux_dp_init(&request);
+ request.cmd = OVS_DP_CMD_GET;
+
+ buf = ofpbuf_new(1024);
+ dpif_linux_dp_to_ofpbuf(&request, buf);
+ nl_dump_start(dump, genl_sock, buf);
+ ofpbuf_delete(buf);
+}
+
+/* Executes 'request' in the kernel datapath. If the command fails, returns a
+ * positive errno value. Otherwise, if 'reply' and 'bufp' are null, returns 0
+ * without doing anything else. If 'reply' and 'bufp' are nonnull, then the
+ * result of the command is expected to be of the same form, which is decoded
+ * and stored in '*reply' and '*bufp'. The caller must free '*bufp' when the
+ * reply is no longer needed ('reply' will contain pointers into '*bufp'). */
+static int
+dpif_linux_dp_transact(const struct dpif_linux_dp *request,
+ struct dpif_linux_dp *reply, struct ofpbuf **bufp)
+{
+ struct ofpbuf *request_buf;
+ int error;
+
+ assert((reply != NULL) == (bufp != NULL));
+
+ request_buf = ofpbuf_new(1024);
+ dpif_linux_dp_to_ofpbuf(request, request_buf);
+ error = nl_sock_transact(genl_sock, request_buf, bufp);
+ ofpbuf_delete(request_buf);
+
+ if (reply) {
+ if (!error) {
+ error = dpif_linux_dp_from_ofpbuf(reply, *bufp);
+ }
+ if (error) {
+ dpif_linux_dp_init(reply);
+ ofpbuf_delete(*bufp);
+ *bufp = NULL;
+ }
+ }
+ return error;
+}
+
+/* Obtains information about 'dpif_' and stores it into '*reply' and '*bufp'.
+ * The caller must free '*bufp' when the reply is no longer needed ('reply'
+ * will contain pointers into '*bufp'). */
+static int
+dpif_linux_dp_get(const struct dpif *dpif_, struct dpif_linux_dp *reply,
+ struct ofpbuf **bufp)
+{
+ struct dpif_linux *dpif = dpif_linux_cast(dpif_);
+ struct dpif_linux_dp request;
+
+ dpif_linux_dp_init(&request);
+ request.cmd = OVS_DP_CMD_GET;
+ request.dp_ifindex = dpif->dp_ifindex;
+
+ return dpif_linux_dp_transact(&request, reply, bufp);
+}
+\f
+/* Parses the contents of 'buf', which contains a "struct ovs_header" followed
+ * by Netlink attributes, into 'flow'. Returns 0 if successful, otherwise a
+ * positive errno value.
+ *
+ * 'flow' will contain pointers into 'buf', so the caller should not free 'buf'
+ * while 'flow' is still in use. */
+static int
+dpif_linux_flow_from_ofpbuf(struct dpif_linux_flow *flow,
+ const struct ofpbuf *buf)
+{
+ static const struct nl_policy ovs_flow_policy[] = {
+ [OVS_FLOW_ATTR_KEY] = { .type = NL_A_NESTED },
+ [OVS_FLOW_ATTR_ACTIONS] = { .type = NL_A_NESTED, .optional = true },
+ [OVS_FLOW_ATTR_STATS] = { NL_POLICY_FOR(struct ovs_flow_stats),
+ .optional = true },
+ [OVS_FLOW_ATTR_TCP_FLAGS] = { .type = NL_A_U8, .optional = true },
+ [OVS_FLOW_ATTR_USED] = { .type = NL_A_U64, .optional = true },
+ /* The kernel never uses OVS_FLOW_ATTR_CLEAR. */
+ };
+
+ struct nlattr *a[ARRAY_SIZE(ovs_flow_policy)];
+ struct ovs_header *ovs_header;
+ struct nlmsghdr *nlmsg;
+ struct genlmsghdr *genl;
+ struct ofpbuf b;
+
+ dpif_linux_flow_init(flow);
+
+ ofpbuf_use_const(&b, buf->data, buf->size);
+ nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
+ genl = ofpbuf_try_pull(&b, sizeof *genl);
+ ovs_header = ofpbuf_try_pull(&b, sizeof *ovs_header);
+ if (!nlmsg || !genl || !ovs_header
+ || nlmsg->nlmsg_type != ovs_flow_family
+ || !nl_policy_parse(&b, 0, ovs_flow_policy, a,
+ ARRAY_SIZE(ovs_flow_policy))) {
+ return EINVAL;
+ }
+
+ flow->nlmsg_flags = nlmsg->nlmsg_flags;
+ flow->dp_ifindex = ovs_header->dp_ifindex;
+ flow->key = nl_attr_get(a[OVS_FLOW_ATTR_KEY]);
+ flow->key_len = nl_attr_get_size(a[OVS_FLOW_ATTR_KEY]);
+ if (a[OVS_FLOW_ATTR_ACTIONS]) {
+ flow->actions = nl_attr_get(a[OVS_FLOW_ATTR_ACTIONS]);
+ flow->actions_len = nl_attr_get_size(a[OVS_FLOW_ATTR_ACTIONS]);
+ }
+ if (a[OVS_FLOW_ATTR_STATS]) {
+ flow->stats = nl_attr_get(a[OVS_FLOW_ATTR_STATS]);
+ }
+ if (a[OVS_FLOW_ATTR_TCP_FLAGS]) {
+ flow->tcp_flags = nl_attr_get(a[OVS_FLOW_ATTR_TCP_FLAGS]);
+ }
+ if (a[OVS_FLOW_ATTR_USED]) {
+ flow->used = nl_attr_get(a[OVS_FLOW_ATTR_USED]);
+ }
+ return 0;
+}
+
+/* Appends to 'buf' (which must initially be empty) a "struct ovs_header"
+ * followed by Netlink attributes corresponding to 'flow'. */
+static void
+dpif_linux_flow_to_ofpbuf(const struct dpif_linux_flow *flow,
+ struct ofpbuf *buf)
+{
+ struct ovs_header *ovs_header;
+
+ nl_msg_put_genlmsghdr(buf, 0, ovs_flow_family,
+ NLM_F_REQUEST | flow->nlmsg_flags,
+ flow->cmd, OVS_FLOW_VERSION);
+
+ ovs_header = ofpbuf_put_uninit(buf, sizeof *ovs_header);
+ ovs_header->dp_ifindex = flow->dp_ifindex;
+
+ if (flow->key_len) {
+ nl_msg_put_unspec(buf, OVS_FLOW_ATTR_KEY, flow->key, flow->key_len);
+ }
+
+ if (flow->actions || flow->actions_len) {
+ nl_msg_put_unspec(buf, OVS_FLOW_ATTR_ACTIONS,
+ flow->actions, flow->actions_len);
+ }
+
+ /* We never need to send these to the kernel. */
+ assert(!flow->stats);
+ assert(!flow->tcp_flags);
+ assert(!flow->used);
+
+ if (flow->clear) {
+ nl_msg_put_flag(buf, OVS_FLOW_ATTR_CLEAR);
+ }
+}
+
+/* Clears 'flow' to "empty" values. */
+static void
+dpif_linux_flow_init(struct dpif_linux_flow *flow)
+{
+ memset(flow, 0, sizeof *flow);
+}
+
+/* Executes 'request' in the kernel datapath. If the command fails, returns a
+ * positive errno value. Otherwise, if 'reply' and 'bufp' are null, returns 0
+ * without doing anything else. If 'reply' and 'bufp' are nonnull, then the
+ * result of the command is expected to be a flow also, which is decoded and
+ * stored in '*reply' and '*bufp'. The caller must free '*bufp' when the reply
+ * is no longer needed ('reply' will contain pointers into '*bufp'). */
+static int
+dpif_linux_flow_transact(struct dpif_linux_flow *request,
+ struct dpif_linux_flow *reply, struct ofpbuf **bufp)
+{
+ struct ofpbuf *request_buf;
+ int error;
+
+ assert((reply != NULL) == (bufp != NULL));
+
+ if (reply) {
+ request->nlmsg_flags |= NLM_F_ECHO;
+ }
+
+ request_buf = ofpbuf_new(1024);
+ dpif_linux_flow_to_ofpbuf(request, request_buf);
+ error = nl_sock_transact(genl_sock, request_buf, bufp);
+ ofpbuf_delete(request_buf);
+
+ if (reply) {
+ if (!error) {
+ error = dpif_linux_flow_from_ofpbuf(reply, *bufp);
+ }
+ if (error) {
+ dpif_linux_flow_init(reply);
+ ofpbuf_delete(*bufp);
+ *bufp = NULL;
+ }
+ }
+ return error;
+}
+
+static void
+dpif_linux_flow_get_stats(const struct dpif_linux_flow *flow,
+ struct dpif_flow_stats *stats)
+{
+ if (flow->stats) {
+ stats->n_packets = get_unaligned_u64(&flow->stats->n_packets);
+ stats->n_bytes = get_unaligned_u64(&flow->stats->n_bytes);
+ } else {
+ stats->n_packets = 0;
+ stats->n_bytes = 0;
+ }
+ stats->used = flow->used ? get_32aligned_u64(flow->used) : 0;
+ stats->tcp_flags = flow->tcp_flags ? *flow->tcp_flags : 0;
+}
+\f
+/* Metwally "space-saving" algorithm implementation. */
+
+/* Updates 'ch' to record that a packet was received on 'port_no'. */
+static void
+update_sketch(struct dpif_channel *ch, uint32_t port_no)
+{
+ struct dpif_sketch *sk;
+
+ /* Find an existing counting element for 'port_no' or, if none, replace the
+ * counting element with the fewest hits by 'port_no'. */
+ for (sk = ch->sketches; ; sk++) {
+ if (port_no == sk->port_no) {
+ break;
+ } else if (sk == &ch->sketches[N_SKETCHES - 1]) {
+ sk->port_no = port_no;
+ sk->error = sk->hits;
+ break;
+ }
+ }
+
+ /* Increment the hit count, then re-sort the counting elements (usually
+ * nothing needs to be done). */
+ sk->hits++;
+ while (sk > ch->sketches && sk[-1].hits > sk->hits) {
+ struct dpif_sketch tmp = sk[-1];
+ sk[-1] = *sk;
+ *sk = tmp;
+ sk--;
+ }
+}
+
+/* Divide the counts of all the the counting elements in 'dpif' by 2. See the
+ * comment on SCALE_INTERVAL. */
+static void
+scale_sketches(struct dpif *dpif_)
+{
+ struct dpif_linux *dpif = dpif_linux_cast(dpif_);
+ struct dpif_channel *ch;
+
+ for (ch = dpif->channels; ch < &dpif->channels[N_CHANNELS]; ch++) {
+ struct dpif_sketch *sk;
+
+ for (sk = ch->sketches; sk < &ch->sketches[N_SKETCHES]; sk++) {
+ sk->hits /= 2;
+ sk->error /= 2;
+ }
+ }
+}
+
+/* Logs information about a packet that was recently lost in 'ch' (in
+ * 'dpif_'). */
+static void
+report_loss(struct dpif *dpif_, struct dpif_channel *ch)
+{
+ struct dpif_linux *dpif = dpif_linux_cast(dpif_);
+ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5);
+ struct dpif_sketch *sk;
+ struct ds s;
+
+ if (VLOG_DROP_ERR(&rl)) {
+ return;
+ }
+
+ ds_init(&s);
+ if (ch->last_poll != LLONG_MIN) {
+ ds_put_format(&s, " (last polled %lld ms ago)",
+ time_msec() - ch->last_poll);
+ }
+ ds_put_cstr(&s, ", most frequent sources are");
+ for (sk = ch->sketches; sk < &ch->sketches[N_SKETCHES]; sk++) {
+ if (sk->hits) {
+ struct dpif_port port;
+
+ ds_put_format(&s, " %"PRIu32, sk->port_no);
+ if (!dpif_port_query_by_number(dpif_, sk->port_no, &port)) {
+ ds_put_format(&s, "(%s)", port.name);
+ dpif_port_destroy(&port);
+ }
+ if (sk->error) {
+ ds_put_format(&s, ": %u to %u,",
+ sk->hits - sk->error, sk->hits);
+ } else {
+ ds_put_format(&s, ": %u,", sk->hits);
+ }
+ }
+ }
+ ds_chomp(&s, ',');
+
+ VLOG_WARN("%s: lost packet on channel %td%s",
+ dpif_name(dpif_), ch - dpif->channels, ds_cstr(&s));
+ ds_destroy(&s);
+}