X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=lib%2Fdpif-netdev.c;h=3c22e6dc1b72dc1af770a5b007ce523c78ae9d50;hb=abfec865566e6cce961cc8660de1ddfdc85dae5f;hp=cae6d2319a83a9d1eb4dae5f7116da9e0043a92d;hpb=a5e54d9b6f8002f34cc792df69e6eda68cf95223;p=openvswitch diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index cae6d231..3c22e6dc 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009 Nicira Networks. + * Copyright (c) 2009, 2010 Nicira Networks. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,16 +22,13 @@ #include #include #include -#include -#include -#include -#include #include +#include +#include #include #include #include #include -#include #include #include "csum.h" @@ -48,9 +45,9 @@ #include "queue.h" #include "timeval.h" #include "util.h" - #include "vlog.h" -#define THIS_MODULE VLM_dpif_netdev + +VLOG_DEFINE_THIS_MODULE(dpif_netdev) /* Configuration parameters. */ enum { N_QUEUES = 2 }; /* Number of queues for dpif_recv(). */ @@ -68,7 +65,7 @@ struct dp_netdev { struct list node; int dp_idx; int open_cnt; - bool deleted; + bool destroyed; bool drop_frags; /* Drop all IP fragments, if true. */ struct ovs_queue queues[N_QUEUES]; /* Messages queued for dpif_recv(). */ @@ -102,10 +99,9 @@ struct dp_netdev_flow { flow_t key; /* Statistics. */ - struct timeval used; /* Last used time, in milliseconds. */ + struct timespec used; /* Last used time. */ long long int packet_count; /* Number of packets matched. */ long long int byte_count; /* Number of bytes matched. */ - uint8_t ip_tos; /* IP TOS value. */ uint16_t tcp_ctl; /* Bitwise-OR of seen tcp_ctl values. */ /* Actions. */ @@ -141,7 +137,7 @@ static int do_del_port(struct dp_netdev *, uint16_t port_no); static int dp_netdev_output_control(struct dp_netdev *, const struct ofpbuf *, int queue_no, int port_no, uint32_t arg); static int dp_netdev_execute_actions(struct dp_netdev *, - struct ofpbuf *, flow_t *, + struct ofpbuf *, const flow_t *, const union odp_action *, int n); static struct dpif_netdev * @@ -160,7 +156,7 @@ get_dp_netdev(const struct dpif *dpif) static int name_to_dp_idx(const char *name) { - if (!strncmp(name, "dp", 2) && isdigit(name[2])) { + if (!strncmp(name, "dp", 2) && isdigit((unsigned char)name[2])) { int dp_idx = atoi(name + 2); if (dp_idx >= 0 && dp_idx < N_DP_NETDEVS) { return dp_idx; @@ -200,7 +196,7 @@ create_dpif_netdev(struct dp_netdev *dp) dp->open_cnt++; - dpname = xasprintf("netdev:dp%d", dp->dp_idx); + dpname = xasprintf("dp%d", dp->dp_idx); dpif = xmalloc(sizeof *dpif); dpif_init(&dpif->dpif, &dpif_netdev_class, dpname, dp->dp_idx, dp->dp_idx); dpif->dp = dp; @@ -223,7 +219,7 @@ create_dp_netdev(const char *name, int dp_idx, struct dpif **dpifp) } /* Create datapath. */ - dp_netdevs[dp_idx] = dp = xcalloc(1, sizeof *dp); + dp_netdevs[dp_idx] = dp = xzalloc(sizeof *dp); list_push_back(&dp_netdev_list, &dp->node); dp->dp_idx = dp_idx; dp->open_cnt = 0; @@ -241,7 +237,7 @@ create_dp_netdev(const char *name, int dp_idx, struct dpif **dpifp) error = do_add_port(dp, name, ODP_PORT_INTERNAL, ODPP_LOCAL); if (error) { dp_netdev_free(dp); - return error; + return ENODEV; } *dpifp = create_dpif_netdev(dp); @@ -249,20 +245,20 @@ create_dp_netdev(const char *name, int dp_idx, struct dpif **dpifp) } static int -dpif_netdev_open(const char *name UNUSED, char *suffix, bool create, +dpif_netdev_open(const char *name, const char *type OVS_UNUSED, bool create, struct dpif **dpifp) { if (create) { - if (find_dp_netdev(suffix)) { + if (find_dp_netdev(name)) { return EEXIST; } else { - int dp_idx = name_to_dp_idx(suffix); + int dp_idx = name_to_dp_idx(name); if (dp_idx >= 0) { - return create_dp_netdev(suffix, dp_idx, dpifp); + return create_dp_netdev(name, dp_idx, dpifp); } else { /* Scan for unused dp_idx number. */ for (dp_idx = 0; dp_idx < N_DP_NETDEVS; dp_idx++) { - int error = create_dp_netdev(suffix, dp_idx, dpifp); + int error = create_dp_netdev(name, dp_idx, dpifp); if (error != EBUSY) { return error; } @@ -273,7 +269,7 @@ dpif_netdev_open(const char *name UNUSED, char *suffix, bool create, } } } else { - struct dp_netdev *dp = find_dp_netdev(suffix); + struct dp_netdev *dp = find_dp_netdev(name); if (dp) { *dpifp = create_dpif_netdev(dp); return 0; @@ -311,17 +307,17 @@ dpif_netdev_close(struct dpif *dpif) { struct dp_netdev *dp = get_dp_netdev(dpif); assert(dp->open_cnt > 0); - if (--dp->open_cnt == 0 && dp->deleted) { + if (--dp->open_cnt == 0 && dp->destroyed) { dp_netdev_free(dp); } free(dpif); } static int -dpif_netdev_delete(struct dpif *dpif) +dpif_netdev_destroy(struct dpif *dpif) { struct dp_netdev *dp = get_dp_netdev(dpif); - dp->deleted = true; + dp->destroyed = true; return 0; } @@ -367,6 +363,7 @@ do_add_port(struct dp_netdev *dp, const char *devname, uint16_t flags, { bool internal = (flags & ODP_PORT_INTERNAL) != 0; struct dp_netdev_port *port; + struct netdev_options netdev_options; struct netdev *netdev; int mtu; int error; @@ -374,11 +371,14 @@ do_add_port(struct dp_netdev *dp, const char *devname, uint16_t flags, /* XXX reject devices already in some dp_netdev. */ /* Open and validate network device. */ - if (!internal) { - error = netdev_open(devname, NETDEV_ETH_TYPE_ANY, &netdev); - } else { - error = netdev_open_tap(devname, &netdev); + memset(&netdev_options, 0, sizeof netdev_options); + netdev_options.name = devname; + netdev_options.ethertype = NETDEV_ETH_TYPE_ANY; + if (internal) { + netdev_options.type = "tap"; } + + error = netdev_open(&netdev_options, &netdev); if (error) { return error; } @@ -396,7 +396,7 @@ do_add_port(struct dp_netdev *dp, const char *devname, uint16_t flags, port->netdev = netdev; port->internal = internal; - mtu = netdev_get_mtu(netdev); + netdev_get_mtu(netdev, &mtu); if (mtu > max_mtu) { max_mtu = mtu; } @@ -422,7 +422,7 @@ dpif_netdev_port_add(struct dpif *dpif, const char *devname, uint16_t flags, return do_add_port(dp, devname, flags, port_no); } } - return EXFULL; + return EFBIG; } static int @@ -470,6 +470,7 @@ static int do_del_port(struct dp_netdev *dp, uint16_t port_no) { struct dp_netdev_port *port; + char *name; int error; error = get_port_by_number(dp, port_no, &port); @@ -482,7 +483,10 @@ do_del_port(struct dp_netdev *dp, uint16_t port_no) dp->n_ports--; dp->serial++; + name = xstrdup(netdev_get_name(port->netdev)); netdev_close(port->netdev); + + free(name); free(port); return 0; @@ -575,7 +579,7 @@ dpif_netdev_port_list(const struct dpif *dpif, struct odp_port *ports, int n) } static int -dpif_netdev_port_poll(const struct dpif *dpif_, char **devnamep UNUSED) +dpif_netdev_port_poll(const struct dpif *dpif_, char **devnamep OVS_UNUSED) { struct dpif_netdev *dpif = dpif_netdev_cast(dpif_); if (dpif->dp_serial != dpif->dp->serial) { @@ -656,7 +660,7 @@ dp_netdev_lookup_flow(const struct dp_netdev *dp, const flow_t *key) { struct dp_netdev_flow *flow; - assert(key->reserved == 0); + assert(!key->reserved[0] && !key->reserved[1] && !key->reserved[2]); HMAP_FOR_EACH_WITH_HASH (flow, struct dp_netdev_flow, node, flow_hash(key, 0), &dp->flow_table) { if (flow_equal(&flow->key, key)) { @@ -667,7 +671,7 @@ dp_netdev_lookup_flow(const struct dp_netdev *dp, const flow_t *key) } static void -answer_flow_query(const struct dp_netdev_flow *flow, +answer_flow_query(struct dp_netdev_flow *flow, uint32_t query_flags, struct odp_flow *odp_flow) { if (flow) { @@ -675,9 +679,9 @@ answer_flow_query(const struct dp_netdev_flow *flow, odp_flow->stats.n_packets = flow->packet_count; odp_flow->stats.n_bytes = flow->byte_count; odp_flow->stats.used_sec = flow->used.tv_sec; - odp_flow->stats.used_nsec = flow->used.tv_usec * 1000; + odp_flow->stats.used_nsec = flow->used.tv_nsec; odp_flow->stats.tcp_flags = TCP_FLAGS(flow->tcp_ctl); - odp_flow->stats.ip_tos = flow->ip_tos; + odp_flow->stats.reserved = 0; odp_flow->stats.error = 0; if (odp_flow->n_actions > 0) { unsigned int n = MIN(odp_flow->n_actions, flow->n_actions); @@ -685,6 +689,11 @@ answer_flow_query(const struct dp_netdev_flow *flow, n * sizeof *odp_flow->actions); odp_flow->n_actions = flow->n_actions; } + + if (query_flags & ODPFF_ZERO_TCP_FLAGS) { + flow->tcp_ctl = 0; + } + } else { odp_flow->stats.error = ENOENT; } @@ -698,7 +707,8 @@ dpif_netdev_flow_get(const struct dpif *dpif, struct odp_flow flows[], int n) for (i = 0; i < n; i++) { struct odp_flow *odp_flow = &flows[i]; - answer_flow_query(dp_netdev_lookup_flow(dp, &odp_flow->key), odp_flow); + answer_flow_query(dp_netdev_lookup_flow(dp, &odp_flow->key), + odp_flow->flags, odp_flow); } return 0; } @@ -707,41 +717,48 @@ static int dpif_netdev_validate_actions(const union odp_action *actions, int n_actions, bool *mutates) { - unsigned int i; + unsigned int i; *mutates = false; - for (i = 0; i < n_actions; i++) { - const union odp_action *a = &actions[i]; - switch (a->type) { - case ODPAT_OUTPUT: - if (a->output.port >= MAX_PORTS) { - return EINVAL; + for (i = 0; i < n_actions; i++) { + const union odp_action *a = &actions[i]; + switch (a->type) { + case ODPAT_OUTPUT: + if (a->output.port >= MAX_PORTS) { + return EINVAL; } - break; + break; - case ODPAT_OUTPUT_GROUP: + case ODPAT_OUTPUT_GROUP: *mutates = true; - if (a->output_group.group >= N_GROUPS) { - return EINVAL; + if (a->output_group.group >= N_GROUPS) { + return EINVAL; } - break; + break; case ODPAT_CONTROLLER: break; - case ODPAT_SET_VLAN_VID: + case ODPAT_SET_VLAN_VID: *mutates = true; - if (a->vlan_vid.vlan_vid & htons(~VLAN_VID_MASK)) { - return EINVAL; + if (a->vlan_vid.vlan_vid & htons(~VLAN_VID_MASK)) { + return EINVAL; } - break; + break; - case ODPAT_SET_VLAN_PCP: + case ODPAT_SET_VLAN_PCP: *mutates = true; - if (a->vlan_pcp.vlan_pcp & ~VLAN_PCP_MASK) { - return EINVAL; + if (a->vlan_pcp.vlan_pcp & ~(VLAN_PCP_MASK >> VLAN_PCP_SHIFT)) { + return EINVAL; } - break; + break; + + case ODPAT_SET_NW_TOS: + *mutates = true; + if (a->nw_tos.nw_tos & IP_ECN_MASK) { + return EINVAL; + } + break; case ODPAT_STRIP_VLAN: case ODPAT_SET_DL_SRC: @@ -753,11 +770,11 @@ dpif_netdev_validate_actions(const union odp_action *actions, int n_actions, *mutates = true; break; - default: + default: return EOPNOTSUPP; - } - } - return 0; + } + } + return 0; } static int @@ -790,9 +807,9 @@ add_flow(struct dpif *dpif, struct odp_flow *odp_flow) struct dp_netdev_flow *flow; int error; - flow = xcalloc(1, sizeof *flow); + flow = xzalloc(sizeof *flow); flow->key = odp_flow->key; - flow->key.reserved = 0; + memset(flow->key.reserved, 0, sizeof flow->key.reserved); error = set_flow_actions(flow, odp_flow); if (error) { @@ -808,10 +825,9 @@ static void clear_stats(struct dp_netdev_flow *flow) { flow->used.tv_sec = 0; - flow->used.tv_usec = 0; + flow->used.tv_nsec = 0; flow->packet_count = 0; flow->byte_count = 0; - flow->ip_tos = 0; flow->tcp_ctl = 0; } @@ -827,7 +843,7 @@ dpif_netdev_flow_put(struct dpif *dpif, struct odp_flow_put *put) if (hmap_count(&dp->flow_table) < MAX_FLOWS) { return add_flow(dpif, &put->flow); } else { - return EXFULL; + return EFBIG; } } else { return ENOENT; @@ -854,7 +870,7 @@ dpif_netdev_flow_del(struct dpif *dpif, struct odp_flow *odp_flow) flow = dp_netdev_lookup_flow(dp, &odp_flow->key); if (flow) { - answer_flow_query(flow, odp_flow); + answer_flow_query(flow, 0, odp_flow); dp_netdev_free_flow(dp, flow); return 0; } else { @@ -874,7 +890,7 @@ dpif_netdev_flow_list(const struct dpif *dpif, struct odp_flow flows[], int n) if (i >= n) { break; } - answer_flow_query(flow, &flows[i++]); + answer_flow_query(flow, 0, &flows[i++]); } return hmap_count(&dp->flow_table); } @@ -890,7 +906,7 @@ dpif_netdev_execute(struct dpif *dpif, uint16_t in_port, flow_t flow; int error; - if (packet->size < ETH_HLEN || packet->size > UINT16_MAX) { + if (packet->size < ETH_HEADER_LEN || packet->size > UINT16_MAX) { return EINVAL; } @@ -912,7 +928,7 @@ dpif_netdev_execute(struct dpif *dpif, uint16_t in_port, * if we don't. */ copy = *packet; } - flow_extract(©, in_port, &flow); + flow_extract(©, 0, in_port, &flow); error = dp_netdev_execute_actions(dp, ©, &flow, actions, n_actions); if (mutates) { ofpbuf_uninit(©); @@ -985,17 +1001,12 @@ static void dp_netdev_flow_used(struct dp_netdev_flow *flow, const flow_t *key, const struct ofpbuf *packet) { - time_timeval(&flow->used); + time_timespec(&flow->used); flow->packet_count++; flow->byte_count += packet->size; - if (key->dl_type == htons(ETH_P_IP)) { - struct ip_header *nh = packet->l3; - flow->ip_tos = nh->ip_tos; - - if (key->nw_proto == IPPROTO_TCP) { - struct tcp_header *th = packet->l4; - flow->tcp_ctl |= th->tcp_ctl; - } + if (key->dl_type == htons(ETH_TYPE_IP) && key->nw_proto == IPPROTO_TCP) { + struct tcp_header *th = packet->l4; + flow->tcp_ctl |= th->tcp_ctl; } } @@ -1006,7 +1017,7 @@ dp_netdev_port_input(struct dp_netdev *dp, struct dp_netdev_port *port, struct dp_netdev_flow *flow; flow_t key; - if (flow_extract(packet, port->port_no, &key) && dp->drop_frags) { + if (flow_extract(packet, 0, port->port_no, &key) && dp->drop_frags) { dp->n_frags++; return; } @@ -1066,14 +1077,23 @@ dp_netdev_wait(void) } } + +/* Modify the TCI field of 'packet'. If a VLAN tag is not present, one + * is added with the TCI field set to 'tci'. If a VLAN tag is present, + * then 'mask' bits are cleared before 'tci' is logically OR'd into the + * TCI field. + * + * Note that the function does not ensure that 'tci' does not affect + * bits outside of 'mask'. + */ static void -dp_netdev_modify_vlan_tci(struct ofpbuf *packet, flow_t *key, +dp_netdev_modify_vlan_tci(struct ofpbuf *packet, const flow_t *key, uint16_t tci, uint16_t mask) { struct vlan_eth_header *veh; if (key->dl_vlan != htons(ODP_VLAN_NONE)) { - /* Modify 'mask' bits, but maintain other TCI bits. */ + /* Clear 'mask' bits, but maintain other TCI bits. */ veh = packet->l2; veh->veth_tci &= ~htons(mask); veh->veth_tci |= htons(tci); @@ -1091,12 +1111,10 @@ dp_netdev_modify_vlan_tci(struct ofpbuf *packet, flow_t *key, memcpy(veh, &tmp, sizeof tmp); packet->l2 = (char*)packet->l2 - VLAN_HEADER_LEN; } - - key->dl_vlan = veh->veth_tci & htons(VLAN_VID_MASK); } static void -dp_netdev_strip_vlan(struct ofpbuf *packet, flow_t *key) +dp_netdev_strip_vlan(struct ofpbuf *packet) { struct vlan_eth_header *veh = packet->l2; if (veh->veth_type == htons(ETH_TYPE_VLAN)) { @@ -1110,29 +1128,25 @@ dp_netdev_strip_vlan(struct ofpbuf *packet, flow_t *key) packet->data = (char*)packet->data + VLAN_HEADER_LEN; packet->l2 = (char*)packet->l2 + VLAN_HEADER_LEN; memcpy(packet->data, &tmp, sizeof tmp); - - key->dl_vlan = htons(ODP_VLAN_NONE); } } static void -dp_netdev_set_dl_src(struct ofpbuf *packet, - const uint8_t dl_addr[ETH_ADDR_LEN]) +dp_netdev_set_dl_src(struct ofpbuf *packet, const uint8_t dl_addr[ETH_ADDR_LEN]) { struct eth_header *eh = packet->l2; memcpy(eh->eth_src, dl_addr, sizeof eh->eth_src); } static void -dp_netdev_set_dl_dst(struct ofpbuf *packet, - const uint8_t dl_addr[ETH_ADDR_LEN]) +dp_netdev_set_dl_dst(struct ofpbuf *packet, const uint8_t dl_addr[ETH_ADDR_LEN]) { struct eth_header *eh = packet->l2; memcpy(eh->eth_dst, dl_addr, sizeof eh->eth_dst); } static void -dp_netdev_set_nw_addr(struct ofpbuf *packet, flow_t *key, +dp_netdev_set_nw_addr(struct ofpbuf *packet, const flow_t *key, const struct odp_action_nw_addr *a) { if (key->dl_type == htons(ETH_TYPE_IP)) { @@ -1158,10 +1172,27 @@ dp_netdev_set_nw_addr(struct ofpbuf *packet, flow_t *key, } static void -dp_netdev_set_tp_port(struct ofpbuf *packet, flow_t *key, +dp_netdev_set_nw_tos(struct ofpbuf *packet, const flow_t *key, + const struct odp_action_nw_tos *a) +{ + if (key->dl_type == htons(ETH_TYPE_IP)) { + struct ip_header *nh = packet->l3; + uint8_t *field = &nh->ip_tos; + + /* Set the DSCP bits and preserve the ECN bits. */ + uint8_t new = a->nw_tos | (nh->ip_tos & IP_ECN_MASK); + + nh->ip_csum = recalc_csum16(nh->ip_csum, htons((uint16_t)*field), + htons((uint16_t)a->nw_tos)); + *field = new; + } +} + +static void +dp_netdev_set_tp_port(struct ofpbuf *packet, const flow_t *key, const struct odp_action_tp_port *a) { - if (key->dl_type == htons(ETH_P_IP)) { + if (key->dl_type == htons(ETH_TYPE_IP)) { uint16_t *field; if (key->nw_proto == IPPROTO_TCP) { struct tcp_header *th = packet->l4; @@ -1173,6 +1204,8 @@ dp_netdev_set_tp_port(struct ofpbuf *packet, flow_t *key, field = a->type == ODPAT_SET_TP_SRC ? &uh->udp_src : &uh->udp_dst; uh->udp_csum = recalc_csum16(uh->udp_csum, *field, a->tp_port); *field = a->tp_port; + } else { + return; } } } @@ -1217,7 +1250,8 @@ dp_netdev_output_control(struct dp_netdev *dp, const struct ofpbuf *packet, } msg_size = sizeof *header + packet->size; - msg = ofpbuf_new(msg_size); + msg = ofpbuf_new(msg_size + DPIF_RECV_MSG_PADDING); + ofpbuf_reserve(msg, DPIF_RECV_MSG_PADDING); header = ofpbuf_put_uninit(msg, sizeof *header); header->type = queue_no; header->length = msg_size; @@ -1231,7 +1265,7 @@ dp_netdev_output_control(struct dp_netdev *dp, const struct ofpbuf *packet, static int dp_netdev_execute_actions(struct dp_netdev *dp, - struct ofpbuf *packet, flow_t *key, + struct ofpbuf *packet, const flow_t *key, const union odp_action *actions, int n_actions) { int i; @@ -1259,12 +1293,13 @@ dp_netdev_execute_actions(struct dp_netdev *dp, break; case ODPAT_SET_VLAN_PCP: - dp_netdev_modify_vlan_tci(packet, key, a->vlan_pcp.vlan_pcp << 13, - VLAN_PCP_MASK); + dp_netdev_modify_vlan_tci( + packet, key, a->vlan_pcp.vlan_pcp << VLAN_PCP_SHIFT, + VLAN_PCP_MASK); break; case ODPAT_STRIP_VLAN: - dp_netdev_strip_vlan(packet, key); + dp_netdev_strip_vlan(packet); break; case ODPAT_SET_DL_SRC: @@ -1280,6 +1315,10 @@ dp_netdev_execute_actions(struct dp_netdev *dp, dp_netdev_set_nw_addr(packet, key, &a->nw_addr); break; + case ODPAT_SET_NW_TOS: + dp_netdev_set_nw_tos(packet, key, &a->nw_tos); + break; + case ODPAT_SET_TP_SRC: case ODPAT_SET_TP_DST: dp_netdev_set_tp_port(packet, key, &a->tp_port); @@ -1290,7 +1329,6 @@ dp_netdev_execute_actions(struct dp_netdev *dp, } const struct dpif_class dpif_netdev_class = { - "netdev", "netdev", dp_netdev_run, dp_netdev_wait, @@ -1298,7 +1336,7 @@ const struct dpif_class dpif_netdev_class = { dpif_netdev_open, dpif_netdev_close, NULL, /* get_all_names */ - dpif_netdev_delete, + dpif_netdev_destroy, dpif_netdev_get_stats, dpif_netdev_get_drop_frags, dpif_netdev_set_drop_frags, @@ -1319,6 +1357,9 @@ const struct dpif_class dpif_netdev_class = { dpif_netdev_execute, dpif_netdev_recv_get_mask, dpif_netdev_recv_set_mask, + NULL, /* get_sflow_probability */ + NULL, /* set_sflow_probability */ + NULL, /* queue_to_priority */ dpif_netdev_recv, dpif_netdev_recv_wait, };