ofproto-dpif: Allow setting of flow eviction threshold
authorSimon Horman <horms@verge.net.au>
Thu, 28 Jul 2011 23:38:50 +0000 (16:38 -0700)
committerBen Pfaff <blp@nicira.com>
Thu, 28 Jul 2011 23:39:29 +0000 (16:39 -0700)
Allow setting the number of flows present in the flow hash
at which point eviction of entries from the kernel flow hash
will begin to occur.

The value may be set using a bridge's other-config column.

e.g.

ovs-vsctl set bridge br3 other-config:flow-eviction-threshold=10000

default is 1000, reflecting constant value previously used.

Increasing this value can result in reduced CPU usage and
packet loss in situations where the number of active flows
is significantly larger than 1000.

ofproto/ofproto-dpif.c
ofproto/ofproto-provider.h
ofproto/ofproto.c
ofproto/ofproto.h
vswitchd/bridge.c
vswitchd/vswitch.xml

index 8bd6f75298aa38b4e1b0d265ea76ec628a5f6b36..cdc21bca65c12516403e52c589a97ec25fb6f2cd 100644 (file)
@@ -1871,11 +1871,12 @@ facet_max_idle(const struct ofproto_dpif *ofproto)
      * N_BUCKETS buckets whose width is BUCKET_WIDTH msecs each.  Each facet
      * that is installed in the kernel gets dropped in the appropriate bucket.
      * After the histogram has been built, we compute the cutoff so that only
-     * the most-recently-used 1% of facets (but at least 1000 flows) are kept
-     * cached.  At least the most-recently-used bucket of facets is kept, so
-     * actually an arbitrary number of facets can be kept in any given
-     * expiration run (though the next run will delete most of those unless
-     * they receive additional data).
+     * the most-recently-used 1% of facets (but at least
+     * ofproto->up.flow_eviction_threshold flows) are kept cached.  At least
+     * the most-recently-used bucket of facets is kept, so actually an
+     * arbitrary number of facets can be kept in any given expiration run
+     * (though the next run will delete most of those unless they receive
+     * additional data).
      *
      * This requires a second pass through the facets, in addition to the pass
      * made by update_stats(), because the former function never looks
@@ -1890,7 +1891,7 @@ facet_max_idle(const struct ofproto_dpif *ofproto)
     int i;
 
     total = hmap_count(&ofproto->facets);
-    if (total <= 1000) {
+    if (total <= ofproto->up.flow_eviction_threshold) {
         return N_BUCKETS * BUCKET_WIDTH;
     }
 
@@ -1908,7 +1909,8 @@ facet_max_idle(const struct ofproto_dpif *ofproto)
     subtotal = bucket = 0;
     do {
         subtotal += buckets[bucket++];
-    } while (bucket < N_BUCKETS && subtotal < MAX(1000, total / 100));
+    } while (bucket < N_BUCKETS &&
+             subtotal < MAX(ofproto->up.flow_eviction_threshold, total / 100));
 
     if (VLOG_IS_DBG_ENABLED()) {
         struct ds s;
index 62fb035346bd3ddbc85abf31576f373e8977bce8..be1e4de6cbc12f66d2415055756d641338a38eb3 100644 (file)
@@ -38,6 +38,9 @@ struct ofproto {
     /* Settings. */
     uint64_t fallback_dpid;     /* Datapath ID if no better choice found. */
     uint64_t datapath_id;       /* Datapath ID. */
+    unsigned flow_eviction_threshold; /* Threshold at which to begin flow
+                                       * table eviction. Only affects the
+                                       * ofproto-dpif implementation */
     char *mfr_desc;             /* Manufacturer. */
     char *hw_desc;              /* Hardware. */
     char *sw_desc;              /* Software version. */
index 201488de47a8b4500cc9a491dfe7a2a2c6ef501d..f40f99590acad5deb690379d1784d06cdbfc4003 100644 (file)
@@ -320,6 +320,8 @@ ofproto_create(const char *datapath_name, const char *datapath_type,
     hmap_insert(&all_ofprotos, &ofproto->hmap_node,
                 hash_string(ofproto->name, 0));
     ofproto->datapath_id = 0;
+    ofproto_set_flow_eviction_threshold(ofproto,
+                                        OFPROTO_FLOW_EVICTON_THRESHOLD_DEFAULT);
     ofproto->fallback_dpid = pick_fallback_dpid();
     ofproto->mfr_desc = xstrdup(DEFAULT_MFR_DESC);
     ofproto->hw_desc = xstrdup(DEFAULT_HW_DESC);
@@ -407,6 +409,18 @@ ofproto_set_in_band_queue(struct ofproto *ofproto, int queue_id)
     connmgr_set_in_band_queue(ofproto->connmgr, queue_id);
 }
 
+/* Sets the number of flows at which eviction from the kernel flow table
+ * will occur. */
+void
+ofproto_set_flow_eviction_threshold(struct ofproto *ofproto, unsigned threshold)
+{
+    if (threshold < OFPROTO_FLOW_EVICTION_THRESHOLD_MIN) {
+        ofproto->flow_eviction_threshold = OFPROTO_FLOW_EVICTION_THRESHOLD_MIN;
+    } else {
+        ofproto->flow_eviction_threshold = threshold;
+    }
+}
+
 void
 ofproto_set_desc(struct ofproto *p,
                  const char *mfr_desc, const char *hw_desc,
index 53ed69128b53c9a7f3c2b5a0ec630daf52a9f55c..4975a8d3b0bc60bd801b5af3424af7d86af9e0a7 100644 (file)
@@ -141,6 +141,9 @@ int ofproto_port_dump_done(struct ofproto_port_dump *);
           : (ofproto_port_dump_done(DUMP), false));         \
         )
 
+#define OFPROTO_FLOW_EVICTON_THRESHOLD_DEFAULT 1000
+#define OFPROTO_FLOW_EVICTION_THRESHOLD_MIN    100
+
 int ofproto_port_add(struct ofproto *, struct netdev *, uint16_t *ofp_portp);
 int ofproto_port_del(struct ofproto *, uint16_t ofp_port);
 
@@ -156,6 +159,7 @@ void ofproto_reconnect_controllers(struct ofproto *);
 void ofproto_set_extra_in_band_remotes(struct ofproto *,
                                        const struct sockaddr_in *, size_t n);
 void ofproto_set_in_band_queue(struct ofproto *, int queue_id);
+void ofproto_set_flow_eviction_threshold(struct ofproto *, unsigned threshold);
 void ofproto_set_desc(struct ofproto *,
                       const char *mfr_desc, const char *hw_desc,
                       const char *sw_desc, const char *serial_desc,
index 35ee3e737357d1477a2e531d8d3263e33d3f13cd..6a4ebe58d2741155e9d679f41c92a34229019207 100644 (file)
@@ -148,6 +148,7 @@ static void bridge_add_ofproto_ports(struct bridge *);
 static void bridge_del_ofproto_ports(struct bridge *);
 static void bridge_refresh_ofp_port(struct bridge *);
 static void bridge_configure_datapath_id(struct bridge *);
+static void bridge_configure_flow_eviction_threshold(struct bridge *);
 static void bridge_configure_netflow(struct bridge *);
 static void bridge_configure_sflow(struct bridge *, int *sflow_bridge_number);
 static void bridge_configure_remotes(struct bridge *,
@@ -412,6 +413,7 @@ bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg)
         }
         bridge_configure_mirrors(br);
         bridge_configure_datapath_id(br);
+        bridge_configure_flow_eviction_threshold(br);
         bridge_configure_remotes(br, managers, n_managers);
         bridge_configure_netflow(br);
         bridge_configure_sflow(br, &sflow_bridge_number);
@@ -962,6 +964,22 @@ bridge_get_other_config(const struct ovsrec_bridge *br_cfg, const char *key)
                                 &ovsrec_bridge_col_other_config, key);
 }
 
+/* Set Flow eviction threshold */
+static void
+bridge_configure_flow_eviction_threshold(struct bridge *br)
+{
+    const char *threshold_str;
+    unsigned threshold;
+
+    threshold_str = bridge_get_other_config(br->cfg, "flow-eviction-threshold");
+    if (threshold_str) {
+        threshold = strtoul(threshold_str, NULL, 10);
+    } else {
+        threshold = OFPROTO_FLOW_EVICTON_THRESHOLD_DEFAULT;
+    }
+    ofproto_set_flow_eviction_threshold(br->ofproto, threshold);
+}
+
 static void
 bridge_pick_local_hw_addr(struct bridge *br, uint8_t ea[ETH_ADDR_LEN],
                           struct iface **hw_addr_iface)
index e72401fdcc4c2636700c572e07f97cb69c3c222b..b3029eb2633241a7b5b29da10e0aa2e346922b79 100644 (file)
             does not have QoS configured, or if the port does not have a queue
             with the specified ID, the default queue is used instead.
           </dd>
+          <dt><code>flow-eviction-threshold</code></dt>
+          <dd>
+            A number of flows as a nonnegative integer.  This sets number
+            of flows at which eviction from the kernel flow table will
+            be triggered.
+            If there are a large number of flows then increasing this
+            value to around the number of flows present
+            can result in reduced CPU usage and packet loss.
+         </dd>
+         <dd>
+            The default is 1000.
+         </dd>
+         <dd>
+            Values below 100 will be rounded up to 100.
+          </dd>
         </dl>
       </column>
     </group>