/* Userspace communication. */
static DEFINE_SPINLOCK(brc_lock); /* Ensure atomic access to these vars. */
static DECLARE_COMPLETION(brc_done); /* Userspace signaled operation done? */
-static int brc_err; /* Error code from userspace. */
+static struct sk_buff *brc_reply; /* Reply from userspace. */
static u32 brc_seq; /* Sequence number for current op. */
-static int brc_send_command(const char *bridge, const char *port, int op);
+static struct sk_buff *brc_send_command(struct sk_buff *, struct nlattr **attrs);
+static int brc_send_simple_command(struct sk_buff *);
static int
get_dp_ifindices(int *indices, int num)
rcu_read_unlock();
}
+static struct sk_buff *
+brc_make_request(int op, const char *bridge, const char *port)
+{
+ struct sk_buff *skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!skb)
+ goto error;
+
+ genlmsg_put(skb, 0, 0, &brc_genl_family, 0, op);
+ NLA_PUT_STRING(skb, BRC_GENL_A_DP_NAME, bridge);
+ if (port)
+ NLA_PUT_STRING(skb, BRC_GENL_A_PORT_NAME, port);
+ return skb;
+
+nla_put_failure:
+ kfree_skb(skb);
+error:
+ return NULL;
+}
+
+static int brc_send_simple_command(struct sk_buff *request)
+{
+ struct nlattr *attrs[BRC_GENL_A_MAX + 1];
+ struct sk_buff *reply;
+ int error;
+
+ reply = brc_send_command(request, attrs);
+ if (IS_ERR(reply))
+ return PTR_ERR(reply);
+
+ error = nla_get_u32(attrs[BRC_GENL_A_ERR_CODE]);
+ kfree_skb(reply);
+ return -error;
+}
+
static int brc_add_del_bridge(char __user *uname, int add)
{
+ struct sk_buff *request;
char name[IFNAMSIZ];
if (copy_from_user(name, uname, IFNAMSIZ))
return -EFAULT;
name[IFNAMSIZ - 1] = 0;
- return brc_send_command(name, NULL,
- add ? BRC_GENL_C_DP_ADD : BRC_GENL_C_DP_DEL);
+ request = brc_make_request(add ? BRC_GENL_C_DP_ADD : BRC_GENL_C_DP_DEL,
+ name, NULL);
+ if (!request)
+ return -ENOMEM;
+
+ return brc_send_simple_command(request);
}
static int brc_get_bridges(int __user *uindices, int n)
static int
brc_add_del_port(struct net_device *dev, int port_ifindex, int add)
{
+ struct sk_buff *request;
struct net_device *port;
- char dev_name[IFNAMSIZ], port_name[IFNAMSIZ];
int err;
port = __dev_get_by_index(&init_net, port_ifindex);
return -EINVAL;
/* Save name of dev and port because there's a race between the
- * rtnl_unlock() and the brc_send_command(). */
- strcpy(dev_name, dev->name);
- strcpy(port_name, port->name);
+ * rtnl_unlock() and the brc_send_simple_command(). */
+ request = brc_make_request(add ? BRC_GENL_C_PORT_ADD : BRC_GENL_C_PORT_DEL,
+ dev->name, port->name);
+ if (!request)
+ return -ENOMEM;
rtnl_unlock();
- err = brc_send_command(dev_name, port_name,
- add ? BRC_GENL_C_PORT_ADD : BRC_GENL_C_PORT_DEL);
+ err = brc_send_simple_command(request);
rtnl_lock();
return err;
return num;
}
+/*
+ * Format up to a page worth of forwarding table entries
+ * userbuf -- where to copy result
+ * maxnum -- maximum number of entries desired
+ * (limited to a page for sanity)
+ * offset -- number of records to skip
+ */
+static int brc_get_fdb_entries(struct net_device *dev, void __user *userbuf,
+ unsigned long maxnum, unsigned long offset)
+{
+ struct nlattr *attrs[BRC_GENL_A_MAX + 1];
+ struct sk_buff *request, *reply;
+ int retval;
+ int len;
+
+ /* Clamp size to PAGE_SIZE, test maxnum to avoid overflow */
+ if (maxnum > PAGE_SIZE/sizeof(struct __fdb_entry))
+ maxnum = PAGE_SIZE/sizeof(struct __fdb_entry);
+
+ request = brc_make_request(BRC_GENL_C_FDB_QUERY, dev->name, NULL);
+ if (!request)
+ return -ENOMEM;
+ NLA_PUT_U64(request, BRC_GENL_A_FDB_COUNT, maxnum);
+ NLA_PUT_U64(request, BRC_GENL_A_FDB_SKIP, offset);
+
+ rtnl_unlock();
+ reply = brc_send_command(request, attrs);
+ retval = PTR_ERR(reply);
+ if (IS_ERR(reply))
+ goto exit;
+
+ retval = -nla_get_u32(attrs[BRC_GENL_A_ERR_CODE]);
+ if (retval < 0)
+ goto exit_free_skb;
+
+ retval = -EINVAL;
+ if (!attrs[BRC_GENL_A_FDB_DATA])
+ goto exit_free_skb;
+ len = nla_len(attrs[BRC_GENL_A_FDB_DATA]);
+ if (len % sizeof(struct __fdb_entry) ||
+ len / sizeof(struct __fdb_entry) > maxnum)
+ goto exit_free_skb;
+
+ retval = len / sizeof(struct __fdb_entry);
+ if (copy_to_user(userbuf, nla_data(attrs[BRC_GENL_A_FDB_DATA]), len))
+ retval = -EFAULT;
+
+exit_free_skb:
+ kfree_skb(reply);
+exit:
+ rtnl_lock();
+ return retval;
+
+nla_put_failure:
+ kfree_skb(request);
+ return -ENOMEM;
+}
+
/* Legacy ioctl's through SIOCDEVPRIVATE. Called with rtnl_lock. */
static int
old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
case BRCTL_GET_PORT_LIST:
return brc_get_port_list(dev, (int __user *)args[1], args[2]);
+
+ case BRCTL_GET_FDB_ENTRIES:
+ return brc_get_fdb_entries(dev, (void __user *)args[1],
+ args[2], args[3]);
}
return -EOPNOTSUPP;
/* Attribute policy: what each attribute may contain. */
static struct nla_policy brc_genl_policy[BRC_GENL_A_MAX + 1] = {
[BRC_GENL_A_ERR_CODE] = { .type = NLA_U32 },
+
[BRC_GENL_A_PROC_DIR] = { .type = NLA_NUL_STRING },
[BRC_GENL_A_PROC_NAME] = { .type = NLA_NUL_STRING },
[BRC_GENL_A_PROC_DATA] = { .type = NLA_NUL_STRING },
+
+ [BRC_GENL_A_FDB_DATA] = { .type = NLA_UNSPEC },
};
static int
if (!info->attrs[BRC_GENL_A_ERR_CODE])
return -EINVAL;
+ skb = skb_clone(skb, GFP_KERNEL);
+ if (!skb)
+ return -ENOMEM;
+
spin_lock_irqsave(&brc_lock, flags);
if (brc_seq == info->snd_seq) {
- brc_err = nla_get_u32(info->attrs[BRC_GENL_A_ERR_CODE]);
+ brc_seq++;
+
+ if (brc_reply)
+ kfree_skb(brc_reply);
+ brc_reply = skb;
+
complete(&brc_done);
err = 0;
} else {
+ kfree_skb(skb);
err = -ESTALE;
}
spin_unlock_irqrestore(&brc_lock, flags);
.dumpit = NULL
};
-static int brc_send_command(const char *bridge, const char *port, int op)
+static struct sk_buff *brc_send_command(struct sk_buff *request, struct nlattr **attrs)
{
unsigned long int flags;
- struct sk_buff *skb;
- void *data;
+ struct sk_buff *reply;
int error;
mutex_lock(&brc_serial);
/* Increment sequence number first, so that we ignore any replies
* to stale requests. */
spin_lock_irqsave(&brc_lock, flags);
- brc_seq++;
+ nlmsg_hdr(request)->nlmsg_seq = ++brc_seq;
INIT_COMPLETION(brc_done);
spin_unlock_irqrestore(&brc_lock, flags);
- /* Compose message. */
- skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
- error = -ENOMEM;
- if (skb == NULL)
- goto exit_unlock;
- data = genlmsg_put(skb, 0, brc_seq, &brc_genl_family, 0, op);
-
- NLA_PUT_STRING(skb, BRC_GENL_A_DP_NAME, bridge);
- if (port)
- NLA_PUT_STRING(skb, BRC_GENL_A_PORT_NAME, port);
-
- genlmsg_end(skb, data);
+ nlmsg_end(request, nlmsg_hdr(request));
/* Send message. */
- error = genlmsg_multicast(skb, 0, brc_mc_group.id, GFP_KERNEL);
+ error = genlmsg_multicast(request, 0, brc_mc_group.id, GFP_KERNEL);
if (error < 0)
- goto exit_unlock;
+ goto error;
/* Wait for reply. */
error = -ETIMEDOUT;
if (!wait_for_completion_timeout(&brc_done, BRC_TIMEOUT))
- goto exit_unlock;
+ goto error;
+
+ /* Grab reply. */
+ spin_lock_irqsave(&brc_lock, flags);
+ reply = brc_reply;
+ brc_reply = NULL;
+ spin_unlock_irqrestore(&brc_lock, flags);
- error = -brc_err;
- goto exit_unlock;
+ mutex_unlock(&brc_serial);
-nla_put_failure:
- kfree_skb(skb);
-exit_unlock:
+ /* Re-parse message. Can't fail, since it parsed correctly once
+ * already. */
+ error = nlmsg_parse(nlmsg_hdr(reply), GENL_HDRLEN,
+ attrs, BRC_GENL_A_MAX, brc_genl_policy);
+ WARN_ON(error);
+
+ return reply;
+
+error:
mutex_unlock(&brc_serial);
- return error;
+ return ERR_PTR(error);
}
int brc_add_dp(struct datapath *dp)
dp = kzalloc(sizeof *dp, GFP_KERNEL);
if (dp == NULL)
goto err_put_module;
-
+ INIT_LIST_HEAD(&dp->port_list);
mutex_init(&dp->mutex);
dp->dp_idx = dp_idx;
for (i = 0; i < DP_N_QUEUES; i++)
skb_queue_head_init(&dp->queues[i]);
init_waitqueue_head(&dp->waitqueue);
+ /* Allocate table. */
+ err = -ENOMEM;
+ rcu_assign_pointer(dp->table, dp_table_create(DP_L1_SIZE));
+ if (!dp->table)
+ goto err_free_dp;
+
/* Setup our datapath device */
dp_dev = dp_dev_create(dp, devname, ODPP_LOCAL);
err = PTR_ERR(dp_dev);
if (IS_ERR(dp_dev))
- goto err_free_dp;
-
- err = -ENOMEM;
- rcu_assign_pointer(dp->table, dp_table_create(DP_L1_SIZE));
- if (!dp->table)
- goto err_destroy_dp_dev;
- INIT_LIST_HEAD(&dp->port_list);
+ goto err_destroy_table;
err = new_nbp(dp, dp_dev, ODPP_LOCAL);
- if (err)
+ if (err) {
+ dp_dev_destroy(dp_dev);
goto err_destroy_table;
+ }
dp->drop_frags = 0;
dp->stats_percpu = alloc_percpu(struct dp_stats_percpu);
return 0;
err_destroy_local_port:
- dp_del_port(dp->ports[ODPP_LOCAL], NULL);
+ dp_del_port(dp->ports[ODPP_LOCAL]);
err_destroy_table:
dp_table_destroy(dp->table, 0);
-err_destroy_dp_dev:
- dp_dev_destroy(dp_dev);
err_free_dp:
kfree(dp);
err_put_module:
return err;
}
-static void do_destroy_dp(struct datapath *dp, struct list_head *dp_devs)
+static void do_destroy_dp(struct datapath *dp)
{
struct net_bridge_port *p, *n;
int i;
list_for_each_entry_safe (p, n, &dp->port_list, node)
if (p->port_no != ODPP_LOCAL)
- dp_del_port(p, dp_devs);
+ dp_del_port(p);
if (dp_del_dp_hook)
dp_del_dp_hook(dp);
rcu_assign_pointer(dps[dp->dp_idx], NULL);
- dp_del_port(dp->ports[ODPP_LOCAL], dp_devs);
+ dp_del_port(dp->ports[ODPP_LOCAL]);
dp_table_destroy(dp->table, 1);
static int destroy_dp(int dp_idx)
{
- struct dp_dev *dp_dev, *next;
struct datapath *dp;
- LIST_HEAD(dp_devs);
int err;
rtnl_lock();
if (!dp)
goto err_unlock;
- do_destroy_dp(dp, &dp_devs);
+ do_destroy_dp(dp);
err = 0;
err_unlock:
mutex_unlock(&dp_mutex);
rtnl_unlock();
- list_for_each_entry_safe (dp_dev, next, &dp_devs, list)
- free_netdev(dp_dev->dev);
return err;
}
return err;
}
-int dp_del_port(struct net_bridge_port *p, struct list_head *dp_devs)
+int dp_del_port(struct net_bridge_port *p)
{
ASSERT_RTNL();
if (is_dp_dev(p->dev)) {
dp_dev_destroy(p->dev);
- if (dp_devs) {
- struct dp_dev *dp_dev = dp_dev_priv(p->dev);
- list_add(&dp_dev->list, dp_devs);
- }
}
if (p->port_no != ODPP_LOCAL && dp_del_if_hook) {
dp_del_if_hook(p);
static int del_port(int dp_idx, int port_no)
{
- struct dp_dev *dp_dev, *next;
struct net_bridge_port *p;
struct datapath *dp;
LIST_HEAD(dp_devs);
if (!p)
goto out_unlock_dp;
- err = dp_del_port(p, &dp_devs);
+ err = dp_del_port(p);
out_unlock_dp:
mutex_unlock(&dp->mutex);
out_unlock_rtnl:
rtnl_unlock();
out:
- list_for_each_entry_safe (dp_dev, next, &dp_devs, list)
- free_netdev(dp_dev->dev);
return err;
}
struct sw_flow *flow;
WARN_ON_ONCE(skb_shared(skb));
- WARN_ON_ONCE(skb->destructor);
/* BHs are off so we don't have to use get_cpu()/put_cpu() here. */
stats = percpu_ptr(dp->stats_percpu, smp_processor_id());
#include "flow.h"
#include "brc_sysfs.h"
-struct sk_buff;
-
/* Mask for the priority bits in a vlan header. If we ever merge upstream
* then this should go into include/linux/if_vlan.h. */
#define VLAN_PCP_MASK 0xe000
void *aux);
void dp_process_received_packet(struct sk_buff *, struct net_bridge_port *);
-int dp_del_port(struct net_bridge_port *, struct list_head *);
-int dp_output_port(struct datapath *, struct sk_buff *, int out_port,
- int ignore_no_fwd);
+int dp_del_port(struct net_bridge_port *);
int dp_output_control(struct datapath *, struct sk_buff *, int, u32 arg);
-void dp_set_origin(struct datapath *, u16, struct sk_buff *);
struct datapath *get_dp(int dp_idx);
#include "datapath.h"
#include "dp_dev.h"
+struct pcpu_lstats {
+ unsigned long rx_packets;
+ unsigned long rx_bytes;
+ unsigned long tx_packets;
+ unsigned long tx_bytes;
+};
+
struct datapath *dp_dev_get_dp(struct net_device *netdev)
{
return dp_dev_priv(netdev)->dp;
static struct net_device_stats *dp_dev_get_stats(struct net_device *netdev)
{
struct dp_dev *dp_dev = dp_dev_priv(netdev);
- return &dp_dev->stats;
+ struct net_device_stats *stats;
+ int i;
+
+ stats = &dp_dev->stats;
+ memset(stats, 0, sizeof *stats);
+ for_each_possible_cpu(i) {
+ const struct pcpu_lstats *lb_stats;
+
+ lb_stats = per_cpu_ptr(dp_dev->lstats, i);
+ stats->rx_bytes += lb_stats->rx_bytes;
+ stats->rx_packets += lb_stats->rx_packets;
+ stats->tx_bytes += lb_stats->tx_bytes;
+ stats->tx_packets += lb_stats->tx_packets;
+ }
+ return stats;
}
int dp_dev_recv(struct net_device *netdev, struct sk_buff *skb)
{
struct dp_dev *dp_dev = dp_dev_priv(netdev);
+ struct pcpu_lstats *lb_stats;
int len;
len = skb->len;
skb->pkt_type = PACKET_HOST;
else
netif_rx_ni(skb);
netdev->last_rx = jiffies;
- dp_dev->stats.rx_packets++;
- dp_dev->stats.rx_bytes += len;
+ lb_stats = per_cpu_ptr(dp_dev->lstats, smp_processor_id());
+ lb_stats->rx_packets++;
+ lb_stats->rx_bytes += len;
return len;
}
return 0;
}
+/* Not reentrant (because it is called with BHs disabled), but may be called
+ * simultaneously on different CPUs. */
static int dp_dev_xmit(struct sk_buff *skb, struct net_device *netdev)
{
struct dp_dev *dp_dev = dp_dev_priv(netdev);
+ struct pcpu_lstats *lb_stats;
- /* By orphaning 'skb' we will screw up socket accounting slightly, but
- * the effect is limited to the device queue length. If we don't
- * do this, then the sk_buff will be destructed eventually, but it is
- * harder to predict when. */
- skb_orphan(skb);
-
- /* We are going to modify 'skb', by sticking it on &dp_dev->xmit_queue,
- * so we need to have our own clone. (At any rate, fwd_port_input()
- * will need its own clone, so there's no benefit to queuing any other
- * way.) */
+ /* dp_process_received_packet() needs its own clone. */
skb = skb_share_check(skb, GFP_ATOMIC);
if (!skb)
return 0;
- dp_dev->stats.tx_packets++;
- dp_dev->stats.tx_bytes += skb->len;
-
- if (skb_queue_len(&dp_dev->xmit_queue) >= netdev->tx_queue_len) {
- /* Queue overflow. Stop transmitter. */
- netif_stop_queue(netdev);
-
- /* We won't see all dropped packets individually, so overrun
- * error is appropriate. */
- dp_dev->stats.tx_fifo_errors++;
- }
- skb_queue_tail(&dp_dev->xmit_queue, skb);
- netdev->trans_start = jiffies;
+ lb_stats = per_cpu_ptr(dp_dev->lstats, smp_processor_id());
+ lb_stats->tx_packets++;
+ lb_stats->tx_bytes += skb->len;
- schedule_work(&dp_dev->xmit_work);
+ skb_reset_mac_header(skb);
+ rcu_read_lock_bh();
+ dp_process_received_packet(skb, dp_dev->dp->ports[dp_dev->port_no]);
+ rcu_read_unlock_bh();
return 0;
}
-static void dp_dev_do_xmit(struct work_struct *work)
-{
- struct dp_dev *dp_dev = container_of(work, struct dp_dev, xmit_work);
- struct datapath *dp = dp_dev->dp;
- struct sk_buff *skb;
-
- while ((skb = skb_dequeue(&dp_dev->xmit_queue)) != NULL) {
- skb_reset_mac_header(skb);
- rcu_read_lock_bh();
- dp_process_received_packet(skb, dp->ports[dp_dev->port_no]);
- rcu_read_unlock_bh();
- }
- netif_wake_queue(dp_dev->dev);
-}
-
static int dp_dev_open(struct net_device *netdev)
{
netif_start_queue(netdev);
.get_tso = ethtool_op_get_tso,
};
+static int dp_dev_init(struct net_device *netdev)
+{
+ struct dp_dev *dp_dev = dp_dev_priv(netdev);
+
+ dp_dev->lstats = alloc_percpu(struct pcpu_lstats);
+ if (!dp_dev->lstats)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void dp_dev_free(struct net_device *netdev)
+{
+ struct dp_dev *dp_dev = dp_dev_priv(netdev);
+
+ free_percpu(dp_dev->lstats);
+ free_netdev(netdev);
+}
+
static void
do_setup(struct net_device *netdev)
{
netdev->open = dp_dev_open;
SET_ETHTOOL_OPS(netdev, &dp_ethtool_ops);
netdev->stop = dp_dev_stop;
- netdev->tx_queue_len = 100;
+ netdev->tx_queue_len = 0;
netdev->set_mac_address = dp_dev_mac_addr;
+ netdev->init = dp_dev_init;
+ netdev->destructor = dp_dev_free;
netdev->flags = IFF_BROADCAST | IFF_MULTICAST;
+ netdev->features = NETIF_F_LLTX; /* XXX other features? */
random_ether_addr(netdev->dev_addr);
dp_dev->dp = dp;
dp_dev->port_no = port_no;
dp_dev->dev = netdev;
- skb_queue_head_init(&dp_dev->xmit_queue);
- INIT_WORK(&dp_dev->xmit_work, dp_dev_do_xmit);
return netdev;
}
/* Called with RTNL lock and dp_mutex.*/
void dp_dev_destroy(struct net_device *netdev)
{
- struct dp_dev *dp_dev = dp_dev_priv(netdev);
-
- netif_tx_disable(netdev);
- synchronize_net();
- skb_queue_purge(&dp_dev->xmit_queue);
unregister_netdevice(netdev);
}
#ifndef DP_DEV_H
#define DP_DEV_H 1
+#include <linux/percpu.h>
+
struct dp_dev {
struct datapath *dp;
int port_no;
struct net_device *dev;
struct net_device_stats stats;
- struct sk_buff_head xmit_queue;
- struct work_struct xmit_work;
-
- struct list_head list;
+ struct pcpu_lstats *lstats;
};
static inline struct dp_dev *dp_dev_priv(struct net_device *netdev)
if (event == NETDEV_UNREGISTER && p) {
struct datapath *dp = p->dp;
mutex_lock(&dp->mutex);
- dp_del_port(p, NULL);
+ dp_del_port(p);
mutex_unlock(&dp->mutex);
}
return NOTIFY_DONE;
{
return alloc_skb(size, flags);
}
-
#endif /* linux kernel < 2.6.19 */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22)
+static inline struct nlmsghdr *nlmsg_hdr(const struct sk_buff *skb)
+{
+ return (struct nlmsghdr *)skb->data;
+}
+#endif
#endif
argv += optind;
/* Make sure that the ezio3 terminfo entry is available. */
- dummy_fd = open("/dev/null", O_RDWR);
+ dummy_fd = get_null_fd();
if (dummy_fd >= 0) {
if (setupterm("ezio3", dummy_fd, &retval) == ERR) {
if (retval == 0) {
}
}
del_curterm(cur_term);
- close(dummy_fd);
- } else {
- ovs_error(errno, "failed to open /dev/null");
}
/* Lock serial port. */
const char *is_connected, *local_ip;
dict_lookup(dict, "local.is-connected", &is_connected);
- dict_lookup(dict, "in-band.local-ip", &local_ip);
+ dict_lookup(dict, "remote.local-ip", &local_ip);
if (!is_connected && !local_ip) {
/* If we're not connected to the datapath and don't have a local IP,
* then we won't have anything useful to show anyhow. */
static bool inited = false;
dict_lookup(dict, "local.is-connected", &is_connected);
- dict_lookup(dict, "in-band.local-ip", &local_ip);
+ dict_lookup(dict, "remote.local-ip", &local_ip);
if (!is_connected && !local_ip) {
/* If we're not connected to the datapath and don't have a local IP,
* then we won't have anything useful to show anyhow. */
/* Attributes that can be attached to the datapath's netlink messages. */
enum {
BRC_GENL_A_UNSPEC,
- BRC_GENL_A_DP_NAME, /* Datapath name. */
- BRC_GENL_A_PORT_NAME, /* Interface name. */
- BRC_GENL_A_ERR_CODE, /* Positive error code. */
- BRC_GENL_A_MC_GROUP, /* Generic netlink multicast group. */
- BRC_GENL_A_PROC_DIR, /* Name of subdirectory in /proc. */
- BRC_GENL_A_PROC_NAME, /* Name of file in /proc. */
- BRC_GENL_A_PROC_DATA, /* Contents of file in /proc. */
+
+ /*
+ * "K:" messages appear in messages from the kernel to userspace.
+ * "U:" messages appear in messages from userspace to the kernel.
+ */
+
+ /* BRC_GENL_C_DP_ADD, BRC_GENL_C_DP_DEL. */
+ BRC_GENL_A_DP_NAME, /* K: Datapath name. */
+
+ /* BRC_GENL_C_DP_ADD, BRC_GENL_C_DP_DEL,
+ BRC_GENL_C_PORT_ADD, BRC_GENL_C_PORT_DEL. */
+ BRC_GENL_A_PORT_NAME, /* K: Interface name. */
+
+ /* BRC_GENL_C_DP_RESULT. */
+ BRC_GENL_A_ERR_CODE, /* U: Positive error code. */
+
+ /* BRC_GENL_C_QUERY_MC. */
+ BRC_GENL_A_MC_GROUP, /* K: Generic netlink multicast group. */
+
+ /* BRC_GENL_C_SET_PROC. */
+ BRC_GENL_A_PROC_DIR, /* U: Name of subdirectory in /proc. */
+ BRC_GENL_A_PROC_NAME, /* U: Name of file in /proc. */
+ BRC_GENL_A_PROC_DATA, /* U: Contents of file in /proc. */
+
+ /* BRC_GENL_C_FDB_QUERY. */
+ BRC_GENL_A_FDB_COUNT, /* K: Number of FDB entries to read. */
+ BRC_GENL_A_FDB_SKIP, /* K: Record offset into FDB to start reading. */
+
+ /* BRC_GENL_C_DP_RESULT. */
+ BRC_GENL_A_FDB_DATA, /* U: FDB records. */
__BRC_GENL_A_MAX,
BRC_GENL_A_MAX = __BRC_GENL_A_MAX - 1
* "K:" messages are sent by the kernel to userspace.
* "U:" messages are sent by userspace to the kernel.
*/
- BRC_GENL_C_DP_ADD, /* K: Datapath created. */
- BRC_GENL_C_DP_DEL, /* K: Datapath destroyed. */
- BRC_GENL_C_DP_RESULT, /* U: Return code from ovs-brcompatd. */
- BRC_GENL_C_PORT_ADD, /* K: Port added to datapath. */
- BRC_GENL_C_PORT_DEL, /* K: Port removed from datapath. */
- BRC_GENL_C_QUERY_MC, /* U: Get multicast group for brcompat. */
- BRC_GENL_C_SET_PROC, /* U: Set contents of file in /proc. */
+ BRC_GENL_C_DP_ADD, /* K: Datapath created. */
+ BRC_GENL_C_DP_DEL, /* K: Datapath destroyed. */
+ BRC_GENL_C_DP_RESULT, /* U: Return code from ovs-brcompatd. */
+ BRC_GENL_C_PORT_ADD, /* K: Port added to datapath. */
+ BRC_GENL_C_PORT_DEL, /* K: Port removed from datapath. */
+ BRC_GENL_C_QUERY_MC, /* U: Get multicast group for brcompat. */
+ BRC_GENL_C_SET_PROC, /* U: Set contents of file in /proc. */
+ BRC_GENL_C_FDB_QUERY, /* K: Read records from forwarding database. */
__BRC_GENL_C_MAX,
BRC_GENL_C_MAX = __BRC_GENL_C_MAX - 1
static bool
parse_mac(const char *s, uint8_t mac[6])
{
- return sscanf(s, "%"SCNx8":%"SCNx8":%"SCNx8":%"SCNx8":%"SCNx8":%"SCNx8,
- &mac[0], &mac[1], &mac[2], &mac[3], &mac[4], &mac[5]) == 6;
+ return (sscanf(s, ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac))
+ == ETH_ADDR_SCAN_COUNT);
}
static bool
#include "coverage-counters.h"
#include "dynamic-string.h"
#include "hash.h"
+#include "unixctl.h"
#include "util.h"
#define THIS_MODULE VLM_coverage
static unsigned int epoch;
+static void
+coverage_unixctl_log(struct unixctl_conn *conn, const char *args UNUSED)
+{
+ coverage_log(VLL_WARN, false);
+ unixctl_command_reply(conn, 200, NULL);
+}
+
+void
+coverage_init(void)
+{
+ unixctl_command_register("coverage/log", coverage_unixctl_log);
+}
+
/* Sorts coverage counters in descending order by count, within equal counts
* alphabetically by name. */
static int
VLOG(level, "%-24s %5u / %9llu", c->name, c->count, c->count + c->total);
}
-/* Logs the coverage counters at the given vlog 'level'. */
+/* Logs the coverage counters at the given vlog 'level'. If
+ * 'suppress_dups' is true, then duplicate events are not displayed. */
void
-coverage_log(enum vlog_level level)
+coverage_log(enum vlog_level level, bool suppress_dups)
{
size_t n_never_hit;
uint32_t hash;
}
hash = coverage_hash();
- if (coverage_hit(hash)) {
- VLOG(level, "Skipping details of duplicate event coverage for "
- "hash=%08"PRIx32" in epoch %u", hash, epoch);
- return;
+ if (suppress_dups) {
+ if (coverage_hit(hash)) {
+ VLOG(level, "Skipping details of duplicate event coverage for "
+ "hash=%08"PRIx32" in epoch %u", hash, epoch);
+ return;
+ }
}
n_never_hit = 0;
NAME##_count.count += AMOUNT; \
} while (0)
-void coverage_log(enum vlog_level);
+void coverage_init(void);
+void coverage_log(enum vlog_level, bool suppress_dups);
void coverage_clear(void);
#endif /* coverage.h */
/*
- * Copyright (c) 2008 Nicira Networks.
+ * Copyright (c) 2008, 2009 Nicira Networks.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
return ds->string;
}
+/* Returns a null-terminated string representing the current contents of 'ds',
+ * which the caller is expected to free with free(), then clears the contents
+ * of 'ds'. */
+char *
+ds_steal_cstr(struct ds *ds)
+{
+ char *s = ds_cstr(ds);
+ ds_init(ds);
+ return s;
+}
+
void
ds_destroy(struct ds *ds)
{
/*
- * Copyright (c) 2008 Nicira Networks.
+ * Copyright (c) 2008, 2009 Nicira Networks.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
int ds_get_line(struct ds *, FILE *);
char *ds_cstr(struct ds *);
+char *ds_steal_cstr(struct ds *);
void ds_destroy(struct ds *);
int ds_last(const struct ds *);
#define THIS_MODULE VLM_mac_learning
#include "vlog.h"
+/* Returns the number of seconds since 'e' was last learned. */
+int
+mac_entry_age(const struct mac_entry *e)
+{
+ time_t remaining = e->expires - time_now();
+ return MAC_ENTRY_IDLE_TIME - remaining;
+}
+
static uint32_t
mac_table_hash(const uint8_t mac[ETH_ADDR_LEN], uint16_t vlan)
{
/* Make the entry most-recently-used. */
list_remove(&e->lru_node);
list_push_back(&ml->lrus, &e->lru_node);
- e->expires = time_now() + 60;
+ e->expires = time_now() + MAC_ENTRY_IDLE_TIME;
/* Did we learn something? */
if (e->port != src_port) {
#define MAC_MAX 1024
+/* Time, in seconds, before expiring a mac_entry due to inactivity. */
+#define MAC_ENTRY_IDLE_TIME 60
+
/* A MAC learning table entry. */
struct mac_entry {
struct list hash_node; /* Element in a mac_learning 'table' list. */
tag_type tag; /* Tag for this learning entry. */
};
+int mac_entry_age(const struct mac_entry *);
+
/* MAC learning table. */
struct mac_learning {
struct list free; /* Not-in-use entries. */
/* If 'netdev' has an assigned IPv4 address, sets '*in4' to that address (if
* 'in4' is non-null) and returns true. Otherwise, returns false. */
bool
-netdev_get_in4(const struct netdev *netdev, struct in_addr *in4)
+netdev_nodev_get_in4(const char *netdev_name, struct in_addr *in4)
{
struct ifreq ifr;
struct in_addr ip = { INADDR_ANY };
- strncpy(ifr.ifr_name, netdev->name, sizeof ifr.ifr_name);
+ init_netdev();
+
+ strncpy(ifr.ifr_name, netdev_name, sizeof ifr.ifr_name);
ifr.ifr_addr.sa_family = AF_INET;
COVERAGE_INC(netdev_get_in4);
if (ioctl(af_inet_sock, SIOCGIFADDR, &ifr) == 0) {
ip = sin->sin_addr;
} else {
VLOG_DBG_RL(&rl, "%s: ioctl(SIOCGIFADDR) failed: %s",
- netdev->name, strerror(errno));
+ netdev_name, strerror(errno));
}
if (in4) {
*in4 = ip;
return ip.s_addr != INADDR_ANY;
}
+bool
+netdev_get_in4(const struct netdev *netdev, struct in_addr *in4)
+{
+ return netdev_nodev_get_in4(netdev->name, in4);
+}
+
static void
make_in4_sockaddr(struct sockaddr *sa, struct in_addr addr)
{
* returns 0. Otherwise, it returns a positive errno value; in particular,
* ENXIO indicates that there is not ARP table entry for 'ip' on 'netdev'. */
int
-netdev_arp_lookup(const struct netdev *netdev,
- uint32_t ip, uint8_t mac[ETH_ADDR_LEN])
+netdev_nodev_arp_lookup(const char *netdev_name, uint32_t ip,
+ uint8_t mac[ETH_ADDR_LEN])
{
struct arpreq r;
struct sockaddr_in *pa;
int retval;
+ init_netdev();
+
memset(&r, 0, sizeof r);
pa = (struct sockaddr_in *) &r.arp_pa;
pa->sin_family = AF_INET;
pa->sin_port = 0;
r.arp_ha.sa_family = ARPHRD_ETHER;
r.arp_flags = 0;
- strncpy(r.arp_dev, netdev->name, sizeof r.arp_dev);
+ strncpy(r.arp_dev, netdev_name, sizeof r.arp_dev);
COVERAGE_INC(netdev_arp_lookup);
retval = ioctl(af_inet_sock, SIOCGARP, &r) < 0 ? errno : 0;
if (!retval) {
memcpy(mac, r.arp_ha.sa_data, ETH_ADDR_LEN);
} else if (retval != ENXIO) {
VLOG_WARN_RL(&rl, "%s: could not look up ARP entry for "IP_FMT": %s",
- netdev->name, IP_ARGS(&ip), strerror(retval));
+ netdev_name, IP_ARGS(&ip), strerror(retval));
}
return retval;
}
+int
+netdev_arp_lookup(const struct netdev *netdev, uint32_t ip,
+ uint8_t mac[ETH_ADDR_LEN])
+{
+ return netdev_nodev_arp_lookup(netdev->name, ip, mac);
+}
+
static int
get_stats_via_netlink(int ifindex, struct netdev_stats *stats)
{
}
}
+/* Attempts to locate a device based on its IPv4 address. The caller
+ * may provide a hint as to the device by setting 'netdev_name' to a
+ * likely device name. This string must be malloc'd, since if it is
+ * not correct then it will be freed. If there is no hint, then
+ * 'netdev_name' must be the NULL pointer.
+ *
+ * If the device is found, the return value will be true and 'netdev_name'
+ * contains the device's name as a string, which the caller is responsible
+ * for freeing. If the device is not found, the return value is false. */
+bool
+netdev_find_dev_by_in4(const struct in_addr *in4, char **netdev_name)
+{
+ int i;
+ struct in_addr dev_in4;
+ struct svec dev_list;
+
+ /* Check the hint first. */
+ if (*netdev_name && (netdev_nodev_get_in4(*netdev_name, &dev_in4))
+ && (dev_in4.s_addr == in4->s_addr)) {
+ return true;
+ }
+
+ free(*netdev_name);
+ *netdev_name = NULL;
+ netdev_enumerate(&dev_list);
+
+ for (i=0; i<dev_list.n; i++) {
+ if ((netdev_nodev_get_in4(dev_list.names[i], &dev_in4))
+ && (dev_in4.s_addr == in4->s_addr)) {
+ *netdev_name = xstrdup(dev_list.names[i]);
+ svec_destroy(&dev_list);
+ return true;
+ }
+ }
+
+ svec_destroy(&dev_list);
+ return false;
+}
+
/* Obtains the current flags for the network device named 'netdev_name' and
* stores them into '*flagsp'. Returns 0 if successful, otherwise a positive
* errno value. On error, stores 0 into '*flagsp'.
uint32_t kbits_burst);
void netdev_enumerate(struct svec *);
+bool netdev_find_dev_by_in4(const struct in_addr *in4, char **netdev_name);
int netdev_nodev_get_flags(const char *netdev_name, enum netdev_flags *);
+bool netdev_nodev_get_in4(const char *netdev_name, struct in_addr *);
int netdev_nodev_set_etheraddr(const char *name, const uint8_t mac[6]);
int netdev_nodev_get_etheraddr(const char *netdev_name, uint8_t mac[6]);
int netdev_nodev_set_policing(const char *netdev_name, uint32_t kbits_rate,
uint32_t kbits_burst);
+int netdev_nodev_arp_lookup(const char *netdev_name, uint32_t ip,
+ uint8_t mac[6]);
int netdev_nodev_get_carrier(const char *netdev_name, bool *carrier);
int netdev_get_vlan_vid(const char *netdev_name, int *vlan_vid);
#ifndef PACKETS_H
#define PACKETS_H 1
+#include <inttypes.h>
#include <stdint.h>
#include <string.h>
#include "compiler.h"
&& (ea[5] & 0xf0) == 0x00);
}
+/* Example:
+ *
+ * uint8_t mac[ETH_ADDR_LEN];
+ * [...]
+ * printf("The Ethernet address is "ETH_ADDR_FMT"\n", ETH_ADDR_ARGS(mac));
+ *
+ */
#define ETH_ADDR_FMT \
"%02"PRIx8":%02"PRIx8":%02"PRIx8":%02"PRIx8":%02"PRIx8":%02"PRIx8
#define ETH_ADDR_ARGS(ea) \
(ea)[0], (ea)[1], (ea)[2], (ea)[3], (ea)[4], (ea)[5]
+/* Example:
+ *
+ * char *string = "1 00:11:22:33:44:55 2";
+ * uint8_t mac[ETH_ADDR_LEN];
+ * int a, b;
+ *
+ * if (sscanf(string, "%d"ETH_ADDR_SCAN_FMT"%d",
+ * &a, ETH_ADDR_SCAN_ARGS(mac), &b) == 1 + ETH_ADDR_SCAN_COUNT + 1) {
+ * ...
+ * }
+ */
+#define ETH_ADDR_SCAN_FMT "%"SCNx8":%"SCNx8":%"SCNx8":%"SCNx8":%"SCNx8":%"SCNx8
+#define ETH_ADDR_SCAN_ARGS(ea) \
+ &(ea)[0], &(ea)[1], &(ea)[2], &(ea)[3], &(ea)[4], &(ea)[5]
+#define ETH_ADDR_SCAN_COUNT 6
+
#define ETH_TYPE_IP 0x0800
#define ETH_TYPE_ARP 0x0806
#define ETH_TYPE_VLAN 0x8100
#include <unistd.h>
#include "coverage.h"
#include "dynamic-string.h"
+#include "fatal-signal.h"
#include "list.h"
#include "poll-loop.h"
#include "socket-util.h"
/* All processes. */
static struct list all_processes = LIST_INITIALIZER(&all_processes);
+static bool sigchld_is_blocked(void);
static void block_sigchld(sigset_t *);
static void unblock_sigchld(const sigset_t *);
static void sigchld_handler(int signr UNUSED);
return ds_cstr(&ds);
}
+/* Prepare to start a process whose command-line arguments are given by the
+ * null-terminated 'argv' array. Returns 0 if successful, otherwise a
+ * positive errno value. */
+static int
+process_prestart(char **argv)
+{
+ char *binary;
+
+ process_init();
+
+ /* Log the process to be started. */
+ if (VLOG_IS_DBG_ENABLED()) {
+ char *args = process_escape_args(argv);
+ VLOG_DBG("starting subprocess: %s", args);
+ free(args);
+ }
+
+ /* execvp() will search PATH too, but the error in that case is more
+ * obscure, since it is only reported post-fork. */
+ binary = process_search_path(argv[0]);
+ if (!binary) {
+ VLOG_ERR("%s not found in PATH", argv[0]);
+ return ENOENT;
+ }
+ free(binary);
+
+ return 0;
+}
+
+/* Creates and returns a new struct process with the specified 'name' and
+ * 'pid'.
+ *
+ * This is racy unless SIGCHLD is blocked (and has been blocked since before
+ * the fork()) that created the subprocess. */
+static struct process *
+process_register(const char *name, pid_t pid)
+{
+ struct process *p;
+ const char *slash;
+
+ assert(sigchld_is_blocked());
+
+ p = xcalloc(1, sizeof *p);
+ p->pid = pid;
+ slash = strrchr(name, '/');
+ p->name = xstrdup(slash ? slash + 1 : name);
+ p->exited = false;
+
+ list_push_back(&all_processes, &p->node);
+
+ return p;
+}
+
/* Starts a subprocess with the arguments in the null-terminated argv[] array.
* argv[0] is used as the name of the process. Searches the PATH environment
* variable to find the program to execute.
struct process **pp)
{
sigset_t oldsigs;
- char *binary;
pid_t pid;
+ int error;
*pp = NULL;
- process_init();
COVERAGE_INC(process_start);
-
- if (VLOG_IS_DBG_ENABLED()) {
- char *args = process_escape_args(argv);
- VLOG_DBG("starting subprocess: %s", args);
- free(args);
- }
-
- /* execvp() will search PATH too, but the error in that case is more
- * obscure, since it is only reported post-fork. */
- binary = process_search_path(argv[0]);
- if (!binary) {
- VLOG_ERR("%s not found in PATH", argv[0]);
- return ENOENT;
+ error = process_prestart(argv);
+ if (error) {
+ return error;
}
- free(binary);
block_sigchld(&oldsigs);
+ fatal_signal_block();
pid = fork();
if (pid < 0) {
+ fatal_signal_unblock();
unblock_sigchld(&oldsigs);
VLOG_WARN("fork failed: %s", strerror(errno));
return errno;
} else if (pid) {
/* Running in parent process. */
- struct process *p;
- const char *slash;
-
- p = xcalloc(1, sizeof *p);
- p->pid = pid;
- slash = strrchr(argv[0], '/');
- p->name = xstrdup(slash ? slash + 1 : argv[0]);
- p->exited = false;
-
- list_push_back(&all_processes, &p->node);
+ *pp = process_register(argv[0], pid);
+ fatal_signal_unblock();
unblock_sigchld(&oldsigs);
-
- *pp = p;
return 0;
} else {
/* Running in child process. */
int fd_max = get_max_fds();
int fd;
+ fatal_signal_fork();
+ fatal_signal_unblock();
unblock_sigchld(&oldsigs);
for (fd = 0; fd < fd_max; fd++) {
if (is_member(fd, null_fds, n_null_fds)) {
+ /* We can't use get_null_fd() here because we might have
+ * already closed its fd. */
int nullfd = open("/dev/null", O_RDWR);
dup2(nullfd, fd);
close(nullfd);
return NULL;
}
\f
+/* process_run_capture() and supporting functions. */
+
+struct stream {
+ struct ds log;
+ int fds[2];
+};
+
+static int
+stream_open(struct stream *s)
+{
+ ds_init(&s->log);
+ if (pipe(s->fds)) {
+ VLOG_WARN("failed to create pipe: %s", strerror(errno));
+ return errno;
+ }
+ set_nonblocking(s->fds[0]);
+ return 0;
+}
+
+static void
+stream_read(struct stream *s)
+{
+ int error = 0;
+
+ if (s->fds[0] < 0) {
+ return;
+ }
+
+ error = 0;
+ for (;;) {
+ char buffer[512];
+ size_t n;
+
+ error = read_fully(s->fds[0], buffer, sizeof buffer, &n);
+ ds_put_buffer(&s->log, buffer, n);
+ if (error) {
+ if (error == EAGAIN || error == EWOULDBLOCK) {
+ return;
+ } else {
+ if (error != EOF) {
+ VLOG_WARN("error reading subprocess pipe: %s",
+ strerror(error));
+ }
+ break;
+ }
+ } else if (s->log.length > PROCESS_MAX_CAPTURE) {
+ VLOG_WARN("subprocess output overflowed %d-byte buffer",
+ PROCESS_MAX_CAPTURE);
+ break;
+ }
+ }
+ close(s->fds[0]);
+ s->fds[0] = -1;
+}
+
+static void
+stream_wait(struct stream *s)
+{
+ if (s->fds[0] >= 0) {
+ poll_fd_wait(s->fds[0], POLLIN);
+ }
+}
+
+static void
+stream_close(struct stream *s)
+{
+ ds_destroy(&s->log);
+ if (s->fds[0] >= 0) {
+ close(s->fds[0]);
+ }
+ if (s->fds[1] >= 0) {
+ close(s->fds[1]);
+ }
+}
+
+/* Starts the process whose arguments are given in the null-terminated array
+ * 'argv' and waits for it to exit. On success returns 0 and stores the
+ * process exit value (suitable for passing to process_status_msg()) in
+ * '*status'. On failure, returns a positive errno value and stores 0 in
+ * '*status'.
+ *
+ * If 'stdout_log' is nonnull, then the subprocess's output to stdout (up to a
+ * limit of PROCESS_MAX_CAPTURE bytes) is captured in a memory buffer, which
+ * when this function returns 0 is stored as a null-terminated string in
+ * '*stdout_log'. The caller is responsible for freeing '*stdout_log' (by
+ * passing it to free()). When this function returns an error, '*stdout_log'
+ * is set to NULL.
+ *
+ * If 'stderr_log' is nonnull, then it is treated like 'stdout_log' except
+ * that it captures the subprocess's output to stderr. */
+int
+process_run_capture(char **argv, char **stdout_log, char **stderr_log,
+ int *status)
+{
+ struct stream s_stdout, s_stderr;
+ sigset_t oldsigs;
+ pid_t pid;
+ int error;
+
+ COVERAGE_INC(process_run_capture);
+ if (stdout_log) {
+ *stdout_log = NULL;
+ }
+ if (stderr_log) {
+ *stderr_log = NULL;
+ }
+ *status = 0;
+ error = process_prestart(argv);
+ if (error) {
+ return error;
+ }
+
+ error = stream_open(&s_stdout);
+ if (error) {
+ return error;
+ }
+
+ error = stream_open(&s_stderr);
+ if (error) {
+ stream_close(&s_stdout);
+ return error;
+ }
+
+ block_sigchld(&oldsigs);
+ fatal_signal_block();
+ pid = fork();
+ if (pid < 0) {
+ int error = errno;
+
+ fatal_signal_unblock();
+ unblock_sigchld(&oldsigs);
+ VLOG_WARN("fork failed: %s", strerror(error));
+
+ stream_close(&s_stdout);
+ stream_close(&s_stderr);
+ *status = 0;
+ return error;
+ } else if (pid) {
+ /* Running in parent process. */
+ struct process *p;
+
+ p = process_register(argv[0], pid);
+ fatal_signal_unblock();
+ unblock_sigchld(&oldsigs);
+
+ close(s_stdout.fds[1]);
+ close(s_stderr.fds[1]);
+ while (!process_exited(p)) {
+ stream_read(&s_stdout);
+ stream_read(&s_stderr);
+
+ stream_wait(&s_stdout);
+ stream_wait(&s_stderr);
+ process_wait(p);
+ poll_block();
+ }
+ stream_read(&s_stdout);
+ stream_read(&s_stderr);
+
+ if (stdout_log) {
+ *stdout_log = ds_steal_cstr(&s_stdout.log);
+ }
+ if (stderr_log) {
+ *stderr_log = ds_steal_cstr(&s_stderr.log);
+ }
+
+ stream_close(&s_stdout);
+ stream_close(&s_stderr);
+
+ *status = process_status(p);
+ process_destroy(p);
+ return 0;
+ } else {
+ /* Running in child process. */
+ int max_fds;
+ int i;
+
+ fatal_signal_fork();
+ fatal_signal_unblock();
+ unblock_sigchld(&oldsigs);
+
+ dup2(get_null_fd(), 0);
+ dup2(s_stdout.fds[1], 1);
+ dup2(s_stderr.fds[1], 2);
+
+ max_fds = get_max_fds();
+ for (i = 3; i < max_fds; i++) {
+ close(i);
+ }
+
+ execvp(argv[0], argv);
+ fprintf(stderr, "execvp(\"%s\") failed: %s\n",
+ argv[0], strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+}
+\f
static void
sigchld_handler(int signr UNUSED)
{
return false;
}
+static bool
+sigchld_is_blocked(void)
+{
+ sigset_t sigs;
+ if (sigprocmask(SIG_SETMASK, NULL, &sigs)) {
+ ovs_fatal(errno, "sigprocmask");
+ }
+ return sigismember(&sigs, SIGCHLD);
+}
+
static void
block_sigchld(sigset_t *oldsigs)
{
char *process_search_path(const char *);
+#define PROCESS_MAX_CAPTURE 65536
+int process_run_capture(char **argv, char **stdout_log, char **stderr_log,
+ int *status);
+
#endif /* process.h */
/* Returns the IP address of the peer, or 0 if the peer is not connected over
* an IP-based protocol or if its IP address is not known. */
uint32_t
-rconn_get_ip(const struct rconn *rconn)
+rconn_get_remote_ip(const struct rconn *rconn)
{
- return rconn->vconn ? vconn_get_ip(rconn->vconn) : 0;
+ return rconn->vconn ? vconn_get_remote_ip(rconn->vconn) : 0;
+}
+
+/* Returns the transport port of the peer, or 0 if the peer does not
+ * contain a port or if the port is not known. */
+uint16_t
+rconn_get_remote_port(const struct rconn *rconn)
+{
+ return rconn->vconn ? vconn_get_remote_port(rconn->vconn) : 0;
+}
+
+/* Returns the IP address used to connect to the peer, or 0 if the
+ * connection is not an IP-based protocol or if its IP address is not
+ * known. */
+uint32_t
+rconn_get_local_ip(const struct rconn *rconn)
+{
+ return rconn->vconn ? vconn_get_local_ip(rconn->vconn) : 0;
+}
+
+/* Returns the transport port used to connect to the peer, or 0 if the
+ * connection does not contain a port or if the port is not known. */
+uint16_t
+rconn_get_local_port(const struct rconn *rconn)
+{
+ return rconn->vconn ? vconn_get_local_port(rconn->vconn) : 0;
}
/* If 'rconn' can't connect to the peer, it could be for any number of reasons.
int rconn_failure_duration(const struct rconn *);
bool rconn_is_connectivity_questionable(struct rconn *);
-uint32_t rconn_get_ip(const struct rconn *);
+uint32_t rconn_get_remote_ip(const struct rconn *);
+uint16_t rconn_get_remote_port(const struct rconn *);
+uint32_t rconn_get_local_ip(const struct rconn *);
+uint16_t rconn_get_local_port(const struct rconn *);
const char *rconn_get_state(const struct rconn *);
unsigned int rconn_get_attempted_connections(const struct rconn *);
return error ? -error : fd;
}
+/* Returns a readable and writable fd for /dev/null, if successful, otherwise
+ * a negative errno value. The caller must not close the returned fd (because
+ * the same fd will be handed out to subsequent callers). */
+int
+get_null_fd(void)
+{
+ static int null_fd = -1;
+ if (null_fd < 0) {
+ null_fd = open("/dev/null", O_RDWR);
+ if (null_fd < 0) {
+ int error = errno;
+ VLOG_ERR("could not open /dev/null: %s", strerror(error));
+ return -error;
+ }
+ }
+ return null_fd;
+}
+
int
read_fully(int fd, void *p_, size_t size, size_t *bytes_read)
{
const char *bind_path, const char *connect_path);
int get_unix_name_len(socklen_t sun_len);
uint32_t guess_netmask(uint32_t ip);
+int get_null_fd(void);
int tcp_open_active(const char *target, uint16_t default_port,
struct sockaddr_in *sinp, int *fdp);
return;
}
+ coverage_init();
+
inited = true;
gettimeofday(&now, NULL);
tick = false;
rusage.ru_nvcsw - last_rusage->ru_nvcsw,
rusage.ru_nivcsw - last_rusage->ru_nivcsw);
}
- coverage_log(VLL_WARN);
+ coverage_log(VLL_WARN, true);
}
/* Update exponentially weighted moving average. With these parameters, a
int error;
int min_version;
int version;
- uint32_t ip;
+ uint32_t remote_ip;
+ uint16_t remote_port;
+ uint32_t local_ip;
+ uint16_t local_port;
char *name;
bool reconnectable;
};
void vconn_init(struct vconn *, struct vconn_class *, int connect_status,
- uint32_t ip, const char *name, bool reconnectable);
+ const char *name, bool reconnectable);
+void vconn_set_remote_ip(struct vconn *, uint32_t remote_ip);
+void vconn_set_remote_port(struct vconn *, uint16_t remote_port);
+void vconn_set_local_ip(struct vconn *, uint32_t local_ip);
+void vconn_set_local_port(struct vconn *, uint16_t local_port);
static inline void vconn_assert_class(const struct vconn *vconn,
const struct vconn_class *class)
{
static int
new_ssl_vconn(const char *name, int fd, enum session_type type,
- enum ssl_state state, const struct sockaddr_in *sin,
+ enum ssl_state state, const struct sockaddr_in *remote,
struct vconn **vconnp)
{
+ struct sockaddr_in local;
+ socklen_t local_len = sizeof local;
struct ssl_vconn *sslv;
SSL *ssl = NULL;
int on = 1;
goto error;
}
+ /* Get the local IP and port information */
+ retval = getsockname(fd, (struct sockaddr *) &local, &local_len);
+ if (retval) {
+ memset(&local, 0, sizeof local);
+ }
+
/* Disable Nagle. */
retval = setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &on, sizeof on);
if (retval) {
/* Create and return the ssl_vconn. */
sslv = xmalloc(sizeof *sslv);
- vconn_init(&sslv->vconn, &ssl_vconn_class, EAGAIN, sin->sin_addr.s_addr,
- name, true);
+ vconn_init(&sslv->vconn, &ssl_vconn_class, EAGAIN, name, true);
+ vconn_set_remote_ip(&sslv->vconn, remote->sin_addr.s_addr);
+ vconn_set_remote_port(&sslv->vconn, remote->sin_port);
+ vconn_set_local_ip(&sslv->vconn, local.sin_addr.s_addr);
+ vconn_set_local_port(&sslv->vconn, local.sin_port);
sslv->state = state;
sslv->type = type;
sslv->fd = fd;
int
new_stream_vconn(const char *name, int fd, int connect_status,
- uint32_t ip, bool reconnectable, struct vconn **vconnp)
+ bool reconnectable, struct vconn **vconnp)
{
struct stream_vconn *s;
s = xmalloc(sizeof *s);
- vconn_init(&s->vconn, &stream_vconn_class, connect_status, ip, name,
- reconnectable);
+ vconn_init(&s->vconn, &stream_vconn_class, connect_status,
+ name, reconnectable);
s->fd = fd;
s->txbuf = NULL;
s->tx_waiter = NULL;
/*
- * Copyright (c) 2008 Nicira Networks.
+ * Copyright (c) 2008, 2009 Nicira Networks.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
struct sockaddr;
int new_stream_vconn(const char *name, int fd, int connect_status,
- uint32_t ip, bool reconnectable, struct vconn **vconnp);
+ bool reconnectable, struct vconn **vconnp);
int new_pstream_pvconn(const char *name, int fd,
int (*accept_cb)(int fd, const struct sockaddr *,
size_t sa_len, struct vconn **),
static int
new_tcp_vconn(const char *name, int fd, int connect_status,
- const struct sockaddr_in *sin, struct vconn **vconnp)
+ const struct sockaddr_in *remote, struct vconn **vconnp)
{
+ struct sockaddr_in local;
+ socklen_t local_len = sizeof local;
int on = 1;
int retval;
+ /* Get the local IP and port information */
+ retval = getsockname(fd, (struct sockaddr *)&local, &local_len);
+ if (retval) {
+ memset(&local, 0, sizeof local);
+ }
+
retval = setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &on, sizeof on);
if (retval) {
VLOG_ERR("%s: setsockopt(TCP_NODELAY): %s", name, strerror(errno));
return errno;
}
- return new_stream_vconn(name, fd, connect_status, sin->sin_addr.s_addr,
- true, vconnp);
+ retval = new_stream_vconn(name, fd, connect_status, true, vconnp);
+ if (!retval) {
+ struct vconn *vconn = *vconnp;
+ vconn_set_remote_ip(vconn, remote->sin_addr.s_addr);
+ vconn_set_remote_port(vconn, remote->sin_port);
+ vconn_set_local_ip(vconn, local.sin_addr.s_addr);
+ vconn_set_local_port(vconn, local.sin_port);
+ }
+ return retval;
}
static int
}
return new_stream_vconn(name, fd, check_connection_completion(fd),
- 0, true, vconnp);
+ true, vconnp);
}
struct vconn_class unix_vconn_class = {
} else {
strcpy(name, "unix");
}
- return new_stream_vconn(name, fd, 0, 0, true, vconnp);
+ return new_stream_vconn(name, fd, 0, true, vconnp);
}
struct pvconn_class punix_pvconn_class = {
/* Returns the IP address of the peer, or 0 if the peer is not connected over
* an IP-based protocol or if its IP address is not yet known. */
uint32_t
-vconn_get_ip(const struct vconn *vconn)
+vconn_get_remote_ip(const struct vconn *vconn)
{
- return vconn->ip;
+ return vconn->remote_ip;
+}
+
+/* Returns the transport port of the peer, or 0 if the connection does not
+ * contain a port or if the port is not yet known. */
+uint16_t
+vconn_get_remote_port(const struct vconn *vconn)
+{
+ return vconn->remote_port;
+}
+
+/* Returns the IP address used to connect to the peer, or 0 if the
+ * connection is not an IP-based protocol or if its IP address is not
+ * yet known. */
+uint32_t
+vconn_get_local_ip(const struct vconn *vconn)
+{
+ return vconn->local_ip;
+}
+
+/* Returns the transport port used to connect to the peer, or 0 if the
+ * connection does not contain a port or if the port is not yet known. */
+uint16_t
+vconn_get_local_port(const struct vconn *vconn)
+{
+ return vconn->local_port;
}
static void
void
vconn_init(struct vconn *vconn, struct vconn_class *class, int connect_status,
- uint32_t ip, const char *name, bool reconnectable)
+ const char *name, bool reconnectable)
{
vconn->class = class;
vconn->state = (connect_status == EAGAIN ? VCS_CONNECTING
vconn->error = connect_status;
vconn->version = -1;
vconn->min_version = -1;
- vconn->ip = ip;
+ vconn->remote_ip = 0;
+ vconn->remote_port = 0;
+ vconn->local_ip = 0;
+ vconn->local_port = 0;
vconn->name = xstrdup(name);
vconn->reconnectable = reconnectable;
}
+void
+vconn_set_remote_ip(struct vconn *vconn, uint32_t ip)
+{
+ vconn->remote_ip = ip;
+}
+
+void
+vconn_set_remote_port(struct vconn *vconn, uint16_t port)
+{
+ vconn->remote_port = port;
+}
+
+void
+vconn_set_local_ip(struct vconn *vconn, uint32_t ip)
+{
+ vconn->local_ip = ip;
+}
+
+void
+vconn_set_local_port(struct vconn *vconn, uint16_t port)
+{
+ vconn->local_port = port;
+}
+
void
pvconn_init(struct pvconn *pvconn, struct pvconn_class *class,
const char *name)
int vconn_open(const char *name, int min_version, struct vconn **);
void vconn_close(struct vconn *);
const char *vconn_get_name(const struct vconn *);
-uint32_t vconn_get_ip(const struct vconn *);
+uint32_t vconn_get_remote_ip(const struct vconn *);
+uint16_t vconn_get_remote_port(const struct vconn *);
+uint32_t vconn_get_local_ip(const struct vconn *);
+uint16_t vconn_get_local_port(const struct vconn *);
int vconn_connect(struct vconn *);
int vconn_recv(struct vconn *, struct ofpbuf **);
int vconn_send(struct vconn *, struct ofpbuf *);
};
/* File descriptors for waking up when a child dies. */
-static int signal_fds[2];
-
-/* File descriptor for /dev/null. */
-static int null_fd = -1;
+static int signal_fds[2] = {-1, -1};
static void send_child_status(struct rconn *, uint32_t xid, uint32_t status,
const void *data, size_t size);
* subprocesses at once? Would also want to catch fatal signals and
* kill them at the same time though. */
fatal_signal_fork();
- dup2(null_fd, 0);
+ dup2(get_null_fd(), 0);
dup2(output_fds[1], 1);
- dup2(null_fd, 2);
+ dup2(get_null_fd(), 2);
max_fds = get_max_fds();
for (i = 3; i < max_fds; i++) {
close(i);
struct sigaction sa;
*executerp = NULL;
- if (null_fd == -1) {
+ if (signal_fds[0] == -1) {
+ /* Make sure we can get a fd for /dev/null. */
+ int null_fd = get_null_fd();
+ if (null_fd < 0) {
+ return -null_fd;
+ }
+
/* Create pipe for notifying us that SIGCHLD was invoked. */
if (pipe(signal_fds)) {
VLOG_ERR("pipe failed: %s", strerror(errno));
}
set_nonblocking(signal_fds[0]);
set_nonblocking(signal_fds[1]);
-
- /* Open /dev/null. */
- null_fd = open("/dev/null", O_RDWR);
- if (null_fd < 0) {
- int error = errno;
- VLOG_ERR("could not open /dev/null: %s", strerror(error));
- close(signal_fds[0]);
- close(signal_fds[1]);
- return error;
- }
}
/* Set up signal handler. */
#include <inttypes.h>
#include <net/if.h>
#include <string.h>
-#include "dpif.h"
+#include <stdlib.h>
#include "flow.h"
#include "mac-learning.h"
#include "netdev.h"
#define IB_BASE_PRIORITY 18181800
enum {
- IBR_FROM_LOCAL_PORT, /* Sent by ofproto local port. */
- IBR_TO_LOCAL_PORT, /* Sent to ofproto local port. */
+ IBR_FROM_LOCAL_PORT, /* Sent by the local port. */
+ IBR_OFP_TO_LOCAL, /* Sent to secure channel on local port. */
+ IBR_ARP_FROM_LOCAL, /* ARP from the local port. */
IBR_ARP_FROM_CTL, /* ARP from the controller. */
IBR_TO_CTL_OFP_SRC, /* To controller, OpenFlow source port. */
IBR_TO_CTL_OFP_DST, /* To controller, OpenFlow dest port. */
struct in_band {
struct ofproto *ofproto;
- struct netdev *netdev;
struct rconn *controller;
struct status_category *ss_cat;
uint32_t last_ip; /* Last known IP, 0 if never known. */
uint8_t mac[ETH_ADDR_LEN]; /* Current MAC, 0 if unknown. */
uint8_t last_mac[ETH_ADDR_LEN]; /* Last known MAC, 0 if never known */
+ char *dev_name;
time_t next_refresh; /* Next time to refresh MAC address. */
/* Keeping track of the local port's MAC address. */
get_controller_mac(struct in_band *ib)
{
time_t now = time_now();
- uint32_t ip;
+ uint32_t controller_ip;
- ip = rconn_get_ip(ib->controller);
- if (ip != ib->ip || now >= ib->next_refresh) {
+ controller_ip = rconn_get_remote_ip(ib->controller);
+ if (controller_ip != ib->ip || now >= ib->next_refresh) {
bool have_mac;
- ib->ip = ip;
+ ib->ip = controller_ip;
/* Look up MAC address. */
memset(ib->mac, 0, sizeof ib->mac);
if (ib->ip) {
- int retval = netdev_arp_lookup(ib->netdev, ib->ip, ib->mac);
- if (retval) {
- VLOG_DBG_RL(&rl, "cannot look up controller hw address "
- "("IP_FMT"): %s",
- IP_ARGS(&ib->ip), strerror(retval));
+ uint32_t local_ip = rconn_get_local_ip(ib->controller);
+ struct in_addr in4;
+ int retval;
+
+ in4.s_addr = local_ip;
+ if (netdev_find_dev_by_in4(&in4, &ib->dev_name)) {
+ retval = netdev_nodev_arp_lookup(ib->dev_name, ib->ip,
+ ib->mac);
+ if (retval) {
+ VLOG_DBG_RL(&rl, "cannot look up controller MAC address "
+ "("IP_FMT"): %s",
+ IP_ARGS(&ib->ip), strerror(retval));
+ }
+ } else {
+ VLOG_DBG_RL(&rl, "cannot find device with IP address "IP_FMT,
+ IP_ARGS(&local_ip));
}
}
have_mac = !eth_addr_is_zero(ib->mac);
time_t now = time_now();
if (now >= ib->next_local_refresh) {
uint8_t ea[ETH_ADDR_LEN];
- if (!netdev_nodev_get_etheraddr(netdev_get_name(ib->netdev), ea)) {
+ if (ib->dev_name && (!netdev_nodev_get_etheraddr(ib->dev_name, ea))) {
memcpy(ib->local_mac, ea, ETH_ADDR_LEN);
}
ib->next_local_refresh = now + 1;
in_band_status_cb(struct status_reply *sr, void *in_band_)
{
struct in_band *in_band = in_band_;
- struct in_addr local_ip;
const uint8_t *local_mac;
- uint32_t controller_ip;
const uint8_t *controller_mac;
- if (netdev_get_in4(in_band->netdev, &local_ip)) {
- status_reply_put(sr, "local-ip="IP_FMT, IP_ARGS(&local_ip.s_addr));
- }
local_mac = get_local_mac(in_band);
if (local_mac) {
status_reply_put(sr, "local-mac="ETH_ADDR_FMT,
ETH_ADDR_ARGS(local_mac));
}
- controller_ip = rconn_get_ip(in_band->controller);
- if (controller_ip) {
- status_reply_put(sr, "controller-ip="IP_FMT,
- IP_ARGS(&controller_ip));
- }
controller_mac = get_controller_mac(in_band);
if (controller_mac) {
status_reply_put(sr, "controller-mac="ETH_ADDR_FMT,
controller_mac = get_controller_mac(in_band);
local_mac = get_local_mac(in_band);
- /* Switch traffic sent from the local port. */
+ /* Switch traffic sent by the local port. */
memset(&flow, 0, sizeof flow);
flow.in_port = ODPP_LOCAL;
setup_flow(in_band, IBR_FROM_LOCAL_PORT, &flow, OFPFW_IN_PORT,
OFPP_NORMAL);
- /* Deliver traffic sent to the local port. */
if (local_mac) {
+ /* Deliver traffic sent to the connection's interface. */
memset(&flow, 0, sizeof flow);
memcpy(flow.dl_dst, local_mac, ETH_ADDR_LEN);
- setup_flow(in_band, IBR_TO_LOCAL_PORT, &flow, OFPFW_DL_DST,
- OFPP_NORMAL);
+ setup_flow(in_band, IBR_OFP_TO_LOCAL, &flow, OFPFW_DL_DST,
+ OFPP_NORMAL);
+
+ /* Allow the connection's interface to be the source of ARP traffic. */
+ memset(&flow, 0, sizeof flow);
+ flow.dl_type = htons(ETH_TYPE_ARP);
+ memcpy(flow.dl_src, local_mac, ETH_ADDR_LEN);
+ setup_flow(in_band, IBR_ARP_FROM_LOCAL, &flow,
+ OFPFW_DL_TYPE | OFPFW_DL_SRC, OFPP_NORMAL);
} else {
- drop_flow(in_band, IBR_TO_LOCAL_PORT);
+ drop_flow(in_band, IBR_OFP_TO_LOCAL);
+ drop_flow(in_band, IBR_ARP_FROM_LOCAL);
}
if (controller_mac) {
}
}
-int
-in_band_create(struct ofproto *ofproto,
- struct dpif *dpif, struct switch_status *ss,
+void
+in_band_create(struct ofproto *ofproto, struct switch_status *ss,
struct rconn *controller, struct in_band **in_bandp)
{
struct in_band *in_band;
- struct netdev *netdev;
- char local_name[IF_NAMESIZE];
- int error;
-
- *in_bandp = NULL;
- error = dpif_port_get_name(dpif, ODPP_LOCAL,
- local_name, sizeof local_name);
- if (error) {
- return error;
- }
-
- error = netdev_open(local_name, NETDEV_ETH_TYPE_NONE, &netdev);
- if (error) {
- VLOG_ERR("failed to open %s network device: %s",
- local_name, strerror(error));
- return error;
- }
in_band = xcalloc(1, sizeof *in_band);
in_band->ofproto = ofproto;
- in_band->netdev = netdev;
in_band->controller = controller;
in_band->ss_cat = switch_status_register(ss, "in-band",
in_band_status_cb, in_band);
in_band->next_refresh = TIME_MIN;
in_band->next_local_refresh = TIME_MIN;
+ in_band->dev_name = NULL;
*in_bandp = in_band;
- return 0;
}
void
in_band_destroy(struct in_band *in_band)
{
if (in_band) {
- netdev_close(in_band->netdev);
switch_status_unregister(in_band->ss_cat);
/* We don't own the rconn. */
}
struct settings;
struct switch_status;
-int in_band_create(struct ofproto *, struct dpif *, struct switch_status *,
- struct rconn *controller, struct in_band **);
+void in_band_create(struct ofproto *, struct switch_status *,
+ struct rconn *controller, struct in_band **);
void in_band_destroy(struct in_band *);
void in_band_run(struct in_band *);
void in_band_wait(struct in_band *);
{
if (in_band != (p->in_band != NULL)) {
if (in_band) {
- return in_band_create(p, p->dpif, p->switch_status,
- p->controller->rconn, &p->in_band);
+ in_band_create(p, p->switch_status, p->controller->rconn,
+ &p->in_band);
+ return 0;
} else {
ofproto_set_discovery(p, false, NULL, true);
in_band_destroy(p->in_band);
return ofproto->datapath_id;
}
+uint64_t
+ofproto_get_mgmt_id(const struct ofproto *ofproto)
+{
+ return ofproto->mgmt_id;
+}
+
int
ofproto_get_probe_interval(const struct ofproto *ofproto)
{
/* Configuration querying. */
uint64_t ofproto_get_datapath_id(const struct ofproto *);
+uint64_t ofproto_get_mgmt_id(const struct ofproto *);
int ofproto_get_probe_interval(const struct ofproto *);
int ofproto_get_max_backoff(const struct ofproto *);
bool ofproto_get_in_band(const struct ofproto *);
#include <arpa/inet.h>
#include <assert.h>
#include <errno.h>
+#include <inttypes.h>
#include <stdlib.h>
#include <unistd.h>
#include "dynamic-string.h"
#include "ofpbuf.h"
#include "ofproto.h"
#include "openflow/nicira-ext.h"
+#include "packets.h"
#include "rconn.h"
#include "svec.h"
#include "timeval.h"
{
struct rconn *rconn = rconn_;
time_t now = time_now();
+ uint32_t remote_ip = rconn_get_remote_ip(rconn);
+ uint32_t local_ip = rconn_get_local_ip(rconn);
status_reply_put(sr, "name=%s", rconn_get_name(rconn));
+ if (remote_ip) {
+ status_reply_put(sr, "remote-ip="IP_FMT, IP_ARGS(&remote_ip));
+ status_reply_put(sr, "remote-port=%d",
+ ntohs(rconn_get_remote_port(rconn)));
+ status_reply_put(sr, "local-ip="IP_FMT, IP_ARGS(&local_ip));
+ status_reply_put(sr, "local-port=%d",
+ ntohs(rconn_get_local_port(rconn)));
+ }
status_reply_put(sr, "state=%s", rconn_get_state(rconn));
status_reply_put(sr, "backoff=%d", rconn_get_backoff(rconn));
status_reply_put(sr, "probe-interval=%d", rconn_get_probe_interval(rconn));
config_status_cb(struct status_reply *sr, void *ofproto_)
{
const struct ofproto *ofproto = ofproto_;
+ uint64_t datapath_id, mgmt_id;
struct svec listeners;
int probe_interval, max_backoff;
size_t i;
+ datapath_id = ofproto_get_datapath_id(ofproto);
+ if (datapath_id) {
+ status_reply_put(sr, "datapath-id=%"PRIx64, datapath_id);
+ }
+
+ mgmt_id = ofproto_get_mgmt_id(ofproto);
+ if (mgmt_id) {
+ status_reply_put(sr, "mgmt-id=%"PRIx64, mgmt_id);
+ }
+
svec_init(&listeners);
ofproto_get_listeners(ofproto, &listeners);
for (i = 0; i < listeners.n; i++) {
If \fImiss-len\fR is provided, \fBovs\-ofctl\fR sends an OpenFlow ``set
configuration'' message at connection setup time that requests
\fImiss-len\fR bytes of each packet that misses the flow table. The
-OpenFlow reference implementation not send these messages to the
+OpenFlow reference implementation does not send these messages to the
\fBovs\-ofctl monitor\fR client connection unless a nonzero value is
specified on this argument.
static void
str_to_mac(const char *str, uint8_t mac[6])
{
- if (sscanf(str, "%"SCNx8":%"SCNx8":%"SCNx8":%"SCNx8":%"SCNx8":%"SCNx8,
- &mac[0], &mac[1], &mac[2], &mac[3], &mac[4], &mac[5]) != 6) {
+ if (sscanf(str, ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac))
+ != ETH_ADDR_SCAN_COUNT) {
ovs_fatal(0, "invalid mac address %s", str);
}
}
const char *devname);
static uint64_t dpid_from_hash(const void *, size_t nbytes);
+static void bridge_unixctl_fdb_show(struct unixctl_conn *, const char *args);
+
static void bond_init(void);
static void bond_run(struct bridge *);
static void bond_wait(struct bridge *);
struct svec dpif_names;
size_t i;
+ unixctl_command_register("fdb/show", bridge_unixctl_fdb_show);
+
dp_enumerate(&dpif_names);
for (i = 0; i < dpif_names.n; i++) {
const char *dpif_name = dpif_names.names[i];
}
}
\f
+/* Bridge unixctl user interface functions. */
+static void
+bridge_unixctl_fdb_show(struct unixctl_conn *conn, const char *args)
+{
+ struct ds ds = DS_EMPTY_INITIALIZER;
+ const struct bridge *br;
+
+ br = bridge_lookup(args);
+ if (!br) {
+ unixctl_command_reply(conn, 501, "no such bridge");
+ return;
+ }
+
+ ds_put_cstr(&ds, " port VLAN MAC Age\n");
+ if (br->ml) {
+ const struct mac_entry *e;
+ LIST_FOR_EACH (e, struct mac_entry, lru_node, &br->ml->lrus) {
+ ds_put_format(&ds, "%5d %4d "ETH_ADDR_FMT" %3d\n",
+ e->port, e->vlan, ETH_ADDR_ARGS(e->mac),
+ mac_entry_age(e));
+ }
+ }
+ unixctl_command_reply(conn, 200, ds_cstr(&ds));
+ ds_destroy(&ds);
+}
+\f
/* Bridge reconfiguration functions. */
static struct bridge *
int rate_limit, burst_limit;
if (!strcmp(controller, "discover")) {
+ bool update_resolv_conf = true;
+
+ if (cfg_has("%s.update-resolv.conf", pfx)) {
+ update_resolv_conf = cfg_get_bool(0, "%s.update-resolv.conf",
+ pfx);
+ }
ofproto_set_discovery(br->ofproto, true,
cfg_get_string(0, "%s.accept-regex", pfx),
- cfg_get_bool(0, "%s.update-resolv.conf",
- pfx));
+ update_resolv_conf);
} else {
char local_name[IF_NAMESIZE];
struct netdev *netdev;
iface->delay_expires = LLONG_MAX;
VLOG_INFO_RL(&rl, "interface %s: will not be %s",
iface->name, carrier ? "disabled" : "enabled");
+ } else if (carrier && port->updelay && port->active_iface < 0) {
+ iface->delay_expires = time_msec();
+ VLOG_INFO_RL(&rl, "interface %s: skipping %d ms updelay since no "
+ "other interface is up", iface->name, port->updelay);
} else {
int delay = carrier ? port->updelay : port->downdelay;
iface->delay_expires = time_msec() + delay;
iface->enabled = enable;
if (!iface->enabled) {
- VLOG_WARN("interface %s: enabled", iface->name);
+ VLOG_WARN("interface %s: disabled", iface->name);
ofproto_revalidate(br->ofproto, iface->tag);
if (iface->port_ifidx == port->active_iface) {
ofproto_revalidate(br->ofproto,
}
bond_send_learning_packets(port);
} else {
- VLOG_WARN("interface %s: disabled", iface->name);
+ VLOG_WARN("interface %s: enabled", iface->name);
if (port->active_iface < 0) {
ofproto_revalidate(br->ofproto, port->no_ifaces_tag);
bond_choose_active_iface(port);
return;
}
- if (sscanf(hash_s, "%"SCNx8":%"SCNx8":%"SCNx8":%"SCNx8":%"SCNx8":%"SCNx8,
- &mac[0], &mac[1], &mac[2], &mac[3], &mac[4], &mac[5]) == 6) {
+ if (sscanf(hash_s, ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac))
+ == ETH_ADDR_SCAN_COUNT) {
hash = bond_hash(mac);
} else if (strspn(hash_s, "0123456789") == strlen(hash_s)) {
hash = atoi(hash_s) & BOND_MASK;
\fBovs\-vswitchd\fR to reload its configuration file.
.PP
.SH OPTIONS
-.IP "\fB--reload-command=\fIcommand\fR"
-Sets the command that \fBovs\-brcompatd\fR runs to force \fBovs\-vswitchd\fR to
-reload its configuration file to \fIcommand\fR. The command is run in
-a subshell, so it may contain arbitrary shell metacharacters, etc.
-The \fB--help\fR option displays the default reload command.
+.IP "\fB--appctl-command=\fIcommand\fR"
+Sets the command that \fBovs\-brcompatd\fR runs to communicate with
+\fBovs\-vswitchd\fR. The command is run in \fB/bin/sh\fR as a shell
+command, so \fIcommand\fR may contain arbitrary shell metacharacters,
+etc. The \fB--help\fR option displays the default command.
+.IP
+\fIcommand\fR must contain exactly one instance of \fB%s\fR, which
+\fBovs\-brcompatd\fR replaces by a command from the set understood by
+\fBovs\-vswitchd\fR. Any instances of \fB%%\fR in \fIcommand\fR are
+replaced by a single \fB%\fR. The \fB%\fR character may not otherwise
+appear in \fIcommand\fR.
+.IP
+The commands that are substituted into \fIcommand\fR are those that
+can be listed by passing \fB-e help\fR to \fBovs\-appctl\fR with
+\fBovs\-vswitchd\fR as target. The command that is substituted may
+include white space-separated arguments, so \fIcommand\fR should include
+shell quotes around \fB%s\fR.
+.IP
+\fIcommand\fR must not redirect \fBovs\-appctl\fR's standard output or
+standard error streams, because \fBovs\-brcompatd\fR expects to read
+both of these streams separately.
.TP
\fB--prune-timeout=\fIsecs\fR
.
#include <config.h>
+#include <asm/param.h>
#include <assert.h>
#include <errno.h>
#include <getopt.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
+#include <time.h>
#include <fcntl.h>
#include <unistd.h>
#include "daemon.h"
#include "dirs.h"
#include "dpif.h"
+#include "dynamic-string.h"
#include "fatal-signal.h"
#include "fault.h"
#include "leak-checker.h"
#include "netlink.h"
#include "ofpbuf.h"
#include "openvswitch/brcompat-netlink.h"
+#include "packets.h"
#include "poll-loop.h"
#include "process.h"
#include "signals.h"
/* Config file shared with ovs-vswitchd (usually ovs-vswitchd.conf). */
static char *config_file;
-/* Command to run (via system()) to reload the ovs-vswitchd configuration
- * file. */
-static char *reload_command;
+/* Shell command to execute (via popen()) to send a control command to the
+ * running ovs-vswitchd process. The string must contain one instance of %s,
+ * which is replaced by the control command. */
+static char *appctl_command;
/* Netlink socket to listen for interface changes. */
static struct nl_sock *rtnl_sock;
return cfg_has_section("bridge.%s", name);
}
+static int
+execute_appctl_command(const char *unixctl_command, char **output)
+{
+ char *stdout_log, *stderr_log;
+ int error, status;
+ char *argv[5];
+
+ argv[0] = "/bin/sh";
+ argv[1] = "-c";
+ argv[2] = xasprintf(appctl_command, unixctl_command);
+ argv[3] = NULL;
+
+ /* Run process and log status. */
+ error = process_run_capture(argv, &stdout_log, &stderr_log, &status);
+ if (error) {
+ VLOG_ERR("failed to execute %s command via ovs-appctl: %s",
+ unixctl_command, strerror(error));
+ } else if (status) {
+ char *msg = process_status_msg(status);
+ VLOG_ERR("ovs-appctl exited with error (%s)", msg);
+ free(msg);
+ error = ECHILD;
+ }
+
+ /* Deal with stdout_log. */
+ if (output) {
+ *output = stdout_log;
+ } else {
+ free(stdout_log);
+ }
+
+ /* Deal with stderr_log */
+ if (stderr_log && *stderr_log) {
+ VLOG_INFO("ovs-appctl wrote to stderr:\n%s", stderr_log);
+ }
+ free(stderr_log);
+
+ free(argv[2]);
+
+ return error;
+}
+
static int
rewrite_and_reload_config(void)
{
int error1 = cfg_write();
int error2 = cfg_read();
long long int reload_start = time_msec();
- int error3 = system(reload_command);
+ int error3 = execute_appctl_command("vswitchd/reload", NULL);
long long int elapsed = time_msec() - reload_start;
COVERAGE_INC(brcompatd_reload);
if (elapsed > 0) {
VLOG_INFO("reload command executed in %lld ms", elapsed);
}
- if (error3 == -1) {
- VLOG_ERR("failed to execute reload command: %s", strerror(errno));
- } else if (error3 != 0) {
- char *msg = process_status_msg(error3);
- VLOG_ERR("reload command exited with error (%s)", msg);
- free(msg);
- }
- return error1 ? error1 : error2 ? error2 : error3 ? ECHILD : 0;
+ return error1 ? error1 : error2 ? error2 : error3;
}
return 0;
}
+/* Get all the interfaces for 'bridge' as 'ifaces', breaking bonded interfaces
+ * down into their constituent parts. */
+static void
+get_bridge_ifaces(const char *bridge, struct svec *ifaces)
+{
+ struct svec ports;
+ int i;
+
+ svec_init(&ports);
+ svec_init(ifaces);
+ cfg_get_all_keys(&ports, "bridge.%s.port", bridge);
+ for (i = 0; i < ports.n; i++) {
+ const char *port_name = ports.names[i];
+ if (cfg_has_section("bonding.%s", port_name)) {
+ struct svec slaves;
+ svec_init(&slaves);
+ cfg_get_all_keys(&slaves, "bonding.%s.slave", port_name);
+ svec_append(ifaces, &slaves);
+ svec_destroy(&slaves);
+ } else {
+ svec_add(ifaces, port_name);
+ }
+ }
+ svec_destroy(&ports);
+}
+
/* Go through the configuration file and remove any ports that no longer
* exist associated with a bridge. */
static void
cfg_get_subsections(&bridges, "bridge");
for (i=0; i<bridges.n; i++) {
const char *br_name = bridges.names[i];
- struct svec ports, ifaces;
-
- svec_init(&ports);
-
- /* Get all the interfaces for the given bridge, breaking bonded
- * interfaces down into their constituent parts. */
- svec_init(&ifaces);
- cfg_get_all_keys(&ports, "bridge.%s.port", br_name);
- for (j=0; j<ports.n; j++) {
- const char *port_name = ports.names[j];
- if (cfg_has_section("bonding.%s", port_name)) {
- struct svec slaves;
- svec_init(&slaves);
- cfg_get_all_keys(&slaves, "bonding.%s.slave", port_name);
- svec_append(&ifaces, &slaves);
- svec_destroy(&slaves);
- } else {
- svec_add(&ifaces, port_name);
- }
- }
- svec_destroy(&ports);
+ struct svec ifaces;
- /* Check that the interfaces exist. */
+ /* Check that each bridge interface exists. */
+ get_bridge_ifaces(br_name, &ifaces);
for (j = 0; j < ifaces.n; j++) {
const char *iface_name = ifaces.names[j];
enum netdev_flags flags;
static int
parse_command(struct ofpbuf *buffer, uint32_t *seq, const char **br_name,
- const char **port_name)
+ const char **port_name, uint64_t *count, uint64_t *skip)
{
static const struct nl_policy policy[] = {
[BRC_GENL_A_DP_NAME] = { .type = NL_A_STRING },
[BRC_GENL_A_PORT_NAME] = { .type = NL_A_STRING, .optional = true },
+ [BRC_GENL_A_FDB_COUNT] = { .type = NL_A_U64, .optional = true },
+ [BRC_GENL_A_FDB_SKIP] = { .type = NL_A_U64, .optional = true },
};
struct nlattr *attrs[ARRAY_SIZE(policy)];
if (!nl_policy_parse(buffer, NLMSG_HDRLEN + GENL_HDRLEN, policy,
attrs, ARRAY_SIZE(policy))
- || (port_name && !attrs[BRC_GENL_A_PORT_NAME])) {
+ || (port_name && !attrs[BRC_GENL_A_PORT_NAME])
+ || (count && !attrs[BRC_GENL_A_FDB_COUNT])
+ || (skip && !attrs[BRC_GENL_A_FDB_SKIP])) {
return EINVAL;
}
if (port_name) {
*port_name = nl_attr_get_string(attrs[BRC_GENL_A_PORT_NAME]);
}
+ if (count) {
+ *count = nl_attr_get_u64(attrs[BRC_GENL_A_FDB_COUNT]);
+ }
+ if (skip) {
+ *skip = nl_attr_get_u64(attrs[BRC_GENL_A_FDB_SKIP]);
+ }
return 0;
}
static void
-send_reply(uint32_t seq, int error)
+send_reply(uint32_t seq, int error, struct ofpbuf *fdb_query_data)
{
struct ofpbuf msg;
int retval;
BRC_GENL_C_DP_RESULT, 1);
((struct nlmsghdr *) msg.data)->nlmsg_seq = seq;
nl_msg_put_u32(&msg, BRC_GENL_A_ERR_CODE, error);
+ if (fdb_query_data) {
+ nl_msg_put_unspec(&msg, BRC_GENL_A_FDB_DATA,
+ fdb_query_data->data, fdb_query_data->size);
+ }
/* Send reply. */
retval = nl_sock_send(brc_sock, &msg, false);
uint32_t seq;
int error;
- error = parse_command(buffer, &seq, &br_name, NULL);
+ error = parse_command(buffer, &seq, &br_name, NULL, NULL, NULL);
if (!error) {
error = add ? add_bridge(br_name) : del_bridge(br_name);
if (!error) {
error = rewrite_and_reload_config();
}
- send_reply(seq, error);
+ send_reply(seq, error, NULL);
}
return error;
}
uint32_t seq;
int error;
- error = parse_command(buffer, &seq, &br_name, &port_name);
+ error = parse_command(buffer, &seq, &br_name, &port_name, NULL, NULL);
if (!error) {
if (!bridge_exists(br_name)) {
VLOG_WARN("%s %s %s: no bridge named %s",
VLOG_INFO("%s %s %s: success", cmd_name, br_name, port_name);
error = rewrite_and_reload_config();
}
- send_reply(seq, error);
+ send_reply(seq, error, NULL);
}
return error;
}
+static int
+handle_fdb_query_cmd(struct ofpbuf *buffer)
+{
+ /* This structure is copied directly from the Linux 2.6.30 header files.
+ * It would be more straightforward to #include <linux/if_bridge.h>, but
+ * the 'port_hi' member was only introduced in Linux 2.6.26 and so systems
+ * with old header files won't have it. */
+ struct __fdb_entry {
+ __u8 mac_addr[6];
+ __u8 port_no;
+ __u8 is_local;
+ __u32 ageing_timer_value;
+ __u8 port_hi;
+ __u8 pad0;
+ __u16 unused;
+ };
+
+ struct mac {
+ uint8_t addr[6];
+ };
+ struct mac *local_macs;
+ int n_local_macs;
+ int i;
+
+ struct ofpbuf query_data;
+ char *unixctl_command;
+ uint64_t count, skip;
+ const char *br_name;
+ struct svec ifaces;
+ char *output;
+ char *save_ptr;
+ uint32_t seq;
+ int error;
+
+ /* Parse the command received from brcompat_mod. */
+ error = parse_command(buffer, &seq, &br_name, NULL, &count, &skip);
+ if (error) {
+ return error;
+ }
+
+ /* Fetch the forwarding database using ovs-appctl. */
+ unixctl_command = xasprintf("fdb/show %s", br_name);
+ error = execute_appctl_command(unixctl_command, &output);
+ free(unixctl_command);
+ if (error) {
+ send_reply(seq, error, NULL);
+ return error;
+ }
+
+ /* Fetch the MAC address for each interface on the bridge, so that we can
+ * fill in the is_local field in the response. */
+ cfg_read();
+ get_bridge_ifaces(br_name, &ifaces);
+ local_macs = xmalloc(ifaces.n * sizeof *local_macs);
+ n_local_macs = 0;
+ for (i = 0; i < ifaces.n; i++) {
+ const char *iface_name = ifaces.names[i];
+ struct mac *mac = &local_macs[n_local_macs];
+ if (!netdev_nodev_get_etheraddr(iface_name, mac->addr)) {
+ n_local_macs++;
+ }
+ }
+ svec_destroy(&ifaces);
+
+ /* Parse the response from ovs-appctl and convert it to binary format to
+ * pass back to the kernel. */
+ ofpbuf_init(&query_data, sizeof(struct __fdb_entry) * 8);
+ save_ptr = NULL;
+ strtok_r(output, "\n", &save_ptr); /* Skip header line. */
+ while (count > 0) {
+ struct __fdb_entry *entry;
+ int port, vlan, age;
+ uint8_t mac[ETH_ADDR_LEN];
+ char *line;
+ bool is_local;
+
+ line = strtok_r(NULL, "\n", &save_ptr);
+ if (!line) {
+ break;
+ }
+
+ if (sscanf(line, "%d %d "ETH_ADDR_SCAN_FMT" %d",
+ &port, &vlan, ETH_ADDR_SCAN_ARGS(mac), &age)
+ != 2 + ETH_ADDR_SCAN_COUNT + 1) {
+ struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
+ VLOG_INFO_RL(&rl, "fdb/show output has invalid format: %s", line);
+ continue;
+ }
+
+ if (skip > 0) {
+ skip--;
+ continue;
+ }
+
+ /* Is this the MAC address of an interface on the bridge? */
+ is_local = false;
+ for (i = 0; i < n_local_macs; i++) {
+ if (eth_addr_equals(local_macs[i].addr, mac)) {
+ is_local = true;
+ break;
+ }
+ }
+
+ entry = ofpbuf_put_uninit(&query_data, sizeof *entry);
+ memcpy(entry->mac_addr, mac, ETH_ADDR_LEN);
+ entry->port_no = port & 0xff;
+ entry->is_local = is_local;
+ entry->ageing_timer_value = age * HZ;
+ entry->port_hi = (port & 0xff00) >> 8;
+ entry->pad0 = 0;
+ entry->unused = 0;
+ count--;
+ }
+ free(output);
+
+ send_reply(seq, 0, &query_data);
+ ofpbuf_uninit(&query_data);
+
+ return 0;
+}
+
static int
brc_recv_update(void)
{
retval = handle_port_cmd(buffer, false);
break;
+ case BRC_GENL_C_FDB_QUERY:
+ retval = handle_fdb_query_cmd(buffer);
+ break;
+
default:
retval = EPROTO;
}
return 0;
}
+static void
+validate_appctl_command(void)
+{
+ const char *p;
+ int n;
+
+ n = 0;
+ for (p = strchr(appctl_command, '%'); p; p = strchr(p + 2, '%')) {
+ if (p[1] == '%') {
+ /* Nothing to do. */
+ } else if (p[1] == 's') {
+ n++;
+ } else {
+ ovs_fatal(0, "only '%%s' and '%%%%' allowed in --appctl-command");
+ }
+ }
+ if (n != 1) {
+ ovs_fatal(0, "'%%s' must appear exactly once in --appctl-command");
+ }
+}
+
static void
parse_options(int argc, char *argv[])
{
enum {
OPT_LOCK_TIMEOUT = UCHAR_MAX + 1,
OPT_PRUNE_TIMEOUT,
- OPT_RELOAD_COMMAND,
+ OPT_APPCTL_COMMAND,
VLOG_OPTION_ENUMS,
LEAK_CHECKER_OPTION_ENUMS
};
{"version", no_argument, 0, 'V'},
{"lock-timeout", required_argument, 0, OPT_LOCK_TIMEOUT},
{"prune-timeout", required_argument, 0, OPT_PRUNE_TIMEOUT},
- {"reload-command", required_argument, 0, OPT_RELOAD_COMMAND},
+ {"appctl-command", required_argument, 0, OPT_APPCTL_COMMAND},
DAEMON_LONG_OPTIONS,
VLOG_LONG_OPTIONS,
LEAK_CHECKER_LONG_OPTIONS,
char *short_options = long_options_to_short_options(long_options);
int error;
- reload_command = xasprintf("%s/ovs-appctl -t "
+ appctl_command = xasprintf("%s/ovs-appctl -t "
"%s/ovs-vswitchd.`cat %s/ovs-vswitchd.pid`.ctl "
- "-e vswitchd/reload 2>&1 "
- "| /usr/bin/logger -t brcompatd-reload",
+ "-e '%%s'",
ovs_bindir, ovs_rundir, ovs_rundir);
for (;;) {
int c;
prune_timeout = atoi(optarg) * 1000;
break;
- case OPT_RELOAD_COMMAND:
- reload_command = optarg;
+ case OPT_APPCTL_COMMAND:
+ appctl_command = optarg;
break;
VLOG_OPTION_HANDLERS
}
free(short_options);
+ validate_appctl_command();
+
argc -= optind;
argv += optind;
"CONFIG is the configuration file used by ovs-vswitchd.\n",
program_name, program_name);
printf("\nConfiguration options:\n"
- " --reload-command=COMMAND shell command to reload ovs-vswitchd\n"
+ " --appctl-command=COMMAND shell command to run ovs-appctl\n"
" --prune-timeout=SECS wait at most SECS before pruning ports\n"
" --lock-timeout=MSECS wait at most MSECS for CONFIG to unlock\n"
);
" -h, --help display this help message\n"
" -V, --version display version information\n");
leak_checker_usage();
- printf("\nThe default reload command is:\n%s\n", reload_command);
+ printf("\nThe default appctl command is:\n%s\n", appctl_command);
exit(EXIT_SUCCESS);
}
\fBbonding.\fIname\fB.updelay\fR or
\fBbonding.\fIname\fB.downdelay\fR, respectively, to a positive
integer, interpreted in milliseconds.
+The \fBupdelay\fR setting is honored only when at least one bonded
+interface is already enabled. When no interfaces are enabled, then
+the first bond interface to come up is enabled immediately. The
+\fBdowndelay\fR setting is always honored.
.PP
The following syntax bonds \fBeth0\fR and \fBeth1\fR into a bonding
device named \fBbond0\fR, which is added to bridge \fBmybr\fR along
valgrind_opt="valgrind --log-file=$BRCOMPATD_VALGRIND_LOG $BRCOMPATD_VALGRIND_OPT"
daemonize="n"
fi
- reload_cmd='/root/vswitch/bin/ovs-appctl -t /var/run/ovs-vswitchd.`cat /var/run/ovs-vswitchd.pid`.ctl -e vswitchd/reload 2>&1 | /usr/bin/logger -t brcompatd-reload'
+ appctl_cmd="$appctl -t /var/run/ovs-vswitchd.\`cat $VSWITCHD_PIDFILE\`.ctl -e '%s'"
if [ "$daemonize" != "y" ]; then
# Start in background and force a "success" message
action "Starting ovs-brcompatd ($strace_opt$valgrind_opt)" true
- (nice -n "$VSWITCHD_PRIORITY" $strace_opt $valgrind_opt "$brcompatd" --reload-command="$reload_cmd" -P$BRCOMPATD_PIDFILE -vANY:CONSOLE:EMER $syslog_opt $logfile_level_opt $logfile_file_opt $leak_opt "$VSWITCHD_CONF") &
+ (nice -n "$VSWITCHD_PRIORITY" $strace_opt $valgrind_opt "$brcompatd" --appctl-command="$appctl_cmd" -P$BRCOMPATD_PIDFILE -vANY:CONSOLE:EMER $syslog_opt $logfile_level_opt $logfile_file_opt $leak_opt "$VSWITCHD_CONF") &
else
- action "Starting ovs-brcompatd" nice -n "$BRCOMPATD_PRIORITY" $strace_opt $valgrind_opt "$brcompatd" --reload-command="$reload_cmd" -P$BRCOMPATD_PIDFILE -D -vANY:CONSOLE:EMER $syslog_opt $logfile_level_opt $logfile_file_opt $leak_opt "$VSWITCHD_CONF"
+ action "Starting ovs-brcompatd" nice -n "$BRCOMPATD_PRIORITY" $strace_opt $valgrind_opt "$brcompatd" --appctl-command="$appctl_cmd" -P$BRCOMPATD_PIDFILE -D -vANY:CONSOLE:EMER $syslog_opt $logfile_level_opt $logfile_file_opt $leak_opt "$VSWITCHD_CONF"
fi
}