From 2dc7590de5e0adae1e332ff84c0c8af264f403b9 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Tue, 1 May 2012 15:36:44 -0700 Subject: [PATCH] ovs-ctl: Make "force-reload-kmod" warn when DHCP clients must be restarted. This should make it more obvious when the admin needs to restart a DHCP client (or other daemon). Without this, unless the admin carefully reads the documentation, the first notice he gets about a need to restart the DHCP client can easily be when the lease expires and the machine drops off the network. Bug #5391. Tested-by: Gurucharan Shetty Suggested-by: Duffie Cooley Signed-off-by: Ben Pfaff --- debian/openvswitch-switch.install | 1 + rhel/openvswitch.spec.in | 1 + utilities/automake.mk | 6 +- utilities/ovs-check-dead-ifs.in | 96 +++++++++++++++++++++++++++++++ utilities/ovs-ctl.8 | 14 +++-- utilities/ovs-ctl.in | 2 + xenserver/openvswitch-xen.spec.in | 1 + 7 files changed, 115 insertions(+), 6 deletions(-) create mode 100755 utilities/ovs-check-dead-ifs.in diff --git a/debian/openvswitch-switch.install b/debian/openvswitch-switch.install index a7006ac0..557429b4 100644 --- a/debian/openvswitch-switch.install +++ b/debian/openvswitch-switch.install @@ -6,6 +6,7 @@ usr/bin/ovs-vsctl usr/bin/ovsdb-tool usr/sbin/ovs-vswitchd usr/sbin/ovsdb-server +usr/share/openvswitch/scripts/ovs-check-dead-ifs usr/share/openvswitch/scripts/ovs-ctl usr/share/openvswitch/scripts/ovs-lib usr/share/openvswitch/scripts/ovs-save diff --git a/rhel/openvswitch.spec.in b/rhel/openvswitch.spec.in index e9997574..00d1aa75 100644 --- a/rhel/openvswitch.spec.in +++ b/rhel/openvswitch.spec.in @@ -146,6 +146,7 @@ exit 0 /usr/share/openvswitch/bugtool-plugins/ /usr/share/openvswitch/python/ /usr/share/openvswitch/scripts/ovs-bugtool-* +/usr/share/openvswitch/scripts/ovs-check-dead-ifs /usr/share/openvswitch/scripts/ovs-ctl /usr/share/openvswitch/scripts/ovs-lib /usr/share/openvswitch/scripts/ovs-save diff --git a/utilities/automake.mk b/utilities/automake.mk index 0e93ffba..9da7f828 100644 --- a/utilities/automake.mk +++ b/utilities/automake.mk @@ -13,10 +13,14 @@ bin_SCRIPTS += \ utilities/ovs-vlan-test endif noinst_SCRIPTS += utilities/ovs-pki-cgi -scripts_SCRIPTS += utilities/ovs-ctl utilities/ovs-save +scripts_SCRIPTS += \ + utilities/ovs-check-dead-ifs \ + utilities/ovs-ctl \ + utilities/ovs-save scripts_DATA += utilities/ovs-lib EXTRA_DIST += \ + utilities/ovs-check-dead-ifs.in \ utilities/ovs-ctl.in \ utilities/ovs-lib.in \ utilities/ovs-parse-leaks.in \ diff --git a/utilities/ovs-check-dead-ifs.in b/utilities/ovs-check-dead-ifs.in new file mode 100755 index 00000000..53185d66 --- /dev/null +++ b/utilities/ovs-check-dead-ifs.in @@ -0,0 +1,96 @@ +#! @PYTHON@ + +import os +import re +import stat +import sys + +if "--help" in sys.argv: + sys.stdout.write("""\ +ovs-check-dead-ifs: Check for packet sockets for nonexistent network devices. + +One side effect of the "force-reload-kmod" command that reloads the +Open vSwitch kernel module is that all the network devices that the +Open vSwitch kernel module implemented get destroyed and then replaced +by new instances with the same names. Unfortunately, programs that +are listening for packets on the original network devices will not +receive packets that arrive on the new instances. This causes some +services, such as DHCP, to silently fail. This program looks for such +problems and, if it finds any, prints information about programs that +are in such a state. The system administrator should then take some +action to fix the problem, such as restarting these programs. +""") + sys.exit(0) +elif len(sys.argv) > 1: + sys.stderr.write("ovs-check-dead-ifs: no arguments or options accepted " + "(use --help for help)\n") + sys.exit(1) + +# Get the set of all valid ifindexes. +# +# 0 is always valid for our purposes because it means "any interface". +valid_ifindexes = set([]) +for ifname in os.listdir("/sys/class/net"): + fn = "/sys/class/net/%s/ifindex" % ifname + try: + valid_ifindexes.add(int(open(fn).readline())) + except IOError: + pass + except ValueError: + print "%s: unexpected format\n" % fn + +# Get inodes for all packet sockets whose ifindexes don't exist. +invalid_inodes = set() +f = open("/proc/net/packet") +f.readline() # Skip header line. +for line in f: + fields = line.split() + ifindex = int(fields[4]) + if ifindex not in valid_ifindexes: + invalid_inodes.add(int(fields[8])) +f.close() + +if not invalid_inodes: + sys.exit(0) + +# Now find the processes that are using those packet sockets. +inode_re = re.compile(r'socket:\[([0-9]+)\]$') +bad_pids = set() +for pid in os.listdir("/proc"): + try: + pid = int(pid) + except ValueError: + continue + + for fd in os.listdir("/proc/%d/fd" % pid): + try: + fd = int(fd) + except ValueError: + continue + + try: + s = os.stat("/proc/%d/fd/%d" % (pid, fd)) + except OSError: + continue + + if not stat.S_ISSOCK(s.st_mode): + continue + + try: + linkname = os.readlink("/proc/%d/fd/%d" % (pid, fd)) + except OSError: + continue + + m = inode_re.match(linkname) + if not m: + continue + + inode = int(m.group(1)) + if inode in invalid_inodes: + bad_pids.add(pid) + +if bad_pids: + print """ +The following processes are listening for packets to arrive on network devices +that no longer exist. You may want to restart them.""" + os.execvp("ps", ["ps"] + ["%s" % pid for pid in bad_pids]) diff --git a/utilities/ovs-ctl.8 b/utilities/ovs-ctl.8 index 06ccfeca..988b5008 100644 --- a/utilities/ovs-ctl.8 +++ b/utilities/ovs-ctl.8 @@ -261,11 +261,15 @@ reloads the kernel module and restarts the OVS daemons (including .IP 6. Restores the kernel configuration state that was saved in step 3. . -.PP -The steps above are often enough to hot-upgrade a new kernel module -with only a few seconds of downtime. DHCP is a common problem: if the -ISC DHCP client is running on an OVS internal interface, then it will -have to be restarted after completing the above procedure. +.IP 7. +Checks for daemons that may need to be restarted because they have +packet sockets that are listening on old instances of Open vSwitch +kernel interfaces and, if it finds any, prints a warning on stdout. +DHCP is a common example: if the ISC DHCP client is running on an OVS +internal interface, then it will have to be restarted after completing +the above procedure. (It would be nice if \fBovs\-ctl\fR could restart +daemons automatically, but the details are far too specific to a +particular distribution and installation.) . .PP \fBforce\-kmod\-reload\fR internally stops and starts OVS, so it diff --git a/utilities/ovs-ctl.in b/utilities/ovs-ctl.in index ed558346..68853826 100755 --- a/utilities/ovs-ctl.in +++ b/utilities/ovs-ctl.in @@ -311,6 +311,8 @@ force_reload_kmod () { log="logger -p daemon.$level -t ovs-save" $log "force-reload-kmod interface restore script exited with status $rc:" $log -f "$script" + + "$datadir/scripts/ovs-check-dead-ifs" } ## --------------- ## diff --git a/xenserver/openvswitch-xen.spec.in b/xenserver/openvswitch-xen.spec.in index 1e8d764f..46b6f60e 100644 --- a/xenserver/openvswitch-xen.spec.in +++ b/xenserver/openvswitch-xen.spec.in @@ -407,6 +407,7 @@ exit 0 /etc/logrotate.d/openvswitch /etc/profile.d/openvswitch.sh /usr/share/openvswitch/python/ +/usr/share/openvswitch/scripts/ovs-check-dead-ifs /usr/share/openvswitch/scripts/ovs-xapi-sync /usr/share/openvswitch/scripts/interface-reconfigure /usr/share/openvswitch/scripts/InterfaceReconfigure.py -- 2.30.2