[Pkg-samba-maint] [ctdb] 01/10: Imported Upstream version 2.3+debian0

Thu Aug 15 18:42:46 UTC 2013

This is an automated email from the git hooks/post-receive script.

sathieu pushed a commit to branch debian-sid
in repository ctdb.

commit b8de429a6bc7561ea6cbe544bd15f848d16eab19
Author: Mathieu Parent <math.parent at gmail.com>
Date:   Sat Aug 10 19:22:10 2013 +0200

    Imported Upstream version 2.3+debian0
---
 Makefile.in                                  |  102 ++++---
 NEWS                                         |   70 +++++
 client/ctdb_client.c                         |   14 +-
 common/ctdb_io.c                             |    2 +-
 common/ctdb_logging.c                        |   15 +-
 common/ctdb_ltdb.c                           |    2 +-
 common/ctdb_message.c                        |    2 +-
 common/ctdb_util.c                           |   11 +-
 common/system_aix.c                          |    6 +
 common/system_common.c                       |    2 +-
 common/system_freebsd.c                      |    6 +
 common/system_gnu.c                          |    6 +
 common/system_kfreebsd.c                     |    6 +
 common/system_linux.c                        |   26 +-
 config/ctdb-crash-cleanup.sh                 |    2 +-
 config/ctdb.init                             |  389 ++++----------------------
 config/ctdb.service                          |   15 +
 config/ctdb.sysconfig                        |   19 ++
 config/ctdbd_wrapper                         |  275 ++++++++++++++++++
 config/debug_locks.sh                        |   45 +++
 config/events.d/00.ctdb                      |  113 +++++++-
 config/events.d/13.per_ip_routing            |    2 +-
 config/events.d/60.ganesha                   |  114 ++++----
 config/events.d/60.nfs                       |   25 ++
 config/functions                             |   40 ++-
 configure                                    |   18 +-
 doc/ctdb.1                                   |   86 +++++-
 doc/ctdb.1.html                              |  204 ++++++++------
 doc/ctdb.1.xml                               |   70 ++++-
 doc/ctdbd.1                                  |   20 +-
 doc/ctdbd.1.html                             |  187 ++++++-------
 doc/ctdbd.1.xml                              |   33 +--
 doc/ltdbtool.1                               |    4 +-
 doc/ltdbtool.1.html                          |   12 +-
 doc/onnode.1                                 |    4 +-
 doc/onnode.1.html                            |   16 +-
 doc/ping_pong.1                              |    4 +-
 doc/ping_pong.1.html                         |   10 +-
 doc/recovery-process.txt                     |    6 +-
 include/ctdb_private.h                       |   16 +-
 include/ctdb_protocol.h                      |    7 -
 include/ctdb_version.h                       |    2 +-
 lib/util/db_wrap.c                           |    2 +-
 libctdb/control.c                            |   33 +--
 libctdb/ctdb.c                               |    1 +
 packaging/RPM/ctdb.spec                      |   71 +++--
 packaging/RPM/ctdb.spec.in                   |   69 +++--
 server/ctdb_banning.c                        |   39 ++-
 server/ctdb_call.c                           |    3 +-
 server/ctdb_control.c                        |   32 ++-
 server/ctdb_daemon.c                         |   44 ++-
 server/ctdb_freeze.c                         |   34 +--
 server/ctdb_lock.c                           |  205 +++-----------
 server/ctdb_logging.c                        |   45 ++-
 server/ctdb_ltdb_server.c                    |   69 +----
 server/ctdb_monitor.c                        |   16 +-
 server/ctdb_persistent.c                     |    2 +-
 server/ctdb_recover.c                        |    7 +-
 server/ctdb_recoverd.c                       |  298 ++++++++++----------
 server/ctdb_server.c                         |    2 +-
 server/ctdb_takeover.c                       |   78 +++++-
 server/ctdb_traverse.c                       |   92 +++---
 server/ctdb_tunables.c                       |    1 -
 server/ctdb_update_record.c                  |    3 +-
 server/ctdb_vacuum.c                         |    4 +-
 server/eventscript.c                         |   21 +-
 tcp/tcp_connect.c                            |   13 +-
 tcp/tcp_init.c                               |    2 +-
 tcp/tcp_io.c                                 |    2 +-
 tests/eventscripts/00.ctdb.init.001.sh       |   13 +
 tests/eventscripts/00.ctdb.init.002.sh       |   17 ++
 tests/eventscripts/00.ctdb.init.003.sh       |   16 ++
 tests/eventscripts/00.ctdb.init.004.sh       |   22 ++
 tests/eventscripts/00.ctdb.init.005.sh       |   20 ++
 tests/eventscripts/00.ctdb.init.006.sh       |   25 ++
 tests/eventscripts/00.ctdb.init.007.sh       |   16 ++
 tests/eventscripts/00.ctdb.init.008.sh       |   19 ++
 tests/eventscripts/00.ctdb.init.021.sh       |   11 +
 tests/eventscripts/00.ctdb.init.022.sh       |   18 ++
 tests/eventscripts/00.ctdb.init.023.sh       |   23 ++
 tests/eventscripts/60.ganesha.monitor.101.sh |   11 +
 tests/eventscripts/60.ganesha.monitor.131.sh |   17 ++
 tests/eventscripts/60.ganesha.monitor.141.sh |   39 +++
 tests/eventscripts/60.nfs.monitor.102.sh     |   15 +
 tests/eventscripts/60.nfs.monitor.103.sh     |   15 +
 tests/eventscripts/60.nfs.monitor.104.sh     |   18 ++
 tests/eventscripts/60.nfs.monitor.113.sh     |   18 ++
 tests/eventscripts/60.nfs.monitor.114.sh     |   18 ++
 tests/eventscripts/etc-ctdb/rc.local         |    6 +
 tests/eventscripts/scripts/local.sh          |   53 +++-
 tests/eventscripts/stubs/date                |    7 +
 tests/eventscripts/stubs/ip                  |   25 +-
 tests/eventscripts/stubs/pidof               |   10 +
 tests/eventscripts/stubs/tdbdump             |    9 +
 tests/eventscripts/stubs/tdbtool             |   15 +
 tests/scripts/integration.bash               |   21 +-
 tests/src/ctdb_bench.c                       |    3 +
 tests/src/ctdb_fetch.c                       |    5 +
 tests/src/ctdb_fetch_readonly_loop.c         |    3 +
 tests/src/ctdb_trackingdb_test.c             |    3 +
 tests/src/ctdb_traverse.c                    |    3 +
 tests/src/ctdb_update_record.c               |    3 +
 tests/src/ctdb_update_record_persistent.c    |    5 +-
 tests/src/ctdbd_test.c                       |    2 +-
 tools/ctdb.c                                 |   56 ++--
 utils/nagios/check_ctdb                      |    6 +-
 utils/ping_pong/ping_pong.c                  |    4 +
 web/samba.html                               |   12 +-
 108 files changed, 2387 insertions(+), 1411 deletions(-)

diff --git a/Makefile.in b/Makefile.in
index bfeccf8..678141f 100755
--- a/Makefile.in
+++ b/Makefile.in
@@ -50,8 +50,16 @@ PMDA_LIBS = -lpcp -lpcp_pmda
 PMDA_INSTALL = @CTDB_PMDA_INSTALL@
 PMDA_DEST_DIR = /var/lib/pcp/pmdas/ctdb
 
+WRAPPER=@
+ifeq ($(V),1)
+WRAPPER=
+endif
+ifeq ($(VERBOSE),1)
+WRAPPER=
+endif
+
 ifeq ($(CC),gcc)
-EXTRA_CFLAGS=-Wno-format-zero-length -fPIC
+EXTRA_CFLAGS=-Wno-format-zero-length -Wno-deprecated-declarations -fPIC
 endif
 
 CFLAGS=@CPPFLAGS@ -g -I$(srcdir)/include -Iinclude -Ilib -Ilib/util -I$(srcdir) \
@@ -59,7 +67,7 @@ CFLAGS=@CPPFLAGS@ -g -I$(srcdir)/include -Iinclude -Ilib -Ilib/util -I$(srcdir)
 	-DVARDIR=\"$(localstatedir)\" -DETCDIR=\"$(etcdir)\" \
 	-DLOGDIR=\"$(logdir)\" -DBINDIR=\"$(bindir)\" \
 	-DSOCKPATH=\"$(sockpath)\" \
-	-DUSE_MMAP=1 -DTEVENT_DEPRECATED_QUIET=1 @CFLAGS@ $(POPT_CFLAGS) \
+	-DUSE_MMAP=1 @CFLAGS@ $(POPT_CFLAGS) \
 	$(EXTRA_CFLAGS)
 
 LDSHFLAGS=-fPIC -shared
@@ -145,139 +153,139 @@ showlayout::
 .c.o:
 	@echo Compiling $*.c
 	@mkdir -p `dirname $@`
-	@$(CC) $(CFLAGS) -c $< -o $@
+	$(WRAPPER) $(CC) $(CFLAGS) -c $< -o $@
 
 dirs:
-	@mkdir -p $(DIRS)
+	$(WRAPPER) mkdir -p $(DIRS)
 
 $(CTDB_VERSION_H):
 	@echo Generating $@
-	@./packaging/mkversion.sh
+	$(WRAPPER) ./packaging/mkversion.sh
 
 bin/ctdbd: $(CTDB_SERVER_OBJ)
 	@echo Linking $@
-	@$(CC) $(CFLAGS) -o $@ $(CTDB_SERVER_OBJ) $(LIB_FLAGS)
+	$(WRAPPER) $(CC) $(CFLAGS) -o $@ $(CTDB_SERVER_OBJ) $(LIB_FLAGS)
 
 libctdb/libctdb.a: $(CTDB_LIB_OBJ)
 	@echo Linking $@
 	-rm -f libctdb.a
-	@$(AR) $(ARFLAGS) libctdb/libctdb.a $(CTDB_LIB_OBJ)
-	@$(RANLIB) libctdb/libctdb.a
+	$(WRAPPER) $(AR) $(ARFLAGS) libctdb/libctdb.a $(CTDB_LIB_OBJ)
+	$(WRAPPER) $(RANLIB) libctdb/libctdb.a
 
 libctdb/libctdb.so.0: $(CTDB_LIB_OBJ)
 	@echo Linking $@
-	@$(SHLD) -Wl,-soname=libctdb.so.0 $(CTDB_LIB_OBJ)
+	$(WRAPPER) $(SHLD) -Wl,-soname=libctdb.so.0 $(CTDB_LIB_OBJ)
 
 libctdb/libctdb.so: libctdb/libctdb.so.0
 	@echo Creating $@
-	- at rm -f libctdb/libctdb.so
-	@ln -s libctdb.so.0 libctdb/libctdb.so
+	$(WRAPPER) rm -f libctdb/libctdb.so
+	$(WRAPPER) ln -s libctdb.so.0 libctdb/libctdb.so
 
 bin/scsi_io: $(CTDB_CLIENT_OBJ) utils/scsi_io/scsi_io.o 
 	@echo Linking $@
-	@$(CC) $(CFLAGS) -o $@ utils/scsi_io/scsi_io.o $(CTDB_CLIENT_OBJ) $(LIB_FLAGS)
+	$(WRAPPER) $(CC) $(CFLAGS) -o $@ utils/scsi_io/scsi_io.o $(CTDB_CLIENT_OBJ) $(LIB_FLAGS)
 
 bin/ctdb: $(CTDB_CLIENT_OBJ) tools/ctdb.o tools/ctdb_vacuum.o libctdb/libctdb.a
 	@echo Linking $@
-	@$(CC) $(CFLAGS) -o $@ tools/ctdb.o tools/ctdb_vacuum.o $(CTDB_CLIENT_OBJ) $(LIB_FLAGS) libctdb/libctdb.a
+	$(WRAPPER) $(CC) $(CFLAGS) -o $@ tools/ctdb.o tools/ctdb_vacuum.o $(CTDB_CLIENT_OBJ) $(LIB_FLAGS) libctdb/libctdb.a
 
 bin/ltdbtool: tools/ltdbtool.o $(TDB_OBJ)
 	@echo Linking $@
-	@$(CC) $(CFLAGS) -o $@ $+ $(TDB_LIBS) $(LIB_FLAGS)
+	$(WRAPPER) $(CC) $(CFLAGS) -o $@ $+ $(TDB_LIBS) $(LIB_FLAGS)
 
 bin/ctdb_lock_helper: server/ctdb_lock_helper.o lib/util/util_file.o $(CTDB_EXTERNAL_OBJ)
 	@echo Linking $@
-	@$(CC) $(CFLAGS) -o $@ server/ctdb_lock_helper.o lib/util/util_file.o $(CTDB_EXTERNAL_OBJ) $(TDB_LIBS) $(LIB_FLAGS)
+	$(WRAPPER) $(CC) $(CFLAGS) -o $@ server/ctdb_lock_helper.o lib/util/util_file.o $(CTDB_EXTERNAL_OBJ) $(TDB_LIBS) $(LIB_FLAGS)
 
 bin/smnotify: utils/smnotify/gen_xdr.o utils/smnotify/gen_smnotify.o utils/smnotify/smnotify.o $(POPT_OBJ)
 	@echo Linking $@
-	@$(CC) $(CFLAGS) -o $@ utils/smnotify/smnotify.o utils/smnotify/gen_xdr.o utils/smnotify/gen_smnotify.o $(POPT_OBJ) $(LIB_FLAGS)
+	$(WRAPPER) $(CC) $(CFLAGS) -o $@ utils/smnotify/smnotify.o utils/smnotify/gen_xdr.o utils/smnotify/gen_smnotify.o $(POPT_OBJ) $(LIB_FLAGS)
 
 utils/smnotify/smnotify.o: utils/smnotify/smnotify.c utils/smnotify/smnotify.h
 
 utils/smnotify/smnotify.h:  utils/smnotify/smnotify.x
 	@echo Generating $@
-	rpcgen -h utils/smnotify/smnotify.x > utils/smnotify/smnotify.h
+	$(WRAPPER) rpcgen -h utils/smnotify/smnotify.x > utils/smnotify/smnotify.h
 
 utils/smnotify/gen_xdr.c: utils/smnotify/smnotify.x utils/smnotify/smnotify.h
 	@echo Generating $@
-	rpcgen -c utils/smnotify/smnotify.x | grep -Ev '^[[:space:]]+register int32_t \*buf;' > utils/smnotify/gen_xdr.c 
+	$(WRAPPER) rpcgen -c utils/smnotify/smnotify.x | grep -Ev '^[[:space:]]+register int32_t \*buf;' > utils/smnotify/gen_xdr.c 
 
 utils/smnotify/gen_smnotify.c: utils/smnotify/smnotify.x utils/smnotify/smnotify.h
 	@echo Generating $@
-	rpcgen -l utils/smnotify/smnotify.x > utils/smnotify/gen_smnotify.c 
+	$(WRAPPER) rpcgen -l utils/smnotify/smnotify.x > utils/smnotify/gen_smnotify.c 
 
 bin/ping_pong: utils/ping_pong/ping_pong.o
 	@echo Linking $@
-	@$(CC) $(CFLAGS) -o $@ utils/ping_pong/ping_pong.o $(LIB_FLAGS)
+	$(WRAPPER) $(CC) $(CFLAGS) -o $@ utils/ping_pong/ping_pong.o $(LIB_FLAGS)
 
 bin/pmdactdb: $(CTDB_CLIENT_OBJ) utils/pmda/pmda_ctdb.o
 	@echo Linking $@
-	@$(CC) $(CFLAGS) -o $@ utils/pmda/pmda_ctdb.o $(CTDB_CLIENT_OBJ) $(LIB_FLAGS) $(PMDA_LIBS)
+	$(WRAPPER) $(CC) $(CFLAGS) -o $@ utils/pmda/pmda_ctdb.o $(CTDB_CLIENT_OBJ) $(LIB_FLAGS) $(PMDA_LIBS)
 
 tests/bin/rb_test: $(CTDB_CLIENT_OBJ) tests/src/rb_test.o 
 	@echo Linking $@
-	@$(CC) $(CFLAGS) -o $@ tests/src/rb_test.o $(CTDB_CLIENT_OBJ) $(LIB_FLAGS)
+	$(WRAPPER) $(CC) $(CFLAGS) -o $@ tests/src/rb_test.o $(CTDB_CLIENT_OBJ) $(LIB_FLAGS)
 
 tests/bin/ctdb_bench: $(CTDB_CLIENT_OBJ) tests/src/ctdb_bench.o 
 	@echo Linking $@
-	@$(CC) $(CFLAGS) -o $@ tests/src/ctdb_bench.o $(CTDB_CLIENT_OBJ) $(LIB_FLAGS)
+	$(WRAPPER) $(CC) $(CFLAGS) -o $@ tests/src/ctdb_bench.o $(CTDB_CLIENT_OBJ) $(LIB_FLAGS)
 
 tests/bin/ctdb_fetch: $(CTDB_CLIENT_OBJ) tests/src/ctdb_fetch.o 
 	@echo Linking $@
-	@$(CC) $(CFLAGS) -o $@ tests/src/ctdb_fetch.o $(CTDB_CLIENT_OBJ) $(LIB_FLAGS)
+	$(WRAPPER) $(CC) $(CFLAGS) -o $@ tests/src/ctdb_fetch.o $(CTDB_CLIENT_OBJ) $(LIB_FLAGS)
 
 tests/bin/ctdb_fetch_one: $(CTDB_CLIENT_OBJ) tests/src/ctdb_fetch_one.o 
 	@echo Linking $@
-	@$(CC) $(CFLAGS) -o $@ tests/src/ctdb_fetch_one.o $(CTDB_CLIENT_OBJ) $(LIB_FLAGS)
+	$(WRAPPER) $(CC) $(CFLAGS) -o $@ tests/src/ctdb_fetch_one.o $(CTDB_CLIENT_OBJ) $(LIB_FLAGS)
 
 tests/bin/ctdb_fetch_lock_once: libctdb/libctdb.a tests/src/ctdb_fetch_lock_once.o $(CTDB_EXTERNAL_OBJ)
 	@echo Linking $@
-	@$(CC) $(CFLAGS) -o $@ tests/src/ctdb_fetch_lock_once.o $(CTDB_EXTERNAL_OBJ) libctdb/libctdb.a $(LIB_FLAGS)
+	$(WRAPPER) $(CC) $(CFLAGS) -o $@ tests/src/ctdb_fetch_lock_once.o $(CTDB_EXTERNAL_OBJ) libctdb/libctdb.a $(LIB_FLAGS)
 
 tests/bin/ctdb_fetch_readonly_once: libctdb/libctdb.a tests/src/ctdb_fetch_readonly_once.o $(CTDB_EXTERNAL_OBJ)
 	@echo Linking $@
-	@$(CC) $(CFLAGS) -o $@ tests/src/ctdb_fetch_readonly_once.o $(CTDB_EXTERNAL_OBJ) libctdb/libctdb.a $(LIB_FLAGS)
+	$(WRAPPER) $(CC) $(CFLAGS) -o $@ tests/src/ctdb_fetch_readonly_once.o $(CTDB_EXTERNAL_OBJ) libctdb/libctdb.a $(LIB_FLAGS)
 
 tests/bin/ctdb_fetch_readonly_loop: $(CTDB_CLIENT_OBJ) tests/src/ctdb_fetch_readonly_loop.o
 	@echo Linking $@
-	@$(CC) $(CFLAGS) -o $@ tests/src/ctdb_fetch_readonly_loop.o $(CTDB_CLIENT_OBJ) $(LIB_FLAGS)
+	$(WRAPPER) $(CC) $(CFLAGS) -o $@ tests/src/ctdb_fetch_readonly_loop.o $(CTDB_CLIENT_OBJ) $(LIB_FLAGS)
 
 tests/bin/ctdb_trackingdb_test: $(CTDB_CLIENT_OBJ) tests/src/ctdb_trackingdb_test.o
 	@echo Linking $@
-	@$(CC) $(CFLAGS) -o $@ tests/src/ctdb_trackingdb_test.o $(CTDB_CLIENT_OBJ) $(LIB_FLAGS)
+	$(WRAPPER) $(CC) $(CFLAGS) -o $@ tests/src/ctdb_trackingdb_test.o $(CTDB_CLIENT_OBJ) $(LIB_FLAGS)
 
 tests/bin/ctdb_update_record: $(CTDB_CLIENT_OBJ) tests/src/ctdb_update_record.o
 	@echo Linking $@
-	@$(CC) $(CFLAGS) -o $@ tests/src/ctdb_update_record.o $(CTDB_CLIENT_OBJ) $(LIB_FLAGS)
+	$(WRAPPER) $(CC) $(CFLAGS) -o $@ tests/src/ctdb_update_record.o $(CTDB_CLIENT_OBJ) $(LIB_FLAGS)
 
 tests/bin/ctdb_update_record_persistent: $(CTDB_CLIENT_OBJ) tests/src/ctdb_update_record_persistent.o
 	@echo Linking $@
-	@$(CC) $(CFLAGS) -o $@ tests/src/ctdb_update_record_persistent.o $(CTDB_CLIENT_OBJ) $(LIB_FLAGS)
+	$(WRAPPER) $(CC) $(CFLAGS) -o $@ tests/src/ctdb_update_record_persistent.o $(CTDB_CLIENT_OBJ) $(LIB_FLAGS)
 
 tests/bin/ctdb_store: $(CTDB_CLIENT_OBJ) tests/src/ctdb_store.o
 	@echo Linking $@
-	@$(CC) $(CFLAGS) -o $@ tests/src/ctdb_store.o $(CTDB_CLIENT_OBJ) $(LIB_FLAGS)
+	$(WRAPPER) $(CC) $(CFLAGS) -o $@ tests/src/ctdb_store.o $(CTDB_CLIENT_OBJ) $(LIB_FLAGS)
 
 tests/bin/ctdb_traverse: $(CTDB_CLIENT_OBJ) tests/src/ctdb_traverse.o
 	@echo Linking $@
-	@$(CC) $(CFLAGS) -o $@ tests/src/ctdb_traverse.o $(CTDB_CLIENT_OBJ) $(LIB_FLAGS)
+	$(WRAPPER) $(CC) $(CFLAGS) -o $@ tests/src/ctdb_traverse.o $(CTDB_CLIENT_OBJ) $(LIB_FLAGS)
 
 tests/bin/ctdb_randrec: $(CTDB_CLIENT_OBJ) tests/src/ctdb_randrec.o
 	@echo Linking $@
-	@$(CC) $(CFLAGS) -o $@ tests/src/ctdb_randrec.o $(CTDB_CLIENT_OBJ) $(LIB_FLAGS)
+	$(WRAPPER) $(CC) $(CFLAGS) -o $@ tests/src/ctdb_randrec.o $(CTDB_CLIENT_OBJ) $(LIB_FLAGS)
 
 tests/bin/ctdb_persistent: $(CTDB_CLIENT_OBJ) tests/src/ctdb_persistent.o
 	@echo Linking $@
-	@$(CC) $(CFLAGS) -o $@ tests/src/ctdb_persistent.o $(CTDB_CLIENT_OBJ) $(LIB_FLAGS)
+	$(WRAPPER) $(CC) $(CFLAGS) -o $@ tests/src/ctdb_persistent.o $(CTDB_CLIENT_OBJ) $(LIB_FLAGS)
 
 tests/bin/ctdb_porting_tests: $(CTDB_CLIENT_OBJ) tests/src/ctdb_porting_tests.o
 	@echo Linking $@
-	@$(CC) $(CFLAGS) -o $@ tests/src/ctdb_porting_tests.o $(CTDB_CLIENT_OBJ) $(LIB_FLAGS)
+	$(WRAPPER) $(CC) $(CFLAGS) -o $@ tests/src/ctdb_porting_tests.o $(CTDB_CLIENT_OBJ) $(LIB_FLAGS)
 
 tests/bin/ctdb_transaction: $(CTDB_CLIENT_OBJ) tests/src/ctdb_transaction.o
 	@echo Linking $@
-	@$(CC) $(CFLAGS) -o $@ tests/src/ctdb_transaction.o $(CTDB_CLIENT_OBJ) $(LIB_FLAGS)
+	$(WRAPPER) $(CC) $(CFLAGS) -o $@ tests/src/ctdb_transaction.o $(CTDB_CLIENT_OBJ) $(LIB_FLAGS)
 
 CTDB_SERVER_MOST_OBJ = $(CTDB_SERVER_OBJ:server/ctdbd.o=)
 CTDBD_TEST_C = $(CTDB_SERVER_MOST_OBJ:.o=.c) tests/src/ctdbd_test.c
@@ -291,23 +299,23 @@ tests/src/ctdb_takeover_tests.o: tests/src/ctdb_takeover_tests.c $(CTDBD_TEST_C)
 
 tests/bin/ctdb_takeover_tests: $(CTDB_TEST_OBJ) tests/src/ctdb_takeover_tests.o
 	@echo Linking $@
-	@$(CC) $(CFLAGS) -o $@ tests/src/ctdb_takeover_tests.o $(CTDB_TEST_OBJ) $(LIB_FLAGS)
+	$(WRAPPER) $(CC) $(CFLAGS) -o $@ tests/src/ctdb_takeover_tests.o $(CTDB_TEST_OBJ) $(LIB_FLAGS)
 
 tests/src/ctdb_tool_libctdb.o: tests/src/ctdb_tool_libctdb.c tests/src/libctdb_test.c $(CTDB_TEST_C)
 
 tests/bin/ctdb_tool_libctdb: $(CTDB_TEST_OBJ) tests/src/ctdb_tool_libctdb.o
 	@echo Linking $@
-	@$(CC) $(CFLAGS) -o $@ tests/src/ctdb_tool_libctdb.o $(CTDB_TEST_OBJ) $(POPT_OBJ) $(LIB_FLAGS)
+	$(WRAPPER) $(CC) $(CFLAGS) -o $@ tests/src/ctdb_tool_libctdb.o $(CTDB_TEST_OBJ) $(POPT_OBJ) $(LIB_FLAGS)
 
 tests/src/ctdb_tool_stubby.o: tests/src/ctdb_tool_stubby.c tests/src/libctdb_test.c $(CTDB_TEST_C)
 
 tests/bin/ctdb_tool_stubby: $(CTDB_TEST_OBJ) tests/src/ctdb_tool_stubby.o
 	@echo Linking $@
-	@$(CC) $(CFLAGS) -o $@ tests/src/ctdb_tool_stubby.o $(CTDB_TEST_OBJ) $(POPT_OBJ) $(LIB_FLAGS)
+	$(WRAPPER) $(CC) $(CFLAGS) -o $@ tests/src/ctdb_tool_stubby.o $(CTDB_TEST_OBJ) $(POPT_OBJ) $(LIB_FLAGS)
 
 tests/bin/ibwrapper_test: $(CTDB_CLIENT_OBJ) ib/ibwrapper_test.o
 	@echo Linking $@
-	@$(CC) $(CFLAGS) -o $@ ib/ibwrapper_test.o $(CTDB_CLIENT_OBJ) $(LIB_FLAGS)
+	$(WRAPPER) $(CC) $(CFLAGS) -o $@ ib/ibwrapper_test.o $(CTDB_CLIENT_OBJ) $(LIB_FLAGS)
 
 manpages:
 	$(MAKE) -C doc
@@ -333,7 +341,6 @@ install: all manpages $(PMDA_INSTALL)
 	mkdir -p $(DESTDIR)$(etcdir)/ctdb/nfs-rpc-checks.d
 	mkdir -p $(DESTDIR)$(etcdir)/sudoers.d/
 	mkdir -p $(DESTDIR)$(etcdir)/ctdb/notify.d
-	mkdir -p $(DESTDIR)$(docdir)/ctdb
 	${INSTALLCMD} -m 644 ctdb.pc $(DESTDIR)$(libdir)/pkgconfig
 	${INSTALLCMD} -m 755 bin/ctdb $(DESTDIR)$(bindir)
 	${INSTALLCMD} -m 755 bin/ctdbd $(DESTDIR)$(sbindir)
@@ -350,10 +357,7 @@ install: all manpages $(PMDA_INSTALL)
 	${INSTALLCMD} -m 440 config/ctdb.sudoers $(DESTDIR)$(etcdir)/sudoers.d/ctdb
 	${INSTALLCMD} -m 644 config/functions $(DESTDIR)$(etcdir)/ctdb
 	${INSTALLCMD} -m 755 config/statd-callout $(DESTDIR)$(etcdir)/ctdb
-	${INSTALLCMD} -m 644 README $(DESTDIR)$(docdir)/ctdb/README
-	${INSTALLCMD} -m 644 COPYING $(DESTDIR)$(docdir)/ctdb/COPYING
-	${INSTALLCMD} -m 644 config/events.d/README $(DESTDIR)$(docdir)/ctdb/README.eventscripts
-	${INSTALLCMD} -m 644 doc/recovery-process.txt $(DESTDIR)$(docdir)/ctdb/recovery-process.txt
+	${INSTALLCMD} -m 755 config/ctdbd_wrapper $(DESTDIR)$(sbindir)
 	${INSTALLCMD} -m 755 config/events.d/00.ctdb $(DESTDIR)$(etcdir)/ctdb/events.d
 	${INSTALLCMD} -m 755 config/events.d/01.reclock $(DESTDIR)$(etcdir)/ctdb/events.d
 	${INSTALLCMD} -m 755 config/events.d/10.interface $(DESTDIR)$(etcdir)/ctdb/events.d
@@ -385,13 +389,7 @@ install: all manpages $(PMDA_INSTALL)
 	if [ -f doc/onnode.1 ];then ${INSTALLCMD} -m 644 doc/onnode.1 $(DESTDIR)$(mandir)/man1; fi
 	if [ -f doc/ltdbtool.1 ]; then ${INSTALLCMD} -m 644 doc/ltdbtool.1 $(DESTDIR)$(mandir)/man1; fi
 	if [ -f doc/ping_pong.1 ];then ${INSTALLCMD} -m 644 doc/ping_pong.1 $(DESTDIR)$(mandir)/man1; fi
-	if [ -f doc/ctdb.1.html ];then ${INSTALLCMD} -m 644 doc/ctdb.1.html $(DESTDIR)$(docdir)/ctdb; fi
-	if [ -f doc/ctdbd.1.html ];then ${INSTALLCMD} -m 644 doc/ctdbd.1.html $(DESTDIR)$(docdir)/ctdb; fi
-	if [ -f doc/onnode.1.html ];then ${INSTALLCMD} -m 644 doc/onnode.1.html $(DESTDIR)$(docdir)/ctdb; fi
-	if [ -f doc/ltdbtool.1.html ];then ${INSTALLCMD} -m 644 doc/ltdbtool.1.html $(DESTDIR)$(docdir)/ctdb; fi
-	if [ -f doc/ping_pong.1.html ];then ${INSTALLCMD} -m 644 doc/ping_pong.1.html $(DESTDIR)$(docdir)/ctdb; fi
 	if [ ! -f $(DESTDIR)$(etcdir)/ctdb/notify.sh ];then ${INSTALLCMD} -m 755 config/notify.sh $(DESTDIR)$(etcdir)/ctdb; fi
-	if [ ! -f $(DESTDIR)$(etcdir)/ctdb/notify.d/README ];then ${INSTALLCMD} -m 755 config/notify.d.README $(DESTDIR)$(etcdir)/ctdb/notify.d/README ; fi
 	${INSTALLCMD} -m 755 config/debug-hung-script.sh $(DESTDIR)$(etcdir)/ctdb
 	if [ ! -f $(DESTDIR)$(etcdir)/ctdb/ctdb-crash-cleanup.sh ];then ${INSTALLCMD} -m 755 config/ctdb-crash-cleanup.sh $(DESTDIR)$(etcdir)/ctdb; fi
 	if [ ! -f $(DESTDIR)$(etcdir)/ctdb/gcore_trace.sh ];then ${INSTALLCMD} -m 755 config/gcore_trace.sh $(DESTDIR)$(etcdir)/ctdb; fi
diff --git a/NEWS b/NEWS
index b4a6169..12aec37 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,73 @@
+Changes in CTDB 2.3
+===================
+
+User-visible changes
+--------------------
+
+* 2 new configuration variables for 60.nfs eventscript:
+
+  - CTDB_MONITOR_NFS_THREAD_COUNT
+  - CTDB_NFS_DUMP_STUCK_THREADS
+
+  See ctdb.sysconfig for details.
+
+* Removed DeadlockTimeout tunable.  To enable debug of locking issues set
+
+   CTDB_DEBUG_LOCKS=/etc/ctdb/debug_locks.sh
+
+* In overall statistics and database statistics, lock buckets have been
+  updated to use following timings:
+
+   < 1ms, < 10ms, < 100ms, < 1s, < 2s, < 4s, < 8s, < 16s, < 32s, < 64s, >= 64s
+
+* Initscript is now simplified with most CTDB-specific functionality
+  split out to ctdbd_wrapper, which is used to start and stop ctdbd.
+
+* Add systemd support.
+
+* CTDB subprocesses are now given informative names to allow them to
+  be easily distinguished when using programs like "top" or "perf".
+
+Important bug fixes
+-------------------
+
+* ctdb tool should not exit from a retry loop if a control times out
+  (e.g. under high load).  This simple fix will stop an exit from the
+  retry loop on any error.
+
+* When updating flags on all nodes, use the correct updated flags.  This
+  should avoid wrong flag change messages in the logs.
+
+* The recovery daemon will not ban other nodes if the current node
+  is banned.
+
+* ctdb dbstatistics command now correctly outputs database statistics.
+
+* Fixed a panic with overlapping shutdowns (regression in 2.2).
+
+* Fixed 60.ganesha "monitor" event (regression in 2.2).
+
+* Fixed a buffer overflow in the "reloadips" implementation.
+
+* Fixed segmentation faults in ping_pong (called with incorrect
+  argument) and test binaries (called when ctdbd not running).
+
+Important internal changes
+--------------------------
+
+* The recovery daemon on stopped or banned node will stop participating in any
+  cluster activity.
+
+* Improve cluster wide database traverse by sending the records directly from
+  traverse child process to requesting node.
+
+* TDB checking and dropping of all IPs moved from initscript to "init"
+  event in 00.ctdb.
+
+* To avoid "rogue IPs" the release IP callback now fails if the
+  released IP is still present on an interface.
+
+
 Changes in CTDB 2.2
 ===================
 
diff --git a/client/ctdb_client.c b/client/ctdb_client.c
index 334d830..08e4903 100644
--- a/client/ctdb_client.c
+++ b/client/ctdb_client.c
@@ -20,7 +20,7 @@
 
 #include "includes.h"
 #include "db_wrap.h"
-#include "lib/tdb/include/tdb.h"
+#include "tdb.h"
 #include "lib/util/dlinklist.h"
 #include "system/network.h"
 #include "system/filesys.h"
@@ -207,8 +207,8 @@ void ctdb_client_read_cb(uint8_t *data, size_t cnt, void *args)
 	talloc_steal(tmp_ctx, hdr);
 
 	if (cnt == 0) {
-		DEBUG(DEBUG_INFO,("Daemon has exited - shutting down client\n"));
-		exit(0);
+		DEBUG(DEBUG_CRIT,("Daemon has exited - shutting down client\n"));
+		exit(1);
 	}
 
 	if (cnt < sizeof(*hdr)) {
@@ -541,12 +541,8 @@ int ctdb_client_send_message(struct ctdb_context *ctdb, uint32_t pnn,
 	memcpy(&r->data[0], data.dptr, data.dsize);
 	
 	res = ctdb_client_queue_pkt(ctdb, &r->hdr);
-	if (res != 0) {
-		return res;
-	}
-
 	talloc_free(r);
-	return 0;
+	return res;
 }
 
 
@@ -3315,7 +3311,7 @@ static void async_callback(struct ctdb_client_control_state *state)
 	struct ctdb_context *ctdb = talloc_get_type(state->ctdb, struct ctdb_context);
 	int ret;
 	TDB_DATA outdata;
-	int32_t res;
+	int32_t res = -1;
 	uint32_t destnode = state->c->hdr.destnode;
 
 	/* one more node has responded with recmode data */
diff --git a/common/ctdb_io.c b/common/ctdb_io.c
index 4e592b9..aee8864 100644
--- a/common/ctdb_io.c
+++ b/common/ctdb_io.c
@@ -21,7 +21,7 @@
 */
 
 #include "includes.h"
-#include "lib/tdb/include/tdb.h"
+#include "tdb.h"
 #include "lib/util/dlinklist.h"
 #include "system/network.h"
 #include "system/filesys.h"
diff --git a/common/ctdb_logging.c b/common/ctdb_logging.c
index 408fda8..ba3e861 100644
--- a/common/ctdb_logging.c
+++ b/common/ctdb_logging.c
@@ -18,7 +18,7 @@
 */
 
 #include "includes.h"
-#include "lib/tdb/include/tdb.h"
+#include "tdb.h"
 #include "system/time.h"
 #include "../include/ctdb_private.h"
 #include "../include/ctdb_client.h"
@@ -129,7 +129,7 @@ void ctdb_collect_log(struct ctdb_context *ctdb, struct ctdb_get_log_addr *log_a
 		tm = localtime(&log_entries[tmp_entry].t.tv_sec);
 		strftime(tbuf, sizeof(tbuf)-1,"%Y/%m/%d %H:%M:%S", tm);
 
-		if (log_entries[tmp_entry].message) {
+		if (log_entries[tmp_entry].message[0] != '\0') {
 			fprintf(f, "%s:%s %s", tbuf,
 				get_debug_by_level(log_entries[tmp_entry].level),
 				log_entries[tmp_entry].message);
@@ -137,9 +137,17 @@ void ctdb_collect_log(struct ctdb_context *ctdb, struct ctdb_get_log_addr *log_a
 	}
 
 	fsize = ftell(f);
+	if (fsize < 0) {
+		fclose(f);
+		DEBUG(DEBUG_ERR, ("Cannot get file size for log entries\n"));
+		return;
+	}
 	rewind(f);
 	data.dptr = talloc_size(NULL, fsize);
-	CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
+	if (data.dptr == NULL) {
+		fclose(f);
+		CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
+	}
 	data.dsize = fread(data.dptr, 1, fsize, f);
 	fclose(f);
 
@@ -166,6 +174,7 @@ int32_t ctdb_control_get_log(struct ctdb_context *ctdb, TDB_DATA addr)
 	}
 
 	if (child == 0) {
+		ctdb_set_process_name("ctdb_log_collector");
 		if (switch_from_server_to_client(ctdb, "log-collector") != 0) {
 			DEBUG(DEBUG_CRIT, (__location__ "ERROR: failed to switch log collector child into client mode.\n"));
 			_exit(1);
diff --git a/common/ctdb_ltdb.c b/common/ctdb_ltdb.c
index dab88f3..0bfc377 100644
--- a/common/ctdb_ltdb.c
+++ b/common/ctdb_ltdb.c
@@ -19,7 +19,7 @@
 */
 
 #include "includes.h"
-#include "lib/tdb/include/tdb.h"
+#include "tdb.h"
 #include "system/network.h"
 #include "system/filesys.h"
 #include "../include/ctdb_private.h"
diff --git a/common/ctdb_message.c b/common/ctdb_message.c
index b0d2ea0..0e19761 100644
--- a/common/ctdb_message.c
+++ b/common/ctdb_message.c
@@ -22,7 +22,7 @@
   protocol design and packet details
 */
 #include "includes.h"
-#include "lib/tdb/include/tdb.h"
+#include "tdb.h"
 #include "system/network.h"
 #include "system/filesys.h"
 #include "../include/ctdb_private.h"
diff --git a/common/ctdb_util.c b/common/ctdb_util.c
index a910a0c..a2da3bc 100644
--- a/common/ctdb_util.c
+++ b/common/ctdb_util.c
@@ -18,7 +18,7 @@
 */
 
 #include "includes.h"
-#include "lib/tdb/include/tdb.h"
+#include "tdb.h"
 #include "system/network.h"
 #include "system/filesys.h"
 #include "system/wait.h"
@@ -59,6 +59,15 @@ void ctdb_fatal(struct ctdb_context *ctdb, const char *msg)
 	abort();
 }
 
+/*
+  like ctdb_fatal() but a core/backtrace would not be useful
+*/
+void ctdb_die(struct ctdb_context *ctdb, const char *msg)
+{
+	DEBUG(DEBUG_ALERT,("ctdb exiting with error: %s\n", msg));
+	exit(1);
+}
+
 /* Invoke an external program to do some sort of tracing on the CTDB
  * process.  This might block for a little while.  The external
  * program is specified by the environment variable
diff --git a/common/system_aix.c b/common/system_aix.c
index 35363d3..41f61ae 100644
--- a/common/system_aix.c
+++ b/common/system_aix.c
@@ -380,6 +380,12 @@ char *ctdb_get_process_name(pid_t pid)
 	return NULL;
 }
 
+int ctdb_set_process_name(const char *name)
+{
+	/* FIXME AIX: set_process_name not implemented */
+	return -ENOSYS;
+}
+
 bool ctdb_get_lock_info(pid_t req_pid, struct ctdb_lock_info *lock_info)
 {
 	/* FIXME AIX: get_lock_info not implemented */
diff --git a/common/system_common.c b/common/system_common.c
index 6ee615f..01ac2bf 100644
--- a/common/system_common.c
+++ b/common/system_common.c
@@ -50,7 +50,7 @@ bool ctdb_sys_have_ip(ctdb_sock_addr *_addr)
 	int ret;
 	ctdb_sock_addr __addr = *_addr;
 	ctdb_sock_addr *addr = &__addr;
-	socklen_t addrlen;
+	socklen_t addrlen = 0;
 
 	switch (addr->sa.sa_family) {
 	case AF_INET:
diff --git a/common/system_freebsd.c b/common/system_freebsd.c
index 641e77a..9597a7a 100644
--- a/common/system_freebsd.c
+++ b/common/system_freebsd.c
@@ -390,6 +390,12 @@ char *ctdb_get_process_name(pid_t pid)
 	return NULL;
 }
 
+int ctdb_set_process_name(const char *name)
+{
+	/* FIXME FreeBSD: set_process_name not implemented */
+	return -ENOSYS;
+}
+
 bool ctdb_get_lock_info(pid_t req_pid, struct ctdb_lock_info *lock_info)
 {
 	/* FIXME FreeBSD: get_lock_info not implemented */
diff --git a/common/system_gnu.c b/common/system_gnu.c
index 0d79ab5..2ab1399 100644
--- a/common/system_gnu.c
+++ b/common/system_gnu.c
@@ -369,6 +369,12 @@ char *ctdb_get_process_name(pid_t pid)
 	return NULL;
 }
 
+int ctdb_set_process_name(const char *name)
+{
+	/* FIXME GNU/Hurd: set_process_name not implemented */
+	return -ENOSYS;
+}
+
 bool ctdb_get_lock_info(pid_t req_pid, struct ctdb_lock_info *lock_info)
 {
 	/* FIXME GNU/Hurd: get_lock_info not implemented */
diff --git a/common/system_kfreebsd.c b/common/system_kfreebsd.c
index 2c05c9e..41aa4d6 100644
--- a/common/system_kfreebsd.c
+++ b/common/system_kfreebsd.c
@@ -382,6 +382,12 @@ char *ctdb_get_process_name(pid_t pid)
 	return strdup(ptr);
 }
 
+int ctdb_set_process_name(const char *name)
+{
+	/* FIXME kFreeBSD: set_process_name not implemented */
+	return -ENOSYS;
+}
+
 bool ctdb_get_lock_info(pid_t req_pid, struct ctdb_lock_info *lock_info)
 {
 	/* FIXME kFreeBSD: get_lock_info not implemented */
diff --git a/common/system_linux.c b/common/system_linux.c
index 9d4d4ec..ab232f0 100644
--- a/common/system_linux.c
+++ b/common/system_linux.c
@@ -28,6 +28,7 @@
 #include <netinet/icmp6.h>
 #include <net/if_arp.h>
 #include <netpacket/packet.h>
+#include <sys/prctl.h>
 
 #ifndef ETHERTYPE_IP6
 #define ETHERTYPE_IP6 0x86dd
@@ -74,7 +75,7 @@ int ctdb_sys_send_arp(const ctdb_sock_addr *addr, const char *iface)
 	struct ether_header *eh;
 	struct arphdr *ah;
 	struct ip6_hdr *ip6;
-	struct icmp6_hdr *icmp6;
+	struct nd_neighbor_solicit *nd_ns;
 	struct ifreq if_hwaddr;
 	unsigned char buffer[78]; /* ipv6 neigh solicitation size */
 	char *ptr;
@@ -222,17 +223,18 @@ int ctdb_sys_send_arp(const ctdb_sock_addr *addr, const char *iface)
 
 		ip6 = (struct ip6_hdr *)(eh+1);
 		ip6->ip6_vfc  = 0x60;
-		ip6->ip6_plen = htons(24);
+		ip6->ip6_plen = htons(sizeof(*nd_ns));
 		ip6->ip6_nxt  = IPPROTO_ICMPV6;
 		ip6->ip6_hlim = 255;
 		ip6->ip6_dst  = addr->ip6.sin6_addr;
 
-		icmp6 = (struct icmp6_hdr *)(ip6+1);
-		icmp6->icmp6_type = ND_NEIGHBOR_SOLICIT;
-		icmp6->icmp6_code = 0;
-		memcpy(&icmp6->icmp6_data32[1], &addr->ip6.sin6_addr, 16);
+		nd_ns = (struct nd_neighbor_solicit *)(ip6+1);
+		nd_ns->nd_ns_type = ND_NEIGHBOR_SOLICIT;
+		nd_ns->nd_ns_code = 0;
+		nd_ns->nd_ns_reserved = 0;
+		nd_ns->nd_ns_target = addr->ip6.sin6_addr;
 
-		icmp6->icmp6_cksum = tcp_checksum6((uint16_t *)icmp6, ntohs(ip6->ip6_plen), ip6);
+		nd_ns->nd_ns_cksum = tcp_checksum6((uint16_t *)nd_ns, ntohs(ip6->ip6_plen), ip6);
 
 		sall.sll_family = AF_PACKET;
 		sall.sll_halen = 6;
@@ -596,6 +598,16 @@ char *ctdb_get_process_name(pid_t pid)
 	return strdup(ptr);
 }
 
+/*
+ * Set process name
+ */
+int ctdb_set_process_name(const char *name)
+{
+	char procname[16];
+
+	strncpy(procname, name, 15);
+	return prctl(PR_SET_NAME, (unsigned long)procname, 0, 0, 0);
+}
 
 /*
  * Parsing a line from /proc/locks,
diff --git a/config/ctdb-crash-cleanup.sh b/config/ctdb-crash-cleanup.sh
index d26838e..78eaa93 100755
--- a/config/ctdb-crash-cleanup.sh
+++ b/config/ctdb-crash-cleanup.sh
@@ -22,7 +22,7 @@ loadconfig ctdb
 [ -f "$CTDB_PUBLIC_ADDRESSES" ] || \
     die "No public addresses file found. Can't clean up."
 
-drop_all_public_ips "ctdb-crash-cleanup.sh"
+drop_all_public_ips 2>&1 | script_log "ctdb-crash-cleanup.sh"
 
 if [ -n "$CTDB_NATGW_PUBLIC_IP" ] ; then
     drop_ip "$CTDB_NATGW_PUBLIC_IP" "ctdb-crash-cleanup.sh"
diff --git a/config/ctdb.init b/config/ctdb.init
index 2ceb45f..e761fec 100755
--- a/config/ctdb.init
+++ b/config/ctdb.init
@@ -1,395 +1,133 @@
 #!/bin/sh
+
+# Start and stop CTDB (Clustered TDB daemon)
 #
-##############################
-# ctdb:                        Starts the clustered tdb daemon
-#
-# chkconfig:           - 90 01
-#
-# description:                 Starts and stops the clustered tdb daemon
-# pidfile:             /var/run/ctdb/ctdbd.pid
+# chkconfig: - 90 01
 #
+# description: Starts and stops CTDB
+# pidfile: /var/run/ctdb/ctdbd.pid
+# config: /etc/sysconfig/ctdb
 
 ### BEGIN INIT INFO
 # Provides:            ctdb
-# Required-Start:      $network
-# Required-Stop:       $network
-# Default-Stop:
-# Default-Start:       3 5
+# Required-Start:      $local_fs $syslog $network
+# Required-Stop:       $local_fs $syslog $network
+# Default-Start:       2 3 4 5
+# Default-Stop:        0 1 6
 # Short-Description:   start and stop ctdb service
-# Description:         initscript for the ctdb service
+# Description:         Start and stop CTDB (Clustered TDB daemon)
 ### END INIT INFO
 
 # Source function library.
 if [ -f /etc/init.d/functions ] ; then
+    # Red Hat
     . /etc/init.d/functions
 elif [ -f /etc/rc.d/init.d/functions ] ; then
+    # Red Hat
     . /etc/rc.d/init.d/functions
-fi
-
-[ -f /etc/rc.status ] && {
+elif [ -f /etc/rc.status ] ; then
+    # SUSE
     . /etc/rc.status
     rc_reset
     LC_ALL=en_US.UTF-8
-}
-
-if [ -f /lib/lsb/init-functions ] ; then
+elif [ -f /lib/lsb/init-functions ] ; then
+    # Debian
     . /lib/lsb/init-functions
 fi
 
 # Avoid using root's TMPDIR
 unset TMPDIR
 
-[ -z "$CTDB_BASE" ] && {
-    export CTDB_BASE="/etc/ctdb"
-}
+[ -n "$CTDB_BASE" ] || export CTDB_BASE="/etc/ctdb"
 
-. $CTDB_BASE/functions
-loadconfig network
-loadconfig ctdb
+. "${CTDB_BASE}/functions"
+loadconfig "network"
+loadconfig "ctdb"
 
 # check networking is up (for redhat)
-[ "$NETWORKING" = "no" ] && exit 0
+if [ "$NETWORKING" = "no" ] ; then
+    exit 0
+fi
 
 detect_init_style
 export CTDB_INIT_STYLE
 
-ctdbd=${CTDBD:-/usr/sbin/ctdbd}
-pidfile="/var/run/ctdb/ctdbd.pid"
-
-if [ "$CTDB_VALGRIND" = "yes" ]; then
-    init_style="valgrind"
-else
-    init_style="$CTDB_INIT_STYLE"
-fi
-
-build_ctdb_options () {
-
-    maybe_set () {
-	# If the 2nd arg is null then return - don't set anything.
-	# Else if the 3rd arg is set and it doesn't match the 2nd arg
-	# then return
-	[ -z "$2" -o \( -n "$3" -a "$3" != "$2" \) ] && return
+ctdbd="${CTDBD:-/usr/sbin/ctdbd}"
+ctdbd_wrapper="${CTDBD_WRAPPER:-/usr/sbin/ctdbd_wrapper}"
+pidfile="${CTDB_PIDFILE:-/var/run/ctdb/ctdbd.pid}"
 
-	val="'$2'"
-	case "$1" in
-	    --*) sep="=" ;;
-	    -*)  sep=" " ;;
-	esac
-	# For these options we're only passing a value-less flag.
-	[ -n "$3" ] && {
-	    val=""
-	    sep=""
-	}
+############################################################
 
-	CTDB_OPTIONS="${CTDB_OPTIONS}${CTDB_OPTIONS:+ }${1}${sep}${val}"
-    }
-
-    [ -z "$CTDB_RECOVERY_LOCK" ] && {
-        echo "No recovery lock specified. Starting CTDB without split brain prevention"
-    }
-    maybe_set "--reclock"                "$CTDB_RECOVERY_LOCK"
-
-    mkdir -p $(dirname "$pidfile")
-    maybe_set "--pidfile"                "$pidfile"
-
-    # build up CTDB_OPTIONS variable from optional parameters
-    maybe_set "--logfile"                "$CTDB_LOGFILE"
-    maybe_set "--nlist"                  "$CTDB_NODES"
-    maybe_set "--socket"                 "$CTDB_SOCKET"
-    maybe_set "--public-addresses"       "$CTDB_PUBLIC_ADDRESSES"
-    maybe_set "--public-interface"       "$CTDB_PUBLIC_INTERFACE"
-    maybe_set "--dbdir"                  "$CTDB_DBDIR"
-    maybe_set "--dbdir-persistent"       "$CTDB_DBDIR_PERSISTENT"
-    maybe_set "--event-script-dir"       "$CTDB_EVENT_SCRIPT_DIR"
-    maybe_set "--transport"              "$CTDB_TRANSPORT"
-    maybe_set "-d"                       "$CTDB_DEBUGLEVEL"
-    maybe_set "--notification-script"    "$CTDB_NOTIFY_SCRIPT"
-    maybe_set "--start-as-disabled"      "$CTDB_START_AS_DISABLED"    "yes"
-    maybe_set "--start-as-stopped "      "$CTDB_START_AS_STOPPED"     "yes"
-    maybe_set "--no-recmaster"           "$CTDB_CAPABILITY_RECMASTER" "no"
-    maybe_set "--no-lmaster"             "$CTDB_CAPABILITY_LMASTER"   "no"
-    maybe_set "--lvs --single-public-ip" "$CTDB_LVS_PUBLIC_IP"
-    maybe_set "--script-log-level"       "$CTDB_SCRIPT_LOG_LEVEL"
-    maybe_set "--log-ringbuf-size"       "$CTDB_LOG_RINGBUF_SIZE"
-    maybe_set "--syslog"                 "$CTDB_SYSLOG"               "yes"
-    maybe_set "--max-persistent-check-errors" "$CTDB_MAX_PERSISTENT_CHECK_ERRORS"
-}
-
-export_debug_variables ()
-{
-    export CTDB_DEBUG_HUNG_SCRIPT CTDB_EXTERNAL_TRACE
-}
-
-# Log given message or stdin to either syslog or a CTDB log file
-do_log ()
-{
-    script_log "ctdb.init" "$@"
-}
-
-select_tdb_checker ()
+start()
 {
-    # Find the best TDB consistency check available.
-    use_tdb_tool_check=false
-    if which tdbtool >/dev/null 2>&1 && \
-	echo "help" | tdbtool | grep -q check ; then
-
-	use_tdb_tool_check=true
-    elif which tdbtool >/dev/null 2>&1 && which tdbdump >/dev/null 2>&1 ; then
-	    do_log <<EOF
-WARNING: The installed 'tdbtool' does not offer the 'check' subcommand.
- Using 'tdbdump' for database checks.
- Consider updating 'tdbtool' for better checks!
-EOF
-    elif which tdbdump >/dev/null 2>&1 ; then
-	do_log <<EOF
-WARNING: 'tdbtool' is not available.
- Using 'tdbdump' to check the databases.
- Consider installing a recent 'tdbtool' for better checks!
-EOF
-    else
-	do_log <<EOF
-WARNING: Cannot check databases since neither
- 'tdbdump' nor 'tdbtool check' is available.
- Consider installing tdbtool or at least tdbdump!
-EOF
-        return 1
-    fi
-}
-
-check_tdb ()
-{
-    _db="$1"
-
-    if $use_tdb_tool_check ; then
-	# tdbtool always exits with 0  :-(
-	if tdbtool "$_db" check 2>/dev/null |
-	    grep -q "Database integrity is OK" ; then
-	    return 0
-	else
-	    return 1
-	fi
-    else
-	tdbdump "$_db" >/dev/null 2>/dev/null
-	return $?
-    fi
-}
-
-check_persistent_databases ()
-{
-    _dir="${CTDB_DBDIR_PERSISTENT:-${CTDB_DBDIR:-/var/ctdb}/persistent}"
-    mkdir -p "$_dir" 2>/dev/null
-
-    [ "${CTDB_MAX_PERSISTENT_CHECK_ERRORS:-0}" = "0" ] || return 0
-
-    for _db in $(ls "$_dir/"*.tdb.*[0-9] 2>/dev/null) ; do
-	check_tdb $_db || {
-	    do_log "Persistent database $_db is corrupted! CTDB will not start."
-	    return 1
-	}
-    done
-}
-
-check_non_persistent_databases ()
-{
-    _dir="${CTDB_DBDIR:-/var/ctdb}"
-    mkdir -p "$_dir" 2>/dev/null
-
-    for _db in $(ls "${_dir}/"*.tdb.*[0-9] 2>/dev/null) ; do
-	check_tdb $_db || {
-	    _backup="${_db}.$(date +'%Y%m%d.%H%M%S.%N').corrupt"
-	    do_log <<EOF
-WARNING: database ${_db} is corrupted.
- Moving to backup ${_backup} for later analysis.
-EOF
-	    mv "$_db" "$_backup"
-
-	    # Now remove excess backups
-	    ls -td "${_db}."*".corrupt" |
-	    tail -n +$((${CTDB_MAX_CORRUPT_DB_BACKUPS:-10} + 1)) |
-	    xargs rm -f
-	    
-	}
-    done
-}
-
-set_retval() {
-    return $1
-}
-
-wait_until_ready () {
-    _timeout="${1:-10}" # default is 10 seconds
-
-    _count=0
-    while ! ctdb runstate first_recovery startup running >/dev/null 2>&1 ; do
-	if [ $_count -ge $_timeout ] ; then
-	    return 1
-	fi
-	sleep 1
-	_count=$(($_count + 1))
-    done
-}
-
-start() {
-    echo -n $"Starting ctdbd service: "
-
-    ctdb ping >/dev/null 2>&1 && {
-	echo $"CTDB is already running"
-	return 0
-    }
-
-    # About to start new $ctdbd.  The ping above has failed and any
-    # new $ctdbd will destroy the Unix domain socket, so any processes
-    # that aren't yet completely useless soon will be...  so kill
-    # them.
-    pkill -9 -f "$ctdbd"
-
-    build_ctdb_options
-
-    export_debug_variables
-
-    # make sure we drop any ips that might still be held if previous
-    # instance of ctdb got killed with -9 or similar
-    drop_all_public_ips "ctdb.init"
-
-    if select_tdb_checker ; then
-	check_persistent_databases || return $?
-	check_non_persistent_databases
-    fi
-
-    if [ "$CTDB_SUPPRESS_COREFILE" = "yes" ]; then
-	ulimit -c 0
-    else
-	ulimit -c unlimited
-    fi
+    echo -n "Starting ctdbd service: "
 
-    case $init_style in
-	valgrind)
-	    eval valgrind -q --log-file=/var/log/ctdb_valgrind \
-		$ctdbd --valgrinding "$CTDB_OPTIONS"
-	    RETVAL=$?
-	    echo
-	    ;;
+    case "$CTDB_INIT_STYLE" in
 	suse)
-	    eval startproc $ctdbd "$CTDB_OPTIONS"
-	    RETVAL=$?
+	    startproc \
+		"$ctdbd_wrapper" "$pidfile" "start"
+	    rc_status -v
 	    ;;
 	redhat)
-	    eval $ctdbd "$CTDB_OPTIONS"
+	    daemon --pidfile "$pidfile" \
+		"$ctdbd_wrapper" "$pidfile" "start"
 	    RETVAL=$?
+	    echo
 	    [ $RETVAL -eq 0 ] && touch /var/lock/subsys/ctdb || RETVAL=1
+	    return $RETVAL
 	    ;;
 	debian)
-	    eval start-stop-daemon --start --quiet --background \
-		--exec $ctdbd -- "$CTDB_OPTIONS"
-	    RETVAL=$?
+	    eval start-stop-daemon --start --quiet --background --exec \
+		"$ctdbd_wrapper" "$pidfile" "start"
 	    ;;
     esac
-
-    if [ $RETVAL -eq 0 ] ; then
-	if ! wait_until_ready ; then
-	    RETVAL=1
-	    echo "Timed out waiting for initialisation - killing CTDB"
-	    pkill -9 -f $ctdbd >/dev/null 2>&1
-	fi
-    fi
-
-    case $init_style in
-	suse)
-	    set_retval $RETVAL
-	    rc_status -v
-	    ;;
-	redhat)
-	    [ $RETVAL -eq 0 ] && success || failure
-	    echo
-	    ;;
-    esac
-
-    return $RETVAL
 }
 
-stop() {
-    echo -n $"Shutting down ctdbd service: "
-    pkill -0 -f $ctdbd || {
-	echo -n "  Warning: ctdbd not running ! "
-	case $init_style in
-	    suse)
-		rc_status -v
-		;;
-	    redhat)
-		echo ""
-		;;
-	esac
-	return 0
-    }
-    ctdb shutdown >/dev/null 2>&1
-    RETVAL=$?
-    count=0
-    while pkill -0 -f $ctdbd ; do
-	sleep 1
-	count=$(($count + 1))
-	[ $count -gt 30 ] && {
-	    echo -n $"killing ctdbd "
-	    pkill -9 -f $ctdbd
-	    pkill -9 -f $CTDB_BASE/events.d/
-	}
-    done
-    # make sure all ips are dropped, pfkill -9 might leave them hanging around
-    drop_all_public_ips
-
-    rm -f "$pidfile"
+stop()
+{
+    echo -n "Shutting down ctdbd service: "
 
-    case $init_style in
+    case "$CTDB_INIT_STYLE" in
 	suse)
-	    # re-set the return code to the recorded RETVAL in order
-	    # to print the correct status message
-	    set_retval $RETVAL
+	    "$ctdbd_wrapper" "$pidfile" "stop"
 	    rc_status -v
 	    ;;
 	redhat)
+	    "$ctdbd_wrapper" "$pidfile" "stop"
+	    RETVAL=$?
             [ $RETVAL -eq 0 ] && success || failure
-	    [ $RETVAL -eq 0 ] && rm -f /var/lock/subsys/ctdb
 	    echo ""
+	    [ $RETVAL -eq 0 ] && rm -f /var/lock/subsys/ctdb
+	    return $RETVAL
+	    ;;
+	debian)
+	    "$ctdbd_wrapper" "$pidfile" "stop"
+	    log_end_msg $?
 	    ;;
     esac
-
-    return $RETVAL
 }
 
-restart() {
+restart()
+{
     stop
     start
 }
 
-# Given that CTDB_VALGRIND is a debug option we don't support the pid
-# file.  We just do a quick and dirty hack instead.  Otherwise we just
-# end up re-implementing each distro's pidfile support...
-check_status_valgrind ()
-{
-    if pkill -0 -f "valgrind.*${ctdbd}" ; then
-	echo "ctdbd is running under valgrind..."
-	return 0
-    else
-	echo "ctdbd is not running"
-	return 1
-    fi
-}
-
 check_status ()
 {
     # Backward compatibility.  When we arrange to pass --pidfile to
     # ctdbd we also create the directory that will contain it.  If
     # that directory is missing then we don't use the pidfile to check
-    # status.
+    # status.  Note that this probably won't work if
+    # $CTDB_VALGRIND="yes" but this doesn't need full backward
+    # compatibility because it is a debug option.
     if [ -d $(dirname "$pidfile") ] ; then
 	_pf_opt="-p $pidfile"
     else
 	_pf_opt=""
     fi
 
-    case "$init_style" in
-	valgrind)
-	    check_status_valgrind
-	    ;;
+    case "$CTDB_INIT_STYLE" in
 	suse)
 	    checkproc $_pf_opt "$ctdbd"
 	    rc_status -v
@@ -403,8 +141,7 @@ check_status ()
     esac
 }
 
-
-[ -x "$CTDB_BASE/rc.ctdb" ] && "$CTDB_BASE/rc.ctdb" $1
+############################################################
 
 case "$1" in
     start)
@@ -426,11 +163,9 @@ case "$1" in
 	;;
     cron)
 	# used from cron to auto-restart ctdb
-  	check_status >/dev/null || restart
+  	check_status >/dev/null 2>&1 || restart
 	;;
     *)
-	echo $"Usage: $0 {start|stop|restart|reload|force-reload|status|cron|condrestart|try-restart}"
+	echo "Usage: $0 {start|stop|restart|reload|force-reload|status|cron|condrestart|try-restart}"
 	exit 1
 esac
-
-exit $?
diff --git a/config/ctdb.service b/config/ctdb.service
new file mode 100644
index 0000000..4ad03a8
--- /dev/null
+++ b/config/ctdb.service
@@ -0,0 +1,15 @@
+[Unit]
+Description=CTDB
+After=network.target
+
+[Service]
+Type=forking
+LimitCORE=infinity
+PIDFile=/run/ctdbd/ctdbd.pid
+ExecStart=/usr/sbin/ctdbd_wrapper /run/ctdbd/ctdbd.pid start
+ExecStop=/usr/sbin/ctdbd_wrapper /run/ctdbd/ctdbd.pid stop
+KillMode=control-group
+Restart=no
+
+[Install]
+WantedBy=multi-user.target
diff --git a/config/ctdb.sysconfig b/config/ctdb.sysconfig
index 6f58e8f..7e775a2 100644
--- a/config/ctdb.sysconfig
+++ b/config/ctdb.sysconfig
@@ -129,6 +129,25 @@ CTDB_RECOVERY_LOCK="/some/place/on/shared/storage"
 # CTDB_MONITOR_FREE_MEMORY_WARN=100
 # CTDB_MONITOR_FREE_MEMORY=10
 
+# Should the 60.nfs monitor event try to correct the number of nfsd
+# threads?  This works around a limitation in some NFS initscripts
+# where some threads can be stuck in host filesystem calls (perhaps
+# due to slow storage), a restart occurs, some threads don't exit, the
+# start only adds the missing number of threads, the stuck threads
+# exit, and the result is a lower than expected thread count.  Note
+# that if you must also set $RPCNFSDCOUNT (RedHat/Debian) or
+# $USE_KERNEL_NFSD_NUMBER (SUSE) in your NFS configuration so the
+# monitoring code knows how many threads there should be - if neither
+# of these are set then this option will be ignored.  The default is
+# to not do this check.
+# CTDB_MONITOR_NFS_THREAD_COUNT="yes"
+
+
+# The number of nfsd threads to dump stack traces for if some are
+# still alive after stopping NFS during a restart.  The default is to
+# dump no stack traces.
+# CTDB_NFS_DUMP_STUCK_THREADS=5
+
 # When set to yes, the CTDB node will start in DISABLED mode and not host
 # any public ip addresses. The administrator needs to explicitely enable
 # the node with "ctdb enable"
diff --git a/config/ctdbd_wrapper b/config/ctdbd_wrapper
new file mode 100755
index 0000000..33bef06
--- /dev/null
+++ b/config/ctdbd_wrapper
@@ -0,0 +1,275 @@
+#!/bin/sh
+
+# ctdbd wrapper - start or stop CTDB
+
+usage ()
+{
+    echo "usage: ctdbd_wrapper <pidfile> { start | stop }"
+    exit 1
+}
+
+[ $# -eq 2 ] || usage
+
+pidfile="$1"
+action="$2"
+
+############################################################
+
+[ -n "$CTDB_BASE" ] || export CTDB_BASE="/etc/ctdb"
+
+. "${CTDB_BASE}/functions"
+loadconfig "ctdb"
+
+ctdbd="${CTDBD:-/usr/sbin/ctdbd}"
+
+############################################################
+
+# ctdbd_is_running()
+
+# 1. Check if ctdbd is running.
+#    - If the PID file is being used then, if the PID file is present,
+#      ctdbd is only considered to running if the PID in the file is
+#      active.
+#    - If the PID file is not being used (i.e. we're upgrading from a
+#      version that doesn't support it) then the presence of any ctdbd
+#      processes is enough proof.
+
+# 2. Print a comma-separated list of PIDs that can be
+#    used with "pkill -s".
+#    - If the PID file is being used then this is just the PID in that
+#      file.  This also happens to be the session ID, so can be used
+#      to kill all CTDB processes.
+#    - If the PID file is not being used (i.e. upgrading) then this is
+#      just any ctdbd processes that are running.  Hopefully one of
+#      them is the session ID so that it can be used to kill all CTDB
+#      processes.
+
+# Combining these 2 checks is an optimisation to avoid potentially
+# running too many pgrep/pkill processes on an already loaded system.
+# Trawling through /proc/ can be very expensive.
+
+ctdbd_is_running ()
+{
+    # If the directory for the PID file exists then respect the
+    # existence of a PID file.
+    _pidfile_dir=$(dirname "$pidfile")
+    if [ -d "$_pidfile_dir" ] ; then
+	if read _pid 2>/dev/null <"$pidfile" ; then
+	    echo "$_pid"
+
+	    # Return value of kill is used
+	    kill -0 $_pid 2>/dev/null
+	else
+	    # Missing/empty PID file
+	    return 1
+	fi
+    else
+	if _pid=$(pgrep -f "${ctdbd}\>") ; then
+	    echo $_pid | sed -e 's@ @, at g'
+	    return 0
+	else
+	    return 1
+	fi
+    fi
+}
+
+############################################################
+
+build_ctdb_options ()
+{
+
+    maybe_set ()
+    {
+	# If the given variable isn't set then do nothing
+	[ -n "$2" ] || return
+	# If a required value for the variable and it doesn't match,
+	# then do nothing
+	[ -z "$3" -o "$3" = "$2" ] || return
+
+	val="'$2'"
+	case "$1" in
+	    --*) sep="=" ;;
+	    -*)  sep=" " ;;
+	esac
+	# For these options we're only passing a value-less flag.
+	if [ -n "$3" ] ; then
+	    val=""
+	    sep=""
+	fi
+
+	CTDB_OPTIONS="${CTDB_OPTIONS}${CTDB_OPTIONS:+ }${1}${sep}${val}"
+    }
+
+    if [ -z "$CTDB_RECOVERY_LOCK" ] ; then
+        echo "No recovery lock specified. Starting CTDB without split brain preventivon"
+    fi
+    maybe_set "--reclock"                "$CTDB_RECOVERY_LOCK"
+
+    maybe_set "--pidfile"                "$pidfile"
+
+    # build up CTDB_OPTIONS variable from optional parameters
+    maybe_set "--logfile"                "$CTDB_LOGFILE"
+    maybe_set "--nlist"                  "$CTDB_NODES"
+    maybe_set "--socket"                 "$CTDB_SOCKET"
+    maybe_set "--public-addresses"       "$CTDB_PUBLIC_ADDRESSES"
+    maybe_set "--public-interface"       "$CTDB_PUBLIC_INTERFACE"
+    maybe_set "--dbdir"                  "$CTDB_DBDIR"
+    maybe_set "--dbdir-persistent"       "$CTDB_DBDIR_PERSISTENT"
+    maybe_set "--event-script-dir"       "$CTDB_EVENT_SCRIPT_DIR"
+    maybe_set "--transport"              "$CTDB_TRANSPORT"
+    maybe_set "-d"                       "$CTDB_DEBUGLEVEL"
+    maybe_set "--notification-script"    "$CTDB_NOTIFY_SCRIPT"
+    maybe_set "--start-as-disabled"      "$CTDB_START_AS_DISABLED"    "yes"
+    maybe_set "--start-as-stopped "      "$CTDB_START_AS_STOPPED"     "yes"
+    maybe_set "--no-recmaster"           "$CTDB_CAPABILITY_RECMASTER" "no"
+    maybe_set "--no-lmaster"             "$CTDB_CAPABILITY_LMASTER"   "no"
+    maybe_set "--lvs --single-public-ip" "$CTDB_LVS_PUBLIC_IP"
+    maybe_set "--script-log-level"       "$CTDB_SCRIPT_LOG_LEVEL"
+    maybe_set "--log-ringbuf-size"       "$CTDB_LOG_RINGBUF_SIZE"
+    maybe_set "--syslog"                 "$CTDB_SYSLOG"               "yes"
+    maybe_set "--max-persistent-check-errors" "$CTDB_MAX_PERSISTENT_CHECK_ERRORS"
+}
+
+export_debug_variables ()
+{
+    export CTDB_DEBUG_HUNG_SCRIPT CTDB_EXTERNAL_TRACE CTDB_DEBUG_LOCKS
+}
+
+kill_ctdbd ()
+{
+    _session="$1"
+
+    if [ -n "$_session" ] ; then
+	pkill -9 -s "$_session" 2>/dev/null
+    fi
+    rm -f "$pidfile"
+}
+
+############################################################
+
+start()
+{
+    if _session=$(ctdbd_is_running) ; then
+	echo $"CTDB is already running"
+	return 0
+    fi
+
+    # About to start new $ctdbd.  The main daemon is not running but
+    # there may still be other processes around, so do some cleanup.
+    # Note that starting ctdbd below will destroy the Unix domain
+    # socket, so any processes that aren't yet completely useless soon
+    # will be, so this can really do no harm.
+    kill_ctdbd "$_session"
+
+    build_ctdb_options
+
+    export_debug_variables
+
+    if [ "$CTDB_SUPPRESS_COREFILE" = "yes" ]; then
+	ulimit -c 0
+    else
+	ulimit -c unlimited
+    fi
+
+    mkdir -p $(dirname "$pidfile")
+
+    if [ -n "$CTDB_VALGRIND" -a "$CTDB_VALGRIND" != "no" ] ; then
+	if [ "$CTDB_VALGRIND" = "yes" ] ; then
+	    ctdbd="valgrind -q --log-file=/var/log/ctdb_valgrind ${ctdbd}"
+	else
+	    ctdbd="${CTDB_VALGRIND} ${ctdbd}"
+	fi
+	CTDB_OPTIONS="${CTDB_OPTIONS} --valgrinding"
+    fi
+
+    # This is ugly but will improve when we get rid of $CTDB_OPTIONS
+    # and use only $CTDB_SYSLOG.
+    case "$CTDB_OPTIONS" in
+	*--syslog*) : ;;
+	*)
+	    logger -t ctdbd "CTDB is being run without syslog enabled.  Logs will be in ${CTDB_LOGFILE:-/var/log/log.ctdb}"
+    esac
+
+    eval "$ctdbd" "$CTDB_OPTIONS" || return 1
+
+    # Wait until ctdbd has started and is ready to respond to clients.
+    _pid=""
+    _timeout="${CTDB_STARTUP_TIMEOUT:-10}"
+    _count=0
+    while [ $_count -lt $_timeout ] ; do
+	# If we don't have the PID then try to read it.
+	[ -n "$_pid" ] || read _pid 2>/dev/null <"$pidfile"
+
+	# If we got the PID but the PID file has gone or the process
+	# is no longer running then stop waiting... CTDB is dead.
+	if [ -n "$_pid" ] ; then
+	    if [ ! -e "$pidfile" ] || ! kill -0 "$_pid" 2>/dev/null ; then
+		echo "CTDB exited during initialisation - check logs."
+		kill_ctdbd "$_pid"
+		drop_all_public_ips >/dev/null 2>&1
+		return 1
+	    fi
+
+	    if ctdb runstate first_recovery startup running >/dev/null 2>&1 ; then
+		return 0
+	    fi
+	fi
+
+	_count=$(($_count + 1))
+	sleep 1
+    done
+
+    echo "Timed out waiting for initialisation - check logs - killing CTDB"
+    kill_ctdbd "$_pid"
+    drop_all_public_ips >/dev/null 2>&1
+    return 1
+}
+
+stop()
+{
+    if ! _session=$(ctdbd_is_running) ; then
+	echo "CTDB is not running"
+	return 0
+    fi
+
+    ctdb shutdown
+
+    # Wait for remaining CTDB processes to exit...
+    _timeout=${CTDB_SHUTDOWN_TIMEOUT:-30}
+    _count=0
+    while [ $_count -lt $_timeout ] ; do
+	pkill -0 -s "$_session" 2>/dev/null || return 0
+
+	_count=$(($_count + 1))
+	sleep 1
+    done
+
+    echo "Timed out waiting for CTDB to shutdown.  Killing CTDB processes."
+    kill_ctdbd "$_session"
+    drop_all_public_ips >/dev/null 2>&1
+
+    sleep 1
+
+    if pkill -0 -s "$_session" ; then
+	# If SIGKILL didn't work then things are bad...
+	echo "Failed to kill all CTDB processes.  Giving up."
+	return 1
+    fi
+
+    return 0
+}
+
+############################################################
+
+# Allow notifications for start/stop.
+if [ -x "$CTDB_BASE/rc.ctdb" ] ; then
+    "$CTDB_BASE/rc.ctdb" "$action"
+fi
+
+case "$action" in
+    start) start ;;
+    stop)  stop  ;;
+    *)
+	echo "usage: $0 {start|stop}"
+	exit 1
+esac
diff --git a/config/debug_locks.sh b/config/debug_locks.sh
new file mode 100644
index 0000000..0dde861
--- /dev/null
+++ b/config/debug_locks.sh
@@ -0,0 +1,45 @@
+#!/bin/sh
+
+# This script parses /proc/locks and finds the processes that are holding
+# locks on CTDB databases.  For all those processes the script dumps a
+# stack trace using gstack.
+#
+# This script can be used only if Samba is configured to use fcntl locks
+# rather than mutex locks.
+
+# Create sed expression to convert inodes to names
+sed_cmd=$( ls -li /var/ctdb/*.tdb.* /var/ctdb/persistent/*.tdb.* |
+	   sed -e "s#/var/ctdb[/persistent]*/\(.*\)#\1#" |
+	   awk '{printf "s#[0-9]*:[0-9]*:%s #%s #\n", $1, $10}' )
+
+# Parse /proc/locks and extract following information
+#    pid process_name tdb_name offsets [W]
+out=$( cat /proc/locks |
+    grep -F "POSIX  ADVISORY  WRITE" |
+    awk '{ if($2 == "->") { print $6, $7, $8, $9, "W" } else { print $5, $6, $7, $8 } }' |
+    while read pid rest ; do
+	pname=$(readlink /proc/$pid/exe)
+	echo $pid $pname $rest
+    done | sed -e "$sed_cmd" | grep "\.tdb" )
+
+if [ -n "$out" ]; then
+    # Log information about locks
+    echo "$out" | logger -t "ctdbd-lock"
+
+    # Find processes that are waiting for locks
+    dbs=$(echo "$out" | grep "W$" | awk '{print $3}')
+    all_pids=""
+    for db in $dbs ; do
+	pids=$(echo "$out" | grep -v "W$" | grep "$db" | grep -v ctdbd | awk '{print $1}')
+	all_pids="$all_pids $pids"
+    done
+    pids=$(echo $all_pids | sort -u)
+
+    # For each process waiting, log stack trace
+    for pid in $pids ; do
+	gstack $pid | logger -t "ctdbd-lock $pid"
+#	gcore -o /var/log/core-deadlock-ctdb $pid
+    done
+fi
+
+exit 0
diff --git a/config/events.d/00.ctdb b/config/events.d/00.ctdb
index 02d1569..d56c7c4 100755
--- a/config/events.d/00.ctdb
+++ b/config/events.d/00.ctdb
@@ -17,7 +17,94 @@ loadconfig
 
 ctdb_setup_service_state_dir "ctdb"
 
-#
+############################################################
+
+select_tdb_checker ()
+{
+    # Find the best TDB consistency check available.
+    use_tdb_tool_check=false
+    if which tdbtool >/dev/null 2>&1 && \
+	echo "help" | tdbtool | grep -q check ; then
+
+	use_tdb_tool_check=true
+    elif which tdbtool >/dev/null 2>&1 && which tdbdump >/dev/null 2>&1 ; then
+	    cat <<EOF
+WARNING: The installed 'tdbtool' does not offer the 'check' subcommand.
+ Using 'tdbdump' for database checks.
+ Consider updating 'tdbtool' for better checks!
+EOF
+    elif which tdbdump >/dev/null 2>&1 ; then
+	cat <<EOF
+WARNING: 'tdbtool' is not available.
+ Using 'tdbdump' to check the databases.
+ Consider installing a recent 'tdbtool' for better checks!
+EOF
+    else
+	cat <<EOF
+WARNING: Cannot check databases since neither
+ 'tdbdump' nor 'tdbtool check' is available.
+ Consider installing tdbtool or at least tdbdump!
+EOF
+        return 1
+    fi
+}
+
+check_tdb ()
+{
+    _db="$1"
+
+    if $use_tdb_tool_check ; then
+	# tdbtool always exits with 0  :-(
+	if tdbtool "$_db" check 2>/dev/null |
+	    grep -q "Database integrity is OK" ; then
+	    return 0
+	else
+	    return 1
+	fi
+    else
+	tdbdump "$_db" >/dev/null 2>/dev/null
+	return $?
+    fi
+}
+
+check_persistent_databases ()
+{
+    _dir="${CTDB_DBDIR_PERSISTENT:-${CTDB_DBDIR:-/var/ctdb}/persistent}"
+    mkdir -p "$_dir" 2>/dev/null
+
+    [ "${CTDB_MAX_PERSISTENT_CHECK_ERRORS:-0}" = "0" ] || return 0
+
+    for _db in $(ls "$_dir/"*.tdb.*[0-9] 2>/dev/null) ; do
+	check_tdb $_db || {
+	    echo "Persistent database $_db is corrupted! CTDB will not start."
+	    return 1
+	}
+    done
+}
+
+check_non_persistent_databases ()
+{
+    _dir="${CTDB_DBDIR:-/var/ctdb}"
+    mkdir -p "$_dir" 2>/dev/null
+
+    for _db in $(ls "${_dir}/"*.tdb.*[0-9] 2>/dev/null) ; do
+	check_tdb $_db || {
+	    _backup="${_db}.$(date +'%Y%m%d.%H%M%S.%N').corrupt"
+	    cat <<EOF
+WARNING: database ${_db} is corrupted.
+ Moving to backup ${_backup} for later analysis.
+EOF
+	    mv "$_db" "$_backup"
+
+	    # Now remove excess backups
+	    ls -td "${_db}."*".corrupt" |
+	    tail -n +$((${CTDB_MAX_CORRUPT_DB_BACKUPS:-10} + 1)) |
+	    xargs rm -f
+	    
+	}
+    done
+}
+
 update_config_from_tdb() {
 
     # Pull optional ctdb configuration data out of config.tdb
@@ -49,18 +136,7 @@ set_ctdb_variables () {
     done
 }
 
-wait_until_ready () {
-    _timeout="${1:-10}" # default is 10 seconds
-
-    _count=0
-    while ! ctdb runstate setup >/dev/null 2>&1 ; do
-	if [ $_count -ge $_timeout ] ; then
-	    return 1
-	fi
-	sleep 1
-	_count=$(($_count + 1))
-    done
-}
+############################################################
 
 ctdb_check_args "$@"
 
@@ -75,11 +151,18 @@ case "$1" in
 	    echo "mkdir -p $CTDB_VARDIR/state - failed - $ret"
 	    exit $ret
 	}
+
+	# make sure we drop any ips that might still be held if
+	# previous instance of ctdb got killed with -9 or similar
+	drop_all_public_ips
+
+	if select_tdb_checker ; then
+	    check_persistent_databases || exit $?
+	    check_non_persistent_databases
+	fi
 	;;
 
      setup)
-        # Make sure CTDB daemon is ready to process requests
-	wait_until_ready || die "CTDB did not become ready for setup"
 	# Set any tunables from the config file
 	set_ctdb_variables || die "Failed to set CTDB tunables"
 	;;
diff --git a/config/events.d/13.per_ip_routing b/config/events.d/13.per_ip_routing
index 4b13546..de153a6 100755
--- a/config/events.d/13.per_ip_routing
+++ b/config/events.d/13.per_ip_routing
@@ -191,7 +191,7 @@ get_config_for_ip ()
 	    if [ "$_ip" = "$_i" ] ; then
 		echo -n "$_ip "; ipv4_host_addr_to_net "$_ip" "$_maskbits"
 	    fi
-	done <"${CTDB_PUBLIC_ADDRESSES:-${CTDB_BASE:-/dev/null}${CTDB_BASE:+/public_addresses}}"
+	done <"${CTDB_PUBLIC_ADDRESSES:-/dev/null}"
     else
 	while read _i _rest ; do
 	    if [ "$_ip" = "$_i" ] ; then
diff --git a/config/events.d/60.ganesha b/config/events.d/60.ganesha
index 0066c54..09860d0 100755
--- a/config/events.d/60.ganesha
+++ b/config/events.d/60.ganesha
@@ -88,6 +88,63 @@ create_ganesha_recdirs ()
     mkdir -p $GANRECDIR3
 }
 
+monitor_ganesha_nfsd ()
+{
+	create_ganesha_recdirs
+	service_name=${service_name}_process
+
+	PIDFILE="/var/run/ganesha.pid"
+	CUR_STATE=`get_cluster_fs_state`
+	GANESHA="/usr/bin/$CTDB_CLUSTER_FILESYSTEM_TYPE.ganesha.nfsd"
+	if { read PID < $PIDFILE && \
+	    grep "$GANESHA" "/proc/$PID/cmdline" ; } >/dev/null 2>&1 ; then
+		ctdb_counter_init "$service_name"
+	else
+	    if [ $CUR_STATE = "active" ]; then
+		echo "Trying fast restart of NFS service"
+		startstop_ganesha restart
+		ctdb_counter_incr "$service_name"
+		ctdb_check_counter "error" "-ge" "6" "$service_name"
+	    fi
+	fi
+
+	service_name="nfs-ganesha-$CTDB_CLUSTER_FILESYSTEM_TYPE"_service
+	# check that NFS is posting forward progress
+	if [ $CUR_STATE = "active" -a "$CTDB_NFS_SKIP_KNFSD_ALIVE_CHECK" != "yes" ] ; then
+	    MAXREDS=2
+	    MAXSTALL=120
+	    RESTART=0
+
+	    NUMREDS=`ls $GANRECDIR3 | grep "red" | wc -l`
+	    LASTONE=`ls -t $GANRECDIR3 | sed 's/_/ /' | awk 'NR > 1 {next} {printf $1} '`
+	    # Beware of startup
+	    if [ -z $LASTONE ] ; then
+		LASTONE=`date +"%s"`
+	    fi
+	    TNOW=$(date +"%s")
+	    TSTALL=$(($TNOW - $LASTONE))
+	    if [ $NUMREDS -ge $MAXREDS ] ; then
+		echo restarting because of $NUMREDS red conditions
+		RESTART=1
+		ctdb_counter_incr "$service_name"
+		ctdb_check_counter "error" "-ge" "6" "$service_name"
+	    fi
+	    if [ $TSTALL -ge $MAXSTALL ] ; then
+		echo restarting because of $TSTALL second stall
+		RESTART=1
+		ctdb_counter_incr "$service_name"
+		ctdb_check_counter "error" "-ge" "6" "$service_name"
+	    fi
+	    if [ $RESTART -gt 0 ] ; then
+		startstop_ganesha restart
+	    else
+		ctdb_counter_init "$service_name"
+	    fi
+	fi
+}
+
+############################################################
+
 case "$1" in
      init)
 	# read statd from persistent database
@@ -131,8 +188,7 @@ case "$1" in
 
      monitor)
 	update_tickles 2049
-	create_ganesha_recdirs
-	service_name=${service_name}_process
+
 	# check that statd responds to rpc requests
 	# if statd is not running we try to restart it
 	# we only do this IF we have a rpc.statd command.
@@ -140,64 +196,18 @@ case "$1" in
         # the check completely
 	p="rpc.statd"
 	which $p >/dev/null 2>/dev/null && \
-	    nfs_check_rpc_service "statd" 1 \
+	    nfs_check_rpc_service "statd" \
 		-ge 6 "verbose unhealthy" \
 		-eq 4 "verbose restart" \
 		-eq 2 "restart:bs"
 
-	PIDFILE="/var/run/ganesha.pid"
-	CUR_STATE=`get_cluster_fs_state`
-	GANESHA="/usr/bin/$CTDB_CLUSTER_FILESYSTEM_TYPE.ganesha.nfsd"
-	if { read PID < $PIDFILE && \
-	    grep "$GANESHA" "/proc/$PID/cmdline" ; } >/dev/null 2>&1 ; then
-		ctdb_counter_init "$service_name"
-	else
-	    if [ $CUR_STATE = "active" ]; then
-		echo "Trying fast restart of NFS service"
-		startstop_ganesha restart
-		ctdb_counter_incr "$service_name"
-		ctdb_check_counter "error" "-ge" "6" "$service_name"
-	    fi
+	if [ "$CTDB_SKIP_GANESHA_NFSD_CHECK" != "yes" ] ; then
+	    monitor_ganesha_nfsd
 	fi
 
-	service_name="nfs-ganesha-$CTDB_CLUSTER_FILESYSTEM_TYPE"_service
-	# check that NFS is posting forward progress
-	if [ $CUR_STATE = "active" -a "$CTDB_NFS_SKIP_KNFSD_ALIVE_CHECK" != "yes" ] ; then
-	    MAXREDS=2
-	    MAXSTALL=120
-	    RESTART=0
-
-	    NUMREDS=`ls $GANRECDIR3 | grep "red" | wc -l`
-	    LASTONE=`ls -t $GANRECDIR3 | sed 's/_/ /' | awk 'NR > 1 {next} {printf $1} '`
-	    # Beware of startup
-	    if [ -z $LASTONE ] ; then
-		LASTONE=`date +"%s"`
-	    fi
-	    TNOW=$(date +"%s")
-	    TSTALL=$(($TNOW - $LASTONE))
-	    if [ $NUMREDS -ge $MAXREDS ] ; then
-		echo restarting because of $NUMREDS red conditions
-		RESTART=1
-		ctdb_counter_incr "$service_name"
-		ctdb_check_counter "error" "-ge" "6" "$service_name"
-	    fi
-	    if [ $TSTALL -ge $MAXSTALL ] ; then
-		echo restarting because of $TSTALL second stall
-		RESTART=1
-		ctdb_counter_incr "$service_name"
-		ctdb_check_counter "error" "-ge" "6" "$service_name"
-	    fi
-	    if [ $RESTART -gt 0 ] ; then
-		startstop_ganesha restart
-	    else
-		ctdb_counter_init "$service_name"
-	    fi
-	fi
-
-
 	# rquotad is sometimes not started correctly on RHEL5
 	# not a critical service so we dont flag the node as unhealthy
-	nfs_check_rpc_service "rquotad" 1 \
+	nfs_check_rpc_service "rquotad" \
 	    -gt 0 "verbose restart:b"
 
 	# Check that directories for shares actually exist.
diff --git a/config/events.d/60.nfs b/config/events.d/60.nfs
index eb98ee1..53f78df 100755
--- a/config/events.d/60.nfs
+++ b/config/events.d/60.nfs
@@ -26,6 +26,29 @@ service_reconfigure ()
     } >/dev/null 2>&1
 }
 
+nfs_check_thread_count ()
+{
+    [ "$CTDB_MONITOR_NFS_THREAD_COUNT" = "yes" ] || return 0
+
+    # If $RPCNFSDCOUNT/$USE_KERNEL_NFSD_NUMBER isn't set then we could
+    # guess the default from the initscript.  However, let's just
+    # assume that those using the default don't care about the number
+    # of threads and that they have switched on this feature in error.
+    _configured_threads="${RPCNFSDCOUNT:-${USE_KERNEL_NFSD_NUMBER}}"
+    [ -n "$_configured_threads" ] || return 0
+
+    # nfsd should be running the configured number of threads.  If
+    # there are a different number of threads then tell nfsd the
+    # correct number.  
+    _running_threads=$(get_proc "fs/nfsd/threads")
+    # Intentionally not arithmetic comparison - avoids extra errors
+    # when get_proc() fails...
+    if [ "$_running_threads" != "$_configured_threads" ] ; then
+	echo "Attempting to correct number of nfsd threads from ${_running_threads} to ${_configured_threads}"
+	set_proc "fs/nfsd/threads" "$_configured_threads"
+    fi
+}
+
 loadconfig
 
 [ "$NFS_SERVER_MODE" != "ganesha" ] || exit 0
@@ -71,6 +94,8 @@ case "$1" in
 
 	nfs_check_rpc_services
 
+	nfs_check_thread_count
+
 	# Every 10 minutes, update the statd state database for which
 	# clients need notifications
 	nfs_statd_update 600
diff --git a/config/functions b/config/functions
index f4707a7..0679938 100755
--- a/config/functions
+++ b/config/functions
@@ -779,6 +779,7 @@ startstop_nfs() {
 			set_proc "fs/nfsd/threads" 0
 			service nfsserver stop > /dev/null 2>&1
 			pkill -9 nfsd
+			nfs_dump_some_threads
 			service nfsserver start
 			;;
 		esac
@@ -798,6 +799,7 @@ startstop_nfs() {
 			service nfs stop > /dev/null 2>&1
 			service nfslock stop > /dev/null 2>&1
 			pkill -9 nfsd
+			nfs_dump_some_threads
 			service nfslock start
 			service nfs start
 			;;
@@ -810,6 +812,28 @@ startstop_nfs() {
 	esac
 }
 
+# Dump up to the configured number of nfsd thread backtraces.
+nfs_dump_some_threads ()
+{
+    [ -n "$CTDB_NFS_DUMP_STUCK_THREADS" ] || return 0
+
+    # Optimisation to avoid running an unnecessary pidof
+    [ $CTDB_NFS_DUMP_STUCK_THREADS -gt 0 ] || return 0
+
+    _count=0
+    for _pid in $(pidof nfsd) ; do
+	[ $_count -le $CTDB_NFS_DUMP_STUCK_THREADS ] || break
+
+	# Do this first to avoid racing with thread exit
+	_stack=$(get_proc "${_pid}/stack" 2>/dev/null)
+	if [ -n "$_stack" ] ; then
+	    echo "Stack trace for stuck nfsd thread [${_pid}]:"
+	    echo "$_stack"
+	    _count=$(($_count + 1))
+	fi
+    done
+}
+
 ########################################################
 # start/stop the nfs lockmanager service on different platforms
 ########################################################
@@ -977,26 +1001,20 @@ ip_maskbits_iface ()
 drop_ip ()
 {
     _addr="${1%/*}"  # Remove optional maskbits
-    _log_tag="$2"
 
     set -- $(ip_maskbits_iface $_addr)
     if [ -n "$1" ] ; then
 	_maskbits="$1"
 	_iface="$2"
-	if [ -n "$_log_tag" ] ; then
-	    script_log "$_log_tag" \
-		"Removing public address $_addr/$_maskbits from device $_iface"
-	fi
-	ip addr del $_addr/$_maskbits dev $_iface >/dev/null 2>&1
+	echo "Removing public address $_addr/$_maskbits from device $_iface"
+	delete_ip_from_iface $_iface $_addr $_maskbits >/dev/null 2>&1
     fi
 }
 
 drop_all_public_ips ()
 {
-    _log_tag="$1"
-
     while read _ip _x ; do
-	drop_ip "$_ip" "$_log_tag"
+	drop_ip "$_ip"
     done <"${CTDB_PUBLIC_ADDRESSES:-/dev/null}"
 }
 
@@ -1223,7 +1241,9 @@ ctdb_replay_monitor_status ()
 	    ;;
 	*) : ;;  # Must be ERROR, do nothing special.
     esac
-    echo "$_err_out"
+    if [ -n "$_err_out" ] ; then
+	echo "$_err_out"
+    fi
     exit $_code
 }
 
diff --git a/configure b/configure
index db5674d..006589f 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for ctdb 2.2.
+# Generated by GNU Autoconf 2.69 for ctdb 2.3.
 #
 #
 # Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
@@ -577,8 +577,8 @@ MAKEFLAGS=
 # Identity of this package.
 PACKAGE_NAME='ctdb'
 PACKAGE_TARNAME='ctdb'
-PACKAGE_VERSION='2.2'
-PACKAGE_STRING='ctdb 2.2'
+PACKAGE_VERSION='2.3'
+PACKAGE_STRING='ctdb 2.3'
 PACKAGE_BUGREPORT=''
 PACKAGE_URL=''
 
@@ -1283,7 +1283,7 @@ if test "$ac_init_help" = "long"; then
   # Omit some internal or obsolete options to make the list less imposing.
   # This message is too long to be a string in the A/UX 3.1 sh.
   cat <<_ACEOF
-\`configure' configures ctdb 2.2 to adapt to many kinds of systems.
+\`configure' configures ctdb 2.3 to adapt to many kinds of systems.
 
 Usage: $0 [OPTION]... [VAR=VALUE]...
 
@@ -1349,7 +1349,7 @@ fi
 
 if test -n "$ac_init_help"; then
   case $ac_init_help in
-     short | recursive ) echo "Configuration of ctdb 2.2:";;
+     short | recursive ) echo "Configuration of ctdb 2.3:";;
    esac
   cat <<\_ACEOF
 
@@ -1448,7 +1448,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
   cat <<\_ACEOF
-ctdb configure 2.2
+ctdb configure 2.3
 generated by GNU Autoconf 2.69
 
 Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2153,7 +2153,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
-It was created by ctdb $as_me 2.2, which was
+It was created by ctdb $as_me 2.3, which was
 generated by GNU Autoconf 2.69.  Invocation command line was
 
   $ $0 $@
@@ -13042,7 +13042,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by ctdb $as_me 2.2, which was
+This file was extended by ctdb $as_me 2.3, which was
 generated by GNU Autoconf 2.69.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
@@ -13108,7 +13108,7 @@ _ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
 ac_cs_version="\\
-ctdb config.status 2.2
+ctdb config.status 2.3
 configured by $0, generated by GNU Autoconf 2.69,
   with options \\"\$ac_cs_config\\"
 
diff --git a/doc/ctdb.1 b/doc/ctdb.1
index 45d981e..b561dce 100644
--- a/doc/ctdb.1
+++ b/doc/ctdb.1
@@ -2,12 +2,12 @@
 .\"     Title: ctdb
 .\"    Author: [FIXME: author] [see http://docbook.sf.net/el/author]
 .\" Generator: DocBook XSL Stylesheets v1.78.1 <http://docbook.sf.net/>
-.\"      Date: 05/30/2013
+.\"      Date: 07/11/2013
 .\"    Manual: CTDB - clustered TDB database
 .\"    Source: ctdb
 .\"  Language: English
 .\"
-.TH "CTDB" "1" "05/30/2013" "ctdb" "CTDB \- clustered TDB database"
+.TH "CTDB" "1" "07/11/2013" "ctdb" "CTDB \- clustered TDB database"
 .\" -----------------------------------------------------------------
 .\" * Define some portability stuff
 .\" -----------------------------------------------------------------
@@ -226,6 +226,84 @@ Recovery master:0
 .if n \{\
 .RE
 .\}
+.SS "nodestatus [<nodespec>]"
+.PP
+This command is similar to the
+\fBstatus\fR
+command\&. It displays the "node status" subset of output\&. The main differences are:
+.sp
+.RS 4
+.ie n \{\
+\h'-04'\(bu\h'+03'\c
+.\}
+.el \{\
+.sp -1
+.IP \(bu 2.3
+.\}
+The exit code is the bitwise\-OR of the flags for each specified node, while
+\fBctdb status\fR
+exits with 0 if it was able to retrieve status for all nodes\&.
+.RE
+.sp
+.RS 4
+.ie n \{\
+\h'-04'\(bu\h'+03'\c
+.\}
+.el \{\
+.sp -1
+.IP \(bu 2.3
+.\}
+\fBctdb status\fR
+provides status information for all nodes\&.
+\fBctdb nodestatus\fR
+defaults to providing status for only the current node\&. If <nodespec> is provided then status is given for the indicated node(s)\&.
+.sp
+By default,
+\fBctdb nodestatus\fR
+gathers status from the local node\&. However, if invoked with "\-n all" (or similar) then status is gathered from the given node(s)\&. In particular
+\fBctdb nodestatus all\fR
+and
+\fBctdb nodestatus \-n all\fR
+will produce different output\&. It is possible to provide 2 different nodespecs (with and without "\-n") but the output is usually confusing!
+.RE
+.PP
+A common invocation in scripts is
+\fBctdb nodestatus all\fR
+to check whether all nodes in a cluster are healthy\&.
+.PP
+Example: ctdb nodestatus
+.PP
+Example output:
+.sp
+.if n \{\
+.RS 4
+.\}
+.nf
+# ctdb nodestatus
+pnn:0 10\&.0\&.0\&.30        OK (THIS NODE)
+      
+.fi
+.if n \{\
+.RE
+.\}
+.PP
+Example: ctdb nodestatus all
+.PP
+Example output:
+.sp
+.if n \{\
+.RS 4
+.\}
+.nf
+# ctdb nodestatus all
+Number of nodes:2
+pnn:0 10\&.0\&.0\&.30        OK (THIS NODE)
+pnn:1 10\&.0\&.0\&.31        OK
+      
+.fi
+.if n \{\
+.RE
+.\}
 .SS "recmaster"
 .PP
 This command shows the pnn of the node which is currently the recmaster\&.
@@ -273,9 +351,9 @@ Example output:
 .\}
 .SS "ping"
 .PP
-This command will "ping" all CTDB daemons in the cluster to verify that they are processing commands correctly\&.
+This command will "ping" specified CTDB node in the cluster to verify that they are running\&.
 .PP
-Example: ctdb ping
+Example: ctdb ping \-n all
 .PP
 Example output:
 .sp
diff --git a/doc/ctdb.1.html b/doc/ctdb.1.html
index a05f2d0..418b580 100644
--- a/doc/ctdb.1.html
+++ b/doc/ctdb.1.html
@@ -1,6 +1,6 @@
-<html><head><meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"><title>ctdb</title><meta name="generator" content="DocBook XSL Stylesheets V1.78.1"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="refentry"><a name="ctdb.1"></a><div class="titlepage"></div><div class="refnamediv"><h2>Name</h2><p>ctdb — clustered tdb database management utility</p></div><div class="refsynopsisdiv"><h2>Synopsis</h2><div class="cm [...]
+<html><head><meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"><title>ctdb</title><meta name="generator" content="DocBook XSL Stylesheets V1.78.1"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="refentry"><a name="ctdb.1"></a><div class="titlepage"></div><div class="refnamediv"><h2>Name</h2><p>ctdb — clustered tdb database management utility</p></div><div class="refsynopsisdiv"><h2>Synopsis</h2><div class="cm [...]
       ctdb is a utility to view and manage a ctdb cluster.
-    </p></div><div class="refsect1"><a name="idm264423590512"></a><h2>OPTIONS</h2><div class="variablelist"><dl class="variablelist"><dt><span class="term">-n <pnn></span></dt><dd><p>
+    </p></div><div class="refsect1"><a name="idm257365557808"></a><h2>OPTIONS</h2><div class="variablelist"><dl class="variablelist"><dt><span class="term">-n <pnn></span></dt><dd><p>
             This specifies the physical node number on which to execute the 
 	    command. Default is to run the command on the daemon running on 
 	    the local host.
@@ -46,13 +46,13 @@
             This lets catdb and dumpdbbackup print the
             record flags for each record. Note that cattdb always
             prints the flags.
-          </p></dd></dl></div></div><div class="refsect1"><a name="idm264423567536"></a><h2>Administrative Commands</h2><p>
+          </p></dd></dl></div></div><div class="refsect1"><a name="idm257365534880"></a><h2>Administrative Commands</h2><p>
       These are commands used to monitor and administrate a CTDB cluster.
-    </p><div class="refsect2"><a name="idm264423566512"></a><h3>pnn</h3><p>
+    </p><div class="refsect2"><a name="idm257365533952"></a><h3>pnn</h3><p>
         This command displays the pnn of the current node.
-      </p></div><div class="refsect2"><a name="idm264423565440"></a><h3>status</h3><p>
+      </p></div><div class="refsect2"><a name="idm257365532880"></a><h3>status</h3><p>
         This command shows the current status of the ctdb node.
-      </p><div class="refsect3"><a name="idm264423564512"></a><h4>node status</h4><p>
+      </p><div class="refsect3"><a name="idm257365531952"></a><h4>node status</h4><p>
           Node status reflects the current status of the node. There are five possible states:
         </p><p>
           OK - This node is fully functional.
@@ -75,7 +75,7 @@
 	in a cluster like a node that is ok. Some interfaces to serve
 	public ip addresses are down, but at least one interface is up.
 	See also "ctdb ifaces".
-        </p></div><div class="refsect3"><a name="idm264423558592"></a><h4>generation</h4><p>
+        </p></div><div class="refsect3"><a name="idm257365526032"></a><h4>generation</h4><p>
           The generation id is a number that indicates the current generation 
           of a cluster instance. Each time a cluster goes through a 
           reconfiguration or a recovery its generation id will be changed.
@@ -96,10 +96,10 @@
 	  All nodes start with generation "INVALID" and are not assigned a real
 	  generation id until they have successfully been merged with a cluster
 	  through a recovery.
-        </p></div><div class="refsect3"><a name="idm264423555536"></a><h4>VNNMAP</h4><p>
+        </p></div><div class="refsect3"><a name="idm257365522976"></a><h4>VNNMAP</h4><p>
           The list of Virtual Node Numbers. This is a list of all nodes that actively participates in the cluster and that share the workload of hosting the Clustered TDB database records.
           Only nodes that are participating in the vnnmap can become lmaster or dmaster for a database record.
-        </p></div><div class="refsect3"><a name="idm264423554160"></a><h4>Recovery mode</h4><p>
+        </p></div><div class="refsect3"><a name="idm257365521600"></a><h4>Recovery mode</h4><p>
           This is the current recovery mode of the cluster. There are two possible modes:
         </p><p>
           NORMAL - The cluster is fully operational.
@@ -119,7 +119,7 @@
 	have been recovered, the node mode will change into NORMAL mode
 	and the databases will be "thawed", allowing samba to access the
 	databases again.
-	</p></div><div class="refsect3"><a name="idm264423550384"></a><h4>Recovery master</h4><p>
+	</p></div><div class="refsect3"><a name="idm257365517824"></a><h4>Recovery master</h4><p>
           This is the cluster node that is currently designated as the recovery master. This node is responsible of monitoring the consistency of the cluster and to perform the actual recovery process when reqired.
         </p><p>
 	Only one node at a time can be the designated recovery master. Which
@@ -141,9 +141,48 @@ hash:2 lmaster:2
 hash:3 lmaster:3
 Recovery mode:NORMAL (0)
 Recovery master:0
-      </pre></div><div class="refsect2"><a name="idm264423546544"></a><h3>recmaster</h3><p>
+      </pre></div><div class="refsect2"><a name="idm257365513984"></a><h3>nodestatus [<nodespec>]</h3><p>
+        This command is similar to the <span class="command"><strong>status</strong></span>
+        command.  It displays the "node status" subset of output.  The
+        main differences are:
+      </p><div class="itemizedlist"><ul class="itemizedlist" style="list-style-type: disc; "><li class="listitem"><p>
+	    The exit code is the bitwise-OR of the flags for each
+	    specified node, while <span class="command"><strong>ctdb status</strong></span> exits
+	    with 0 if it was able to retrieve status for all nodes.
+	  </p></li><li class="listitem"><p>
+	    <span class="command"><strong>ctdb status</strong></span> provides status information
+	    for all nodes.  <span class="command"><strong>ctdb nodestatus</strong></span>
+	    defaults to providing status for only the current node.
+	    If <nodespec> is provided then status is given for
+	    the indicated node(s).
+	  </p><p>
+	    By default, <span class="command"><strong>ctdb nodestatus</strong></span> gathers
+	    status from the local node.  However, if invoked with "-n
+	    all" (or similar) then status is gathered from the given
+	    node(s).  In particular <span class="command"><strong>ctdb nodestatus
+	    all</strong></span> and <span class="command"><strong>ctdb nodestatus -n
+	    all</strong></span> will produce different output.  It is
+	    possible to provide 2 different nodespecs (with and
+	    without "-n") but the output is usually confusing!
+	  </p></li></ul></div><p>
+	A common invocation in scripts is <span class="command"><strong>ctdb nodestatus
+	all</strong></span> to check whether all nodes in a cluster are
+	healthy.
+      </p><p>
+	Example: ctdb nodestatus
+      </p><p>Example output:</p><pre class="screen">
+# ctdb nodestatus
+pnn:0 10.0.0.30        OK (THIS NODE)
+      </pre><p>
+	Example: ctdb nodestatus all
+      </p><p>Example output:</p><pre class="screen">
+# ctdb nodestatus all
+Number of nodes:2
+pnn:0 10.0.0.30        OK (THIS NODE)
+pnn:1 10.0.0.31        OK
+      </pre></div><div class="refsect2"><a name="idm257365500320"></a><h3>recmaster</h3><p>
         This command shows the pnn of the node which is currently the recmaster.
-      </p></div><div class="refsect2"><a name="idm264423545376"></a><h3>uptime</h3><p>
+      </p></div><div class="refsect2"><a name="idm257360456880"></a><h3>uptime</h3><p>
         This command shows the uptime for the ctdb daemon. When the last recovery or ip-failover completed and how long it took. If the "duration" is shown as a negative number, this indicates that there is a recovery/failover in progress and it started that many seconds ago.
       </p><p>
 	Example: ctdb uptime
@@ -152,7 +191,7 @@ Current time of node          :                Thu Oct 29 10:38:54 2009
 Ctdbd start time              : (000 16:54:28) Wed Oct 28 17:44:26 2009
 Time of last recovery/failover: (000 16:53:31) Wed Oct 28 17:45:23 2009
 Duration of last recovery/failover: 2.248552 seconds
-      </pre></div><div class="refsect2"><a name="idm264423542224"></a><h3>listnodes</h3><p>
+      </pre></div><div class="refsect2"><a name="idm257360453872"></a><h3>listnodes</h3><p>
         This command shows lists the ip addresses of all the nodes in the cluster.
       </p><p>
 	Example: ctdb listnodes
@@ -161,10 +200,11 @@ Duration of last recovery/failover: 2.248552 seconds
 10.0.0.72
 10.0.0.73
 10.0.0.74
-      </pre></div><div class="refsect2"><a name="idm264423539552"></a><h3>ping</h3><p>
-        This command will "ping" all CTDB daemons in the cluster to verify that they are processing commands correctly.
+      </pre></div><div class="refsect2"><a name="idm257360451344"></a><h3>ping</h3><p>
+        This command will "ping" specified CTDB node in the cluster to
+        verify that they are running.
       </p><p>
-	Example: ctdb ping
+	Example: ctdb ping -n all
       </p><p>
 	Example output:
       </p><pre class="screen">
@@ -172,7 +212,7 @@ response from 0 time=0.000054 sec  (3 clients)
 response from 1 time=0.000144 sec  (2 clients)
 response from 2 time=0.000105 sec  (2 clients)
 response from 3 time=0.000114 sec  (2 clients)
-      </pre></div><div class="refsect2"><a name="idm264423536624"></a><h3>runstate [setup|first_recovery|startup|running]</h3><p>
+      </pre></div><div class="refsect2"><a name="idm257360448592"></a><h3>runstate [setup|first_recovery|startup|running]</h3><p>
         Print the runstate of the specified node.  Runstates are used
         to serialise important state transitions in CTDB, particularly
         during startup.
@@ -186,7 +226,7 @@ response from 3 time=0.000114 sec  (2 clients)
 	Example output:
       </p><pre class="screen">
 RUNNING
-      </pre></div><div class="refsect2"><a name="idm264418543872"></a><h3>ifaces</h3><p>
+      </pre></div><div class="refsect2"><a name="idm257360445424"></a><h3>ifaces</h3><p>
 	This command will display the list of network interfaces, which could
 	host public addresses, along with their status.
       </p><p>
@@ -209,13 +249,13 @@ name:eth2 link:up references:1
 :eth4:0:0
 :eth3:1:1
 :eth2:1:1
-      </pre></div><div class="refsect2"><a name="idm264418539648"></a><h3>setifacelink <iface> <status></h3><p>
+      </pre></div><div class="refsect2"><a name="idm257360441200"></a><h3>setifacelink <iface> <status></h3><p>
 	This command will set the status of a network interface.
 	The status needs to be "up" or "down". This is typically
 	used in the 10.interfaces script in the "monitor" event.
       </p><p>
 	Example: ctdb setifacelink eth0 up
-      </p></div><div class="refsect2"><a name="idm264418538048"></a><h3>ip</h3><p>
+      </p></div><div class="refsect2"><a name="idm257360439600"></a><h3>ip</h3><p>
         This command will display the list of public addresses that are provided by the cluster and which physical node is currently serving this ip. By default this command will ONLY show those public addresses that are known to the node itself. To see the full list of all public ips across the cluster you must use "ctdb ip -n all".
       </p><p>
 	Example: ctdb ip
@@ -245,7 +285,7 @@ Public IPs on node 0
 :172.31.92.83:0:eth5:eth5:eth4,eth5:
 :172.31.92.84:1::eth5:eth4,eth5:
 :172.31.92.85:0:eth5:eth5:eth4,eth5:
-      </pre></div><div class="refsect2"><a name="idm264418532864"></a><h3>ipinfo <ip></h3><p>
+      </pre></div><div class="refsect2"><a name="idm257360434416"></a><h3>ipinfo <ip></h3><p>
         This command will display details about the specified public addresses.
       </p><p>
 	Example: ctdb ipinfo 172.31.92.85
@@ -258,7 +298,7 @@ CurrentNode:0
 NumInterfaces:2
 Interface[1]: Name:eth4 Link:down References:0
 Interface[2]: Name:eth5 Link:up References:2 (active)
-      </pre></div><div class="refsect2"><a name="idm264418530144"></a><h3>scriptstatus</h3><p>
+      </pre></div><div class="refsect2"><a name="idm257360431696"></a><h3>scriptstatus</h3><p>
         This command displays which scripts where run in the previous monitoring cycle and the result of each script. If a script failed with an error, causing the node to become unhealthy, the output from that script is also shown.
       </p><p>
 	Example: ctdb scriptstatus
@@ -275,15 +315,15 @@ Interface[2]: Name:eth5 Link:up References:2 (active)
 41.httpd             Status:OK    Duration:0.039 Tue Mar 24 18:56:57 2009
 50.samba             Status:ERROR    Duration:0.082 Tue Mar 24 18:56:57 2009
    OUTPUT:ERROR: Samba tcp port 445 is not responding
-      </pre></div><div class="refsect2"><a name="idm264418526784"></a><h3>disablescript <script></h3><p>
+      </pre></div><div class="refsect2"><a name="idm257360428336"></a><h3>disablescript <script></h3><p>
 	This command is used to disable an eventscript.
       </p><p>
 	This will take effect the next time the eventscripts are being executed so it can take a short while until this is reflected in 'scriptstatus'.
-      </p></div><div class="refsect2"><a name="idm264418525200"></a><h3>enablescript <script></h3><p>
+      </p></div><div class="refsect2"><a name="idm257360426752"></a><h3>enablescript <script></h3><p>
 	This command is used to enable an eventscript.
       </p><p>
 	This will take effect the next time the eventscripts are being executed so it can take a short while until this is reflected in 'scriptstatus'.
-      </p></div><div class="refsect2"><a name="idm264418523616"></a><h3>getvar <name></h3><p>
+      </p></div><div class="refsect2"><a name="idm257360425168"></a><h3>getvar <name></h3><p>
         Get the runtime value of a tuneable variable.
       </p><p>
 	Example: ctdb getvar MaxRedirectCount
@@ -291,11 +331,11 @@ Interface[2]: Name:eth5 Link:up References:2 (active)
 	Example output:
       </p><pre class="screen">
 MaxRedirectCount    = 3
-      </pre></div><div class="refsect2"><a name="idm264418521200"></a><h3>setvar <name> <value></h3><p>
+      </pre></div><div class="refsect2"><a name="idm257360422752"></a><h3>setvar <name> <value></h3><p>
         Set the runtime value of a tuneable variable.
       </p><p>
 	Example: ctdb setvar MaxRedirectCount 5
-      </p></div><div class="refsect2"><a name="idm264418519712"></a><h3>listvars</h3><p>
+      </p></div><div class="refsect2"><a name="idm257360421344"></a><h3>listvars</h3><p>
       List all tuneable variables, except the values of the obsolete tunables
       like VacuumMinInterval. The obsolete tunables can be retrieved only
       explicitly with the "ctdb getvar" command.
@@ -348,7 +388,7 @@ StatHistoryInterval     = 1
 DeferredAttachTO        = 120
 AllowClientDBAttach     = 1
 RecoverPDBBySeqNum      = 0
-      </pre></div><div class="refsect2"><a name="idm264418515776"></a><h3>lvsmaster</h3><p>
+      </pre></div><div class="refsect2"><a name="idm257360417408"></a><h3>lvsmaster</h3><p>
       This command shows which node is currently the LVSMASTER. The
       LVSMASTER is the node in the cluster which drives the LVS system and
       which receives all incoming traffic from clients.
@@ -359,7 +399,7 @@ RecoverPDBBySeqNum      = 0
       evenly onto the other nodes in the cluster. This is an alternative to using
       public ip addresses. See the manpage for ctdbd for more information
       about LVS.
-      </p></div><div class="refsect2"><a name="idm264418513728"></a><h3>lvs</h3><p>
+      </p></div><div class="refsect2"><a name="idm257360415360"></a><h3>lvs</h3><p>
       This command shows which nodes in the cluster are currently active in the
       LVS configuration. I.e. which nodes we are currently loadbalancing
       the single ip address across.
@@ -374,7 +414,7 @@ RecoverPDBBySeqNum      = 0
       </p><pre class="screen">
 2:10.0.0.13
 3:10.0.0.14
-      </pre></div><div class="refsect2"><a name="idm264418510768"></a><h3>getcapabilities</h3><p>
+      </pre></div><div class="refsect2"><a name="idm257360412400"></a><h3>getcapabilities</h3><p>
       This command shows the capabilities of the current node.
       Please see manpage for ctdbd for a full list of all capabilities and
       more detailed description.
@@ -393,7 +433,7 @@ RecoverPDBBySeqNum      = 0
 RECMASTER: YES
 LMASTER: YES
 LVS: NO
-      </pre></div><div class="refsect2"><a name="idm264418507264"></a><h3>statistics</h3><p>
+      </pre></div><div class="refsect2"><a name="idm257360408896"></a><h3>statistics</h3><p>
         Collect statistics from the CTDB daemon about how many calls it has served.
       </p><p>
 	Example: ctdb statistics
@@ -435,23 +475,23 @@ CTDB version 1
  max_hop_count                      0
  max_call_latency                   4.948321 sec
  max_lockwait_latency               0.000000 sec
-      </pre></div><div class="refsect2"><a name="idm264418503472"></a><h3>statisticsreset</h3><p>
+      </pre></div><div class="refsect2"><a name="idm257365633344"></a><h3>statisticsreset</h3><p>
         This command is used to clear all statistics counters in a node.
       </p><p>
 	Example: ctdb statisticsreset
-      </p></div><div class="refsect2"><a name="idm264418501968"></a><h3>getreclock</h3><p>
+      </p></div><div class="refsect2"><a name="idm257365631792"></a><h3>getreclock</h3><p>
 	This command is used to show the filename of the reclock file that is used.
       </p><p>
 	Example output:
       </p><pre class="screen">
 Reclock file:/gpfs/.ctdb/shared
-      </pre></div><div class="refsect2"><a name="idm264418499840"></a><h3>setreclock [filename]</h3><p>
+      </pre></div><div class="refsect2"><a name="idm257365629504"></a><h3>setreclock [filename]</h3><p>
 	This command is used to modify, or clear, the file that is used as the reclock file at runtime. When this command is used, the reclock file checks are disabled. To re-enable the checks the administrator needs to activate the "VerifyRecoveryLock" tunable using "ctdb setvar".
       </p><p>
 	If run with no parameter this will remove the reclock file completely. If run with a parameter the parameter specifies the new filename to use for the recovery lock.
       </p><p>
 	This command only affects the runtime settings of a ctdb node and will be lost when ctdb is restarted. For persistent changes to the reclock file setting you must edit /etc/sysconfig/ctdb.
-      </p></div><div class="refsect2"><a name="idm264418497344"></a><h3>getdebug</h3><p>
+      </p></div><div class="refsect2"><a name="idm257365626976"></a><h3>getdebug</h3><p>
         Get the current debug level for the node. the debug level controls what information is written to the log file.
       </p><p>
 	The debug levels are mapped to the corresponding syslog levels.
@@ -461,42 +501,42 @@ Reclock file:/gpfs/.ctdb/shared
 	The list of debug levels from highest to lowest are :
       </p><p>
 	EMERG ALERT CRIT ERR WARNING NOTICE INFO DEBUG
-      </p></div><div class="refsect2"><a name="idm264418494752"></a><h3>setdebug <debuglevel></h3><p>
+      </p></div><div class="refsect2"><a name="idm257365624432"></a><h3>setdebug <debuglevel></h3><p>
         Set the debug level of a node. This controls what information will be logged.
       </p><p>
 	The debuglevel is one of EMERG ALERT CRIT ERR WARNING NOTICE INFO DEBUG
-      </p></div><div class="refsect2"><a name="idm264418493136"></a><h3>getpid</h3><p>
+      </p></div><div class="refsect2"><a name="idm257365622784"></a><h3>getpid</h3><p>
         This command will return the process id of the ctdb daemon.
-      </p></div><div class="refsect2"><a name="idm264418492016"></a><h3>disable</h3><p>
+      </p></div><div class="refsect2"><a name="idm257365621632"></a><h3>disable</h3><p>
         This command is used to administratively disable a node in the cluster.
         A disabled node will still participate in the cluster and host
         clustered TDB records but its public ip address has been taken over by
         a different node and it no longer hosts any services.
-      </p></div><div class="refsect2"><a name="idm264418490672"></a><h3>enable</h3><p>
+      </p></div><div class="refsect2"><a name="idm257365620256"></a><h3>enable</h3><p>
         Re-enable a node that has been administratively disabled.
-      </p></div><div class="refsect2"><a name="idm264418489552"></a><h3>stop</h3><p>
+      </p></div><div class="refsect2"><a name="idm257365619104"></a><h3>stop</h3><p>
         This command is used to administratively STOP a node in the cluster.
         A STOPPED node is connected to the cluster but will not host any
 	public ip addresse, nor does it participate in the VNNMAP.
 	The difference between a DISABLED node and a STOPPED node is that
 	a STOPPED node does not host any parts of the database which means
 	that a recovery is required to stop/continue nodes.
-      </p></div><div class="refsect2"><a name="idm264418488112"></a><h3>continue</h3><p>
+      </p></div><div class="refsect2"><a name="idm257360373536"></a><h3>continue</h3><p>
         Re-start a node that has been administratively stopped.
-      </p></div><div class="refsect2"><a name="idm264418487008"></a><h3>addip <public_ip/mask> <iface></h3><p>
+      </p></div><div class="refsect2"><a name="idm257360372512"></a><h3>addip <public_ip/mask> <iface></h3><p>
 	This command is used to add a new public ip to a node during runtime.
 	This allows public addresses to be added to a cluster without having
 	to restart the ctdb daemons.
       </p><p>
 	Note that this only updates the runtime instance of ctdb. Any changes will be lost next time ctdb is restarted and the public addresses file is re-read.
  If you want this change to be permanent you must also update the public addresses file manually.
-      </p></div><div class="refsect2"><a name="idm264418485056"></a><h3>delip <public_ip></h3><p>
+      </p></div><div class="refsect2"><a name="idm257360370560"></a><h3>delip <public_ip></h3><p>
 	This command is used to remove a public ip from a node during runtime.
 	If this public ip is currently hosted by the node it being removed from, the ip will first be failed over to another node, if possible, before it is removed.
       </p><p>
 	Note that this only updates the runtime instance of ctdb. Any changes will be lost next time ctdb is restarted and the public addresses file is re-read.
  If you want this change to be permanent you must also update the public addresses file manually.
-      </p></div><div class="refsect2"><a name="idm264418483120"></a><h3>moveip <public_ip> <node></h3><p>
+      </p></div><div class="refsect2"><a name="idm257360368624"></a><h3>moveip <public_ip> <node></h3><p>
       This command can be used to manually fail a public ip address to a
       specific node.
       </p><p>
@@ -507,14 +547,14 @@ Reclock file:/gpfs/.ctdb/shared
       DeterministicIPs = 0
       </p><p>
       NoIPFailback = 1
-      </p></div><div class="refsect2"><a name="idm264418480608"></a><h3>shutdown</h3><p>
+      </p></div><div class="refsect2"><a name="idm257360366112"></a><h3>shutdown</h3><p>
         This command will shutdown a specific CTDB daemon.
-      </p></div><div class="refsect2"><a name="idm264418479504"></a><h3>recover</h3><p>
+      </p></div><div class="refsect2"><a name="idm257360365008"></a><h3>recover</h3><p>
         This command will trigger the recovery daemon to do a cluster
         recovery.
-      </p></div><div class="refsect2"><a name="idm264418478368"></a><h3>ipreallocate</h3><p>
+      </p></div><div class="refsect2"><a name="idm257360363872"></a><h3>ipreallocate</h3><p>
         This command will force the recovery master to perform a full ip reallocation process and redistribute all ip addresses. This is useful to "reset" the allocations back to its default state if they have been changed using the "moveip" command. While a "recover" will also perform this reallocation, a recovery is much more hevyweight since it will also rebuild all the databases.
-      </p></div><div class="refsect2"><a name="idm264418476928"></a><h3>setlmasterrole <on|off></h3><p>
+      </p></div><div class="refsect2"><a name="idm257360362432"></a><h3>setlmasterrole <on|off></h3><p>
 	This command is used ot enable/disable the LMASTER capability for a node at runtime. This capability determines whether or not a node can be used as an LMASTER for records in the database. A node that does not have the LMASTER capability will not show up in the vnnmap.
       </p><p>
 	Nodes will by default have this capability, but it can be stripped off nodes by the setting in the sysconfig file or by using this command.
@@ -522,21 +562,21 @@ Reclock file:/gpfs/.ctdb/shared
 	Once this setting has been enabled/disabled, you need to perform a recovery for it to take effect.
       </p><p>
 	See also "ctdb getcapabilities"
-      </p></div><div class="refsect2"><a name="idm264418474176"></a><h3>setrecmasterrole <on|off></h3><p>
+      </p></div><div class="refsect2"><a name="idm257360359680"></a><h3>setrecmasterrole <on|off></h3><p>
 	This command is used ot enable/disable the RECMASTER capability for a node at runtime. This capability determines whether or not a node can be used as an RECMASTER for the cluster. A node that does not have the RECMASTER capability can not win a recmaster election. A node that already is the recmaster for the cluster when the capability is stripped off the node will remain the recmaster until the next cluster election.
       </p><p>
 	Nodes will by default have this capability, but it can be stripped off nodes by the setting in the sysconfig file or by using this command.
       </p><p>
 	See also "ctdb getcapabilities"
-      </p></div><div class="refsect2"><a name="idm264418471776"></a><h3>killtcp <srcip:port> <dstip:port></h3><p>
+      </p></div><div class="refsect2"><a name="idm257360357280"></a><h3>killtcp <srcip:port> <dstip:port></h3><p>
         This command will kill the specified TCP connection by issuing a
         TCP RST to the srcip:port endpoint. This is a command used by the 
 	ctdb eventscripts.
-      </p></div><div class="refsect2"><a name="idm264418470560"></a><h3>gratiousarp <ip> <interface></h3><p>
+      </p></div><div class="refsect2"><a name="idm257360356064"></a><h3>gratiousarp <ip> <interface></h3><p>
 	This command will send out a gratious arp for the specified interface
 	through the specified interface. This command is mainly used by the
 	ctdb eventscripts.
-      </p></div><div class="refsect2"><a name="idm264418469280"></a><h3>reloadnodes</h3><p>
+      </p></div><div class="refsect2"><a name="idm257360354784"></a><h3>reloadnodes</h3><p>
       This command is used when adding new nodes, or removing existing nodes from an existing cluster.
       </p><p>
       Procedure to add a node:
@@ -570,7 +610,7 @@ Reclock file:/gpfs/.ctdb/shared
       </p><p>
       5, Use 'ctdb status' on all nodes and verify that the deleted node no longer shows up in the list..
       </p><p>
-      </p></div><div class="refsect2"><a name="idm264418461056"></a><h3>reloadips</h3><p>
+      </p></div><div class="refsect2"><a name="idm257360346560"></a><h3>reloadips</h3><p>
       This command is used to reload the public addresses file and update the
 	ip configuration of the running daemon.
       </p><p>
@@ -597,7 +637,7 @@ Reclock file:/gpfs/.ctdb/shared
 	master to ensure that all addresses are added to all nodes as one
 	single operation, after which any required ip node rebalancing may
 	may take place.
-      </p></div><div class="refsect2"><a name="idm264418455472"></a><h3>tickle <srcip:port> <dstip:port></h3><p>
+      </p></div><div class="refsect2"><a name="idm257360340976"></a><h3>tickle <srcip:port> <dstip:port></h3><p>
         This command will will send a TCP tickle to the source host for the
         specified TCP connection.
 	A TCP tickle is a TCP ACK packet with an invalid sequence and 
@@ -609,10 +649,10 @@ Reclock file:/gpfs/.ctdb/shared
         TCP connection has been disrupted and that the client will need
         to reestablish. This greatly speeds up the time it takes for a client
         to detect and reestablish after an IP failover in the ctdb cluster.
-      </p></div><div class="refsect2"><a name="idm264418453328"></a><h3>gettickles <ip></h3><p>
+      </p></div><div class="refsect2"><a name="idm257360338448"></a><h3>gettickles <ip></h3><p>
 	This command is used to show which TCP connections are registered with
 	CTDB to be "tickled" if there is a failover.
-      </p></div><div class="refsect2"><a name="idm264418452160"></a><h3>repack [max_freelist]</h3><p>
+      </p></div><div class="refsect2"><a name="idm257360337280"></a><h3>repack [max_freelist]</h3><p>
 	Over time, when records are created and deleted in a TDB, the TDB list of free space will become fragmented. This can lead to a slowdown in accessing TDB records.
 	This command is used to defragment a TDB database and pruning the freelist.
       </p><p>
@@ -627,7 +667,7 @@ Reclock file:/gpfs/.ctdb/shared
 	Example: ctdb repack 1000
       </p><p>
 	 By default, this operation is issued from the 00.ctdb event script every 5 minutes.
-      </p></div><div class="refsect2"><a name="idm264418447664"></a><h3>vacuum [max_records]</h3><p>
+      </p></div><div class="refsect2"><a name="idm257360332784"></a><h3>vacuum [max_records]</h3><p>
 	Over time CTDB databases will fill up with empty deleted records which will lead to a progressive slow down of CTDB database access.
 	This command is used to prune all databases and delete all empty records from the cluster.
       </p><p>
@@ -643,17 +683,17 @@ Reclock file:/gpfs/.ctdb/shared
 	Example: ctdb vacuum
       </p><p>
 	 By default, this operation is issued from the 00.ctdb event script every 5 minutes.
-      </p></div><div class="refsect2"><a name="idm264418444208"></a><h3>backupdb <dbname> <file></h3><p>
+      </p></div><div class="refsect2"><a name="idm257360329328"></a><h3>backupdb <dbname> <file></h3><p>
         This command can be used to copy the entire content of a database out to a file. This file can later be read back into ctdb using the restoredb command.
 This is mainly useful for backing up persistent databases such as secrets.tdb and similar.
-      </p></div><div class="refsect2"><a name="idm264418442912"></a><h3>restoredb <file> [<dbname>]</h3><p>
+      </p></div><div class="refsect2"><a name="idm257360328032"></a><h3>restoredb <file> [<dbname>]</h3><p>
         This command restores a persistent database that was previously backed up using backupdb.
 	By default the data will be restored back into the same database as
 	it was created from. By specifying dbname you can restore the data
 	into a different database.
-      </p></div><div class="refsect2"><a name="idm264418441520"></a><h3>wipedb <dbname></h3><p>
+      </p></div><div class="refsect2"><a name="idm257360326640"></a><h3>wipedb <dbname></h3><p>
         This command can be used to remove all content of a database.
-      </p></div></div><div class="refsect2"><a name="idm264418440272"></a><h3>getlog [<level>] [recoverd]</h3><p>
+      </p></div></div><div class="refsect2"><a name="idm257360325392"></a><h3>getlog [<level>] [recoverd]</h3><p>
 	In addition to the normal logging to a log file,
 	CTDBD also keeps a in-memory ringbuffer containing the most recent
 	log entries for all log levels (except DEBUG).
@@ -671,25 +711,25 @@ This is mainly useful for backing up persistent databases such as secrets.tdb an
 	By default, logs are extracted from the main CTDB daemon.  If
 	the recoverd option is given then logs are extracted from the
 	recovery daemon.
-      </p></div><div class="refsect2"><a name="idm264418437152"></a><h3>clearlog [recoverd]</h3><p>
+      </p></div><div class="refsect2"><a name="idm257360322272"></a><h3>clearlog [recoverd]</h3><p>
 	This command clears the in-memory logging ringbuffer.
       </p><p>
 	By default, logs are cleared in the main CTDB daemon.  If the
 	recoverd option is given then logs are cleared in the recovery
 	daemon.
-      </p></div><div class="refsect2"><a name="idm264418435504"></a><h3>setdbreadonly <dbname|hash></h3><p>
+      </p></div><div class="refsect2"><a name="idm257360320624"></a><h3>setdbreadonly <dbname|hash></h3><p>
         This command will enable the ReadOnly record support for a database.
 	This is an experimental feature to improve performance for contended
 	records primarily in locking.tdb and brlock.tdb.
 	When enabling this feature you must set it on all nodes in the cluster.
 	For now, this feature requires a special patch to samba in order to 
 	use it.
-      </p></div><div class="refsect1"><a name="idm264418434112"></a><h2>Debugging Commands</h2><p>
+      </p></div><div class="refsect1"><a name="idm257360319232"></a><h2>Debugging Commands</h2><p>
       These commands are primarily used for CTDB development and testing and
       should not be used for normal administration.
-    </p><div class="refsect2"><a name="idm264418433072"></a><h3>process-exists <pid></h3><p>
+    </p><div class="refsect2"><a name="idm257360318192"></a><h3>process-exists <pid></h3><p>
         This command checks if a specific process exists on the CTDB host. This is mainly used by Samba to check if remote instances of samba are still running or not.
-      </p></div><div class="refsect2"><a name="idm264418431856"></a><h3>getdbmap</h3><p>
+      </p></div><div class="refsect2"><a name="idm257360316976"></a><h3>getdbmap</h3><p>
         This command lists all clustered TDB databases that the CTDB daemon has attached to. Some databases are flagged as PERSISTENT, this means that the database stores data persistently and the data will remain across reboots. One example of such a database is secrets.tdb where information about how the cluster was joined to the domain is stored.
       </p><p>
 	If a PERSISTENT database is not in a healthy state the database is
@@ -729,7 +769,7 @@ dbid:0xb775fff6 name:secrets.tdb path:/var/ctdb/persistent/secrets.tdb.0 PERSIST
       </p><pre class="screen">
 :ID:Name:Path:Persistent:Unhealthy:
 :0x7bbbd26c:passdb.tdb:/var/ctdb/persistent/passdb.tdb.0:1:0:
-      </pre></div><div class="refsect2"><a name="idm264418424016"></a><h3>getdbstatus <dbname></h3><p>
+      </pre></div><div class="refsect2"><a name="idm257360309136"></a><h3>getdbstatus <dbname></h3><p>
 	This command displays more details about a database.
       </p><p>
 	Example: ctdb getdbstatus test.tdb.0
@@ -751,35 +791,35 @@ name: registry.tdb
 path: /var/ctdb/persistent/registry.tdb.0
 PERSISTENT: yes
 HEALTH: NO-HEALTHY-NODES - ERROR - Backup of corrupted TDB in '/var/ctdb/persistent/registry.tdb.0.corrupted.20091208091949.0Z'
-      </pre></div><div class="refsect2"><a name="idm264418419696"></a><h3>catdb <dbname></h3><p>
+      </pre></div><div class="refsect2"><a name="idm257360304816"></a><h3>catdb <dbname></h3><p>
         This command will dump a clustered TDB database to the screen. This is a debugging command.
-      </p></div><div class="refsect2"><a name="idm264418418544"></a><h3>cattdb <dbname></h3><p>
+      </p></div><div class="refsect2"><a name="idm257360303664"></a><h3>cattdb <dbname></h3><p>
         This command will dump the content of the local TDB database to the screen. This is a debugging command.
-      </p></div><div class="refsect2"><a name="idm264418417376"></a><h3>dumpdbbackup <backup-file></h3><p>
+      </p></div><div class="refsect2"><a name="idm257360302496"></a><h3>dumpdbbackup <backup-file></h3><p>
 	This command will dump the content of database backup to the screen
 	(similar to ctdb catdb). This is a debugging command.
-      </p></div><div class="refsect2"><a name="idm264418416208"></a><h3>getmonmode</h3><p>
+      </p></div><div class="refsect2"><a name="idm257360301328"></a><h3>getmonmode</h3><p>
         This command returns the monutoring mode of a node. The monitoring mode is either ACTIVE or DISABLED. Normally a node will continuously monitor that all other nodes that are expected are in fact connected and that they respond to commands.
       </p><p>
         ACTIVE - This is the normal mode. The node is actively monitoring all other nodes, both that the transport is connected and also that the node responds to commands. If a node becomes unavailable, it will be marked as DISCONNECTED and a recovery is initiated to restore the cluster.
       </p><p>
         DISABLED - This node is not monitoring that other nodes are available. In this mode a node failure will not be detected and no recovery will be performed. This mode is useful when for debugging purposes one wants to attach GDB to a ctdb process but wants to prevent the rest of the cluster from marking this node as DISCONNECTED and do a recovery.
-      </p></div><div class="refsect2"><a name="idm264418413440"></a><h3>setmonmode <0|1></h3><p>
+      </p></div><div class="refsect2"><a name="idm257360298560"></a><h3>setmonmode <0|1></h3><p>
         This command can be used to explicitly disable/enable monitoring mode on a node. The main purpose is if one wants to attach GDB to a running ctdb daemon but wants to prevent the other nodes from marking it as DISCONNECTED and issuing a recovery. To do this, set monitoring mode to 0 on all nodes before attaching with GDB. Remember to set monitoring mode back to 1 afterwards.
-      </p></div><div class="refsect2"><a name="idm264418412000"></a><h3>attach <dbname> [persistent]</h3><p>
+      </p></div><div class="refsect2"><a name="idm257360297120"></a><h3>attach <dbname> [persistent]</h3><p>
         This is a debugging command. This command will make the CTDB daemon create a new CTDB database and attach to it.
-      </p></div><div class="refsect2"><a name="idm264418410752"></a><h3>dumpmemory</h3><p>
+      </p></div><div class="refsect2"><a name="idm257360295872"></a><h3>dumpmemory</h3><p>
         This is a debugging command. This command will make the ctdb
         daemon to write a fill memory allocation map to standard output.
-      </p></div><div class="refsect2"><a name="idm264418409568"></a><h3>rddumpmemory</h3><p>
+      </p></div><div class="refsect2"><a name="idm257360294688"></a><h3>rddumpmemory</h3><p>
         This is a debugging command. This command will dump the talloc memory
 	allocation tree for the recovery daemon to standard output.
-      </p></div><div class="refsect2"><a name="idm264418408384"></a><h3>thaw</h3><p>
+      </p></div><div class="refsect2"><a name="idm257360293504"></a><h3>thaw</h3><p>
         Thaw a previously frozen node.
-      </p></div><div class="refsect2"><a name="idm264418407360"></a><h3>eventscript <arguments></h3><p>
+      </p></div><div class="refsect2"><a name="idm257360292480"></a><h3>eventscript <arguments></h3><p>
         This is a debugging command. This command can be used to manually
 	invoke and run the eventscritps with arbitrary arguments.
-      </p></div><div class="refsect2"><a name="idm264418406176"></a><h3>ban <bantime|0></h3><p>
+      </p></div><div class="refsect2"><a name="idm257360291296"></a><h3>ban <bantime|0></h3><p>
         Administratively ban a node for bantime seconds. A bantime of 0 means that the node should be permanently banned. 
       </p><p>
         A banned node does not participate in the cluster and does not host any records for the clustered TDB. Its ip address has been taken over by another node and no services are hosted.
@@ -788,11 +828,11 @@ HEALTH: NO-HEALTHY-NODES - ERROR - Backup of corrupted TDB in '/var/ctdb/persist
         cluster recoveries.
       </p><p>
 	This is primarily a testing command. Note that the recovery daemon controls the overall ban state and it may automatically unban nodes at will. Meaning that a node that has been banned by the administrator can and ofter are unbanned before the admin specifid timeout triggers. If wanting to "drop" a node out from the cluster for mainentance or other reasons, use the "stop" / "continue" commands instad of "ban" / "unban". 
-      </p></div><div class="refsect2"><a name="idm264418403072"></a><h3>unban</h3><p>
+      </p></div><div class="refsect2"><a name="idm257360288192"></a><h3>unban</h3><p>
         This command is used to unban a node that has either been 
         administratively banned using the ban command or has been automatically
         banned by the recovery daemon.
-      </p></div><div class="refsect2"><a name="idm264418401840"></a><h3>check_srvids <srvid> ...</h3><p>
+      </p></div><div class="refsect2"><a name="idm257360286960"></a><h3>check_srvids <srvid> ...</h3><p>
 	This command checks whether a set of srvid message ports are registered on the
 	node or not. The command takes a list of values to check.
       </p><p>
@@ -804,10 +844,10 @@ Server id 0:1 does not exist
 Server id 0:2 does not exist
 Server id 0:3 does not exist
 Server id 0:14765 exists
-      </pre></div></div><div class="refsect1"><a name="idm264418399008"></a><h2>SEE ALSO</h2><p>
+      </pre></div></div><div class="refsect1"><a name="idm257360284128"></a><h2>SEE ALSO</h2><p>
       ctdbd(1), onnode(1)
       <a class="ulink" href="http://ctdb.samba.org/" target="_top">http://ctdb.samba.org/</a>
-    </p></div><div class="refsect1"><a name="idm264418397488"></a><h2>COPYRIGHT/LICENSE</h2><div class="literallayout"><p><br>
+    </p></div><div class="refsect1"><a name="idm257360282608"></a><h2>COPYRIGHT/LICENSE</h2><div class="literallayout"><p><br>
 Copyright�(C)�Andrew�Tridgell�2007<br>
 Copyright�(C)�Ronnie�sahlberg�2007<br>
 <br>
diff --git a/doc/ctdb.1.xml b/doc/ctdb.1.xml
index ce83a3e..c854619 100644
--- a/doc/ctdb.1.xml
+++ b/doc/ctdb.1.xml
@@ -324,6 +324,71 @@ Recovery master:0
       </screen>
     </refsect2>
 
+    <refsect2><title>nodestatus [<nodespec>]</title>
+      <para>
+        This command is similar to the <command>status</command>
+        command.  It displays the "node status" subset of output.  The
+        main differences are:
+      </para>
+
+      <itemizedlist>
+	<listitem>
+	  <para>
+	    The exit code is the bitwise-OR of the flags for each
+	    specified node, while <command>ctdb status</command> exits
+	    with 0 if it was able to retrieve status for all nodes.
+	  </para>
+	</listitem>
+
+	<listitem>
+	  <para>
+	    <command>ctdb status</command> provides status information
+	    for all nodes.  <command>ctdb nodestatus</command>
+	    defaults to providing status for only the current node.
+	    If <nodespec> is provided then status is given for
+	    the indicated node(s).
+	  </para>
+
+	  <para>
+	    By default, <command>ctdb nodestatus</command> gathers
+	    status from the local node.  However, if invoked with "-n
+	    all" (or similar) then status is gathered from the given
+	    node(s).  In particular <command>ctdb nodestatus
+	    all</command> and <command>ctdb nodestatus -n
+	    all</command> will produce different output.  It is
+	    possible to provide 2 different nodespecs (with and
+	    without "-n") but the output is usually confusing!
+	  </para>
+	</listitem>
+      </itemizedlist>
+
+      <para>
+	A common invocation in scripts is <command>ctdb nodestatus
+	all</command> to check whether all nodes in a cluster are
+	healthy.
+      </para>
+
+      <para>
+	Example: ctdb nodestatus
+      </para>
+      <para>Example output:</para>
+      <screen format="linespecific">
+# ctdb nodestatus
+pnn:0 10.0.0.30        OK (THIS NODE)
+      </screen>
+
+      <para>
+	Example: ctdb nodestatus all
+      </para>
+      <para>Example output:</para>
+      <screen format="linespecific">
+# ctdb nodestatus all
+Number of nodes:2
+pnn:0 10.0.0.30        OK (THIS NODE)
+pnn:1 10.0.0.31        OK
+      </screen>
+    </refsect2>
+
     <refsect2><title>recmaster</title>
       <para>
         This command shows the pnn of the node which is currently the recmaster.
@@ -366,10 +431,11 @@ Duration of last recovery/failover: 2.248552 seconds
 
     <refsect2><title>ping</title>
       <para>
-        This command will "ping" all CTDB daemons in the cluster to verify that they are processing commands correctly.
+        This command will "ping" specified CTDB node in the cluster to
+        verify that they are running.
       </para>
       <para>
-	Example: ctdb ping
+	Example: ctdb ping -n all
       </para>
       <para>
 	Example output:
diff --git a/doc/ctdbd.1 b/doc/ctdbd.1
index 0964f01..aec597d 100644
--- a/doc/ctdbd.1
+++ b/doc/ctdbd.1
@@ -2,12 +2,12 @@
 .\"     Title: ctdbd
 .\"    Author: [FIXME: author] [see http://docbook.sf.net/el/author]
 .\" Generator: DocBook XSL Stylesheets v1.78.1 <http://docbook.sf.net/>
-.\"      Date: 05/30/2013
+.\"      Date: 07/11/2013
 .\"    Manual: CTDB - clustered TDB database
 .\"    Source: ctdb
 .\"  Language: English
 .\"
-.TH "CTDBD" "1" "05/30/2013" "ctdb" "CTDB \- clustered TDB database"
+.TH "CTDBD" "1" "07/11/2013" "ctdb" "CTDB \- clustered TDB database"
 .\" -----------------------------------------------------------------
 .\" * Define some portability stuff
 .\" -----------------------------------------------------------------
@@ -444,7 +444,7 @@ Once a recovery has completed, no additional recoveries are permitted until this
 .PP
 Default: 1
 .PP
-When set to 0, this disables BANNING completely in the cluster and thus nodes can not get banned, even it they break\&. Don\*(Aqt set to 0 unless you know what you are doing\&.
+When set to 0, this disables BANNING completely in the cluster and thus nodes can not get banned, even it they break\&. Don\*(Aqt set to 0 unless you know what you are doing\&. You should set this to the same value on all nodes to avoid unexpected behaviour\&.
 .SS "DeterministicIPs"
 .PP
 Default: 0
@@ -482,7 +482,7 @@ When you enable this tunable, CTDB will no longer attempt to recover the cluster
 .PP
 Default: 0
 .PP
-When set to 1, ctdb will allow ip addresses to be failed over onto this node\&. Any ip addresses that the node currently hosts will remain on the node but no new ip addresses can be failed over onto the node\&.
+When set to 1, ctdb will not allow IP addresses to be failed over onto this node\&. Any IP addresses that the node currently hosts will remain on the node but no new IP addresses can be failed over to the node\&.
 .SS "NoIPHostOnAllDisabled"
 .PP
 Default: 0
@@ -593,13 +593,6 @@ When many clients across many nodes try to access the same record at the same ti
 This parameter is used to activate a fetch\-collapse\&. A fetch\-collapse is when we track which records we have requests in flight so that we only keep one request in flight from a certain node, even if multiple smbd processes are attemtping to fetch the record at the same time\&. This can improve performance and reduce CPU utilization for certain workloads\&.
 .PP
 This timeout controls if we should collapse multiple fetch operations of the same record into a single request and defer all duplicates or not\&.
-.SS "DeadlockTimeout"
-.PP
-Default: 60
-.PP
-Number of seconds to determine if ctdb is in deadlock with samba\&.
-.PP
-When ctdb daemon is blocked waiting for a lock on a database which is blocked by some other process, ctdb logs a warning every 10 seconds\&. Most often this is caused by samba locking databases and waiting on ctdb and result in a deadlock\&. If the lock is not obtained by ctdb before deadlock timeout expires, ctdb will detect it as a deadlock and terminate the blocking samba process\&. Setting this value to 0 disables deadlock detection\&.
 .SS "Samba3AvoidDeadlocks"
 .PP
 Default: 0
@@ -1022,7 +1015,10 @@ Notification scripts are used with ctdb to have a call\-out from ctdb to a user\
 .PP
 This is activated by setting CTDB_NOTIFY_SCRIPT=<your script> in the sysconfig file, or by adding \-\-notification\-script=<your script>\&.
 .PP
-See /etc/ctdb/notify\&.sh for an example script\&.
+See /etc/ctdb/notify\&.sh for an example script\&. This script executes files in
+/etc/ctdb/notify\&.d/, so it is recommended that you handle notifications using the example script and by place executable scripts in
+/etc/ctdb/notify\&.d/
+to handle the desired notifications\&.
 .PP
 CTDB currently generates notifications on these state changes:
 .SS "unhealthy"
diff --git a/doc/ctdbd.1.html b/doc/ctdbd.1.html
index afb8201..855c9a7 100644
--- a/doc/ctdbd.1.html
+++ b/doc/ctdbd.1.html
@@ -1,4 +1,4 @@
-<html><head><meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"><title>ctdbd</title><meta name="generator" content="DocBook XSL Stylesheets V1.78.1"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="refentry"><a name="ctdbd.1"></a><div class="titlepage"></div><div class="refnamediv"><h2>Name</h2><p>ctdbd — The CTDB cluster daemon</p></div><div class="refsynopsisdiv"><h2>Synopsis</h2><div class="cmdsynopsis"><p>< [...]
+<html><head><meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"><title>ctdbd</title><meta name="generator" content="DocBook XSL Stylesheets V1.78.1"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="refentry"><a name="ctdbd.1"></a><div class="titlepage"></div><div class="refnamediv"><h2>Name</h2><p>ctdbd — The CTDB cluster daemon</p></div><div class="refsynopsisdiv"><h2>Synopsis</h2><div class="cmdsynopsis"><p>< [...]
       ctdbd is the main ctdb daemon.
     </p><p>
       ctdbd provides a clustered version of the TDB database with automatic rebuild/recovery of the databases upon nodefailures.
@@ -8,7 +8,7 @@
       ctdbd provides monitoring of all nodes in the cluster and automatically reconfigures the cluster and recovers upon node failures.
     </p><p>
       ctdbd is the main component in clustered Samba that provides a high-availability load-sharing CIFS server cluster.
-    </p></div><div class="refsect1"><a name="idm264408581808"></a><h2>OPTIONS</h2><div class="variablelist"><dl class="variablelist"><dt><span class="term">-? --help</span></dt><dd><p>
+    </p></div><div class="refsect1"><a name="idm257361817184"></a><h2>OPTIONS</h2><div class="variablelist"><dl class="variablelist"><dt><span class="term">-? --help</span></dt><dd><p>
             Print some help text to the screen.
           </p></dd><dt><span class="term">-d --debug=<DEBUGLEVEL></span></dt><dd><p>
             This option sets the debuglevel on the ctdbd daemon which controls what will be written to the logfile. The default is 0 which will only log important events and errors. A larger number will provide additional logging.
@@ -154,10 +154,10 @@
 	    implemented in the future.
           </p></dd><dt><span class="term">--usage</span></dt><dd><p>
             Print useage information to the screen.
-          </p></dd></dl></div></div><div class="refsect1"><a name="idm264403537648"></a><h2>Private vs Public addresses</h2><p>
+          </p></dd></dl></div></div><div class="refsect1"><a name="idm257361856656"></a><h2>Private vs Public addresses</h2><p>
       When used for ip takeover in a HA environment, each node in a ctdb 
       cluster has multiple ip addresses assigned to it. One private and one or more public.
-    </p><div class="refsect2"><a name="idm264403536560"></a><h3>Private address</h3><p>
+    </p><div class="refsect2"><a name="idm257363129360"></a><h3>Private address</h3><p>
         This is the physical ip address of the node which is configured in 
         linux and attached to a physical interface. This address uniquely
         identifies a physical node in the cluster and is the ip addresses
@@ -187,7 +187,7 @@
         10.1.1.2
         10.1.1.3
         10.1.1.4
-      </pre></div><div class="refsect2"><a name="idm264403532336"></a><h3>Public address</h3><p>
+      </pre></div><div class="refsect2"><a name="idm257362702048"></a><h3>Public address</h3><p>
         A public address on the other hand is not attached to an interface.
         This address is managed by ctdbd itself and is attached/detached to
         a physical node at runtime.
@@ -248,7 +248,7 @@
 	unavailable. 10.1.1.1 can not be failed over to node 2 or node 3 since
 	these nodes do not have this ip address listed in their public
 	addresses file.
-	</p></div></div><div class="refsect1"><a name="idm264403525136"></a><h2>Node status</h2><p>
+	</p></div></div><div class="refsect1"><a name="idm257361021296"></a><h2>Node status</h2><p>
       The current status of each node in the cluster can be viewed by the 
       'ctdb status' command.
     </p><p>
@@ -285,9 +285,9 @@
       RECMASTER or NATGW.
       This node does not perticipate in the CTDB cluster but can still be
       communicated with. I.e. ctdb commands can be sent to it.
-    </p></div><div class="refsect1"><a name="idm264403519424"></a><h2>PUBLIC TUNABLES</h2><p>
+    </p></div><div class="refsect1"><a name="idm257361191168"></a><h2>PUBLIC TUNABLES</h2><p>
     These are the public tuneables that can be used to control how ctdb behaves.
-    </p><div class="refsect2"><a name="idm264403518304"></a><h3>MaxRedirectCount</h3><p>Default: 3</p><p>
+    </p><div class="refsect2"><a name="idm257363314944"></a><h3>MaxRedirectCount</h3><p>Default: 3</p><p>
     If we are not the DMASTER and need to fetch a record across the network
     we first send the request to the LMASTER after which the record
     is passed onto the current DMASTER. If the DMASTER changes before
@@ -301,7 +301,7 @@
     </p><p>
     When chasing a record, this is how many hops we will chase the record
     for before going back to the LMASTER to ask for new guidance.
-    </p></div><div class="refsect2"><a name="idm264403515712"></a><h3>SeqnumInterval</h3><p>Default: 1000</p><p>
+    </p></div><div class="refsect2"><a name="idm257360791184"></a><h3>SeqnumInterval</h3><p>Default: 1000</p><p>
     Some databases have seqnum tracking enabled, so that samba will be able
     to detect asynchronously when there has been updates to the database.
     Everytime a database is updated its sequence number is increased.
@@ -309,17 +309,17 @@
     This tunable is used to specify in 'ms' how frequently ctdb will
     send out updates to remote nodes to inform them that the sequence
     number is increased.
-    </p></div><div class="refsect2"><a name="idm264403513488"></a><h3>ControlTimeout</h3><p>Default: 60</p><p>
+    </p></div><div class="refsect2"><a name="idm257360214608"></a><h3>ControlTimeout</h3><p>Default: 60</p><p>
     This is the default
     setting for timeout for when sending a control message to either the
     local or a remote ctdb daemon.
-    </p></div><div class="refsect2"><a name="idm264403511920"></a><h3>TraverseTimeout</h3><p>Default: 20</p><p>
+    </p></div><div class="refsect2"><a name="idm257360971392"></a><h3>TraverseTimeout</h3><p>Default: 20</p><p>
     This setting controls how long we allow a traverse process to run.
     After this timeout triggers, the main ctdb daemon will abort the
     traverse if it has not yet finished.
-    </p></div><div class="refsect2"><a name="idm264403510304"></a><h3>KeepaliveInterval</h3><p>Default: 5</p><p>
+    </p></div><div class="refsect2"><a name="idm257360248432"></a><h3>KeepaliveInterval</h3><p>Default: 5</p><p>
     How often in seconds should the nodes send keepalives to eachother.
-    </p></div><div class="refsect2"><a name="idm264403508800"></a><h3>KeepaliveLimit</h3><p>Default: 5</p><p>
+    </p></div><div class="refsect2"><a name="idm257360436288"></a><h3>KeepaliveLimit</h3><p>Default: 5</p><p>
     After how many keepalive intervals without any traffic should a node
     wait until marking the peer as DISCONNECTED.
     </p><p>
@@ -328,60 +328,61 @@
     require a recovery. This limitshould not be set too high since we want
     a hung node to be detectec, and expunged from the cluster well before
     common CIFS timeouts (45-90 seconds) kick in.
-    </p></div><div class="refsect2"><a name="idm264403506496"></a><h3>RecoverTimeout</h3><p>Default: 20</p><p>
+    </p></div><div class="refsect2"><a name="idm257360365280"></a><h3>RecoverTimeout</h3><p>Default: 20</p><p>
     This is the default setting for timeouts for controls when sent from the
     recovery daemon. We allow longer control timeouts from the recovery daemon
     than from normal use since the recovery dameon often use controls that 
     can take a lot longer than normal controls.
-    </p></div><div class="refsect2"><a name="idm264403504784"></a><h3>RecoverInterval</h3><p>Default: 1</p><p>
+    </p></div><div class="refsect2"><a name="idm257361786848"></a><h3>RecoverInterval</h3><p>Default: 1</p><p>
     How frequently in seconds should the recovery daemon perform the
     consistency checks that determine if we need to perform a recovery or not.
-    </p></div><div class="refsect2"><a name="idm264403503200"></a><h3>ElectionTimeout</h3><p>Default: 3</p><p>
+    </p></div><div class="refsect2"><a name="idm257361496176"></a><h3>ElectionTimeout</h3><p>Default: 3</p><p>
     When electing a new recovery master, this is how many seconds we allow
     the election to take before we either deem the election finished
     or we fail the election and start a new one.
-    </p></div><div class="refsect2"><a name="idm264403501584"></a><h3>TakeoverTimeout</h3><p>Default: 9</p><p>
+    </p></div><div class="refsect2"><a name="idm257362904800"></a><h3>TakeoverTimeout</h3><p>Default: 9</p><p>
     This is how many seconds we allow controls to take for IP failover events.
-    </p></div><div class="refsect2"><a name="idm264403500080"></a><h3>MonitorInterval</h3><p>Default: 15</p><p>
+    </p></div><div class="refsect2"><a name="idm257361934432"></a><h3>MonitorInterval</h3><p>Default: 15</p><p>
     How often should ctdb run the event scripts to check for a nodes health.
-    </p></div><div class="refsect2"><a name="idm264403498576"></a><h3>TickleUpdateInterval</h3><p>Default: 20</p><p>
+    </p></div><div class="refsect2"><a name="idm257362181680"></a><h3>TickleUpdateInterval</h3><p>Default: 20</p><p>
     How often will ctdb record and store the "tickle" information used to
     kickstart stalled tcp connections after a recovery.
-    </p></div><div class="refsect2"><a name="idm264403497024"></a><h3>EventScriptTimeout</h3><p>Default: 20</p><p>
+    </p></div><div class="refsect2"><a name="idm257361380608"></a><h3>EventScriptTimeout</h3><p>Default: 20</p><p>
     How long should ctdb let an event script run before aborting it and
     marking the node unhealthy.
-    </p></div><div class="refsect2"><a name="idm264403495488"></a><h3>EventScriptTimeoutCount</h3><p>Default: 1</p><p>
+    </p></div><div class="refsect2"><a name="idm257361927888"></a><h3>EventScriptTimeoutCount</h3><p>Default: 1</p><p>
     How many events in a row needs to timeout before we flag the node UNHEALTHY.
     This setting is useful if your scripts can not be written so that they
     do not hang for benign reasons.
-    </p></div><div class="refsect2"><a name="idm264403493872"></a><h3>EventScriptUnhealthyOnTimeout</h3><p>Default: 0</p><p>
+    </p></div><div class="refsect2"><a name="idm257361543136"></a><h3>EventScriptUnhealthyOnTimeout</h3><p>Default: 0</p><p>
     This setting can be be used to make ctdb never become UNHEALTHY if your
     eventscripts keep hanging/timing out.
-    </p></div><div class="refsect2"><a name="idm264403492320"></a><h3>RecoveryGracePeriod</h3><p>Default: 120</p><p>
+    </p></div><div class="refsect2"><a name="idm257360345648"></a><h3>RecoveryGracePeriod</h3><p>Default: 120</p><p>
     During recoveries, if a node has not caused recovery failures during the
     last grace period, any records of transgressions that the node has caused
     recovery failures will be forgiven. This resets the ban-counter back to 
     zero for that node.
-    </p></div><div class="refsect2"><a name="idm264403490640"></a><h3>RecoveryBanPeriod</h3><p>Default: 300</p><p>
+    </p></div><div class="refsect2"><a name="idm257360354816"></a><h3>RecoveryBanPeriod</h3><p>Default: 300</p><p>
     If a node becomes banned causing repetitive recovery failures. The node will
     eventually become banned from the cluster.
     This controls how long the culprit node will be banned from the cluster
     before it is allowed to try to join the cluster again.
     Don't set to small. A node gets banned for a reason and it is usually due
     to real problems with the node.
-    </p></div><div class="refsect2"><a name="idm264403488448"></a><h3>DatabaseHashSize</h3><p>Default: 100001</p><p>
+    </p></div><div class="refsect2"><a name="idm257361543648"></a><h3>DatabaseHashSize</h3><p>Default: 100001</p><p>
     Size of the hash chains for the local store of the tdbs that ctdb manages.
-    </p></div><div class="refsect2"><a name="idm264403486944"></a><h3>DatabaseMaxDead</h3><p>Default: 5</p><p>
+    </p></div><div class="refsect2"><a name="idm257360177760"></a><h3>DatabaseMaxDead</h3><p>Default: 5</p><p>
     How many dead records per hashchain in the TDB database do we allow before
     the freelist needs to be processed.
-    </p></div><div class="refsect2"><a name="idm264403485392"></a><h3>RerecoveryTimeout</h3><p>Default: 10</p><p>
+    </p></div><div class="refsect2"><a name="idm257361278144"></a><h3>RerecoveryTimeout</h3><p>Default: 10</p><p>
     Once a recovery has completed, no additional recoveries are permitted
     until this timeout has expired.
-    </p></div><div class="refsect2"><a name="idm264403483856"></a><h3>EnableBans</h3><p>Default: 1</p><p>
+    </p></div><div class="refsect2"><a name="idm257360366752"></a><h3>EnableBans</h3><p>Default: 1</p><p>
     When set to 0, this disables BANNING completely in the cluster and thus
     nodes can not get banned, even it they break. Don't set to 0 unless you
-    know what you are doing.
-    </p></div><div class="refsect2"><a name="idm264403482240"></a><h3>DeterministicIPs</h3><p>Default: 0</p><p>
+    know what you are doing.  You should set this to the same value on
+    all nodes to avoid unexpected behaviour.
+    </p></div><div class="refsect2"><a name="idm257361709216"></a><h3>DeterministicIPs</h3><p>Default: 0</p><p>
     When enabled, this tunable makes ctdb try to keep public IP addresses
     locked to specific nodes as far as possible. This makes it easier for
     debugging since you can know that as long as all nodes are healthy
@@ -392,12 +393,12 @@
     public IP assignment changes in the cluster. This tunable may increase
     the number of IP failover/failbacks that are performed on the cluster
     by a small margin.
-    </p></div><div class="refsect2"><a name="idm264403479824"></a><h3>LCP2PublicIPs</h3><p>Default: 1</p><p>
+    </p></div><div class="refsect2"><a name="idm257360989040"></a><h3>LCP2PublicIPs</h3><p>Default: 1</p><p>
     When enabled this switches ctdb to use the LCP2 ip allocation
     algorithm.
-    </p></div><div class="refsect2"><a name="idm264403478320"></a><h3>ReclockPingPeriod</h3><p>Default: x</p><p>
+    </p></div><div class="refsect2"><a name="idm257361296128"></a><h3>ReclockPingPeriod</h3><p>Default: x</p><p>
     Obsolete
-    </p></div><div class="refsect2"><a name="idm264403476912"></a><h3>NoIPFailback</h3><p>Default: 0</p><p>
+    </p></div><div class="refsect2"><a name="idm257361685920"></a><h3>NoIPFailback</h3><p>Default: 0</p><p>
     When set to 1, ctdb will not perform failback of IP addresses when a node
     becomes healthy. Ctdb WILL perform failover of public IP addresses when a
     node becomes UNHEALTHY, but when the node becomes HEALTHY again, ctdb
@@ -415,7 +416,7 @@
     intervention from the administrator. When this parameter is set, you can
     manually fail public IP addresses over to the new node(s) using the
     'ctdb moveip' command.
-    </p></div><div class="refsect2"><a name="idm264403473680"></a><h3>DisableIPFailover</h3><p>Default: 0</p><p>
+    </p></div><div class="refsect2"><a name="idm257362173872"></a><h3>DisableIPFailover</h3><p>Default: 0</p><p>
     When enabled, ctdb will not perform failover or failback. Even if a
     node fails while holding public IPs, ctdb will not recover the IPs or
     assign them to another node.
@@ -424,11 +425,12 @@
     the cluster by failing IP addresses over to other nodes. This leads to
     a service outage until the administrator has manually performed failover
     to replacement nodes using the 'ctdb moveip' command.
-    </p></div><div class="refsect2"><a name="idm264403471376"></a><h3>NoIPTakeover</h3><p>Default: 0</p><p>
-    When set to 1, ctdb will allow ip addresses to be failed over onto this
-    node. Any ip addresses that the node currently hosts will remain on the
-    node but no new ip addresses can be failed over onto the node.
-    </p></div><div class="refsect2"><a name="idm264403469728"></a><h3>NoIPHostOnAllDisabled</h3><p>Default: 0</p><p>
+    </p></div><div class="refsect2"><a name="idm257363058912"></a><h3>NoIPTakeover</h3><p>Default: 0</p><p>
+    When set to 1, ctdb will not allow IP addresses to be failed over
+    onto this node. Any IP addresses that the node currently hosts
+    will remain on the node but no new IP addresses can be failed over
+    to the node.
+    </p></div><div class="refsect2"><a name="idm257360148896"></a><h3>NoIPHostOnAllDisabled</h3><p>Default: 0</p><p>
     If no nodes are healthy then by default ctdb will happily host
     public IPs on disabled (unhealthy or administratively disabled)
     nodes.  This can cause problems, for example if the underlying
@@ -436,48 +438,48 @@
     that node is disabled it, any IPs hosted by this node will be
     released and the node will not takeover any IPs until it is no
     longer disabled.
-    </p></div><div class="refsect2"><a name="idm264403467872"></a><h3>DBRecordCountWarn</h3><p>Default: 100000</p><p>
+    </p></div><div class="refsect2"><a name="idm257360621776"></a><h3>DBRecordCountWarn</h3><p>Default: 100000</p><p>
     When set to non-zero, ctdb will log a warning when we try to recover a
     database with more than this many records. This will produce a warning
     if a database grows uncontrollably with orphaned records.
-    </p></div><div class="refsect2"><a name="idm264403466224"></a><h3>DBRecordSizeWarn</h3><p>Default: 10000000</p><p>
+    </p></div><div class="refsect2"><a name="idm257361339168"></a><h3>DBRecordSizeWarn</h3><p>Default: 10000000</p><p>
     When set to non-zero, ctdb will log a warning when we try to recover a
     database where a single record is bigger than this. This will produce
     a warning if a database record grows uncontrollably with orphaned
     sub-records.
-    </p></div><div class="refsect2"><a name="idm264403464560"></a><h3>DBSizeWarn</h3><p>Default: 1000000000</p><p>
+    </p></div><div class="refsect2"><a name="idm257361392320"></a><h3>DBSizeWarn</h3><p>Default: 1000000000</p><p>
     When set to non-zero, ctdb will log a warning when we try to recover a
     database bigger than this. This will produce
     a warning if a database grows uncontrollably.
-    </p></div><div class="refsect2"><a name="idm264403462960"></a><h3>VerboseMemoryNames</h3><p>Default: 0</p><p>
+    </p></div><div class="refsect2"><a name="idm257361206560"></a><h3>VerboseMemoryNames</h3><p>Default: 0</p><p>
     This feature consumes additional memory. when used the talloc library
     will create more verbose names for all talloc allocated objects.
-    </p></div><div class="refsect2"><a name="idm264403461392"></a><h3>RecdPingTimeout</h3><p>Default: 60</p><p>
+    </p></div><div class="refsect2"><a name="idm257362408304"></a><h3>RecdPingTimeout</h3><p>Default: 60</p><p>
     If the main dameon has not heard a "ping" from the recovery dameon for
     this many seconds, the main dameon will log a message that the recovery
     daemon is potentially hung.
-    </p></div><div class="refsect2"><a name="idm264403459776"></a><h3>RecdFailCount</h3><p>Default: 10</p><p>
+    </p></div><div class="refsect2"><a name="idm257362658992"></a><h3>RecdFailCount</h3><p>Default: 10</p><p>
     If the recovery daemon has failed to ping the main dameon for this many
     consecutive intervals, the main daemon will consider the recovery daemon
     as hung and will try to restart it to recover.
-    </p></div><div class="refsect2"><a name="idm264403458144"></a><h3>LogLatencyMs</h3><p>Default: 0</p><p>
+    </p></div><div class="refsect2"><a name="idm257362070960"></a><h3>LogLatencyMs</h3><p>Default: 0</p><p>
     When set to non-zero, this will make the main daemon log any operation that
     took longer than this value, in 'ms', to complete.
     These include "how long time a lockwait child process needed", 
     "how long time to write to a persistent database" but also
     "how long did it take to get a response to a CALL from a remote node".
-    </p></div><div class="refsect2"><a name="idm264403456368"></a><h3>RecLockLatencyMs</h3><p>Default: 1000</p><p>
+    </p></div><div class="refsect2"><a name="idm257361840560"></a><h3>RecLockLatencyMs</h3><p>Default: 1000</p><p>
     When using a reclock file for split brain prevention, if set to non-zero
     this tunable will make the recovery dameon log a message if the fcntl()
     call to lock/testlock the recovery file takes longer than this number of 
     ms.
-    </p></div><div class="refsect2"><a name="idm264403454704"></a><h3>RecoveryDropAllIPs</h3><p>Default: 120</p><p>
+    </p></div><div class="refsect2"><a name="idm257363402896"></a><h3>RecoveryDropAllIPs</h3><p>Default: 120</p><p>
     If we have been stuck in recovery, or stopped, or banned, mode for
     this many seconds we will force drop all held public addresses.
-    </p></div><div class="refsect2"><a name="idm264403453136"></a><h3>verifyRecoveryLock</h3><p>Default: 1</p><p>
+    </p></div><div class="refsect2"><a name="idm257362993248"></a><h3>verifyRecoveryLock</h3><p>Default: 1</p><p>
     Should we take a fcntl() lock on the reclock file to verify that we are the
     sole recovery master node on the cluster or not.
-    </p></div><div class="refsect2"><a name="idm264403451568"></a><h3>DeferredAttachTO</h3><p>Default: 120</p><p>
+    </p></div><div class="refsect2"><a name="idm257363031120"></a><h3>DeferredAttachTO</h3><p>Default: 120</p><p>
     When databases are frozen we do not allow clients to attach to the
     databases. Instead of returning an error immediately to the application
     the attach request from the client is deferred until the database
@@ -485,7 +487,7 @@
     </p><p>
     This timeout controls how long we will defer the request from the client
     before timing it out and returning an error to the client.
-    </p></div><div class="refsect2"><a name="idm264403449312"></a><h3>HopcountMakeSticky</h3><p>Default: 50</p><p>
+    </p></div><div class="refsect2"><a name="idm257364122288"></a><h3>HopcountMakeSticky</h3><p>Default: 50</p><p>
     If the database is set to 'STICKY' mode, using the 'ctdb setdbsticky' 
     command, any record that is seen as very hot and migrating so fast that
     hopcount surpasses 50 is set to become a STICKY record for StickyDuration
@@ -496,15 +498,15 @@
     migrating across the cluster so fast. This will improve performance for
     certain workloads, such as locking.tdb if many clients are opening/closing
     the same file concurrently.
-    </p></div><div class="refsect2"><a name="idm264403446848"></a><h3>StickyDuration</h3><p>Default: 600</p><p>
+    </p></div><div class="refsect2"><a name="idm257362165808"></a><h3>StickyDuration</h3><p>Default: 600</p><p>
     Once a record has been found to be fetch-lock hot and has been flagged to
     become STICKY, this is for how long, in seconds, the record will be 
     flagged as a STICKY record.
-    </p></div><div class="refsect2"><a name="idm264403445232"></a><h3>StickyPindown</h3><p>Default: 200</p><p>
+    </p></div><div class="refsect2"><a name="idm257360088736"></a><h3>StickyPindown</h3><p>Default: 200</p><p>
     Once a STICKY record has been migrated onto a node, it will be pinned down
     on that node for this number of ms. Any request from other nodes to migrate
     the record off the node will be deferred until the pindown timer expires.
-    </p></div><div class="refsect2"><a name="idm264403443568"></a><h3>MaxLACount</h3><p>Default: 20</p><p>
+    </p></div><div class="refsect2"><a name="idm257361087456"></a><h3>MaxLACount</h3><p>Default: 20</p><p>
     When record content is fetched from a remote node, if it is only for 
     reading the record, pass back the content of the record but do not yet 
     migrate the record. Once MaxLACount identical requests from the 
@@ -512,13 +514,13 @@
     onto the requesting node. This reduces the amount of migration for a 
     database read-mostly workload at the expense of more frequent network
     roundtrips.
-    </p></div><div class="refsect2"><a name="idm264403441680"></a><h3>StatHistoryInterval</h3><p>Default: 1</p><p>
+    </p></div><div class="refsect2"><a name="idm257360353792"></a><h3>StatHistoryInterval</h3><p>Default: 1</p><p>
     Granularity of the statistics collected in the statistics history.
-    </p></div><div class="refsect2"><a name="idm264403440176"></a><h3>AllowClientDBAttach</h3><p>Default: 1</p><p>
+    </p></div><div class="refsect2"><a name="idm257361670160"></a><h3>AllowClientDBAttach</h3><p>Default: 1</p><p>
     When set to 0, clients are not allowed to attach to any databases.
     This can be used to temporarily block any new processes from attaching
     to and accessing the databases.
-    </p></div><div class="refsect2"><a name="idm264403438560"></a><h3>RecoverPDBBySeqNum</h3><p>Default: 0</p><p>
+    </p></div><div class="refsect2"><a name="idm257365182688"></a><h3>RecoverPDBBySeqNum</h3><p>Default: 0</p><p>
     When set to non-zero, this will change how the recovery process for
     persistent databases ar performed. By default, when performing a database
     recovery, for normal as for persistent databases, recovery is
@@ -529,7 +531,7 @@
     a whole db and not by individual records. The node that contains the
     highest value stored in the record "__db_sequence_number__" is selected
     and the copy of that nodes database is used as the recovered database.
-    </p></div><div class="refsect2"><a name="idm264403436096"></a><h3>FetchCollapse</h3><p>Default: 1</p><p>
+    </p></div><div class="refsect2"><a name="idm257365180160"></a><h3>FetchCollapse</h3><p>Default: 1</p><p>
     When many clients across many nodes try to access the same record at the
     same time this can lead to a fetch storm where the record becomes very
     active and bounces between nodes very fast. This leads to high CPU
@@ -545,24 +547,14 @@
     </p><p>
     This timeout controls if we should collapse multiple fetch operations
     of the same record into a single request and defer all duplicates or not.
-    </p></div><div class="refsect2"><a name="idm264403432992"></a><h3>DeadlockTimeout</h3><p>Default: 60</p><p>
-    Number of seconds to determine if ctdb is in deadlock with samba.
-    </p><p>
-    When ctdb daemon is blocked waiting for a lock on a database which is
-    blocked by some other process, ctdb logs a warning every 10 seconds. Most
-    often this is caused by samba locking databases and waiting on ctdb and
-    result in a deadlock. If the lock is not obtained by ctdb before deadlock
-    timeout expires, ctdb will detect it as a deadlock and terminate the
-    blocking samba process. Setting this value to 0 disables deadlock
-    detection.
-    </p></div><div class="refsect2"><a name="idm264403430624"></a><h3>Samba3AvoidDeadlocks</h3><p>Default: 0</p><p>
+    </p></div><div class="refsect2"><a name="idm257365176992"></a><h3>Samba3AvoidDeadlocks</h3><p>Default: 0</p><p>
     Enable code that prevents deadlocks with Samba (only for Samba 3.x).
     </p><p>
     This should be set to 1 when using Samba version 3.x to enable special
     code in CTDB to avoid deadlock with Samba version 3.x.  This code
     is not required for Samba version 4.x and must not be enabled for
     Samba 4.x.
-    </p></div></div><div class="refsect1"><a name="idm264403428352"></a><h2>LVS</h2><p>
+    </p></div></div><div class="refsect1"><a name="idm257365174656"></a><h2>LVS</h2><p>
     LVS is a mode where CTDB presents one single IP address for the entire
     cluster. This is an alternative to using public IP addresses and round-robin
     DNS to loadbalance clients across the cluster.
@@ -603,7 +595,7 @@
     the processing node back to the clients. For read-intensive i/o patterns you can acheive very high throughput rates in this mode.
     </p><p>
     Note: you can use LVS and public addresses at the same time.
-    </p><div class="refsect2"><a name="idm264403421456"></a><h3>Configuration</h3><p>
+    </p><div class="refsect2"><a name="idm257365168432"></a><h3>Configuration</h3><p>
     To activate LVS on a CTDB node you must specify CTDB_PUBLIC_INTERFACE and 
     CTDB_LVS_PUBLIC_ADDRESS in /etc/sysconfig/ctdb.
 	</p><p>
@@ -626,7 +618,7 @@ You must also specify the "--lvs" command line argument to ctdbd to activate LVS
     all of the clients from the node BEFORE you enable LVS. Also make sure
     that when you ping these hosts that the traffic is routed out through the
     eth0 interface.
-    </p></div><div class="refsect1"><a name="idm264403417056"></a><h2>REMOTE CLUSTER NODES</h2><p>
+    </p></div><div class="refsect1"><a name="idm257365163936"></a><h2>REMOTE CLUSTER NODES</h2><p>
 It is possible to have a CTDB cluster that spans across a WAN link. 
 For example where you have a CTDB cluster in your datacentre but you also
 want to have one additional CTDB node located at a remote branch site.
@@ -655,7 +647,7 @@ CTDB_CAPABILITY_RECMASTER=no
     </p><p>
 	Verify with the command "ctdb getcapabilities" that that node no longer
 	has the recmaster or the lmaster capabilities.
-    </p></div><div class="refsect1"><a name="idm264403412480"></a><h2>NAT-GW</h2><p>
+    </p></div><div class="refsect1"><a name="idm257365159296"></a><h2>NAT-GW</h2><p>
       Sometimes it is desireable to run services on the CTDB node which will
       need to originate outgoing traffic to external servers. This might
       be contacting NIS servers, LDAP servers etc. etc.
@@ -678,7 +670,7 @@ CTDB_CAPABILITY_RECMASTER=no
       if there are no public addresses assigned to the node.
       This is the simplest way but it uses up a lot of ip addresses since you
       have to assign both static and also public addresses to each node.
-    </p><div class="refsect2"><a name="idm264403409024"></a><h3>NAT-GW</h3><p>
+    </p><div class="refsect2"><a name="idm257365155360"></a><h3>NAT-GW</h3><p>
       A second way is to use the built in NAT-GW feature in CTDB.
       With NAT-GW you assign one public NATGW address for each natgw group.
       Each NATGW group is a set of nodes in the cluster that shares the same
@@ -693,7 +685,7 @@ CTDB_CAPABILITY_RECMASTER=no
       In each NATGW group, one of the nodes is designated the NAT Gateway
       through which all traffic that is originated by nodes in this group
       will be routed through if a public addresses are not available. 
-    </p></div><div class="refsect2"><a name="idm264403406336"></a><h3>Configuration</h3><p>
+    </p></div><div class="refsect2"><a name="idm257365152640"></a><h3>Configuration</h3><p>
       NAT-GW is configured in /etc/sysconfig/ctdb by setting the following
       variables:
     </p><pre class="screen">
@@ -741,31 +733,31 @@ CTDB_CAPABILITY_RECMASTER=no
 # become natgw master.
 #
 # CTDB_NATGW_SLAVE_ONLY=yes
-    </pre></div><div class="refsect2"><a name="idm264403402512"></a><h3>CTDB_NATGW_PUBLIC_IP</h3><p>
+    </pre></div><div class="refsect2"><a name="idm257365148752"></a><h3>CTDB_NATGW_PUBLIC_IP</h3><p>
       This is an ip address in the public network that is used for all outgoing
       traffic when the public addresses are not assigned.
       This address will be assigned to one of the nodes in the cluster which
       will masquerade all traffic for the other nodes.
     </p><p>
       Format of this parameter is IPADDRESS/NETMASK
-    </p></div><div class="refsect2"><a name="idm264403400816"></a><h3>CTDB_NATGW_PUBLIC_IFACE</h3><p>
+    </p></div><div class="refsect2"><a name="idm257365147024"></a><h3>CTDB_NATGW_PUBLIC_IFACE</h3><p>
       This is the physical interface where the CTDB_NATGW_PUBLIC_IP will be
       assigned to. This should be an interface connected to the public network.
     </p><p>
       Format of this parameter is INTERFACE
-    </p></div><div class="refsect2"><a name="idm264403399232"></a><h3>CTDB_NATGW_DEFAULT_GATEWAY</h3><p>
+    </p></div><div class="refsect2"><a name="idm257365145408"></a><h3>CTDB_NATGW_DEFAULT_GATEWAY</h3><p>
       This is the default gateway to use on the node that is elected to host
       the CTDB_NATGW_PUBLIC_IP. This is the default gateway on the public network.
     </p><p>
       Format of this parameter is IPADDRESS
-    </p></div><div class="refsect2"><a name="idm264403397648"></a><h3>CTDB_NATGW_PRIVATE_NETWORK</h3><p>
+    </p></div><div class="refsect2"><a name="idm257365143776"></a><h3>CTDB_NATGW_PRIVATE_NETWORK</h3><p>
       This is the network/netmask used for the interal private network.
     </p><p>
       Format of this parameter is IPADDRESS/NETMASK
-    </p></div><div class="refsect2"><a name="idm264403396064"></a><h3>CTDB_NATGW_NODES</h3><p>
+    </p></div><div class="refsect2"><a name="idm257365142224"></a><h3>CTDB_NATGW_NODES</h3><p>
       This is the list of all nodes that belong to the same NATGW group
       as this node. The default is /etc/ctdb/natgw_nodes.
-    </p></div><div class="refsect2"><a name="idm264403394896"></a><h3>Operation</h3><p>
+    </p></div><div class="refsect2"><a name="idm257365141024"></a><h3>Operation</h3><p>
       When the NAT-GW functionality is used, one of the nodes is elected
       to act as a NAT router for all the other nodes in the group when
       they need to originate traffic to the external public network.
@@ -784,7 +776,7 @@ CTDB_CAPABILITY_RECMASTER=no
     </p><p>
       This is implemented in the 11.natgw eventscript. Please see the
       eventscript for further information.
-    </p></div><div class="refsect2"><a name="idm264403391696"></a><h3>Removing/Changing NATGW at runtime</h3><p>
+    </p></div><div class="refsect2"><a name="idm257365137792"></a><h3>Removing/Changing NATGW at runtime</h3><p>
       The following are the procedures to change/remove a NATGW configuration 
       at runtime, without having to restart ctdbd.
     </p><p>
@@ -798,7 +790,7 @@ CTDB_CAPABILITY_RECMASTER=no
 1, Run 'CTDB_BASE=/etc/ctdb /etc/ctdb/events.d/11.natgw removenatgw'
 2, Then change the configuration in /etc/sysconfig/ctdb
 3, Run 'CTDB_BASE=/etc/ctdb /etc/ctdb/events.d/11.natgw updatenatgw'
-    </pre></div></div><div class="refsect1"><a name="idm264403387856"></a><h2>POLICY ROUTING</h2><p>
+    </pre></div></div><div class="refsect1"><a name="idm257365133840"></a><h2>POLICY ROUTING</h2><p>
       A node running CTDB may be a component of a complex network
       topology.  In particular, public addresses may be spread across
       several different networks (or VLANs) and it may not be possible
@@ -808,7 +800,7 @@ CTDB_CAPABILITY_RECMASTER=no
       be specified for packets sourced from each public address.  The
       routes are added and removed as CTDB moves public addresses
       between nodes.
-    </p><div class="refsect2"><a name="idm264403385664"></a><h3>Configuration variables</h3><p>
+    </p><div class="refsect2"><a name="idm257365132192"></a><h3>Configuration variables</h3><p>
 	There are 4 configuration variables related to policy routing:
       </p><div class="variablelist"><dl class="variablelist"><dt><span class="term"><code class="varname">CTDB_PER_IP_ROUTING_CONF</code></span></dt><dd><p>
 	    The name of a configuration file that specifies the
@@ -849,7 +841,7 @@ CTDB_CAPABILITY_RECMASTER=no
 	      The label for a public address <addr;gt; will look
 	      like ctdb.<addr>.  This means that the associated
 	      rules and routes are easy to read (and manipulate).
-	    </p></dd></dl></div></div><div class="refsect2"><a name="idm264403373520"></a><h3>Configuration file</h3><p>
+	    </p></dd></dl></div></div><div class="refsect2"><a name="idm257363756240"></a><h3>Configuration file</h3><p>
 	The format of each line is:
       </p><pre class="screen">
     <public_address> <network> [ <gateway> ]
@@ -910,7 +902,7 @@ CTDB_CAPABILITY_RECMASTER=no
       </p><pre class="screen">
   192.168.1.0/24 dev eth2 scope link 
   default via 192.168.1.1 dev eth2 
-      </pre></div><div class="refsect2"><a name="idm264403358720"></a><h3>Example configuration</h3><p>
+      </pre></div><div class="refsect2"><a name="idm257363740496"></a><h3>Example configuration</h3><p>
 	Here is a more complete example configuration.
       </p><pre class="screen">
 /etc/ctdb/public_addresses:
@@ -930,7 +922,7 @@ CTDB_CAPABILITY_RECMASTER=no
 	The routes local packets as expected, the default route is as
 	previously discussed, but packets to 192.168.200.0/24 are
 	routed via the alternate gateway 192.168.1.254.
-      </p></div></div><div class="refsect1"><a name="idm264403355888"></a><h2>NOTIFICATION SCRIPT</h2><p>
+      </p></div></div><div class="refsect1"><a name="idm257363737600"></a><h2>NOTIFICATION SCRIPT</h2><p>
       Notification scripts are used with ctdb to have a call-out from ctdb
       to a user-specified script when certain state changes occur in ctdb.
       This is commonly to set up either sending SNMP traps or emails
@@ -939,20 +931,25 @@ CTDB_CAPABILITY_RECMASTER=no
       This is activated by setting CTDB_NOTIFY_SCRIPT=<your script> in the
 	sysconfig file, or by adding --notification-script=<your script>.
     </p><p>
-      See /etc/ctdb/notify.sh for an example script.
+      See /etc/ctdb/notify.sh for an example script.  This script
+      executes files in <code class="filename">/etc/ctdb/notify.d/</code>, so
+      it is recommended that you handle notifications using the
+      example script and by place executable scripts in
+      <code class="filename">/etc/ctdb/notify.d/</code> to handle the desired
+      notifications.
     </p><p>
       CTDB currently generates notifications on these state changes:
-    </p><div class="refsect2"><a name="idm264403353008"></a><h3>unhealthy</h3><p>
+    </p><div class="refsect2"><a name="idm257363733136"></a><h3>unhealthy</h3><p>
       This call-out is triggered when the node changes to UNHEALTHY state.
-    </p></div><div class="refsect2"><a name="idm264403351888"></a><h3>healthy</h3><p>
+    </p></div><div class="refsect2"><a name="idm257363731984"></a><h3>healthy</h3><p>
       This call-out is triggered when the node changes to HEALTHY state.
-    </p></div><div class="refsect2"><a name="idm264403350768"></a><h3>startup</h3><p>
+    </p></div><div class="refsect2"><a name="idm257363730832"></a><h3>startup</h3><p>
       This call-out is triggered when ctdb has started up and all managed services are up and running.
-    </p></div></div><div class="refsect1"><a name="idm264403349488"></a><h2>ClamAV Daemon</h2><p>
+    </p></div></div><div class="refsect1"><a name="idm257363729520"></a><h2>ClamAV Daemon</h2><p>
 CTDB has support to manage the popular anti-virus daemon ClamAV.
 This support is implemented through the
 eventscript : /etc/ctdb/events.d/31.clamd.
-</p><div class="refsect2"><a name="idm264403348432"></a><h3>Configuration</h3><p>
+</p><div class="refsect2"><a name="idm257363728384"></a><h3>Configuration</h3><p>
 Start by configuring CLAMAV normally and test that it works. Once this is
 done, copy the configuration files over to all the nodes so that all nodes
 share identical CLAMAV configurations.
@@ -981,10 +978,10 @@ Once you have restarted CTDBD, use
 ctdb scriptstatus
 </pre><p>
 and verify that the 31.clamd eventscript is listed and that it was executed successfully.
-</p></div></div><div class="refsect1"><a name="idm264403342368"></a><h2>SEE ALSO</h2><p>
+</p></div></div><div class="refsect1"><a name="idm257363721952"></a><h2>SEE ALSO</h2><p>
       ctdb(1), onnode(1)
       <a class="ulink" href="http://ctdb.samba.org/" target="_top">http://ctdb.samba.org/</a>
-    </p></div><div class="refsect1"><a name="idm264403340848"></a><h2>COPYRIGHT/LICENSE</h2><div class="literallayout"><p><br>
+    </p></div><div class="refsect1"><a name="idm257363720320"></a><h2>COPYRIGHT/LICENSE</h2><div class="literallayout"><p><br>
 Copyright�(C)�Andrew�Tridgell�2007<br>
 Copyright�(C)�Ronnie�sahlberg�2007<br>
 <br>
diff --git a/doc/ctdbd.1.xml b/doc/ctdbd.1.xml
index 9516164..1053d9b 100644
--- a/doc/ctdbd.1.xml
+++ b/doc/ctdbd.1.xml
@@ -777,7 +777,8 @@
     <para>
     When set to 0, this disables BANNING completely in the cluster and thus
     nodes can not get banned, even it they break. Don't set to 0 unless you
-    know what you are doing.
+    know what you are doing.  You should set this to the same value on
+    all nodes to avoid unexpected behaviour.
     </para>
     </refsect2>
 
@@ -856,9 +857,10 @@
     <refsect2><title>NoIPTakeover</title>
     <para>Default: 0</para>
     <para>
-    When set to 1, ctdb will allow ip addresses to be failed over onto this
-    node. Any ip addresses that the node currently hosts will remain on the
-    node but no new ip addresses can be failed over onto the node.
+    When set to 1, ctdb will not allow IP addresses to be failed over
+    onto this node. Any IP addresses that the node currently hosts
+    will remain on the node but no new IP addresses can be failed over
+    to the node.
     </para>
     </refsect2>
 
@@ -1084,22 +1086,6 @@
     </para>
     </refsect2>
 
-    <refsect2><title>DeadlockTimeout</title>
-    <para>Default: 60</para>
-    <para>
-    Number of seconds to determine if ctdb is in deadlock with samba.
-    </para>
-    <para>
-    When ctdb daemon is blocked waiting for a lock on a database which is
-    blocked by some other process, ctdb logs a warning every 10 seconds. Most
-    often this is caused by samba locking databases and waiting on ctdb and
-    result in a deadlock. If the lock is not obtained by ctdb before deadlock
-    timeout expires, ctdb will detect it as a deadlock and terminate the
-    blocking samba process. Setting this value to 0 disables deadlock
-    detection.
-    </para>
-    </refsect2>
-
     <refsect2><title>Samba3AvoidDeadlocks</title>
     <para>Default: 0</para>
     <para>
@@ -1696,7 +1682,12 @@ CTDB_CAPABILITY_RECMASTER=no
 	sysconfig file, or by adding --notification-script=<your script>.
     </para>
     <para>
-      See /etc/ctdb/notify.sh for an example script.
+      See /etc/ctdb/notify.sh for an example script.  This script
+      executes files in <filename>/etc/ctdb/notify.d/</filename>, so
+      it is recommended that you handle notifications using the
+      example script and by place executable scripts in
+      <filename>/etc/ctdb/notify.d/</filename> to handle the desired
+      notifications.
     </para>
     <para>
       CTDB currently generates notifications on these state changes:
diff --git a/doc/ltdbtool.1 b/doc/ltdbtool.1
index 9c80835..c78ccfb 100644
--- a/doc/ltdbtool.1
+++ b/doc/ltdbtool.1
@@ -2,12 +2,12 @@
 .\"     Title: ltdbtool
 .\"    Author: [FIXME: author] [see http://docbook.sf.net/el/author]
 .\" Generator: DocBook XSL Stylesheets v1.78.1 <http://docbook.sf.net/>
-.\"      Date: 05/30/2013
+.\"      Date: 07/11/2013
 .\"    Manual:  
 .\"    Source:  
 .\"  Language: English
 .\"
-.TH "LTDBTOOL" "1" "05/30/2013" "" ""
+.TH "LTDBTOOL" "1" "07/11/2013" "" ""
 .\" -----------------------------------------------------------------
 .\" * Define some portability stuff
 .\" -----------------------------------------------------------------
diff --git a/doc/ltdbtool.1.html b/doc/ltdbtool.1.html
index e4300d3..b79f214 100644
--- a/doc/ltdbtool.1.html
+++ b/doc/ltdbtool.1.html
@@ -1,4 +1,4 @@
-<html><head><meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"><title>ltdbtool</title><meta name="generator" content="DocBook XSL Stylesheets V1.78.1"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="refentry"><a name="ltdbtool.1"></a><div class="titlepage"></div><div class="refnamediv"><h2>Name</h2><p>ltdbtool — handle ctdb's local tdb copies </p></div><div class="refsynopsisdiv"><h2>Synopsis</h2><div class=" [...]
+<html><head><meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"><title>ltdbtool</title><meta name="generator" content="DocBook XSL Stylesheets V1.78.1"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="refentry"><a name="ltdbtool.1"></a><div class="titlepage"></div><div class="refnamediv"><h2>Name</h2><p>ltdbtool — handle ctdb's local tdb copies </p></div><div class="refsynopsisdiv"><h2>Synopsis</h2><div class=" [...]
       ltdbtool is a utility to cope with ctdb's local tdb copies (LTDBs)
       without connecting to a ctdb daemon.
     </p><p>It can be used to
@@ -11,14 +11,14 @@
 	</p></li><li class="listitem"><p>convert between 64 and 32 bit LTDBs where the ctdb record
 	headers differ by 4 bytes of padding.
 	</p></li></ul></div><p>
-    </p></div><div class="refsect1"><a name="idm264395060864"></a><h2>COMMANDS</h2><div class="variablelist"><dl class="variablelist"><dt><span class="term">help</span></dt><dd><p>
+    </p></div><div class="refsect1"><a name="idm257376351600"></a><h2>COMMANDS</h2><div class="variablelist"><dl class="variablelist"><dt><span class="term">help</span></dt><dd><p>
 	    Print a help text.
 	  </p></dd><dt><span class="term">dump <IDB></span></dt><dd><p>
 	    Dump the contents of a LTDB file to standard output in a
 	    human-readable format.
 	  </p></dd><dt><span class="term">convert <IDB> <ODB></span></dt><dd><p>
 	    Make a copy of a LTDB optionally adding or removing ctdb headers.
-	  </p></dd></dl></div></div><div class="refsect1"><a name="idm264395088960"></a><h2>OPTIONS</h2><div class="variablelist"><dl class="variablelist"><dt><span class="term">-p</span></dt><dd><p>
+	  </p></dd></dl></div></div><div class="refsect1"><a name="idm257376318944"></a><h2>OPTIONS</h2><div class="variablelist"><dl class="variablelist"><dt><span class="term">-p</span></dt><dd><p>
 	    Dump with header information, similar to "ctdb catdb".
 	  </p></dd><dt><span class="term">-s {0|32|64}</span></dt><dd><p>
 	    Specify how to determine the ctdb record header size
@@ -34,7 +34,7 @@
 	    Explicitly specify the ctdb record header size for the output database in bytes.
 	  </p></dd><dt><span class="term">-h</span></dt><dd><p>
             Print a help text.
-	  </p></dd></dl></div></div><div class="refsect1"><a name="idm264393451168"></a><h2>EXAMPLES</h2><p>
+	  </p></dd></dl></div></div><div class="refsect1"><a name="idm257376003680"></a><h2>EXAMPLES</h2><p>
       Print a local tdb in "tdbdump" style:
     </p><pre class="screen">
       ltdbtool dump idmap2.tdb.0
@@ -62,10 +62,10 @@
       Add a default header:
     </p><pre class="screen">
       ltdbtool convert -s0 idmap.tdb idmap2.tdb.0
-    </pre></div><div class="refsect1"><a name="idm264395258096"></a><h2>SEE ALSO</h2><p>
+    </pre></div><div class="refsect1"><a name="idm257375005568"></a><h2>SEE ALSO</h2><p>
       ctdbd(1), ctdb(1), tdbdump(1), tdbrestore(1),
       <a class="ulink" href="http://ctdb.samba.org/" target="_top">http://ctdb.samba.org/</a>
-    </p></div><div class="refsect1"><a name="idm264393419200"></a><h2>COPYRIGHT/LICENSE</h2><div class="literallayout"><p><br>
+    </p></div><div class="refsect1"><a name="idm257376991056"></a><h2>COPYRIGHT/LICENSE</h2><div class="literallayout"><p><br>
 Copyright�(C)�Gregor�Beck�2011<br>
 Copyright�(C)�Michael�Adam�2011<br>
 <br>
diff --git a/doc/onnode.1 b/doc/onnode.1
index 40733eb..fe8c363 100644
--- a/doc/onnode.1
+++ b/doc/onnode.1
@@ -2,12 +2,12 @@
 .\"     Title: onnode
 .\"    Author: [FIXME: author] [see http://docbook.sf.net/el/author]
 .\" Generator: DocBook XSL Stylesheets v1.78.1 <http://docbook.sf.net/>
-.\"      Date: 05/30/2013
+.\"      Date: 07/11/2013
 .\"    Manual:  
 .\"    Source:  
 .\"  Language: English
 .\"
-.TH "ONNODE" "1" "05/30/2013" "" ""
+.TH "ONNODE" "1" "07/11/2013" "" ""
 .\" -----------------------------------------------------------------
 .\" * Define some portability stuff
 .\" -----------------------------------------------------------------
diff --git a/doc/onnode.1.html b/doc/onnode.1.html
index 706ddad..4a8d9bd 100644
--- a/doc/onnode.1.html
+++ b/doc/onnode.1.html
@@ -1,4 +1,4 @@
-<html><head><meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"><title>onnode</title><meta name="generator" content="DocBook XSL Stylesheets V1.78.1"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="refentry"><a name="onnode.1"></a><div class="titlepage"></div><div class="refnamediv"><h2>Name</h2><p>onnode — run commands on ctdb nodes</p></div><div class="refsynopsisdiv"><h2>Synopsis</h2><div class="cmdsynopsis [...]
+<html><head><meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"><title>onnode</title><meta name="generator" content="DocBook XSL Stylesheets V1.78.1"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="refentry"><a name="onnode.1"></a><div class="titlepage"></div><div class="refnamediv"><h2>Name</h2><p>onnode — run commands on ctdb nodes</p></div><div class="refsynopsisdiv"><h2>Synopsis</h2><div class="cmdsynopsis [...]
       onnode is a utility to run commands on a specific node of a CTDB
       cluster, or on all nodes.
     </p><p>
@@ -13,7 +13,7 @@
     </p><p>
       The COMMAND can be any shell command. The onnode utility uses
       ssh or rsh to connect to the remote nodes and run the command.
-    </p></div><div class="refsect1"><a name="idm264402864768"></a><h2>DESCRIPTIVE NODE SPECIFICATIONS</h2><p>
+    </p></div><div class="refsect1"><a name="idm257378511600"></a><h2>DESCRIPTIVE NODE SPECIFICATIONS</h2><p>
       The following descriptive node specification can be used in
       place of numeric node numbers:
     </p><div class="variablelist"><dl class="variablelist"><dt><span class="term">all</span></dt><dd><p>
@@ -32,7 +32,7 @@
             The current NAT gateway.
 	  </p></dd><dt><span class="term">rm | recmaster</span></dt><dd><p>
             The current recovery master.
-	  </p></dd></dl></div></div><div class="refsect1"><a name="idm264402957984"></a><h2>OPTIONS</h2><div class="variablelist"><dl class="variablelist"><dt><span class="term">-c</span></dt><dd><p>
+	  </p></dd></dl></div></div><div class="refsect1"><a name="idm257378378688"></a><h2>OPTIONS</h2><div class="variablelist"><dl class="variablelist"><dt><span class="term">-c</span></dt><dd><p>
             Execute COMMAND in the current working directory on the
             specified nodes.
 	  </p></dd><dt><span class="term">-o <prefix></span></dt><dd><p>
@@ -66,7 +66,7 @@
             - filenames with whitespace in them are not supported.
 	  </p></dd><dt><span class="term">-h, --help</span></dt><dd><p>
             Show a short usage guide.
-	  </p></dd></dl></div></div><div class="refsect1"><a name="idm264402941280"></a><h2>EXAMPLES</h2><p>
+	  </p></dd></dl></div></div><div class="refsect1"><a name="idm257378460336"></a><h2>EXAMPLES</h2><p>
       The following command would show the process ID of ctdb on all nodes
     </p><pre class="screen">
       onnode all pidof ctdbd
@@ -84,12 +84,12 @@
       directory, in parallel, on nodes 0, 2, 3 and 4.
     </p><pre class="screen">
       onnode -c -p 0,2-4 ./foo
-    </pre></div><div class="refsect1"><a name="idm264402936064"></a><h2>ENVIRONMENT</h2><div class="variablelist"><dl class="variablelist"><dt><span class="term"><code class="envar">CTDB_NODES_FILE</code></span></dt><dd><p>
+    </pre></div><div class="refsect1"><a name="idm257378455120"></a><h2>ENVIRONMENT</h2><div class="variablelist"><dl class="variablelist"><dt><span class="term"><code class="envar">CTDB_NODES_FILE</code></span></dt><dd><p>
 	    Name of alternative nodes file to use instead of the
             default.  See the discussion of
             <code class="filename">/etc/ctdb/nodes</code> in the FILES section
             for more details.
-	  </p></dd></dl></div></div><div class="refsect1"><a name="idm264402932624"></a><h2>FILES</h2><div class="variablelist"><dl class="variablelist"><dt><span class="term"><code class="filename">/etc/ctdb/nodes</code></span></dt><dd><p>
+	  </p></dd></dl></div></div><div class="refsect1"><a name="idm257378451680"></a><h2>FILES</h2><div class="variablelist"><dl class="variablelist"><dt><span class="term"><code class="filename">/etc/ctdb/nodes</code></span></dt><dd><p>
             Default file containing a list of each node's IP address
             or hostname.
 	  </p><p>
@@ -107,9 +107,9 @@
             something other than "ssh".  In this case the -t option is
             ignored.  For example, the administrator may choose to use
             use rsh instead of ssh.
-	  </p></dd></dl></div></div><div class="refsect1"><a name="idm264402923216"></a><h2>SEE ALSO</h2><p>
+	  </p></dd></dl></div></div><div class="refsect1"><a name="idm257378442272"></a><h2>SEE ALSO</h2><p>
       ctdbd(1), ctdb(1), <a class="ulink" href="http://ctdb.samba.org/" target="_top">http://ctdb.samba.org/</a>
-    </p></div><div class="refsect1"><a name="idm264402921584"></a><h2>COPYRIGHT/LICENSE</h2><div class="literallayout"><p><br>
+    </p></div><div class="refsect1"><a name="idm257378440640"></a><h2>COPYRIGHT/LICENSE</h2><div class="literallayout"><p><br>
 Copyright�(C)�Andrew�Tridgell�2007<br>
 Copyright�(C)�Ronnie�sahlberg�2007<br>
 Copyright�(C)�Martin�Schwenke�2008<br>
diff --git a/doc/ping_pong.1 b/doc/ping_pong.1
index c8a57f0..f7e5efb 100644
--- a/doc/ping_pong.1
+++ b/doc/ping_pong.1
@@ -2,12 +2,12 @@
 .\"     Title: ping_pong
 .\"    Author: [FIXME: author] [see http://docbook.sf.net/el/author]
 .\" Generator: DocBook XSL Stylesheets v1.78.1 <http://docbook.sf.net/>
-.\"      Date: 05/30/2013
+.\"      Date: 07/11/2013
 .\"    Manual:  
 .\"    Source:  
 .\"  Language: English
 .\"
-.TH "PING_PONG" "1" "05/30/2013" "" ""
+.TH "PING_PONG" "1" "07/11/2013" "" ""
 .\" -----------------------------------------------------------------
 .\" * Define some portability stuff
 .\" -----------------------------------------------------------------
diff --git a/doc/ping_pong.1.html b/doc/ping_pong.1.html
index f158338..ca93c95 100644
--- a/doc/ping_pong.1.html
+++ b/doc/ping_pong.1.html
@@ -1,9 +1,9 @@
-<html><head><meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"><title>ping_pong</title><meta name="generator" content="DocBook XSL Stylesheets V1.78.1"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="refentry"><a name="ping_pong.1"></a><div class="titlepage"></div><div class="refnamediv"><h2>Name</h2><p>ping_pong — measures the ping-pong byte range lock latency</p></div><div class="refsynopsisdiv"><h2>Synopsi [...]
+<html><head><meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"><title>ping_pong</title><meta name="generator" content="DocBook XSL Stylesheets V1.78.1"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="refentry"><a name="ping_pong.1"></a><div class="titlepage"></div><div class="refnamediv"><h2>Name</h2><p>ping_pong — measures the ping-pong byte range lock latency</p></div><div class="refsynopsisdiv"><h2>Synopsi [...]
       This measures the ping-pong byte range lock latency. It is
       especially useful on a cluster of nodes sharing a common lock
       manager as it will give some indication of the lock managers
       performance under stress.
-    </p></div><div class="refsect1"><a name="idm264401737536"></a><h2>OPTIONS</h2><div class="variablelist"><dl class="variablelist"><dt><span class="term">-r</span></dt><dd><p>
+    </p></div><div class="refsect1"><a name="idm257374113088"></a><h2>OPTIONS</h2><div class="variablelist"><dl class="variablelist"><dt><span class="term">-r</span></dt><dd><p>
 	    do reads
 	  </p></dd><dt><span class="term">-w</span></dt><dd><p>
 	    do writes
@@ -11,7 +11,7 @@
 	    use mmap
 	  </p></dd><dt><span class="term">-c</span></dt><dd><p>
 	    validate the locks
-	  </p></dd></dl></div></div><div class="refsect1"><a name="idm264401881872"></a><h2>EXAMPLES</h2><p>
+	  </p></dd></dl></div></div><div class="refsect1"><a name="idm257374067488"></a><h2>EXAMPLES</h2><p>
       Testing lock coherence
     </p><pre class="screen">
       ping_pong test.dat N
@@ -23,9 +23,9 @@
       Testing IO coherence
     </p><pre class="screen">
       ping_pong -rw test.dat N
-    </pre></div><div class="refsect1"><a name="idm264401839360"></a><h2>SEE ALSO</h2><p>
+    </pre></div><div class="refsect1"><a name="idm257374063456"></a><h2>SEE ALSO</h2><p>
       <a class="ulink" href="https://wiki.samba.org/index.php/Ping_pong" target="_top">https://wiki.samba.org/index.php/Ping_pong</a>, ctdb(1), ctdbd(1)
-    </p></div><div class="refsect1"><a name="idm264401837760"></a><h2>COPYRIGHT/LICENSE</h2><div class="literallayout"><p><br>
+    </p></div><div class="refsect1"><a name="idm257374061856"></a><h2>COPYRIGHT/LICENSE</h2><div class="literallayout"><p><br>
 Copyright�(C)�Andrew�Tridgell�2002<br>
 <br>
 This�program�is�free�software;�you�can�redistribute�it�and/or�modify<br>
diff --git a/doc/recovery-process.txt b/doc/recovery-process.txt
index 7780d84..333eeb2 100644
--- a/doc/recovery-process.txt
+++ b/doc/recovery-process.txt
@@ -112,8 +112,8 @@ These tests are performed on all nodes in the cluster which is why it is optimiz
 as few network calls to other nodes as possible.
 Each node only performs 1 call to the recovery master in each loop and to no other nodes.
 
-NORMAL NODE CLUSTER MONITORING
-------------------------------
+RECOVERY MASTER CLUSTER MONITORING
+-----------------------------------
 The recovery master performs a much more extensive test. In addition to tests 1-9 above
 the recovery master also performs the following tests:
 
@@ -151,7 +151,7 @@ the recovery master also performs the following tests:
 16, Verify that all CONNECTED nodes in the cluster are in recovery mode NORMAL.
     If one of the nodes were in recovery mode ACTIVE, force a new recovery and restart
     monitoring from 1.
-    "Node:%u was in recovery mode. Restart recovery process"
+    "Node:%u was in recovery mode. Start recovery process"
 
 17, Verify that the filehandle to the recovery lock file is valid.
     If it is not, this may mean a split brain and is a critical error.
diff --git a/include/ctdb_private.h b/include/ctdb_private.h
index eadd963..cbaff97 100644
--- a/include/ctdb_private.h
+++ b/include/ctdb_private.h
@@ -136,7 +136,6 @@ struct ctdb_tunable {
 	uint32_t db_size_warn;
 	uint32_t pulldb_preallocation_size;
 	uint32_t no_ip_host_on_all_disabled;
-	uint32_t deadlock_timeout;
 	uint32_t samba3_hack;
 };
 
@@ -450,6 +449,8 @@ const char *runstate_to_string(enum ctdb_runstate runstate);
 enum ctdb_runstate runstate_from_string(const char *label);
 void ctdb_set_runstate(struct ctdb_context *ctdb, enum ctdb_runstate runstate);
 
+void ctdb_shutdown_sequence(struct ctdb_context *ctdb, int exit_code);
+
 #define CTDB_MONITORING_ACTIVE		0
 #define CTDB_MONITORING_DISABLED	1
 
@@ -723,6 +724,7 @@ struct ctdb_fetch_handle {
 /* internal prototypes */
 void ctdb_set_error(struct ctdb_context *ctdb, const char *fmt, ...) PRINTF_ATTRIBUTE(2,3);
 void ctdb_fatal(struct ctdb_context *ctdb, const char *msg);
+void ctdb_die(struct ctdb_context *ctdb, const char *msg);
 void ctdb_external_trace(void);
 bool ctdb_same_address(struct ctdb_address *a1, struct ctdb_address *a2);
 int ctdb_parse_address(struct ctdb_context *ctdb,
@@ -1220,6 +1222,7 @@ struct ctdb_lock_info {
 };
 
 char *ctdb_get_process_name(pid_t pid);
+int ctdb_set_process_name(const char *name);
 bool ctdb_get_lock_info(pid_t req_pid, struct ctdb_lock_info *lock_info);
 bool ctdb_get_blocker_pid(struct ctdb_lock_info *reqlock, pid_t *blocker_pid);
 
@@ -1283,7 +1286,7 @@ int ctdb_ctrl_get_all_tunables(struct ctdb_context *ctdb,
 			       uint32_t destnode,
 			       struct ctdb_tunable *tunables);
 
-int ctdb_start_freeze(struct ctdb_context *ctdb, uint32_t priority);
+void ctdb_start_freeze(struct ctdb_context *ctdb, uint32_t priority);
 
 bool parse_ip_mask(const char *s, const char *iface, ctdb_sock_addr *addr, unsigned *mask);
 bool parse_ip_port(const char *s, ctdb_sock_addr *addr);
@@ -1438,7 +1441,7 @@ int ctdb_vacuum_init(struct ctdb_db_context *ctdb_db);
 int32_t ctdb_control_enable_script(struct ctdb_context *ctdb, TDB_DATA indata);
 int32_t ctdb_control_disable_script(struct ctdb_context *ctdb, TDB_DATA indata);
 
-int32_t ctdb_local_node_got_banned(struct ctdb_context *ctdb);
+void ctdb_local_node_got_banned(struct ctdb_context *ctdb);
 int32_t ctdb_control_set_ban_state(struct ctdb_context *ctdb, TDB_DATA indata);
 int32_t ctdb_control_get_ban_state(struct ctdb_context *ctdb, TDB_DATA *outdata);
 int32_t ctdb_control_set_db_priority(struct ctdb_context *ctdb, TDB_DATA indata);
@@ -1491,7 +1494,8 @@ void ctdb_run_notification_script(struct ctdb_context *ctdb, const char *event);
 void ctdb_fault_setup(void);
 
 int verify_remote_ip_allocation(struct ctdb_context *ctdb, 
-				struct ctdb_all_public_ips *ips);
+				struct ctdb_all_public_ips *ips,
+				uint32_t pnn);
 int update_ip_assignment_tree(struct ctdb_context *ctdb,
 				struct ctdb_public_ip *ip);
 
@@ -1557,10 +1561,6 @@ int ctdb_fetch_func(struct ctdb_call_info *call);
 
 int ctdb_fetch_with_header_func(struct ctdb_call_info *call);
 
-int32_t ctdb_control_get_db_statistics(struct ctdb_context *ctdb,
-				uint32_t db_id,
-				TDB_DATA *outdata);
-
 int ctdb_set_db_sticky(struct ctdb_context *ctdb, struct ctdb_db_context *ctdb_db);
 
 /*
diff --git a/include/ctdb_protocol.h b/include/ctdb_protocol.h
index 10f643b..9e95f4d 100644
--- a/include/ctdb_protocol.h
+++ b/include/ctdb_protocol.h
@@ -733,13 +733,6 @@ struct ctdb_db_statistics {
 	uint32_t num_hot_keys;
 	struct ctdb_db_hot_key hot_keys[MAX_HOT_KEYS];
 };
-struct ctdb_db_statistics_wire {
-	uint32_t db_ro_delegations;
-	uint32_t db_ro_revokes;
-	uint32_t hop_count_bucket[MAX_COUNT_BUCKETS];
-	uint32_t num_hot_keys;
-	char hot_keys[1];
-};
 
 /*
  * wire format for interface list
diff --git a/include/ctdb_version.h b/include/ctdb_version.h
index a867913..98b5858 100644
--- a/include/ctdb_version.h
+++ b/include/ctdb_version.h
@@ -1,4 +1,4 @@
 /* This file is auto-genrated by packaging/mkversion.sh */
 
-#define CTDB_VERSION_STRING "2.2"
+#define CTDB_VERSION_STRING "2.3"
 
diff --git a/lib/util/db_wrap.c b/lib/util/db_wrap.c
index 7e7e17f..07b066c 100644
--- a/lib/util/db_wrap.c
+++ b/lib/util/db_wrap.c
@@ -28,7 +28,7 @@
 
 #include "includes.h"
 #include "lib/util/dlinklist.h"
-#include "lib/tdb/include/tdb.h"
+#include "tdb.h"
 #include "db_wrap.h"
 
 static struct tdb_wrap *tdb_list;
diff --git a/libctdb/control.c b/libctdb/control.c
index 64cc80e..2a7db95 100644
--- a/libctdb/control.c
+++ b/libctdb/control.c
@@ -124,10 +124,6 @@ bool ctdb_getdbstat_recv(struct ctdb_connection *ctdb,
 			 struct ctdb_db_statistics **stat)
 {
 	struct ctdb_reply_control *reply;
-	struct ctdb_db_statistics *s;
-	struct ctdb_db_statistics_wire *wire;
-	int i;
-	char *ptr;
 
 	reply = unpack_reply_control(req, CTDB_CONTROL_GET_DB_STATISTICS);
 	if (!reply) {
@@ -137,37 +133,16 @@ bool ctdb_getdbstat_recv(struct ctdb_connection *ctdb,
 		DEBUG(ctdb, LOG_ERR, "ctdb_getpnn_recv: status -1");
 		return false;
 	}
-	if (reply->datalen < offsetof(struct ctdb_db_statistics_wire, hot_keys)) {
+	if (reply->datalen < offsetof(struct ctdb_db_statistics, hot_keys)) {
 		DEBUG(ctdb, LOG_ERR, "ctdb_getdbstat_recv: returned data is %d bytes but should be >= %d", reply->datalen, (int)sizeof(struct ctdb_db_statistics));
 		return false;
 	}
 
-	wire = (struct ctdb_db_statistics_wire *)reply->data;
-
-	s = malloc(offsetof(struct ctdb_db_statistics, hot_keys) + sizeof(struct ctdb_db_hot_key) * wire->num_hot_keys);
-	if (!s) {
+	*stat = malloc(reply->datalen);
+	if (*stat == NULL) {
 		return false;
 	}
-	s->db_ro_delegations = wire->db_ro_delegations;
-	s->db_ro_revokes     = wire->db_ro_revokes;
-	for (i = 0; i < MAX_COUNT_BUCKETS; i++) {
-		s->hop_count_bucket[i] = wire->hop_count_bucket[i];
-	}
-	s->num_hot_keys      = wire->num_hot_keys;
-	ptr = &wire->hot_keys[0];
-	for (i = 0; i < wire->num_hot_keys; i++) {
-		s->hot_keys[i].count = *(uint32_t *)ptr;
-		ptr += 4;
-
-		s->hot_keys[i].key.dsize = *(uint32_t *)ptr;
-		ptr += 4;
-
-		s->hot_keys[i].key.dptr = malloc(s->hot_keys[i].key.dsize);
-		memcpy(s->hot_keys[i].key.dptr, ptr, s->hot_keys[i].key.dsize);
-		ptr += s->hot_keys[i].key.dsize;
-	}
-
-	*stat = s;
+	memcpy(*stat, reply->data, reply->datalen);
 
 	return true;
 }
diff --git a/libctdb/ctdb.c b/libctdb/ctdb.c
index dd1b572..4462fe4 100644
--- a/libctdb/ctdb.c
+++ b/libctdb/ctdb.c
@@ -17,6 +17,7 @@
    You should have received a copy of the GNU General Public License
    along with this program; if not, see <http://www.gnu.org/licenses/>.
 */
+#include "config.h"
 #include <sys/time.h>
 #include <sys/socket.h>
 #include <string.h>
diff --git a/packaging/RPM/ctdb.spec b/packaging/RPM/ctdb.spec
index 2a31fe9..e30f33b 100644
--- a/packaging/RPM/ctdb.spec
+++ b/packaging/RPM/ctdb.spec
@@ -1,9 +1,10 @@
+%define with_systemd  %{?_with_systemd: 1} %{?!_with_systemd: 0}
 %define initdir %{_sysconfdir}/init.d
 Name: ctdb
 Summary: Clustered TDB
 Vendor: Samba Team
 Packager: Samba Team <samba at samba.org>
-Version: 2.2
+Version: 2.3
 Release: 1
 Epoch: 0
 License: GNU GPL version 3
@@ -16,33 +17,37 @@ Source: ctdb-%{version}.tar.gz
 Requires: coreutils, sed, gawk, iptables, iproute, procps, ethtool, sudo
 # Commands - package name might vary
 Requires: /usr/bin/killall, /bin/kill, /bin/netstat
-# Directories
-Requires: /etc/init.d
 
 Provides: ctdb = %{version}
 
 Prefix: /usr
 BuildRoot: %{_tmppath}/%{name}-%{version}-root
 
-# Always use the bundled versions of these libraries.
-%define with_included_talloc 1
-%define with_included_tdb 1
-%define with_included_tevent 1
+# Allow build with system libraries
+# To enable, run rpmbuild with,
+#      "--with system_talloc"
+#      "--with system_tdb"
+#      "--with system_tevent"
+%define with_included_talloc %{?_with_system_talloc: 0} %{?!_with_system_talloc: 1}
+%define with_included_tdb %{?_with_system_tdb: 0} %{?!_with_system_tdb: 1}
+%define with_included_tevent %{?_with_system_tevent: 0} %{?!_with_system_tevent: 1}
 
-# If the above options are changed then mandate minimum system
-# versions.
-%define libtalloc_version 2.0.6
-%define libtdb_version 1.2.9
-%define libtevent_version 0.9.13
+# Required minimum library versions when building with system libraries
+%define libtalloc_version 2.0.8
+%define libtdb_version 1.2.11
+%define libtevent_version 0.9.18
 
 %if ! %with_included_talloc
 BuildRequires: libtalloc-devel >= %{libtalloc_version}
+Requires: libtalloc >= %{libtalloc_version}
 %endif
 %if ! %with_included_tdb
 BuildRequires: libtdb-devel >= %{libtdb_version}
+Requires: libtdb >= %{libtdb_version}
 %endif
 %if ! %with_included_tevent
 BuildRequires: libtevent-devel >= %{libtevent_version}
+Requires: libtevent >= %{libtevent_version}
 %endif
 
 # To build the ctdb-pcp-pmda package, run rpmbuild with "--with pmda"
@@ -51,6 +56,10 @@ BuildRequires: libtevent-devel >= %{libtevent_version}
 BuildRequires: pcp-libs-devel
 %endif
 
+%if %{with_systemd}
+BuildRequires: systemd-units
+%endif
+
 %description
 ctdb is the clustered database used by samba
 
@@ -77,7 +86,7 @@ export CC
 ## always run autogen.sh
 ./autogen.sh
 
-CFLAGS="$RPM_OPT_FLAGS $EXTRA -O0 -D_GNU_SOURCE" ./configure \
+CFLAGS="$RPM_OPT_FLAGS $EXTRA -D_GNU_SOURCE" ./configure \
 %if %with_included_talloc
 	--with-included-talloc \
 %endif
@@ -104,16 +113,22 @@ rm -rf $RPM_BUILD_ROOT
 
 # Create the target build directory hierarchy
 mkdir -p $RPM_BUILD_ROOT%{_sysconfdir}/sysconfig
-mkdir -p $RPM_BUILD_ROOT%{_sysconfdir}/init.d
 mkdir -p $RPM_BUILD_ROOT%{_sysconfdir}/sudoers.d
 
 make DESTDIR=$RPM_BUILD_ROOT docdir=%{_docdir} install install_tests
 
 install -m644 config/ctdb.sysconfig $RPM_BUILD_ROOT%{_sysconfdir}/sysconfig/ctdb
+
+%if %{with_systemd}
+mkdir -p $RPM_BUILD_ROOT%{_unitdir}
+install -m 755 config/ctdb.service $RPM_BUILD_ROOT%{_unitdir}
+%else
+mkdir -p $RPM_BUILD_ROOT%{initdir}
 install -m755 config/ctdb.init $RPM_BUILD_ROOT%{initdir}/ctdb
+%endif
 
-mkdir -p $RPM_BUILD_ROOT%{_docdir}/ctdb/tests/bin
-install -m755 tests/bin/ctdb_transaction $RPM_BUILD_ROOT%{_docdir}/ctdb/tests/bin
+cp config/events.d/README README.eventscripts
+cp config/notify.d.README README.notify.d
 
 # Remove "*.old" files
 find $RPM_BUILD_ROOT -name "*.old" -exec rm -f {} \;
@@ -134,20 +149,21 @@ rm -rf $RPM_BUILD_ROOT
 %config(noreplace) %{_sysconfdir}/ctdb/debug-hung-script.sh
 %config(noreplace) %{_sysconfdir}/ctdb/ctdb-crash-cleanup.sh
 %config(noreplace) %{_sysconfdir}/ctdb/gcore_trace.sh
-%config(noreplace) %{_sysconfdir}/ctdb/functions
+
+%if %{with_systemd}
+%{_unitdir}/ctdb.service
+%else
 %attr(755,root,root) %{initdir}/ctdb
+%endif
+
 %attr(755,root,root) %{_sysconfdir}/ctdb/notify.d
 
-%{_docdir}/ctdb/README
-%{_docdir}/ctdb/COPYING
-%{_docdir}/ctdb/README.eventscripts
-%{_docdir}/ctdb/recovery-process.txt
-%{_docdir}/ctdb/ctdb.1.html
-%{_docdir}/ctdb/ctdbd.1.html
-%{_docdir}/ctdb/onnode.1.html
-%{_docdir}/ctdb/ltdbtool.1.html
-%{_docdir}/ctdb/ping_pong.1.html
+%doc README COPYING NEWS
+%doc README.eventscripts README.notify.d
+%doc doc/recovery-process.txt
+%doc doc/*.html
 %{_sysconfdir}/sudoers.d/ctdb
+%{_sysconfdir}/ctdb/functions
 %{_sysconfdir}/ctdb/events.d/00.ctdb
 %{_sysconfdir}/ctdb/events.d/01.reclock
 %{_sysconfdir}/ctdb/events.d/10.interface
@@ -172,8 +188,8 @@ rm -rf $RPM_BUILD_ROOT
 %config(noreplace) %{_sysconfdir}/ctdb/nfs-rpc-checks.d/40.mountd.check
 %config(noreplace) %{_sysconfdir}/ctdb/nfs-rpc-checks.d/50.rquotad.check
 %{_sysconfdir}/ctdb/statd-callout
-%{_sysconfdir}/ctdb/notify.d/README
 %{_sbindir}/ctdbd
+%{_sbindir}/ctdbd_wrapper
 %{_bindir}/ctdb
 %{_bindir}/ctdb_lock_helper
 %{_bindir}/smnotify
@@ -188,7 +204,6 @@ rm -rf $RPM_BUILD_ROOT
 %{_mandir}/man1/ping_pong.1.gz
 %{_libdir}/pkgconfig/ctdb.pc
 
-%{_docdir}/ctdb/tests/bin/ctdb_transaction
 
 %package devel
 Summary: CTDB development libraries
diff --git a/packaging/RPM/ctdb.spec.in b/packaging/RPM/ctdb.spec.in
index 44bd46f..b87ba0b 100644
--- a/packaging/RPM/ctdb.spec.in
+++ b/packaging/RPM/ctdb.spec.in
@@ -1,3 +1,4 @@
+%define with_systemd  %{?_with_systemd: 1} %{?!_with_systemd: 0}
 %define initdir %{_sysconfdir}/init.d
 Name: ctdb
 Summary: Clustered TDB
@@ -16,33 +17,37 @@ Source: ctdb-%{version}.tar.gz
 Requires: coreutils, sed, gawk, iptables, iproute, procps, ethtool, sudo
 # Commands - package name might vary
 Requires: /usr/bin/killall, /bin/kill, /bin/netstat
-# Directories
-Requires: /etc/init.d
 
 Provides: ctdb = %{version}
 
 Prefix: /usr
 BuildRoot: %{_tmppath}/%{name}-%{version}-root
 
-# Always use the bundled versions of these libraries.
-%define with_included_talloc 1
-%define with_included_tdb 1
-%define with_included_tevent 1
+# Allow build with system libraries
+# To enable, run rpmbuild with,
+#      "--with system_talloc"
+#      "--with system_tdb"
+#      "--with system_tevent"
+%define with_included_talloc %{?_with_system_talloc: 0} %{?!_with_system_talloc: 1}
+%define with_included_tdb %{?_with_system_tdb: 0} %{?!_with_system_tdb: 1}
+%define with_included_tevent %{?_with_system_tevent: 0} %{?!_with_system_tevent: 1}
 
-# If the above options are changed then mandate minimum system
-# versions.
-%define libtalloc_version 2.0.6
-%define libtdb_version 1.2.9
-%define libtevent_version 0.9.13
+# Required minimum library versions when building with system libraries
+%define libtalloc_version 2.0.8
+%define libtdb_version 1.2.11
+%define libtevent_version 0.9.18
 
 %if ! %with_included_talloc
 BuildRequires: libtalloc-devel >= %{libtalloc_version}
+Requires: libtalloc >= %{libtalloc_version}
 %endif
 %if ! %with_included_tdb
 BuildRequires: libtdb-devel >= %{libtdb_version}
+Requires: libtdb >= %{libtdb_version}
 %endif
 %if ! %with_included_tevent
 BuildRequires: libtevent-devel >= %{libtevent_version}
+Requires: libtevent >= %{libtevent_version}
 %endif
 
 # To build the ctdb-pcp-pmda package, run rpmbuild with "--with pmda"
@@ -51,6 +56,10 @@ BuildRequires: libtevent-devel >= %{libtevent_version}
 BuildRequires: pcp-libs-devel
 %endif
 
+%if %{with_systemd}
+BuildRequires: systemd-units
+%endif
+
 %description
 ctdb is the clustered database used by samba
 
@@ -77,7 +86,7 @@ export CC
 ## always run autogen.sh
 ./autogen.sh
 
-CFLAGS="$RPM_OPT_FLAGS $EXTRA -O0 -D_GNU_SOURCE" ./configure \
+CFLAGS="$RPM_OPT_FLAGS $EXTRA -D_GNU_SOURCE" ./configure \
 %if %with_included_talloc
 	--with-included-talloc \
 %endif
@@ -104,16 +113,22 @@ rm -rf $RPM_BUILD_ROOT
 
 # Create the target build directory hierarchy
 mkdir -p $RPM_BUILD_ROOT%{_sysconfdir}/sysconfig
-mkdir -p $RPM_BUILD_ROOT%{_sysconfdir}/init.d
 mkdir -p $RPM_BUILD_ROOT%{_sysconfdir}/sudoers.d
 
 make DESTDIR=$RPM_BUILD_ROOT docdir=%{_docdir} install install_tests
 
 install -m644 config/ctdb.sysconfig $RPM_BUILD_ROOT%{_sysconfdir}/sysconfig/ctdb
+
+%if %{with_systemd}
+mkdir -p $RPM_BUILD_ROOT%{_unitdir}
+install -m 755 config/ctdb.service $RPM_BUILD_ROOT%{_unitdir}
+%else
+mkdir -p $RPM_BUILD_ROOT%{initdir}
 install -m755 config/ctdb.init $RPM_BUILD_ROOT%{initdir}/ctdb
+%endif
 
-mkdir -p $RPM_BUILD_ROOT%{_docdir}/ctdb/tests/bin
-install -m755 tests/bin/ctdb_transaction $RPM_BUILD_ROOT%{_docdir}/ctdb/tests/bin
+cp config/events.d/README README.eventscripts
+cp config/notify.d.README README.notify.d
 
 # Remove "*.old" files
 find $RPM_BUILD_ROOT -name "*.old" -exec rm -f {} \;
@@ -134,20 +149,21 @@ rm -rf $RPM_BUILD_ROOT
 %config(noreplace) %{_sysconfdir}/ctdb/debug-hung-script.sh
 %config(noreplace) %{_sysconfdir}/ctdb/ctdb-crash-cleanup.sh
 %config(noreplace) %{_sysconfdir}/ctdb/gcore_trace.sh
-%config(noreplace) %{_sysconfdir}/ctdb/functions
+
+%if %{with_systemd}
+%{_unitdir}/ctdb.service
+%else
 %attr(755,root,root) %{initdir}/ctdb
+%endif
+
 %attr(755,root,root) %{_sysconfdir}/ctdb/notify.d
 
-%{_docdir}/ctdb/README
-%{_docdir}/ctdb/COPYING
-%{_docdir}/ctdb/README.eventscripts
-%{_docdir}/ctdb/recovery-process.txt
-%{_docdir}/ctdb/ctdb.1.html
-%{_docdir}/ctdb/ctdbd.1.html
-%{_docdir}/ctdb/onnode.1.html
-%{_docdir}/ctdb/ltdbtool.1.html
-%{_docdir}/ctdb/ping_pong.1.html
+%doc README COPYING NEWS
+%doc README.eventscripts README.notify.d
+%doc doc/recovery-process.txt
+%doc doc/*.html
 %{_sysconfdir}/sudoers.d/ctdb
+%{_sysconfdir}/ctdb/functions
 %{_sysconfdir}/ctdb/events.d/00.ctdb
 %{_sysconfdir}/ctdb/events.d/01.reclock
 %{_sysconfdir}/ctdb/events.d/10.interface
@@ -172,8 +188,8 @@ rm -rf $RPM_BUILD_ROOT
 %config(noreplace) %{_sysconfdir}/ctdb/nfs-rpc-checks.d/40.mountd.check
 %config(noreplace) %{_sysconfdir}/ctdb/nfs-rpc-checks.d/50.rquotad.check
 %{_sysconfdir}/ctdb/statd-callout
-%{_sysconfdir}/ctdb/notify.d/README
 %{_sbindir}/ctdbd
+%{_sbindir}/ctdbd_wrapper
 %{_bindir}/ctdb
 %{_bindir}/ctdb_lock_helper
 %{_bindir}/smnotify
@@ -188,7 +204,6 @@ rm -rf $RPM_BUILD_ROOT
 %{_mandir}/man1/ping_pong.1.gz
 %{_libdir}/pkgconfig/ctdb.pc
 
-%{_docdir}/ctdb/tests/bin/ctdb_transaction
 
 %package devel
 Summary: CTDB development libraries
diff --git a/server/ctdb_banning.c b/server/ctdb_banning.c
index 4e6db31..e6df4b9 100644
--- a/server/ctdb_banning.c
+++ b/server/ctdb_banning.c
@@ -17,7 +17,7 @@
    along with this program; if not, see <http://www.gnu.org/licenses/>.
 */
 #include "includes.h"
-#include "lib/tdb/include/tdb.h"
+#include "tdb.h"
 #include "system/time.h"
 #include "system/network.h"
 #include "system/filesys.h"
@@ -31,6 +31,21 @@ ctdb_ban_node_event(struct event_context *ev, struct timed_event *te,
 			       struct timeval t, void *private_data)
 {
 	struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
+	bool freeze_failed = false;
+	int i;
+
+	/* Make sure we were able to freeze databases during banning */
+	for (i=1; i<=NUM_DB_PRIORITIES; i++) {
+		if (ctdb->freeze_mode[i] != CTDB_FREEZE_FROZEN) {
+			freeze_failed = true;
+			break;
+		}
+	}
+	if (freeze_failed) {
+		DEBUG(DEBUG_ERR, ("Banning timedout, but still unable to freeze databases\n"));
+		ctdb_ban_self(ctdb);
+		return;
+	}
 
 	DEBUG(DEBUG_ERR,("Banning timedout\n"));
 	ctdb->nodes[ctdb->pnn]->flags &= ~NODE_FLAGS_BANNED;
@@ -41,7 +56,7 @@ ctdb_ban_node_event(struct event_context *ev, struct timed_event *te,
 	}
 }
 
-int32_t ctdb_local_node_got_banned(struct ctdb_context *ctdb)
+void ctdb_local_node_got_banned(struct ctdb_context *ctdb)
 {
 	uint32_t i;
 
@@ -56,14 +71,10 @@ int32_t ctdb_local_node_got_banned(struct ctdb_context *ctdb)
 	ctdb->vnn_map->generation = INVALID_GENERATION;
 
 	for (i=1; i<=NUM_DB_PRIORITIES; i++) {
-		if (ctdb_start_freeze(ctdb, i) != 0) {
-			DEBUG(DEBUG_ERR,(__location__ " Failed to freeze db priority %u\n", i));
-		}
+		ctdb_start_freeze(ctdb, i);
 	}
 	ctdb_release_all_ips(ctdb);
 	ctdb->recovery_mode = CTDB_RECOVERY_ACTIVE;
-
-	return 0;
 }
 
 int32_t ctdb_control_set_ban_state(struct ctdb_context *ctdb, TDB_DATA indata)
@@ -78,12 +89,16 @@ int32_t ctdb_control_set_ban_state(struct ctdb_context *ctdb, TDB_DATA indata)
 			return -1;
 		}
 		if (bantime->time == 0) {
-			DEBUG(DEBUG_INFO,("unbanning node %d\n", bantime->pnn));
+			DEBUG(DEBUG_NOTICE,("unbanning node %d\n", bantime->pnn));
 			ctdb->nodes[bantime->pnn]->flags &= ~NODE_FLAGS_BANNED;
 		} else {
-			DEBUG(DEBUG_INFO,("banning node %d\n", bantime->pnn));
+			DEBUG(DEBUG_NOTICE,("banning node %d\n", bantime->pnn));
 			if (ctdb->tunable.enable_bans == 0) {
-				DEBUG(DEBUG_INFO,("Bans are disabled - ignoring ban of node %u\n", bantime->pnn));
+				/* FIXME: This is bogus. We really should be
+				 * taking decision based on the tunables on
+				 * the banned node and not local node.
+				 */
+				DEBUG(DEBUG_WARNING,("Bans are disabled - ignoring ban of node %u\n", bantime->pnn));
 				return 0;
 			}
 
@@ -120,10 +135,8 @@ int32_t ctdb_control_set_ban_state(struct ctdb_context *ctdb, TDB_DATA indata)
 	ctdb->nodes[bantime->pnn]->flags |= NODE_FLAGS_BANNED;
 
 	event_add_timed(ctdb->ev, ctdb->banning_ctx, timeval_current_ofs(bantime->time,0), ctdb_ban_node_event, ctdb);
-	if (bantime->pnn == ctdb->pnn) {
-		return ctdb_local_node_got_banned(ctdb);
-	}
 
+	ctdb_local_node_got_banned(ctdb);
 	return 0;
 }
 
diff --git a/server/ctdb_call.c b/server/ctdb_call.c
index a98903d..aa69f93 100644
--- a/server/ctdb_call.c
+++ b/server/ctdb_call.c
@@ -21,7 +21,7 @@
   protocol design and packet details
 */
 #include "includes.h"
-#include "lib/tdb/include/tdb.h"
+#include "tdb.h"
 #include "lib/util/dlinklist.h"
 #include "system/network.h"
 #include "system/filesys.h"
@@ -1645,6 +1645,7 @@ int ctdb_start_revoke_ro_record(struct ctdb_context *ctdb, struct ctdb_db_contex
 		close(rc->fd[0]);
 		debug_extra = talloc_asprintf(NULL, "revokechild-%s:", ctdb_db->db_name);
 
+		ctdb_set_process_name("ctdb_revokechild");
 		if (switch_from_server_to_client(ctdb, "revokechild-%s", ctdb_db->db_name) != 0) {
 			DEBUG(DEBUG_ERR,("Failed to switch from server to client for revokechild process\n"));
 			c = 1;
diff --git a/server/ctdb_control.c b/server/ctdb_control.c
index bf4a20d..690608e 100644
--- a/server/ctdb_control.c
+++ b/server/ctdb_control.c
@@ -17,7 +17,7 @@
    along with this program; if not, see <http://www.gnu.org/licenses/>.
 */
 #include "includes.h"
-#include "lib/tdb/include/tdb.h"
+#include "tdb.h"
 #include "system/network.h"
 #include "system/filesys.h"
 #include "system/wait.h"
@@ -331,17 +331,10 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb,
 		return 0;
 
 	case CTDB_CONTROL_SHUTDOWN:
-		DEBUG(DEBUG_NOTICE,("Received SHUTDOWN command. Stopping CTDB daemon.\n"));
-		ctdb_set_runstate(ctdb, CTDB_RUNSTATE_SHUTDOWN);
-		ctdb_stop_recoverd(ctdb);
-		ctdb_stop_keepalive(ctdb);
-		ctdb_stop_monitoring(ctdb);
-		ctdb_release_all_ips(ctdb);
-		ctdb_event_script(ctdb, CTDB_EVENT_SHUTDOWN);
-		if (ctdb->methods != NULL) {
-			ctdb->methods->shutdown(ctdb);
-		}
-		exit(0);
+		DEBUG(DEBUG_NOTICE,("Received SHUTDOWN command.\n"));
+		ctdb_shutdown_sequence(ctdb, 0);
+		/* In case above returns due to duplicate shutdown */
+		return 0;
 
 	case CTDB_CONTROL_TAKEOVER_IPv4:
 		CHECK_CONTROL_DATA_SIZE(sizeof(struct ctdb_public_ipv4));
@@ -658,9 +651,18 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb,
 		CHECK_CONTROL_DATA_SIZE(size);
 		return ctdb_control_schedule_for_deletion(ctdb, indata);
 	}
-	case CTDB_CONTROL_GET_DB_STATISTICS:
-		CHECK_CONTROL_DATA_SIZE(sizeof(uint32_t));
-		return ctdb_control_get_db_statistics(ctdb, *(uint32_t *)indata.dptr, outdata);
+	case CTDB_CONTROL_GET_DB_STATISTICS: {
+		uint32_t db_id;
+		struct ctdb_db_context *ctdb_db;
+
+		CHECK_CONTROL_DATA_SIZE(sizeof(db_id));
+		db_id = *(uint32_t *)indata.dptr;
+		ctdb_db = find_ctdb_db(ctdb, db_id);
+		if (ctdb_db == NULL) return -1;
+		outdata->dptr = (uint8_t *)&ctdb_db->statistics;
+		outdata->dsize = sizeof(ctdb_db->statistics);
+		return 0;
+	}
 
 	case CTDB_CONTROL_RELOAD_PUBLIC_IPS:
 		CHECK_CONTROL_DATA_SIZE(0);
diff --git a/server/ctdb_daemon.c b/server/ctdb_daemon.c
index cedee09..0932157 100644
--- a/server/ctdb_daemon.c
+++ b/server/ctdb_daemon.c
@@ -19,7 +19,7 @@
 
 #include "includes.h"
 #include "db_wrap.h"
-#include "lib/tdb/include/tdb.h"
+#include "tdb.h"
 #include "lib/util/dlinklist.h"
 #include "system/network.h"
 #include "system/filesys.h"
@@ -1032,8 +1032,7 @@ static void ctdb_setup_event_callback(struct ctdb_context *ctdb, int status,
 				      void *private_data)
 {
 	if (status != 0) {
-		DEBUG(DEBUG_ALERT,("Failed to run setup event - exiting\n"));
-		exit(1);
+		ctdb_die(ctdb, "Failed to run setup event");
 	}
 	ctdb_run_notification_script(ctdb, "setup");
 
@@ -1213,6 +1212,13 @@ int ctdb_start_daemon(struct ctdb_context *ctdb, bool do_fork, bool use_syslog,
 	/* force initial recovery for election */
 	ctdb->recovery_mode = CTDB_RECOVERY_ACTIVE;
 
+	ctdb_set_runstate(ctdb, CTDB_RUNSTATE_INIT);
+	ret = ctdb_event_script(ctdb, CTDB_EVENT_INIT);
+	if (ret != 0) {
+		ctdb_die(ctdb, "Failed to run init event\n");
+	}
+	ctdb_run_notification_script(ctdb, "init");
+
 	if (strcmp(ctdb->transport, "tcp") == 0) {
 		int ctdb_tcp_init(struct ctdb_context *);
 		ret = ctdb_tcp_init(ctdb);
@@ -1258,13 +1264,6 @@ int ctdb_start_daemon(struct ctdb_context *ctdb, bool do_fork, bool use_syslog,
 		ctdb_fatal(ctdb, "Failed to attach to databases\n");
 	}
 
-	ctdb_set_runstate(ctdb, CTDB_RUNSTATE_INIT);
-	ret = ctdb_event_script(ctdb, CTDB_EVENT_INIT);
-	if (ret != 0) {
-		ctdb_fatal(ctdb, "Failed to run init event\n");
-	}
-	ctdb_run_notification_script(ctdb, "init");
-
 	/* start frozen, then let the first election sort things out */
 	if (!ctdb_blocking_freeze(ctdb)) {
 		ctdb_fatal(ctdb, "Failed to get initial freeze\n");
@@ -1274,6 +1273,9 @@ int ctdb_start_daemon(struct ctdb_context *ctdb, bool do_fork, bool use_syslog,
 	fde = event_add_fd(ctdb->ev, ctdb, ctdb->daemon.sd, 
 			   EVENT_FD_READ,
 			   ctdb_accept_client, ctdb);
+	if (fde == NULL) {
+		ctdb_fatal(ctdb, "Failed to add daemon socket to event loop");
+	}
 	tevent_fd_set_auto_close(fde);
 
 	/* release any IPs we hold from previous runs of the daemon */
@@ -1725,3 +1727,25 @@ int32_t ctdb_control_process_exists(struct ctdb_context *ctdb, pid_t pid)
 
 	return kill(pid, 0);
 }
+
+void ctdb_shutdown_sequence(struct ctdb_context *ctdb, int exit_code)
+{
+	if (ctdb->runstate == CTDB_RUNSTATE_SHUTDOWN) {
+		DEBUG(DEBUG_NOTICE,("Already shutting down so will not proceed.\n"));
+		return;
+	}
+
+	DEBUG(DEBUG_NOTICE,("Shutdown sequence commencing.\n"));
+	ctdb_set_runstate(ctdb, CTDB_RUNSTATE_SHUTDOWN);
+	ctdb_stop_recoverd(ctdb);
+	ctdb_stop_keepalive(ctdb);
+	ctdb_stop_monitoring(ctdb);
+	ctdb_release_all_ips(ctdb);
+	ctdb_event_script(ctdb, CTDB_EVENT_SHUTDOWN);
+	if (ctdb->methods != NULL) {
+		ctdb->methods->shutdown(ctdb);
+	}
+
+	DEBUG(DEBUG_NOTICE,("Shutdown sequence complete, exiting.\n"));
+	exit(exit_code);
+}
diff --git a/server/ctdb_freeze.c b/server/ctdb_freeze.c
index 88384f9..fee44d4 100644
--- a/server/ctdb_freeze.c
+++ b/server/ctdb_freeze.c
@@ -17,7 +17,7 @@
    along with this program; if not, see <http://www.gnu.org/licenses/>.
 */
 #include "includes.h"
-#include "lib/tdb/include/tdb.h"
+#include "tdb.h"
 #include "system/network.h"
 #include "system/filesys.h"
 #include "system/wait.h"
@@ -126,43 +126,38 @@ static int ctdb_freeze_waiter_destructor(struct ctdb_freeze_waiter *w)
 /*
   start the freeze process for a certain priority
  */
-int ctdb_start_freeze(struct ctdb_context *ctdb, uint32_t priority)
+void ctdb_start_freeze(struct ctdb_context *ctdb, uint32_t priority)
 {
 	struct ctdb_freeze_handle *h;
 
-	if (priority == 0) {
-		DEBUG(DEBUG_ERR,("Freeze priority 0 requested, remapping to priority 1\n"));
-		priority = 1;
-	}
-
 	if ((priority < 1) || (priority > NUM_DB_PRIORITIES)) {
 		DEBUG(DEBUG_ERR,(__location__ " Invalid db priority : %u\n", priority));
-		return -1;
+		ctdb_fatal(ctdb, "Internal error");
 	}
 
 	if (ctdb->freeze_mode[priority] == CTDB_FREEZE_FROZEN) {
 		/* we're already frozen */
-		return 0;
+		return;
 	}
 
+	DEBUG(DEBUG_ERR, ("Freeze priority %u\n", priority));
+
 	/* Stop any vacuuming going on: we don't want to wait. */
 	ctdb_stop_vacuuming(ctdb);
 
 	/* if there isn't a freeze lock child then create one */
 	if (ctdb->freeze_handles[priority] == NULL) {
 		h = talloc_zero(ctdb, struct ctdb_freeze_handle);
-		CTDB_NO_MEMORY(ctdb, h);
+		CTDB_NO_MEMORY_FATAL(ctdb, h);
 		h->ctdb = ctdb;
 		h->priority = priority;
 		talloc_set_destructor(h, ctdb_freeze_handle_destructor);
 
 		h->lreq = ctdb_lock_alldb_prio(ctdb, priority, false, ctdb_freeze_lock_handler, h);
-		CTDB_NO_MEMORY(ctdb, h->lreq);
+		CTDB_NO_MEMORY_FATAL(ctdb, h->lreq);
 		ctdb->freeze_handles[priority] = h;
 		ctdb->freeze_mode[priority] = CTDB_FREEZE_PENDING;
 	}
-
-	return 0;
 }
 
 /*
@@ -175,8 +170,6 @@ int32_t ctdb_control_freeze(struct ctdb_context *ctdb, struct ctdb_req_control *
 
 	priority = (uint32_t)c->srvid;
 
-	DEBUG(DEBUG_ERR, ("Freeze priority %u\n", priority));
-
 	if (priority == 0) {
 		DEBUG(DEBUG_ERR,("Freeze priority 0 requested, remapping to priority 1\n"));
 		priority = 1;
@@ -188,14 +181,12 @@ int32_t ctdb_control_freeze(struct ctdb_context *ctdb, struct ctdb_req_control *
 	}
 
 	if (ctdb->freeze_mode[priority] == CTDB_FREEZE_FROZEN) {
+		DEBUG(DEBUG_ERR, ("Freeze priority %u\n", priority));
 		/* we're already frozen */
 		return 0;
 	}
 
-	if (ctdb_start_freeze(ctdb, priority) != 0) {
-		DEBUG(DEBUG_ERR,(__location__ " Failed to start freezing databases with priority %u\n", priority));
-		return -1;
-	}
+	ctdb_start_freeze(ctdb, priority);
 
 	/* add ourselves to list of waiters */
 	if (ctdb->freeze_handles[priority] == NULL) {
@@ -226,10 +217,7 @@ bool ctdb_blocking_freeze(struct ctdb_context *ctdb)
 	int i;
 
 	for (i=1; i<=NUM_DB_PRIORITIES; i++) {
-		if (ctdb_start_freeze(ctdb, i)) {
-			DEBUG(DEBUG_ERR,(__location__ " Failed to freeze databases of prio %u\n", i));
-			continue;
-		}
+		ctdb_start_freeze(ctdb, i);
 
 		/* block until frozen */
 		while (ctdb->freeze_mode[i] == CTDB_FREEZE_PENDING) {
diff --git a/server/ctdb_lock.c b/server/ctdb_lock.c
index 77b4da8..8886ed0 100644
--- a/server/ctdb_lock.c
+++ b/server/ctdb_lock.c
@@ -386,27 +386,31 @@ static void process_callbacks(struct lock_context *lock_ctx, bool locked)
 
 static int lock_bucket_id(double t)
 {
-	double us = 1.e-6, ms = 1.e-3, s = 1;
+	double ms = 1.e-3, s = 1;
 	int id;
 
-	if (t < 1*us) {
+	if (t < 1*ms) {
 		id = 0;
-	} else if (t < 10*us) {
+	} else if (t < 10*ms) {
 		id = 1;
-	} else if (t < 100*us) {
+	} else if (t < 100*ms) {
 		id = 2;
-	} else if (t < 1*ms) {
+	} else if (t < 1*s) {
 		id = 3;
-	} else if (t < 10*ms) {
+	} else if (t < 2*s) {
 		id = 4;
-	} else if (t < 100*ms) {
+	} else if (t < 4*s) {
 		id = 5;
-	} else if (t < 1*s) {
+	} else if (t < 8*s) {
 		id = 6;
-	} else if (t < 10*s) {
+	} else if (t < 16*s) {
 		id = 7;
-	} else {
+	} else if (t < 32*s) {
 		id = 8;
+	} else if (t < 64*s) {
+		id = 9;
+	} else {
+		id = 10;
 	}
 
 	return id;
@@ -422,7 +426,7 @@ static void ctdb_lock_handler(struct tevent_context *ev,
 			    void *private_data)
 {
 	struct lock_context *lock_ctx;
-	TALLOC_CTX *tmp_ctx;
+	TALLOC_CTX *tmp_ctx = NULL;
 	char c;
 	bool locked;
 	double t;
@@ -456,9 +460,13 @@ static void ctdb_lock_handler(struct tevent_context *ev,
 	}
 
 	if (locked) {
-		CTDB_INCREMENT_STAT(lock_ctx->ctdb, locks.num_current);
-		CTDB_INCREMENT_STAT(lock_ctx->ctdb, locks.buckets[id]);
 		if (lock_ctx->ctdb_db) {
+			CTDB_INCREMENT_STAT(lock_ctx->ctdb, locks.num_current);
+			CTDB_INCREMENT_STAT(lock_ctx->ctdb, locks.buckets[id]);
+			CTDB_UPDATE_LATENCY(lock_ctx->ctdb, lock_ctx->ctdb_db,
+					    lock_type_str[lock_ctx->type], locks.latency,
+					    lock_ctx->start_time);
+
 			CTDB_INCREMENT_DB_STAT(lock_ctx->ctdb_db, locks.num_current);
 			CTDB_UPDATE_DB_LATENCY(lock_ctx->ctdb_db, lock_type_str[lock_ctx->type], locks.latency, t);
 			CTDB_INCREMENT_DB_STAT(lock_ctx->ctdb_db, locks.buckets[id]);
@@ -478,8 +486,6 @@ static void ctdb_lock_handler(struct tevent_context *ev,
 }
 
 
-static void ctdb_lock_find_blocker(struct lock_context *lock_ctx);
-
 /*
  * Callback routine when required locks are not obtained within timeout
  * Called from parent context
@@ -489,15 +495,32 @@ static void ctdb_lock_timeout_handler(struct tevent_context *ev,
 				    struct timeval current_time,
 				    void *private_data)
 {
+	const char *cmd = getenv("CTDB_DEBUG_LOCKS");
 	struct lock_context *lock_ctx;
 	struct ctdb_context *ctdb;
+	pid_t pid;
 
 	lock_ctx = talloc_get_type_abort(private_data, struct lock_context);
 	ctdb = lock_ctx->ctdb;
 
+	if (lock_ctx->type == LOCK_RECORD || lock_ctx->type == LOCK_DB) {
+		DEBUG(DEBUG_WARNING,
+		      ("Unable to get %s lock on database %s for %.0lf seconds\n",
+		       (lock_ctx->type == LOCK_RECORD ? "RECORD" : "DB"),
+		       lock_ctx->ctdb_db->db_name,
+		       timeval_elapsed(&lock_ctx->start_time)));
+	} else {
+		DEBUG(DEBUG_WARNING,
+		      ("Unable to get ALLDB locks for %.0lf seconds\n",
+		       timeval_elapsed(&lock_ctx->start_time)));
+	}
+
 	/* fire a child process to find the blocking process */
-	if (lock_ctx->block_child == -1) {
-		ctdb_lock_find_blocker(lock_ctx);
+	if (cmd != NULL) {
+		pid = fork();
+		if (pid == 0) {
+			execl(cmd, cmd, NULL);
+		}
 	}
 
 	/* reset the timeout timer */
@@ -988,151 +1011,3 @@ struct lock_request *ctdb_lock_alldb(struct ctdb_context *ctdb,
 				  auto_mark);
 }
 
-/*
- * Callback routine to read the PID of blocking process from the child and log
- *
- */
-void ctdb_lock_blocked_handler(struct tevent_context *ev,
-				struct tevent_fd *tfd,
-				uint16_t flags,
-				void *private_data)
-{
-	struct lock_context *lock_ctx;
-	pid_t blocker_pid = -1;
-	char *process_name = NULL;
-	const char *db_name = NULL;
-	ino_t inode;
-	struct ctdb_db_context *ctdb_db;
-	int fd;
-	struct stat stat_buf;
-
-	lock_ctx = talloc_get_type_abort(private_data, struct lock_context);
-
-	if (read(lock_ctx->block_fd[0], &blocker_pid, sizeof(blocker_pid)) != sizeof(blocker_pid)) {
-		DEBUG(DEBUG_ERR, ("Error reading blocker process pid from child\n"));
-		goto failed;
-	}
-	if (read(lock_ctx->block_fd[0], &inode, sizeof(inode)) != sizeof(inode)) {
-		DEBUG(DEBUG_ERR, ("Error reading blocked inode from child\n"));
-		goto failed;
-	}
-
-	if (blocker_pid < 0) {
-		goto failed;
-	}
-
-	process_name = ctdb_get_process_name(blocker_pid);
-
-	if (lock_ctx->type == LOCK_RECORD || lock_ctx->type == LOCK_DB) {
-		db_name = lock_ctx->ctdb_db->ltdb->name;
-	} else {
-		for (ctdb_db = lock_ctx->ctdb->db_list; ctdb_db; ctdb_db = ctdb_db->next) {
-			fd = tdb_fd(ctdb_db->ltdb->tdb);
-			if (fstat(fd, &stat_buf) == 0) {
-				if (stat_buf.st_ino == inode) {
-					db_name = ctdb_db->ltdb->name;
-					break;
-				}
-			}
-		}
-	}
-
-	if (db_name) {
-		DEBUG(DEBUG_WARNING,
-		      ("Process (pid=%d) blocked in locking\n", lock_ctx->child));
-		DEBUG(DEBUG_WARNING,
-		      ("Process %s (pid=%d) locked database %s (inode %lu) for %.0lf seconds\n",
-		       (process_name ? process_name : "unknown"),
-		       blocker_pid, db_name, (unsigned long)inode,
-		       timeval_elapsed(&lock_ctx->start_time)));
-	} else {
-		DEBUG(DEBUG_WARNING,
-		      ("Process %s (pid=%d) locked database (inode %lu) for %.0lf seconds\n",
-		       (process_name ? process_name : "unknown"),
-		       blocker_pid, (unsigned long)inode,
-		       timeval_elapsed(&lock_ctx->start_time)));
-	}
-
-	/*
-	 * If ctdb is blocked by smbd for deadlock_interval, detect it as a deadlock
-	 * and kill smbd process.
-	 */
-	if (lock_ctx->ctdb->tunable.deadlock_timeout > 0 &&
-	    timeval_elapsed(&lock_ctx->start_time) > lock_ctx->ctdb->tunable.deadlock_timeout &&
-	    process_name && strstr(process_name, "smbd")) {
-		DEBUG(DEBUG_WARNING,
-		      ("Deadlock detected. Killing smbd process (pid=%d)", blocker_pid));
-		kill(blocker_pid, SIGKILL);
-	}
-
-	free(process_name);
-
-failed:
-	if (lock_ctx->block_child > 0) {
-		ctdb_kill(lock_ctx->ctdb, lock_ctx->block_child, SIGKILL);
-	}
-	lock_ctx->block_child = -1;
-	talloc_free(tfd);
-}
-
-
-/*
- * Find processes that holds lock we are interested in
- */
-void ctdb_lock_find_blocker(struct lock_context *lock_ctx)
-{
-	struct tevent_fd *tfd;
-	pid_t parent;
-
-	if (pipe(lock_ctx->block_fd) < 0) {
-		return;
-	}
-
-	parent = getpid();
-
-	lock_ctx->block_child = ctdb_fork(lock_ctx->ctdb);
-	if (lock_ctx->block_child == -1) {
-		close(lock_ctx->block_fd[0]);
-		close(lock_ctx->block_fd[1]);
-		return;
-	}
-
-	/* Child process */
-	if (lock_ctx->block_child == 0) {
-		struct ctdb_lock_info reqlock;
-		pid_t blocker_pid = -1;
-		bool status;
-
-		close(lock_ctx->block_fd[0]);
-		if (ctdb_get_lock_info(lock_ctx->child, &reqlock)) {
-			status = ctdb_get_blocker_pid(&reqlock, &blocker_pid);
-			if (!status) {
-				/* Could not find blocker pid */
-				blocker_pid = -2;
-			}
-		}
-		write(lock_ctx->block_fd[1], &blocker_pid, sizeof(blocker_pid));
-		write(lock_ctx->block_fd[1], &reqlock.inode, sizeof(reqlock.inode));
-
-		/* Hang around till parent dies */
-		while (kill(parent, 0) == 0 || errno != ESRCH) {
-			sleep(5);
-		}
-		_exit(0);
-	}
-
-	/* Parent process */
-	close(lock_ctx->block_fd[1]);
-	set_close_on_exec(lock_ctx->block_fd[0]);
-
-	tfd = tevent_add_fd(lock_ctx->ctdb->ev,
-				lock_ctx,
-				lock_ctx->block_fd[0],
-				EVENT_FD_READ,
-				ctdb_lock_blocked_handler,
-				(void *)lock_ctx);
-	if (tfd == NULL) {
-		ctdb_kill(lock_ctx->ctdb, lock_ctx->block_child, SIGKILL);
-		close(lock_ctx->block_fd[0]);
-	}
-}
diff --git a/server/ctdb_logging.c b/server/ctdb_logging.c
index 0b6ac12..218186e 100644
--- a/server/ctdb_logging.c
+++ b/server/ctdb_logging.c
@@ -85,6 +85,8 @@ int start_syslog_daemon(struct ctdb_context *ctdb)
 	struct sockaddr_in syslog_sin;
 	struct ctdb_syslog_state *state;
 	struct tevent_fd *fde;
+	int startup_fd[2];
+	int ret = -1;
 
 	state = talloc(ctdb, struct ctdb_syslog_state);
 	CTDB_NO_MEMORY(ctdb, state);
@@ -95,23 +97,42 @@ int start_syslog_daemon(struct ctdb_context *ctdb)
 		return -1;
 	}
 	
+	if (pipe(startup_fd) != 0) {
+		printf("Failed to create syslog startup pipe\n");
+		close(state->fd[0]);
+		close(state->fd[1]);
+		talloc_free(state);
+		return -1;
+	}
+	
 	ctdb->syslogd_pid = ctdb_fork(ctdb);
 	if (ctdb->syslogd_pid == (pid_t)-1) {
 		printf("Failed to create syslog child process\n");
 		close(state->fd[0]);
 		close(state->fd[1]);
+		close(startup_fd[0]);
+		close(startup_fd[1]);
 		talloc_free(state);
 		return -1;
 	}
 
-	syslogd_is_started = 1;
-
 	if (ctdb->syslogd_pid != 0) {
+		ssize_t n;
+		int dummy;
+
 		DEBUG(DEBUG_ERR,("Starting SYSLOG child process with pid:%d\n", (int)ctdb->syslogd_pid));
 
 		close(state->fd[1]);
 		set_close_on_exec(state->fd[0]);
 
+		close(startup_fd[1]);
+		n = read(startup_fd[0], &dummy, sizeof(dummy));
+		close(startup_fd[0]);
+		if (n < sizeof(dummy)) {
+			return -1;
+		}
+
+		syslogd_is_started = 1;
 		return 0;
 	}
 
@@ -120,9 +141,12 @@ int start_syslog_daemon(struct ctdb_context *ctdb)
 	ctdb->ev = event_context_init(NULL);
 
 	syslog(LOG_ERR, "Starting SYSLOG daemon with pid:%d", (int)getpid());
+	ctdb_set_process_name("ctdb_syslogd");
 
 	close(state->fd[0]);
+	close(startup_fd[0]);
 	set_close_on_exec(state->fd[1]);
+	set_close_on_exec(startup_fd[1]);
 	fde = event_add_fd(ctdb->ev, state, state->fd[1], EVENT_FD_READ,
 		     ctdb_syslog_terminate_handler, state);
 	tevent_fd_set_auto_close(fde);
@@ -130,7 +154,8 @@ int start_syslog_daemon(struct ctdb_context *ctdb)
 	state->syslog_fd = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
 	if (state->syslog_fd == -1) {
 		printf("Failed to create syslog socket\n");
-		return -1;
+		close(startup_fd[1]);
+		return ret;
 	}
 
 	set_close_on_exec(state->syslog_fd);
@@ -142,11 +167,8 @@ int start_syslog_daemon(struct ctdb_context *ctdb)
 	if (bind(state->syslog_fd, (struct sockaddr *)&syslog_sin,
 		 sizeof(syslog_sin)) == -1)
 	{
-		if (errno == EADDRINUSE) {
-			/* this is ok, we already have a syslog daemon */
-			_exit(0);
-		}
 		printf("syslog daemon failed to bind to socket. errno:%d(%s)\n", errno, strerror(errno));
+		close(startup_fd[1]);
 		_exit(10);
 	}
 
@@ -155,6 +177,11 @@ int start_syslog_daemon(struct ctdb_context *ctdb)
 		     ctdb_syslog_handler, state);
 	tevent_fd_set_auto_close(fde);
 
+	/* Tell parent that we're up */
+	ret = 0;
+	write(startup_fd[1], &ret, sizeof(ret));
+	close(startup_fd[1]);
+
 	event_loop_wait(ctdb->ev);
 
 	/* this should not happen */
@@ -520,6 +547,10 @@ int ctdb_set_child_logging(struct ctdb_context *ctdb)
 	/* We'll fail if stderr/stdout not already open; it's simpler. */
 	old_stdout = dup(STDOUT_FILENO);
 	old_stderr = dup(STDERR_FILENO);
+	if (old_stdout < 0 || old_stderr < 0) {
+		DEBUG(DEBUG_ERR, ("Failed to dup stdout/stderr for child logging\n"));
+		return -1;
+	}
 	if (dup2(p[1], STDOUT_FILENO) < 0 || dup2(p[1], STDERR_FILENO) < 0) {
 		int saved_errno = errno;
 		dup2(old_stdout, STDOUT_FILENO);
diff --git a/server/ctdb_ltdb_server.c b/server/ctdb_ltdb_server.c
index 0426d96..57e0d68 100644
--- a/server/ctdb_ltdb_server.c
+++ b/server/ctdb_ltdb_server.c
@@ -18,7 +18,7 @@
 */
 
 #include "includes.h"
-#include "lib/tdb/include/tdb.h"
+#include "tdb.h"
 #include "system/network.h"
 #include "system/filesys.h"
 #include "system/dir.h"
@@ -706,7 +706,7 @@ int ctdb_set_db_readonly(struct ctdb_context *ctdb, struct ctdb_db_context *ctdb
 	}
 
 	if (ctdb_db->persistent) {
-		DEBUG(DEBUG_ERR,("Trying to set persistent database with readonly property\n"));
+		DEBUG(DEBUG_ERR,("Persistent databases do not support readonly property\n"));
 		return -1;
 	}
 
@@ -728,6 +728,9 @@ int ctdb_set_db_readonly(struct ctdb_context *ctdb, struct ctdb_db_context *ctdb
 	DEBUG(DEBUG_NOTICE,("OPENED tracking database : '%s'\n", ropath));
 
 	ctdb_db->readonly = true;
+
+	DEBUG(DEBUG_NOTICE, ("Readonly property set on DB %s\n", ctdb_db->db_name));
+
 	talloc_free(ropath);
 	return 0;
 }
@@ -993,8 +996,9 @@ again:
 	}
 
 
-	DEBUG(DEBUG_INFO,("Attached to database '%s'\n", ctdb_db->db_path));
-	
+	DEBUG(DEBUG_NOTICE,("Attached to database '%s' with flags 0x%x\n",
+			    ctdb_db->db_path, tdb_flags));
+
 	/* success */
 	return 0;
 }
@@ -1183,7 +1187,10 @@ static int ctdb_attach_persistent(struct ctdb_context *ctdb,
 		int invalid_name = 0;
 		
 		s = talloc_strdup(ctdb, de->d_name);
-		CTDB_NO_MEMORY(ctdb, s);
+		if (s == NULL) {
+			closedir(d);
+			CTDB_NO_MEMORY(ctdb, s);
+		}
 
 		/* only accept names ending in .tdb */
 		p = strstr(s, ".tdb.");
@@ -1495,55 +1502,3 @@ int ctdb_set_db_sticky(struct ctdb_context *ctdb, struct ctdb_db_context *ctdb_d
 
 	return 0;
 }
-
-int32_t ctdb_control_get_db_statistics(struct ctdb_context *ctdb,
-				uint32_t db_id,
-				TDB_DATA *outdata)
-{
-	struct ctdb_db_context *ctdb_db;
-	struct ctdb_db_statistics_wire *stats;
-	int i;
-	int len;
-	char *ptr;
-
-	ctdb_db = find_ctdb_db(ctdb, db_id);
-	if (!ctdb_db) {
-		DEBUG(DEBUG_ERR,("Unknown db_id 0x%x in get_db_statistics\n", db_id));
-		return -1;
-	}
-
-	len = offsetof(struct ctdb_db_statistics_wire, hot_keys);
-	for (i = 0; i < MAX_HOT_KEYS; i++) {
-		len += 8 + ctdb_db->statistics.hot_keys[i].key.dsize;
-	}
-
-	stats = talloc_size(outdata, len);
-	if (stats == NULL) {
-		DEBUG(DEBUG_ERR,("Failed to allocate db statistics wire structure\n"));
-		return -1;
-	}
-
-	stats->db_ro_delegations = ctdb_db->statistics.db_ro_delegations;
-	stats->db_ro_revokes     = ctdb_db->statistics.db_ro_revokes;
-	for (i = 0; i < MAX_COUNT_BUCKETS; i++) {
-		stats->hop_count_bucket[i] = ctdb_db->statistics.hop_count_bucket[i];
-	}
-	stats->num_hot_keys = MAX_HOT_KEYS;
-
-	ptr = &stats->hot_keys[0];
-	for (i = 0; i < MAX_HOT_KEYS; i++) {
-		*(uint32_t *)ptr = ctdb_db->statistics.hot_keys[i].count;
-		ptr += 4;
-
-		*(uint32_t *)ptr = ctdb_db->statistics.hot_keys[i].key.dsize;
-		ptr += 4;
-
-		memcpy(ptr, ctdb_db->statistics.hot_keys[i].key.dptr, ctdb_db->statistics.hot_keys[i].key.dsize);
-		ptr += ctdb_db->statistics.hot_keys[i].key.dsize;
-	}
-
-	outdata->dptr  = (uint8_t *)stats;
-	outdata->dsize = len;
-
-	return 0;
-}
diff --git a/server/ctdb_monitor.c b/server/ctdb_monitor.c
index 1608804..63eb9df 100644
--- a/server/ctdb_monitor.c
+++ b/server/ctdb_monitor.c
@@ -90,6 +90,7 @@ void ctdb_run_notification_script(struct ctdb_context *ctdb, const char *event)
 	if (child == 0) {
 		int ret;
 
+		ctdb_set_process_name("ctdb_notification");
 		debug_extra = talloc_asprintf(NULL, "notification-%s:", event);
 		ret = ctdb_run_notification_script_child(ctdb, event);
 		if (ret != 0) {
@@ -294,16 +295,9 @@ static void ctdb_wait_until_recovered(struct event_context *ev, struct timed_eve
 		DEBUG(DEBUG_ALERT,(__location__
 				  "ctdb_recheck_persistent_health() failed (%llu times) - prepare shutdown\n",
 				  (unsigned long long)ctdb->db_persistent_check_errors));
-		ctdb_stop_recoverd(ctdb);
-		ctdb_stop_keepalive(ctdb);
-		ctdb_stop_monitoring(ctdb);
-		ctdb_release_all_ips(ctdb);
-		if (ctdb->methods != NULL) {
-			ctdb->methods->shutdown(ctdb);
-		}
-		ctdb_event_script(ctdb, CTDB_EVENT_SHUTDOWN);
-		DEBUG(DEBUG_ALERT,("ctdb_recheck_persistent_health() failed - Stopping CTDB daemon\n"));
-		exit(11);
+		ctdb_shutdown_sequence(ctdb, 11);
+		/* In case above returns due to duplicate shutdown */
+		return;
 	}
 	ctdb->db_persistent_check_errors = 0;
 
@@ -498,7 +492,7 @@ int32_t ctdb_control_modflags(struct ctdb_context *ctdb, TDB_DATA indata)
 
 	/* if we have become banned, we should go into recovery mode */
 	if ((node->flags & NODE_FLAGS_BANNED) && !(c->old_flags & NODE_FLAGS_BANNED) && (node->pnn == ctdb->pnn)) {
-		return ctdb_local_node_got_banned(ctdb);
+		ctdb_local_node_got_banned(ctdb);
 	}
 	
 	return 0;
diff --git a/server/ctdb_persistent.c b/server/ctdb_persistent.c
index 2b3da32..dcbb6e0 100644
--- a/server/ctdb_persistent.c
+++ b/server/ctdb_persistent.c
@@ -22,7 +22,7 @@
 #include "system/filesys.h"
 #include "system/wait.h"
 #include "db_wrap.h"
-#include "lib/tdb/include/tdb.h"
+#include "tdb.h"
 #include "../include/ctdb_private.h"
 
 struct ctdb_persistent_state {
diff --git a/server/ctdb_recover.c b/server/ctdb_recover.c
index d7741ab..0bec03e 100644
--- a/server/ctdb_recover.c
+++ b/server/ctdb_recover.c
@@ -18,7 +18,7 @@
    along with this program; if not, see <http://www.gnu.org/licenses/>.
 */
 #include "includes.h"
-#include "lib/tdb/include/tdb.h"
+#include "tdb.h"
 #include "system/time.h"
 #include "system/network.h"
 #include "system/filesys.h"
@@ -669,6 +669,7 @@ int32_t ctdb_control_set_recmode(struct ctdb_context *ctdb,
 		char cc = 0;
 		close(state->fd[0]);
 
+		ctdb_set_process_name("ctdb_recmode");
 		debug_extra = talloc_asprintf(NULL, "set_recmode:");
 		/* we should not be able to get the lock on the reclock file, 
 		  as it should  be held by the recovery master 
@@ -823,7 +824,9 @@ static int delete_tdb_record(struct ctdb_context *ctdb, struct ctdb_db_context *
 
 	if (data.dsize < sizeof(struct ctdb_ltdb_header)) {
 		if (tdb_lock_nonblock(ctdb_db->ltdb->tdb, -1, F_WRLCK) == 0) {
-			tdb_delete(ctdb_db->ltdb->tdb, key);
+			if (tdb_delete(ctdb_db->ltdb->tdb, key) != 0) {
+				DEBUG(DEBUG_CRIT,(__location__ " Failed to delete corrupt record\n"));
+			}
 			tdb_unlock(ctdb_db->ltdb->tdb, -1, F_WRLCK);
 			DEBUG(DEBUG_CRIT,(__location__ " Deleted corrupt record\n"));
 		}
diff --git a/server/ctdb_recoverd.c b/server/ctdb_recoverd.c
index c3a1852..bf8d8cc 100644
--- a/server/ctdb_recoverd.c
+++ b/server/ctdb_recoverd.c
@@ -86,13 +86,13 @@ static void ctdb_ban_node(struct ctdb_recoverd *rec, uint32_t pnn, uint32_t ban_
 	struct ctdb_context *ctdb = rec->ctdb;
 	struct ctdb_ban_time bantime;
        
-	DEBUG(DEBUG_NOTICE,("Banning node %u for %u seconds\n", pnn, ban_time));
-
 	if (!ctdb_validate_pnn(ctdb, pnn)) {
 		DEBUG(DEBUG_ERR,("Bad pnn %u in ctdb_ban_node\n", pnn));
 		return;
 	}
 
+	DEBUG(DEBUG_NOTICE,("Banning node %u for %u seconds\n", pnn, ban_time));
+
 	bantime.pnn  = pnn;
 	bantime.time = ban_time;
 
@@ -120,6 +120,12 @@ static void ctdb_set_culprit_count(struct ctdb_recoverd *rec, uint32_t culprit,
 		return;
 	}
 
+	/* If we are banned or stopped, do not set other nodes as culprits */
+	if (rec->node_flags & NODE_FLAGS_INACTIVE) {
+		DEBUG(DEBUG_NOTICE, ("This node is INACTIVE, cannot set culprit node %d\n", culprit));
+		return;
+	}
+
 	if (ctdb->nodes[culprit]->ban_state == NULL) {
 		ctdb->nodes[culprit]->ban_state = talloc_zero(ctdb->nodes[culprit], struct ctdb_banning_state);
 		CTDB_NO_MEMORY_VOID(ctdb, ctdb->nodes[culprit]->ban_state);
@@ -1108,7 +1114,7 @@ static int update_local_flags(struct ctdb_recoverd *rec, struct ctdb_node_map *n
 			   Since we are the recovery master we can just as
 			   well update the flags on all nodes.
 			*/
-			ret = ctdb_ctrl_modflags(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].pnn, nodemap->nodes[j].flags, ~nodemap->nodes[j].flags);
+			ret = ctdb_ctrl_modflags(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].pnn, remote_nodemap->nodes[j].flags, ~remote_nodemap->nodes[j].flags);
 			if (ret != 0) {
 				DEBUG(DEBUG_ERR, (__location__ " Unable to update nodeflags on remote nodes\n"));
 				return -1;
@@ -1254,8 +1260,8 @@ static int traverse_recdb(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data,
 		params->recdata = talloc_realloc_size(NULL, params->recdata, params->allocated_len);
 	}
 	if (params->recdata == NULL) {
-		DEBUG(DEBUG_CRIT,(__location__ " Failed to expand recdata to %u (%u records)\n", 
-			 rec->length + params->len, params->recdata->count));
+		DEBUG(DEBUG_CRIT,(__location__ " Failed to expand recdata to %u\n",
+			 rec->length + params->len));
 		params->failed = true;
 		return -1;
 	}
@@ -1427,57 +1433,62 @@ static int ctdb_reload_remote_public_ips(struct ctdb_context *ctdb,
 	}
 
 	for (j=0; j<nodemap->num; j++) {
+		/* For readability */
+		struct ctdb_node *node = ctdb->nodes[j];
+
 		/* release any existing data */
-		if (ctdb->nodes[j]->known_public_ips) {
-			talloc_free(ctdb->nodes[j]->known_public_ips);
-			ctdb->nodes[j]->known_public_ips = NULL;
+		if (node->known_public_ips) {
+			talloc_free(node->known_public_ips);
+			node->known_public_ips = NULL;
 		}
-		if (ctdb->nodes[j]->available_public_ips) {
-			talloc_free(ctdb->nodes[j]->available_public_ips);
-			ctdb->nodes[j]->available_public_ips = NULL;
+		if (node->available_public_ips) {
+			talloc_free(node->available_public_ips);
+			node->available_public_ips = NULL;
 		}
 
 		if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
 			continue;
 		}
 
-		/* grab a new shiny list of public ips from the node */
+		/* Retrieve the list of known public IPs from the node */
 		ret = ctdb_ctrl_get_public_ips_flags(ctdb,
 					CONTROL_TIMEOUT(),
-					ctdb->nodes[j]->pnn,
+					node->pnn,
 					ctdb->nodes,
 					0,
-					&ctdb->nodes[j]->known_public_ips);
+					&node->known_public_ips);
 		if (ret != 0) {
-			DEBUG(DEBUG_ERR,("Failed to read known public ips from node : %u\n",
-				ctdb->nodes[j]->pnn));
+			DEBUG(DEBUG_ERR,
+			      ("Failed to read known public IPs from node: %u\n",
+			       node->pnn));
 			if (culprit) {
-				*culprit = ctdb->nodes[j]->pnn;
+				*culprit = node->pnn;
 			}
 			return -1;
 		}
 
-		if (ctdb->do_checkpublicip) {
-			if (rec->ip_check_disable_ctx == NULL) {
-				if (verify_remote_ip_allocation(ctdb, ctdb->nodes[j]->known_public_ips)) {
-					DEBUG(DEBUG_ERR,("Node %d has inconsistent public ip allocation and needs update.\n", ctdb->nodes[j]->pnn));
-					rec->need_takeover_run = true;
-				}
-			}
+		if (ctdb->do_checkpublicip &&
+		    (rec->ip_check_disable_ctx == NULL) &&
+		    verify_remote_ip_allocation(ctdb,
+						 node->known_public_ips,
+						 node->pnn)) {
+			DEBUG(DEBUG_ERR,("Trigger IP reallocation\n"));
+			rec->need_takeover_run = true;
 		}
 
-		/* grab a new shiny list of public ips from the node */
+		/* Retrieve the list of available public IPs from the node */
 		ret = ctdb_ctrl_get_public_ips_flags(ctdb,
 					CONTROL_TIMEOUT(),
-					ctdb->nodes[j]->pnn,
+					node->pnn,
 					ctdb->nodes,
 					CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE,
-					&ctdb->nodes[j]->available_public_ips);
+					&node->available_public_ips);
 		if (ret != 0) {
-			DEBUG(DEBUG_ERR,("Failed to read available public ips from node : %u\n",
-				ctdb->nodes[j]->pnn));
+			DEBUG(DEBUG_ERR,
+			      ("Failed to read available public IPs from node: %u\n",
+			       node->pnn));
 			if (culprit) {
-				*culprit = ctdb->nodes[j]->pnn;
+				*culprit = node->pnn;
 			}
 			return -1;
 		}
@@ -1527,12 +1538,46 @@ static int sync_recovery_lock_file_across_cluster(struct ctdb_recoverd *rec)
  */
 static void takeover_fail_callback(struct ctdb_context *ctdb, uint32_t node_pnn, int32_t res, TDB_DATA outdata, void *callback_data)
 {
-	struct ctdb_recoverd *rec = talloc_get_type(callback_data, struct ctdb_recoverd);
+	DEBUG(DEBUG_ERR, ("Node %u failed the takeover run\n", node_pnn));
 
-	DEBUG(DEBUG_ERR, (__location__ " Node %u failed the takeover run. Setting it as recovery fail culprit\n", node_pnn));
+	if (callback_data != NULL) {
+		struct ctdb_recoverd *rec = talloc_get_type(callback_data, struct ctdb_recoverd);
 
-	ctdb_set_culprit(rec, node_pnn);
-	rec->need_takeover_run = true;
+		DEBUG(DEBUG_ERR, ("Setting node %u as recovery fail culprit\n", node_pnn));
+
+		ctdb_set_culprit(rec, node_pnn);
+		rec->need_takeover_run = true;
+	}
+}
+
+
+static void ban_misbehaving_nodes(struct ctdb_recoverd *rec, bool *self_ban)
+{
+	struct ctdb_context *ctdb = rec->ctdb;
+	int i;
+	struct ctdb_banning_state *ban_state;
+
+	*self_ban = false;
+	for (i=0; i<ctdb->num_nodes; i++) {
+		if (ctdb->nodes[i]->ban_state == NULL) {
+			continue;
+		}
+		ban_state = (struct ctdb_banning_state *)ctdb->nodes[i]->ban_state;
+		if (ban_state->count < 2*ctdb->num_nodes) {
+			continue;
+		}
+
+		DEBUG(DEBUG_NOTICE,("Node %u reached %u banning credits - banning it for %u seconds\n",
+			ctdb->nodes[i]->pnn, ban_state->count,
+			ctdb->tunable.recovery_ban_period));
+		ctdb_ban_node(rec, ctdb->nodes[i]->pnn, ctdb->tunable.recovery_ban_period);
+		ban_state->count = 0;
+
+		/* Banning ourself? */
+		if (ctdb->nodes[i]->pnn == rec->ctdb->pnn) {
+			*self_ban = true;
+		}
+	}
 }
 
 
@@ -1551,30 +1596,19 @@ static int do_recovery(struct ctdb_recoverd *rec,
 	uint32_t *nodes;
 	struct timeval start_time;
 	uint32_t culprit = (uint32_t)-1;
+	bool self_ban;
 
 	DEBUG(DEBUG_NOTICE, (__location__ " Starting do_recovery\n"));
 
 	/* if recovery fails, force it again */
 	rec->need_recovery = true;
 
-	for (i=0; i<ctdb->num_nodes; i++) {
-		struct ctdb_banning_state *ban_state;
-
-		if (ctdb->nodes[i]->ban_state == NULL) {
-			continue;
-		}
-		ban_state = (struct ctdb_banning_state *)ctdb->nodes[i]->ban_state;
-		if (ban_state->count < 2*ctdb->num_nodes) {
-			continue;
-		}
-		DEBUG(DEBUG_NOTICE,("Node %u has caused %u recoveries recently - banning it for %u seconds\n",
-			ctdb->nodes[i]->pnn, ban_state->count,
-			ctdb->tunable.recovery_ban_period));
-		ctdb_ban_node(rec, ctdb->nodes[i]->pnn, ctdb->tunable.recovery_ban_period);
-		ban_state->count = 0;
+	ban_misbehaving_nodes(rec, &self_ban);
+	if (self_ban) {
+		DEBUG(DEBUG_NOTICE, ("This node was banned, aborting recovery\n"));
+		return -1;
 	}
 
-
         if (ctdb->tunable.verify_recovery_lock != 0) {
 		DEBUG(DEBUG_ERR,("Taking out recovery lock from recovery daemon\n"));
 		start_time = timeval_current();
@@ -1814,9 +1848,7 @@ static int do_recovery(struct ctdb_recoverd *rec,
 
 	DEBUG(DEBUG_NOTICE, (__location__ " Recovery - disabled recovery mode\n"));
 
-	/*
-	  tell nodes to takeover their public IPs
-	 */
+	/* Fetch known/available public IPs from each active node */
 	ret = ctdb_reload_remote_public_ips(ctdb, rec, nodemap, &culprit);
 	if (ret != 0) {
 		DEBUG(DEBUG_ERR,("Failed to read public ips from remote node %d\n",
@@ -1825,7 +1857,7 @@ static int do_recovery(struct ctdb_recoverd *rec,
 		return -1;
 	}
 	rec->need_takeover_run = false;
-	ret = ctdb_takeover_run(ctdb, nodemap, NULL, NULL);
+	ret = ctdb_takeover_run(ctdb, nodemap, takeover_fail_callback, NULL);
 	if (ret != 0) {
 		DEBUG(DEBUG_ERR, (__location__ " Unable to setup public takeover addresses. ctdb_takeover_run() failed.\n"));
 		rec->need_takeover_run = true;
@@ -1948,12 +1980,12 @@ static bool ctdb_election_win(struct ctdb_recoverd *rec, struct election_message
 	/* we cant win if we are banned */
 	if (rec->node_flags & NODE_FLAGS_BANNED) {
 		return false;
-	}	
+	}
 
 	/* we cant win if we are stopped */
 	if (rec->node_flags & NODE_FLAGS_STOPPED) {
 		return false;
-	}	
+	}
 
 	/* we will automatically win if the other node is banned */
 	if (em->node_flags & NODE_FLAGS_BANNED) {
@@ -2133,6 +2165,7 @@ static void getlog_handler(struct ctdb_context *ctdb, uint64_t srvid,
 	}
 
 	if (child == 0) {
+		ctdb_set_process_name("ctdb_rec_log_collector");
 		if (switch_from_server_to_client(ctdb, "recoverd-log-collector") != 0) {
 			DEBUG(DEBUG_CRIT, (__location__ "ERROR: failed to switch log collector child into client mode.\n"));
 			_exit(1);
@@ -2184,7 +2217,7 @@ static void ctdb_rebalance_timeout(struct event_context *ev, struct timed_event
 
 	DEBUG(DEBUG_NOTICE,("Rebalance all nodes that have had ip assignment changes.\n"));
 
-	ret = ctdb_takeover_run(ctdb, rec->nodemap, NULL, NULL);
+	ret = ctdb_takeover_run(ctdb, rec->nodemap, takeover_fail_callback, NULL);
 	if (ret != 0) {
 		DEBUG(DEBUG_ERR, (__location__ " Unable to setup public takeover addresses. ctdb_takeover_run() failed.\n"));
 		rec->need_takeover_run = true;
@@ -2410,7 +2443,7 @@ static void process_ipreallocate_requests(struct ctdb_context *ctdb, struct ctdb
 		rec->need_takeover_run = true;
 	}
 	if (ret == 0) {
-		ret = ctdb_takeover_run(ctdb, rec->nodemap, NULL, NULL);
+		ret = ctdb_takeover_run(ctdb, rec->nodemap, takeover_fail_callback, NULL);
 		if (ret != 0) {
 			DEBUG(DEBUG_ERR,("Failed to reallocate addresses: ctdb_takeover_run() failed.\n"));
 			rec->need_takeover_run = true;
@@ -2586,8 +2619,8 @@ static void monitor_handler(struct ctdb_context *ctdb, uint64_t srvid,
 		return;
 	}
 
-	if (nodemap->nodes[i].flags != c->new_flags) {
-		DEBUG(DEBUG_NOTICE,("Node %u has changed flags - now 0x%x  was 0x%x\n", c->pnn, c->new_flags, nodemap->nodes[i].flags));
+	if (c->old_flags != c->new_flags) {
+		DEBUG(DEBUG_NOTICE,("Node %u has changed flags - now 0x%x  was 0x%x\n", c->pnn, c->new_flags, c->old_flags));
 	}
 
 	disabled_flag_changed =  (nodemap->nodes[i].flags ^ c->new_flags) & NODE_FLAGS_DISABLED;
@@ -2699,7 +2732,7 @@ static void verify_recmode_normal_callback(struct ctdb_client_control_state *sta
 	   status field
 	*/
 	if (state->status != CTDB_RECOVERY_NORMAL) {
-		DEBUG(DEBUG_NOTICE, (__location__ " Node:%u was in recovery mode. Restart recovery process\n", state->c->hdr.destnode));
+		DEBUG(DEBUG_NOTICE, ("Node:%u was in recovery mode. Start recovery process\n", state->c->hdr.destnode));
 		rmdata->status = MONITOR_RECOVERY_NEEDED;
 	}
 
@@ -3192,6 +3225,7 @@ static int check_recovery_lock(struct ctdb_context *ctdb)
 		close(state->fd[0]);
 		state->fd[0] = -1;
 
+		ctdb_set_process_name("ctdb_rec_reclock");
 		debug_extra = talloc_asprintf(NULL, "recovery-lock:");
 		if (pread(ctdb->recovery_lock_fd, &cc, 1, 0) == -1) {
 			DEBUG(DEBUG_CRIT,("failed read from recovery_lock_fd - %s\n", strerror(errno)));
@@ -3202,7 +3236,6 @@ static int check_recovery_lock(struct ctdb_context *ctdb)
 		/* make sure we die when our parent dies */
 		while (ctdb_kill(ctdb, parent, 0) == 0 || errno != ESRCH) {
 			sleep(5);
-			write(state->fd[1], &cc, 1);
 		}
 		_exit(0);
 	}
@@ -3315,7 +3348,7 @@ static void main_loop(struct ctdb_context *ctdb, struct ctdb_recoverd *rec,
 	struct ctdb_vnn_map *remote_vnnmap=NULL;
 	int32_t debug_level;
 	int i, j, ret;
-
+	bool self_ban;
 
 
 	/* verify that the main daemon is still running */
@@ -3340,28 +3373,6 @@ static void main_loop(struct ctdb_context *ctdb, struct ctdb_recoverd *rec,
 	}
 	LogLevel = debug_level;
 
-
-	/* We must check if we need to ban a node here but we want to do this
-	   as early as possible so we dont wait until we have pulled the node
-	   map from the local node. thats why we have the hardcoded value 20
-	*/
-	for (i=0; i<ctdb->num_nodes; i++) {
-		struct ctdb_banning_state *ban_state;
-
-		if (ctdb->nodes[i]->ban_state == NULL) {
-			continue;
-		}
-		ban_state = (struct ctdb_banning_state *)ctdb->nodes[i]->ban_state;
-		if (ban_state->count < 20) {
-			continue;
-		}
-		DEBUG(DEBUG_NOTICE,("Node %u has caused %u recoveries recently - banning it for %u seconds\n",
-			ctdb->nodes[i]->pnn, ban_state->count,
-			ctdb->tunable.recovery_ban_period));
-		ctdb_ban_node(rec, ctdb->nodes[i]->pnn, ctdb->tunable.recovery_ban_period);
-		ban_state->count = 0;
-	}
-
 	/* get relevant tunables */
 	ret = ctdb_ctrl_get_all_tunables(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, &ctdb->tunable);
 	if (ret != 0) {
@@ -3385,11 +3396,7 @@ static void main_loop(struct ctdb_context *ctdb, struct ctdb_recoverd *rec,
 		}
 	}
 
-	pnn = ctdb_ctrl_getpnn(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE);
-	if (pnn == (uint32_t)-1) {
-		DEBUG(DEBUG_ERR,("Failed to get local pnn - retrying\n"));
-		return;
-	}
+	pnn = ctdb_get_pnn(ctdb);
 
 	/* get the vnnmap */
 	ret = ctdb_ctrl_getvnnmap(ctdb, CONTROL_TIMEOUT(), pnn, mem_ctx, &vnnmap);
@@ -3412,10 +3419,43 @@ static void main_loop(struct ctdb_context *ctdb, struct ctdb_recoverd *rec,
 	}
 	nodemap = rec->nodemap;
 
-	/* update the capabilities for all nodes */
-	ret = update_capabilities(ctdb, nodemap);
-	if (ret != 0) {
-		DEBUG(DEBUG_ERR, (__location__ " Unable to update node capabilities.\n"));
+	/* remember our own node flags */
+	rec->node_flags = nodemap->nodes[pnn].flags;
+
+	ban_misbehaving_nodes(rec, &self_ban);
+	if (self_ban) {
+		DEBUG(DEBUG_NOTICE, ("This node was banned, restart main_loop\n"));
+		return;
+	}
+
+	/* if the local daemon is STOPPED or BANNED, we verify that the databases are
+	   also frozen and that the recmode is set to active.
+	*/
+	if (rec->node_flags & (NODE_FLAGS_STOPPED | NODE_FLAGS_BANNED)) {
+		ret = ctdb_ctrl_getrecmode(ctdb, mem_ctx, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, &ctdb->recovery_mode);
+		if (ret != 0) {
+			DEBUG(DEBUG_ERR,(__location__ " Failed to read recmode from local node\n"));
+		}
+		if (ctdb->recovery_mode == CTDB_RECOVERY_NORMAL) {
+			DEBUG(DEBUG_ERR,("Node is stopped or banned but recovery mode is not active. Activate recovery mode and lock databases\n"));
+
+			ret = ctdb_ctrl_freeze_priority(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, 1);
+			if (ret != 0) {
+				DEBUG(DEBUG_ERR,(__location__ " Failed to freeze node in STOPPED or BANNED state\n"));
+				return;
+			}
+			ret = ctdb_ctrl_setrecmode(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, CTDB_RECOVERY_ACTIVE);
+			if (ret != 0) {
+				DEBUG(DEBUG_ERR,(__location__ " Failed to activate recovery mode in STOPPED or BANNED state\n"));
+
+				return;
+			}
+		}
+
+		/* If this node is stopped or banned then it is not the recovery
+		 * master, so don't do anything. This prevents stopped or banned
+		 * node from starting election and sending unnecessary controls.
+		 */
 		return;
 	}
 
@@ -3435,50 +3475,27 @@ static void main_loop(struct ctdb_context *ctdb, struct ctdb_recoverd *rec,
 		}
 	}
 
+	/* This is a special case.  When recovery daemon is started, recmaster
+	 * is set to -1.  If a node is not started in stopped state, then
+	 * start election to decide recovery master
+	 */
 	if (rec->recmaster == (uint32_t)-1) {
 		DEBUG(DEBUG_NOTICE,(__location__ " Initial recovery master set - forcing election\n"));
 		force_election(rec, pnn, nodemap);
 		return;
 	}
 
-	/* if the local daemon is STOPPED, we verify that the databases are
-	   also frozen and thet the recmode is set to active 
-	*/
-	if (nodemap->nodes[pnn].flags & NODE_FLAGS_STOPPED) {
-		ret = ctdb_ctrl_getrecmode(ctdb, mem_ctx, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, &ctdb->recovery_mode);
-		if (ret != 0) {
-			DEBUG(DEBUG_ERR,(__location__ " Failed to read recmode from local node\n"));
-		}
-		if (ctdb->recovery_mode == CTDB_RECOVERY_NORMAL) {
-			DEBUG(DEBUG_ERR,("Node is stopped but recovery mode is not active. Activate recovery mode and lock databases\n"));
-
-			ret = ctdb_ctrl_freeze_priority(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, 1);
-			if (ret != 0) {
-				DEBUG(DEBUG_ERR,(__location__ " Failed to freeze node in STOPPED state\n"));
-				return;
-			}
-			ret = ctdb_ctrl_setrecmode(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, CTDB_RECOVERY_ACTIVE);
-			if (ret != 0) {
-				DEBUG(DEBUG_ERR,(__location__ " Failed to activate recovery mode in STOPPED state\n"));
-
-				return;
-			}
-			return;
-		}
-	}
-	/* If the local node is stopped, verify we are not the recmaster 
-	   and yield this role if so
-	*/
-	if ((nodemap->nodes[pnn].flags & NODE_FLAGS_INACTIVE) && (rec->recmaster == pnn)) {
-		DEBUG(DEBUG_ERR,("Local node is INACTIVE. Yielding recmaster role\n"));
-		force_election(rec, pnn, nodemap);
+	/* update the capabilities for all nodes */
+	ret = update_capabilities(ctdb, nodemap);
+	if (ret != 0) {
+		DEBUG(DEBUG_ERR, (__location__ " Unable to update node capabilities.\n"));
 		return;
 	}
-	
+
 	/*
-	 * if the current recmaster do not have CTDB_CAP_RECMASTER,
-	 * but we have force an election and try to become the new
-	 * recmaster
+	 * If the current recmaster does not have CTDB_CAP_RECMASTER,
+	 * but we have, then force an election and try to become the new
+	 * recmaster.
 	 */
 	if ((rec->ctdb->nodes[rec->recmaster]->capabilities & CTDB_CAP_RECMASTER) == 0 &&
 	    (rec->ctdb->capabilities & CTDB_CAP_RECMASTER) &&
@@ -3490,13 +3507,6 @@ static void main_loop(struct ctdb_context *ctdb, struct ctdb_recoverd *rec,
 		return;
 	}
 
-	/* check that we (recovery daemon) and the local ctdb daemon
-	   agrees on whether we are banned or not
-	*/
-
-	/* remember our own node flags */
-	rec->node_flags = nodemap->nodes[pnn].flags;
-
 	/* count how many active nodes there are */
 	rec->num_active    = 0;
 	rec->num_connected = 0;
@@ -3543,19 +3553,16 @@ static void main_loop(struct ctdb_context *ctdb, struct ctdb_recoverd *rec,
 	if ((recmaster_nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) &&
 	    (rec->node_flags & NODE_FLAGS_INACTIVE) == 0) {
 		DEBUG(DEBUG_NOTICE, ("Recmaster node %u no longer available. Force reelection\n", nodemap->nodes[j].pnn));
+		/*
+		 * update our nodemap to carry the recmaster's notion of
+		 * its own flags, so that we don't keep freezing the
+		 * inactive recmaster node...
+		 */
+		nodemap->nodes[j].flags = recmaster_nodemap->nodes[j].flags;
 		force_election(rec, pnn, nodemap);
 		return;
 	}
 
-	/* If this node is stopped then it is not the recovery master
-	 * so the only remaining action is to potentially to verify
-	 * the local IP allocation below.  This won't accomplish
-	 * anything useful so skip it.
-	 */
-	if (rec->node_flags & NODE_FLAGS_STOPPED) {
-		return;
-	}
-
 	/* verify that we have all ip addresses we should have and we dont
 	 * have addresses we shouldnt have.
 	 */ 
@@ -4061,6 +4068,7 @@ int ctdb_start_recoverd(struct ctdb_context *ctdb)
 	/* Clear the log ringbuffer */
 	ctdb_clear_log(ctdb);
 
+	ctdb_set_process_name("ctdb_recovered");
 	if (switch_from_server_to_client(ctdb, "recoverd") != 0) {
 		DEBUG(DEBUG_CRIT, (__location__ "ERROR: failed to switch recovery daemon into client mode. shutting down.\n"));
 		exit(1);
diff --git a/server/ctdb_server.c b/server/ctdb_server.c
index de3c690..d0c3461 100644
--- a/server/ctdb_server.c
+++ b/server/ctdb_server.c
@@ -18,7 +18,7 @@
 */
 
 #include "includes.h"
-#include "lib/tdb/include/tdb.h"
+#include "tdb.h"
 #include "lib/util/dlinklist.h"
 #include "system/network.h"
 #include "system/filesys.h"
diff --git a/server/ctdb_takeover.c b/server/ctdb_takeover.c
index fda7c56..be49b3f 100644
--- a/server/ctdb_takeover.c
+++ b/server/ctdb_takeover.c
@@ -19,7 +19,7 @@
    along with this program; if not, see <http://www.gnu.org/licenses/>.
 */
 #include "includes.h"
-#include "lib/tdb/include/tdb.h"
+#include "tdb.h"
 #include "lib/util/dlinklist.h"
 #include "system/network.h"
 #include "system/filesys.h"
@@ -881,6 +881,14 @@ static void release_ip_callback(struct ctdb_context *ctdb, int status,
 		ctdb_ban_self(ctdb);
 	}
 
+	if (ctdb->do_checkpublicip && ctdb_sys_have_ip(state->addr)) {
+		DEBUG(DEBUG_ERR, ("IP %s still hosted during release IP callback, failing\n",
+				  ctdb_addr_to_str(state->addr)));
+		ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
+		talloc_free(state);
+		return;
+	}
+
 	/* send a message to all clients of this node telling them
 	   that the cluster has been reconfigured and they should
 	   release any sockets on this IP */
@@ -977,6 +985,21 @@ int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
 			DEBUG(DEBUG_ERR, ("Could not find which interface the ip address is hosted on. can not release it\n"));
 			return 0;
 		}
+		if (vnn->iface == NULL) {
+			DEBUG(DEBUG_WARNING,
+			      ("Public IP %s is hosted on interface %s but we have no VNN\n",
+			       ctdb_addr_to_str(&pip->addr),
+			       iface));
+		} else if (strcmp(iface, ctdb_vnn_iface_string(vnn)) != 0) {
+			DEBUG(DEBUG_WARNING,
+			      ("Public IP %s is hosted on inteterface %s but VNN says %s\n",
+			       ctdb_addr_to_str(&pip->addr),
+			       iface,
+			       ctdb_vnn_iface_string(vnn)));
+			/* Should we fix vnn->iface?  If we do, what
+			 * happens to reference counts?
+			 */
+		}
 	} else {
 		iface = strdup(ctdb_vnn_iface_string(vnn));
 	}
@@ -1899,6 +1922,7 @@ static bool lcp2_failback_candidate(struct ctdb_context *ctdb,
 	struct ctdb_public_ip_list *tmp_ip;
 
 	/* Find an IP and destination node that best reduces imbalance. */
+	srcimbl = 0;
 	minip = NULL;
 	minsrcimbl = 0;
 	mindstnode = -1;
@@ -3187,6 +3211,7 @@ void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
 void ctdb_release_all_ips(struct ctdb_context *ctdb)
 {
 	struct ctdb_vnn *vnn;
+	int count = 0;
 
 	for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
 		if (!ctdb_sys_have_ip(&vnn->public_address)) {
@@ -3196,13 +3221,22 @@ void ctdb_release_all_ips(struct ctdb_context *ctdb)
 		if (!vnn->iface) {
 			continue;
 		}
+
+		DEBUG(DEBUG_INFO,("Release of IP %s/%u on interface %s node:-1\n",
+				    ctdb_addr_to_str(&vnn->public_address),
+				    vnn->public_netmask_bits,
+				    ctdb_vnn_iface_string(vnn)));
+
 		ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
 				  ctdb_vnn_iface_string(vnn),
 				  ctdb_addr_to_str(&vnn->public_address),
 				  vnn->public_netmask_bits);
 		release_kill_clients(ctdb, &vnn->public_address);
 		ctdb_vnn_unassign_iface(ctdb, vnn);
+		count++;
 	}
+
+	DEBUG(DEBUG_NOTICE,(__location__ " Released %d public IPs\n", count));
 }
 
 
@@ -4233,7 +4267,9 @@ int32_t ctdb_control_ipreallocated(struct ctdb_context *ctdb,
    node has the expected ip allocation.
    This is verified against ctdb->ip_tree
 */
-int verify_remote_ip_allocation(struct ctdb_context *ctdb, struct ctdb_all_public_ips *ips)
+int verify_remote_ip_allocation(struct ctdb_context *ctdb,
+				struct ctdb_all_public_ips *ips,
+				uint32_t pnn)
 {
 	struct ctdb_public_ip_list *tmp_ip; 
 	int i;
@@ -4251,7 +4287,7 @@ int verify_remote_ip_allocation(struct ctdb_context *ctdb, struct ctdb_all_publi
 	for (i=0; i<ips->num; i++) {
 		tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ips->ips[i].addr));
 		if (tmp_ip == NULL) {
-			DEBUG(DEBUG_ERR,(__location__ " Could not find host for address %s, reassign ips\n", ctdb_addr_to_str(&ips->ips[i].addr)));
+			DEBUG(DEBUG_ERR,("Node %u has new or unknown public IP %s\n", pnn, ctdb_addr_to_str(&ips->ips[i].addr)));
 			return -1;
 		}
 
@@ -4260,7 +4296,11 @@ int verify_remote_ip_allocation(struct ctdb_context *ctdb, struct ctdb_all_publi
 		}
 
 		if (tmp_ip->pnn != ips->ips[i].pnn) {
-			DEBUG(DEBUG_ERR,("Inconsistent ip allocation. Trigger reallocation. Thinks %s is held by node %u while it is held by node %u\n", ctdb_addr_to_str(&ips->ips[i].addr), ips->ips[i].pnn, tmp_ip->pnn));
+			DEBUG(DEBUG_ERR,
+			      ("Inconsistent IP allocation - node %u thinks %s is held by node %u while it is assigned to node %u\n",
+			       pnn,
+			       ctdb_addr_to_str(&ips->ips[i].addr),
+			       ips->ips[i].pnn, tmp_ip->pnn));
 			return -1;
 		}
 	}
@@ -4346,6 +4386,8 @@ static int ctdb_reloadips_child(struct ctdb_context *ctdb)
 	struct ctdb_vnn *vnn;
 	int i, ret;
 
+	CTDB_NO_MEMORY(ctdb, mem_ctx);
+
 	/* read the ip allocation from the local node */
 	ret = ctdb_ctrl_get_public_ips(ctdb, TAKEOVER_TIMEOUT(), CTDB_CURRENT_NODE, mem_ctx, &ips);
 	if (ret != 0) {
@@ -4360,7 +4402,7 @@ static int ctdb_reloadips_child(struct ctdb_context *ctdb)
 		DEBUG(DEBUG_ERR,("Failed to re-read public addresses file\n"));
 		talloc_free(mem_ctx);
 		return -1;
-	}		
+	}
 
 
 	/* check the previous list of ips and scan for ips that have been
@@ -4384,6 +4426,7 @@ static int ctdb_reloadips_child(struct ctdb_context *ctdb)
 
 			ret = ctdb_ctrl_del_public_ip(ctdb, TAKEOVER_TIMEOUT(), CTDB_CURRENT_NODE, &pub);
 			if (ret != 0) {
+				talloc_free(mem_ctx);
 				DEBUG(DEBUG_ERR, ("RELOADIPS: Unable to del public ip:%s from local node\n", ctdb_addr_to_str(&ips->ips[i].addr)));
 				return -1;
 			}
@@ -4399,15 +4442,15 @@ static int ctdb_reloadips_child(struct ctdb_context *ctdb)
 			}
 		}
 		if (i == ips->num) {
-			struct ctdb_control_ip_iface pub;
+			struct ctdb_control_ip_iface *pub;
 			const char *ifaces = NULL;
 			int iface = 0;
 
 			DEBUG(DEBUG_NOTICE,("RELOADIPS: New ip:%s found, adding it.\n", ctdb_addr_to_str(&vnn->public_address)));
 
-			pub.addr  = vnn->public_address;
-			pub.mask  = vnn->public_netmask_bits;
-
+			pub = talloc_zero(mem_ctx, struct ctdb_control_ip_iface);
+			pub->addr  = vnn->public_address;
+			pub->mask  = vnn->public_netmask_bits;
 
 			ifaces = vnn->ifaces[0];
 			iface = 1;
@@ -4415,17 +4458,27 @@ static int ctdb_reloadips_child(struct ctdb_context *ctdb)
 				ifaces = talloc_asprintf(vnn, "%s,%s", ifaces, vnn->ifaces[iface]);
 				iface++;
 			}
-			pub.len   = strlen(ifaces)+1;
-			memcpy(&pub.iface[0], ifaces, strlen(ifaces)+1);
+			pub->len   = strlen(ifaces)+1;
+			pub = talloc_realloc_size(mem_ctx, pub,
+				offsetof(struct ctdb_control_ip_iface, iface) + pub->len);
+			if (pub == NULL) {
+				DEBUG(DEBUG_ERR, (__location__ " Failed to allocate memory\n"));
+				talloc_free(mem_ctx);
+				return -1;
+			}
+			memcpy(&pub->iface[0], ifaces, pub->len);
 
-			ret = ctdb_ctrl_add_public_ip(ctdb, TAKEOVER_TIMEOUT(), CTDB_CURRENT_NODE, &pub);
+			ret = ctdb_ctrl_add_public_ip(ctdb, TAKEOVER_TIMEOUT(),
+						      CTDB_CURRENT_NODE, pub);
 			if (ret != 0) {
 				DEBUG(DEBUG_ERR, ("RELOADIPS: Unable to add public ip:%s to local node\n", ctdb_addr_to_str(&vnn->public_address)));
+				talloc_free(mem_ctx);
 				return -1;
 			}
 		}
 	}
 
+	talloc_free(mem_ctx);
 	return 0;
 }
 
@@ -4471,6 +4524,7 @@ int32_t ctdb_control_reload_public_ips(struct ctdb_context *ctdb, struct ctdb_re
 		close(h->fd[0]);
 		debug_extra = talloc_asprintf(NULL, "reloadips:");
 
+		ctdb_set_process_name("ctdb_reloadips");
 		if (switch_from_server_to_client(ctdb, "reloadips-child") != 0) {
 			DEBUG(DEBUG_CRIT,("ERROR: Failed to switch reloadips child into client mode\n"));
 			res = -1;
diff --git a/server/ctdb_traverse.c b/server/ctdb_traverse.c
index d9aed2b..ed6e8e4 100644
--- a/server/ctdb_traverse.c
+++ b/server/ctdb_traverse.c
@@ -21,7 +21,7 @@
 #include "system/filesys.h"
 #include "system/wait.h"
 #include "db_wrap.h"
-#include "lib/tdb/include/tdb.h"
+#include "tdb.h"
 #include "../include/ctdb_private.h"
 #include "lib/util/dlinklist.h"
 
@@ -38,38 +38,31 @@ struct ctdb_traverse_local_handle {
 	pid_t child;
 	uint64_t srvid;
 	uint32_t client_reqid;
+	uint32_t reqid;
+	int srcnode;
 	void *private_data;
 	ctdb_traverse_fn_t callback;
-	struct timeval start_time;
-	struct ctdb_queue *queue;
 	bool withemptyrecords;
+	struct tevent_fd *fde;
 };
 
 /*
-  called when data is available from the child
+ * called when traverse is completed by child or on error
  */
-static void ctdb_traverse_local_handler(uint8_t *rawdata, size_t length, void *private_data)
+static void ctdb_traverse_child_handler(struct tevent_context *ev, struct tevent_fd *fde,
+					uint16_t flags, void *private_data)
 {
-	struct ctdb_traverse_local_handle *h = talloc_get_type(private_data, 
-							       struct ctdb_traverse_local_handle);
-	TDB_DATA key, data;
+	struct ctdb_traverse_local_handle *h = talloc_get_type(private_data,
+							struct ctdb_traverse_local_handle);
 	ctdb_traverse_fn_t callback = h->callback;
 	void *p = h->private_data;
-	struct ctdb_rec_data *tdata = (struct ctdb_rec_data *)rawdata;
-
-	if (rawdata == NULL || length < 4 || length != tdata->length) {
-		/* end of traverse */
-		talloc_free(h);
-		callback(p, tdb_null, tdb_null);
-		return;
-	}
-
-	key.dsize = tdata->keylen;
-	key.dptr  = &tdata->data[0];
-	data.dsize = tdata->datalen;
-	data.dptr = &tdata->data[tdata->keylen];
+	char res;
 
-	callback(p, key, data);	
+	/* FIXME: There is no way to distinguish between failed traverse and
+	 * successful traverse.  The only way to signal the end is by sending
+	 * tdb_null for key and data. */
+	read(h->fd[0], &res, 1);
+	callback(p, tdb_null, tdb_null);
 }
 
 /*
@@ -87,10 +80,12 @@ static int traverse_local_destructor(struct ctdb_traverse_local_handle *h)
  */
 static int ctdb_traverse_local_fn(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data, void *p)
 {
-	struct ctdb_traverse_local_handle *h = talloc_get_type(p, 
+	struct ctdb_traverse_local_handle *h = talloc_get_type(p,
 							       struct ctdb_traverse_local_handle);
 	struct ctdb_rec_data *d;
 	struct ctdb_ltdb_header *hdr;
+	int res, status;
+	TDB_DATA outdata;
 
 	hdr = (struct ctdb_ltdb_header *)data.dptr;
 
@@ -108,15 +103,21 @@ static int ctdb_traverse_local_fn(struct tdb_context *tdb, TDB_DATA key, TDB_DAT
 		}
 	}
 
-	d = ctdb_marshall_record(h, 0, key, NULL, data);
+	d = ctdb_marshall_record(h, h->reqid, key, NULL, data);
 	if (d == NULL) {
 		/* error handling is tricky in this child code .... */
 		return -1;
 	}
 
-	if (write(h->fd[1], (uint8_t *)d, d->length) != d->length) {
+	outdata.dptr = (uint8_t *)d;
+	outdata.dsize = d->length;
+
+	res = ctdb_control(h->ctdb_db->ctdb, h->srcnode, 0, CTDB_CONTROL_TRAVERSE_DATA,
+			   CTDB_CTRL_FLAG_NOREPLY, outdata, NULL, NULL, &status, NULL, NULL);
+	if (res != 0 || status != 0) {
 		return -1;
 	}
+
 	return 0;
 }
 
@@ -169,15 +170,34 @@ static struct ctdb_traverse_local_handle *ctdb_traverse_local(struct ctdb_db_con
 	h->private_data = all_state;
 	h->ctdb_db = ctdb_db;
 	h->client_reqid = all_state->client_reqid;
+	h->reqid = all_state->reqid;
 	h->srvid = all_state->srvid;
+	h->srcnode = all_state->srcnode;
 	h->withemptyrecords = all_state->withemptyrecords;
 
 	if (h->child == 0) {
 		/* start the traverse in the child */
+		char res = 0;
+		pid_t parent = getpid();
+
 		close(h->fd[0]);
-		debug_extra = talloc_asprintf(NULL, "traverse_local-%s:",
-					      ctdb_db->db_name);
-		tdb_traverse_read(ctdb_db->ltdb->tdb, ctdb_traverse_local_fn, h);
+
+		ctdb_set_process_name("ctdb_traverse");
+		if (switch_from_server_to_client(ctdb_db->ctdb,
+						 "traverse_local-%s:",
+						 ctdb_db->db_name) != 0) {
+			DEBUG(DEBUG_CRIT, ("Failed to switch traverse child into client mode\n"));
+			res = -1;
+		}
+
+		if (tdb_traverse_read(ctdb_db->ltdb->tdb, ctdb_traverse_local_fn, h) != 0) {
+			res = -1;
+		}
+		write(h->fd[1], &res, 1);
+
+		while (ctdb_kill(ctdb_db->ctdb, parent, 0) == 0 || errno != ESRCH) {
+			sleep(5);
+		}
 		_exit(0);
 	}
 
@@ -188,20 +208,14 @@ static struct ctdb_traverse_local_handle *ctdb_traverse_local(struct ctdb_db_con
 
 	DLIST_ADD(ctdb_db->traverse, h);
 
-	/*
-	  setup a packet queue between the child and the parent. This
-	  copes with all the async and packet boundary issues
-	 */
-	DEBUG(DEBUG_DEBUG, (__location__ " Created PIPE FD:%d to child traverse\n", h->fd[0]));
-
-	h->queue = ctdb_queue_setup(ctdb_db->ctdb, h, h->fd[0], 0, ctdb_traverse_local_handler, h,
-				    "to-ctdbd");
-	if (h->queue == NULL) {
+	h->fde = tevent_add_fd(ctdb_db->ctdb->ev, h, h->fd[0], EVENT_FD_READ,
+			       ctdb_traverse_child_handler, h);
+	if (h->fde == NULL) {
+		close(h->fd[0]);
 		talloc_free(h);
 		return NULL;
 	}
-
-	h->start_time = timeval_current();
+	tevent_fd_set_auto_close(h->fde);
 
 	return h;
 }
diff --git a/server/ctdb_tunables.c b/server/ctdb_tunables.c
index 365c6ba..7d06d83 100644
--- a/server/ctdb_tunables.c
+++ b/server/ctdb_tunables.c
@@ -85,7 +85,6 @@ static const struct {
 	{ "DBSizeWarn",        100000000,  offsetof(struct ctdb_tunable, db_size_warn), false },
 	{ "PullDBPreallocation", 10*1024*1024,  offsetof(struct ctdb_tunable, pulldb_preallocation_size), false },
 	{ "NoIPHostOnAllDisabled",    0,  offsetof(struct ctdb_tunable, no_ip_host_on_all_disabled), false },
-	{ "DeadlockTimeout",	300, offsetof(struct ctdb_tunable, deadlock_timeout), false },
 	{ "Samba3AvoidDeadlocks", 0, offsetof(struct ctdb_tunable, samba3_hack), false },
 };
 
diff --git a/server/ctdb_update_record.c b/server/ctdb_update_record.c
index 1543b46..7bfa08a 100644
--- a/server/ctdb_update_record.c
+++ b/server/ctdb_update_record.c
@@ -20,7 +20,7 @@
 
 #include "includes.h"
 #include "db_wrap.h"
-#include "lib/tdb/include/tdb.h"
+#include "tdb.h"
 #include "ctdb_private.h"
 
 struct ctdb_persistent_write_state {
@@ -250,6 +250,7 @@ static struct childwrite_handle *ctdb_childwrite(
 		char c = 0;
 
 		close(result->fd[0]);
+		ctdb_set_process_name("ctdb_write_persistent");
 		debug_extra = talloc_asprintf(NULL, "childwrite-%s:", ctdb_db->db_name);
 		ret = ctdb_persistent_store(state);
 		if (ret != 0) {
diff --git a/server/ctdb_vacuum.c b/server/ctdb_vacuum.c
index d7527d4..d07afd4 100644
--- a/server/ctdb_vacuum.c
+++ b/server/ctdb_vacuum.c
@@ -20,7 +20,7 @@
 */
 
 #include "includes.h"
-#include "lib/tdb/include/tdb.h"
+#include "tdb.h"
 #include "system/network.h"
 #include "system/filesys.h"
 #include "system/dir.h"
@@ -1627,7 +1627,7 @@ ctdb_vacuum_event(struct event_context *ev, struct timed_event *te,
 		close(child_ctx->fd[0]);
 
 		DEBUG(DEBUG_INFO,("Vacuuming child process %d for db %s started\n", getpid(), ctdb_db->db_name));
-	
+		ctdb_set_process_name("ctdb_vacuum");
 		if (switch_from_server_to_client(ctdb, "vacuum-%s", ctdb_db->db_name) != 0) {
 			DEBUG(DEBUG_CRIT, (__location__ "ERROR: failed to switch vacuum daemon into client mode. Shutting down.\n"));
 			_exit(1);
diff --git a/server/eventscript.c b/server/eventscript.c
index 5c448c7..10d426f 100644
--- a/server/eventscript.c
+++ b/server/eventscript.c
@@ -34,8 +34,15 @@ static void ctdb_event_script_timeout(struct event_context *ev, struct timed_eve
  */
 static void sigterm(int sig)
 {
+	pid_t pid;
+
 	/* all the child processes will be running in the same process group */
-	kill(-getpgrp(), SIGKILL);
+	pid = getpgrp();
+	if (pid == -1) {
+		kill(-getpid(), SIGKILL);
+	} else {
+		kill(-pid, SIGKILL);
+	}
 	_exit(1);
 }
 
@@ -211,6 +218,7 @@ static struct ctdb_scripts_wire *ctdb_get_script_list(struct ctdb_context *ctdb,
 		tree_item = talloc(tree, struct ctdb_script_tree_item);
 		if (tree_item == NULL) {
 			DEBUG(DEBUG_ERR, (__location__ " Failed to allocate new tree item\n"));
+			closedir(dir);
 			talloc_free(tmp_ctx);
 			return NULL;
 		}
@@ -223,6 +231,7 @@ static struct ctdb_scripts_wire *ctdb_get_script_list(struct ctdb_context *ctdb,
 		tree_item->name = talloc_strdup(tree_item, de->d_name);
 		if (tree_item->name == NULL) {
 			DEBUG(DEBUG_ERR,(__location__ " Failed to allocate script name.\n"));
+			closedir(dir);
 			talloc_free(tmp_ctx);
 			return NULL;
 		}
@@ -400,6 +409,7 @@ static int fork_child_for_script(struct ctdb_context *ctdb,
 					      ctdb_eventscript_call_names[state->call]);
 		close(state->fd[0]);
 		set_close_on_exec(state->fd[1]);
+		ctdb_set_process_name("ctdb_eventscript");
 
 		rt = child_run_script(ctdb, state->from_user, state->call, state->options, current);
 		/* We must be able to write PIPEBUF bytes at least; if this
@@ -533,6 +543,7 @@ static void ctdb_run_debug_hung_script(struct ctdb_context *ctdb, struct ctdb_ev
 	if (pid == 0) {
 		char *buf;
 
+		ctdb_set_process_name("ctdb_debug_hung_script");
 		if (getenv("CTDB_DEBUG_HUNG_SCRIPT") != NULL) {
 			debug_hung_script = getenv("CTDB_DEBUG_HUNG_SCRIPT");
 		}
@@ -871,10 +882,10 @@ int ctdb_event_script_args(struct ctdb_context *ctdb, enum ctdb_eventscript_call
 	va_start(ap, fmt);
 	ret = ctdb_event_script_callback_v(ctdb, ctdb,
 			event_script_callback, &status, false, call, fmt, ap);
+	va_end(ap);
 	if (ret != 0) {
 		return ret;
 	}
-	va_end(ap);
 
 	status.status = -1;
 	status.done = false;
@@ -886,7 +897,11 @@ int ctdb_event_script_args(struct ctdb_context *ctdb, enum ctdb_eventscript_call
 				  " Immediately banning ourself for %d seconds\n",
 				  ctdb_eventscript_call_names[call],
 				  ctdb->tunable.recovery_ban_period));
-		ctdb_ban_self(ctdb);
+
+		/* Don't ban self if CTDB is starting up or shutting down */
+		if (call != CTDB_EVENT_INIT && call != CTDB_EVENT_SHUTDOWN) {
+			ctdb_ban_self(ctdb);
+		}
 	}
 
 	return status.status;
diff --git a/tcp/tcp_connect.c b/tcp/tcp_connect.c
index 93111f3..383d726 100644
--- a/tcp/tcp_connect.c
+++ b/tcp/tcp_connect.c
@@ -19,7 +19,7 @@
 */
 
 #include "includes.h"
-#include "lib/tdb/include/tdb.h"
+#include "tdb.h"
 #include "system/network.h"
 #include "system/filesys.h"
 #include "../include/ctdb_private.h"
@@ -154,6 +154,10 @@ void ctdb_tcp_node_connect(struct event_context *ev, struct timed_event *te,
 	}
 
 	tnode->fd = socket(sock_out.sa.sa_family, SOCK_STREAM, IPPROTO_TCP);
+	if (tnode->fd == -1) {
+		DEBUG(DEBUG_ERR, (__location__ "Failed to create socket\n"));
+		return;
+	}
 	set_nonblocking(tnode->fd);
 	set_close_on_exec(tnode->fd);
 
@@ -196,7 +200,12 @@ void ctdb_tcp_node_connect(struct event_context *ev, struct timed_event *te,
 	sock_in.ip.sin_len = sockin_size;
 	sock_out.ip.sin_len = sockout_size;
 #endif
-	bind(tnode->fd, (struct sockaddr *)&sock_in, sockin_size);
+	if (bind(tnode->fd, (struct sockaddr *)&sock_in, sockin_size) == -1) {
+		DEBUG(DEBUG_ERR, (__location__ "Failed to bind socket %s(%d)\n",
+				  strerror(errno), errno));
+		close(tnode->fd);
+		return;
+	}
 
 	if (connect(tnode->fd, (struct sockaddr *)&sock_out, sockout_size) != 0 &&
 	    errno != EINPROGRESS) {
diff --git a/tcp/tcp_init.c b/tcp/tcp_init.c
index 3fec599..a65e732 100644
--- a/tcp/tcp_init.c
+++ b/tcp/tcp_init.c
@@ -18,7 +18,7 @@
 */
 
 #include "includes.h"
-#include "lib/tdb/include/tdb.h"
+#include "tdb.h"
 #include "system/network.h"
 #include "system/filesys.h"
 #include "../include/ctdb_private.h"
diff --git a/tcp/tcp_io.c b/tcp/tcp_io.c
index f8904af..5111195 100644
--- a/tcp/tcp_io.c
+++ b/tcp/tcp_io.c
@@ -19,7 +19,7 @@
 
 #include "includes.h"
 #include "lib/util/dlinklist.h"
-#include "lib/tdb/include/tdb.h"
+#include "tdb.h"
 #include "system/network.h"
 #include "system/filesys.h"
 #include "../include/ctdb_private.h"
diff --git a/tests/eventscripts/00.ctdb.init.001.sh b/tests/eventscripts/00.ctdb.init.001.sh
new file mode 100755
index 0000000..320025a
--- /dev/null
+++ b/tests/eventscripts/00.ctdb.init.001.sh
@@ -0,0 +1,13 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "TDB check, tdbtool supports check"
+
+setup_ctdb
+
+FAKE_TDBTOOL_SUPPORTS_CHECK="yes"
+
+ok_null
+
+simple_test
diff --git a/tests/eventscripts/00.ctdb.init.002.sh b/tests/eventscripts/00.ctdb.init.002.sh
new file mode 100755
index 0000000..2777cc5
--- /dev/null
+++ b/tests/eventscripts/00.ctdb.init.002.sh
@@ -0,0 +1,17 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "TDB check, tdbtool does no support check"
+
+setup_ctdb
+
+FAKE_TDBTOOL_SUPPORTS_CHECK="no"
+
+ok <<EOF
+WARNING: The installed 'tdbtool' does not offer the 'check' subcommand.
+ Using 'tdbdump' for database checks.
+ Consider updating 'tdbtool' for better checks!
+EOF
+
+simple_test
diff --git a/tests/eventscripts/00.ctdb.init.003.sh b/tests/eventscripts/00.ctdb.init.003.sh
new file mode 100755
index 0000000..2770210
--- /dev/null
+++ b/tests/eventscripts/00.ctdb.init.003.sh
@@ -0,0 +1,16 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "TDB check, tdbtool supports check, good TDB"
+
+setup_ctdb
+
+FAKE_TDBTOOL_SUPPORTS_CHECK="yes"
+
+touch "${CTDB_DBDIR}/foo.tdb.0"
+FAKE_TDB_IS_OK="yes"
+
+ok_null
+
+simple_test
diff --git a/tests/eventscripts/00.ctdb.init.004.sh b/tests/eventscripts/00.ctdb.init.004.sh
new file mode 100755
index 0000000..b504d08
--- /dev/null
+++ b/tests/eventscripts/00.ctdb.init.004.sh
@@ -0,0 +1,22 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "TDB check, tdbtool supports check, bad TDB"
+
+setup_ctdb
+
+FAKE_TDBTOOL_SUPPORTS_CHECK="yes"
+
+db="${CTDB_DBDIR}/foo.tdb.0"
+touch "$db"
+FAKE_TDB_IS_OK="no"
+
+FAKE_DATE_OUTPUT="19690818.103000.000000001"
+
+ok <<EOF
+WARNING: database ${db} is corrupted.
+ Moving to backup ${db}.${FAKE_DATE_OUTPUT}.corrupt for later analysis.
+EOF
+
+simple_test
diff --git a/tests/eventscripts/00.ctdb.init.005.sh b/tests/eventscripts/00.ctdb.init.005.sh
new file mode 100755
index 0000000..d11ab94
--- /dev/null
+++ b/tests/eventscripts/00.ctdb.init.005.sh
@@ -0,0 +1,20 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "TDB check, tdbtool does not support check, good TDB"
+
+setup_ctdb
+
+FAKE_TDBTOOL_SUPPORTS_CHECK="no"
+
+touch "${CTDB_DBDIR}/foo.tdb.0"
+FAKE_TDB_IS_OK="yes"
+
+ok <<EOF
+WARNING: The installed 'tdbtool' does not offer the 'check' subcommand.
+ Using 'tdbdump' for database checks.
+ Consider updating 'tdbtool' for better checks!
+EOF
+
+simple_test
diff --git a/tests/eventscripts/00.ctdb.init.006.sh b/tests/eventscripts/00.ctdb.init.006.sh
new file mode 100755
index 0000000..745bca0
--- /dev/null
+++ b/tests/eventscripts/00.ctdb.init.006.sh
@@ -0,0 +1,25 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "TDB check, tdbtool does not support check, bad TDB"
+
+setup_ctdb
+
+FAKE_TDBTOOL_SUPPORTS_CHECK="no"
+
+db="${CTDB_DBDIR}/foo.tdb.0"
+touch "$db"
+FAKE_TDB_IS_OK="no"
+
+FAKE_DATE_OUTPUT="19690818.103000.000000001"
+
+ok <<EOF
+WARNING: The installed 'tdbtool' does not offer the 'check' subcommand.
+ Using 'tdbdump' for database checks.
+ Consider updating 'tdbtool' for better checks!
+WARNING: database ${db} is corrupted.
+ Moving to backup ${db}.${FAKE_DATE_OUTPUT}.corrupt for later analysis.
+EOF
+
+simple_test
diff --git a/tests/eventscripts/00.ctdb.init.007.sh b/tests/eventscripts/00.ctdb.init.007.sh
new file mode 100755
index 0000000..1c954d7
--- /dev/null
+++ b/tests/eventscripts/00.ctdb.init.007.sh
@@ -0,0 +1,16 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "TDB check, tdbtool supports check, good persistent TDB"
+
+setup_ctdb
+
+FAKE_TDBTOOL_SUPPORTS_CHECK="yes"
+
+touch "${CTDB_DBDIR}/persistent/foo.tdb.0"
+FAKE_TDB_IS_OK="yes"
+
+ok_null
+
+simple_test
diff --git a/tests/eventscripts/00.ctdb.init.008.sh b/tests/eventscripts/00.ctdb.init.008.sh
new file mode 100755
index 0000000..a6afdd8
--- /dev/null
+++ b/tests/eventscripts/00.ctdb.init.008.sh
@@ -0,0 +1,19 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "TDB check, tdbtool supports check, bad persistent TDB"
+
+setup_ctdb
+
+FAKE_TDBTOOL_SUPPORTS_CHECK="yes"
+
+db="${CTDB_DBDIR}/persistent/foo.tdb.0"
+touch "$db"
+FAKE_TDB_IS_OK="no"
+
+required_result 1 <<EOF
+Persistent database ${db} is corrupted! CTDB will not start.
+EOF
+
+simple_test
diff --git a/tests/eventscripts/00.ctdb.init.021.sh b/tests/eventscripts/00.ctdb.init.021.sh
new file mode 100755
index 0000000..87dfa4d
--- /dev/null
+++ b/tests/eventscripts/00.ctdb.init.021.sh
@@ -0,0 +1,11 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "Check public IP dropping, none assigned"
+
+setup_ctdb
+
+ok_null
+
+simple_test
diff --git a/tests/eventscripts/00.ctdb.init.022.sh b/tests/eventscripts/00.ctdb.init.022.sh
new file mode 100755
index 0000000..6e59428
--- /dev/null
+++ b/tests/eventscripts/00.ctdb.init.022.sh
@@ -0,0 +1,18 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "Check public IP dropping, 1 assigned"
+
+setup_ctdb
+
+ctdb_get_1_public_address |
+while read dev ip bits ; do
+    ip addr add "${ip}/${bits}" dev "$dev"
+
+    ok <<EOF
+Removing public address ${ip}/${bits} from device ${dev}
+EOF
+
+    simple_test
+done
diff --git a/tests/eventscripts/00.ctdb.init.023.sh b/tests/eventscripts/00.ctdb.init.023.sh
new file mode 100755
index 0000000..9b97e82
--- /dev/null
+++ b/tests/eventscripts/00.ctdb.init.023.sh
@@ -0,0 +1,23 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "Check public IP dropping, all assigned"
+
+setup_ctdb
+
+nl="
+"
+ctdb_get_my_public_addresses | {
+    out=""
+    while read dev ip bits ; do
+	ip addr add "${ip}/${bits}" dev "$dev"
+
+	msg="Removing public address ${ip}/${bits} from device ${dev}"
+	out="${out}${out:+${nl}}${msg}"
+    done
+
+    ok "$out"
+
+    simple_test
+}
diff --git a/tests/eventscripts/60.ganesha.monitor.101.sh b/tests/eventscripts/60.ganesha.monitor.101.sh
new file mode 100755
index 0000000..d68ad6a
--- /dev/null
+++ b/tests/eventscripts/60.ganesha.monitor.101.sh
@@ -0,0 +1,11 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "all services available"
+
+setup_nfs_ganesha
+
+ok_null
+
+simple_test
diff --git a/tests/eventscripts/60.ganesha.monitor.131.sh b/tests/eventscripts/60.ganesha.monitor.131.sh
new file mode 100755
index 0000000..95ce450
--- /dev/null
+++ b/tests/eventscripts/60.ganesha.monitor.131.sh
@@ -0,0 +1,17 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "rquotad down"
+
+setup_nfs_ganesha
+rpc_services_down "rquotad"
+
+ok<<EOF
+ERROR: rquotad failed RPC check:
+rpcinfo: RPC: Program not registered
+program rquotad version 1 is not available
+Trying to restart rquotad [rpc.rquotad]
+EOF
+
+simple_test
diff --git a/tests/eventscripts/60.ganesha.monitor.141.sh b/tests/eventscripts/60.ganesha.monitor.141.sh
new file mode 100755
index 0000000..656a530
--- /dev/null
+++ b/tests/eventscripts/60.ganesha.monitor.141.sh
@@ -0,0 +1,39 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "statd down, 6 iterations"
+
+# statd fails and attempts to restart it fail.
+
+setup_nfs_ganesha
+rpc_services_down "status"
+
+ok_null
+simple_test || exit $?
+
+ok<<EOF
+Trying to restart statd [rpc.statd -n cluster1 -H /etc/ctdb/statd-callout ]
+EOF
+simple_test || exit $?
+
+ok_null
+simple_test || exit $?
+
+ok<<EOF
+ERROR: status failed RPC check:
+rpcinfo: RPC: Program not registered
+program status version 1 is not available
+Trying to restart statd [rpc.statd -n cluster1 -H /etc/ctdb/statd-callout ]
+EOF
+simple_test || exit $?
+
+ok_null
+simple_test || exit $?
+
+required_result 1 <<EOF
+ERROR: status failed RPC check:
+rpcinfo: RPC: Program not registered
+program status version 1 is not available
+EOF
+simple_test || exit $?
diff --git a/tests/eventscripts/60.nfs.monitor.102.sh b/tests/eventscripts/60.nfs.monitor.102.sh
new file mode 100755
index 0000000..bb988aa
--- /dev/null
+++ b/tests/eventscripts/60.nfs.monitor.102.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "all services available, check nfsd thread count, count matches"
+
+setup_nfs
+
+CTDB_MONITOR_NFS_THREAD_COUNT="yes"
+RPCNFSDCOUNT=8
+FAKE_NFSD_THREAD_PIDS="1 2 3 4 5 6 7 8"
+
+ok_null
+
+simple_test
diff --git a/tests/eventscripts/60.nfs.monitor.103.sh b/tests/eventscripts/60.nfs.monitor.103.sh
new file mode 100755
index 0000000..75d7291
--- /dev/null
+++ b/tests/eventscripts/60.nfs.monitor.103.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "all services available, check nfsd thread count, not enough threads"
+
+setup_nfs
+
+CTDB_MONITOR_NFS_THREAD_COUNT="yes"
+RPCNFSDCOUNT=8
+FAKE_NFSD_THREAD_PIDS="1 2 3 4 5"
+
+ok "Attempting to correct number of nfsd threads from 5 to 8"
+
+simple_test
diff --git a/tests/eventscripts/60.nfs.monitor.104.sh b/tests/eventscripts/60.nfs.monitor.104.sh
new file mode 100755
index 0000000..a052be8
--- /dev/null
+++ b/tests/eventscripts/60.nfs.monitor.104.sh
@@ -0,0 +1,18 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+# Add this extra test to catch a design change where we only ever
+# increase the number of threads.  That is, this test would need to be
+# consciously removed.
+define_test "all services available, check nfsd thread count, too many threads"
+
+setup_nfs
+
+CTDB_MONITOR_NFS_THREAD_COUNT="yes"
+RPCNFSDCOUNT=4
+FAKE_NFSD_THREAD_PIDS="1 2 3 4 5 6"
+
+ok "Attempting to correct number of nfsd threads from 6 to 4"
+
+simple_test
diff --git a/tests/eventscripts/60.nfs.monitor.113.sh b/tests/eventscripts/60.nfs.monitor.113.sh
new file mode 100755
index 0000000..caa4989
--- /dev/null
+++ b/tests/eventscripts/60.nfs.monitor.113.sh
@@ -0,0 +1,18 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "knfsd down, 6 iterations, dump 5 threads, none hung"
+
+# knfsd fails and attempts to restart it fail.
+setup_nfs
+rpc_services_down "nfs"
+
+# Additionally, any hung threads should have stack traces dumped.
+CTDB_NFS_DUMP_STUCK_THREADS=5
+FAKE_NFSD_THREAD_PIDS=""
+
+iterate_test 6 'ok_null' \
+    2 'rpc_set_service_failure_response "nfsd"' \
+    4 'rpc_set_service_failure_response "nfsd"' \
+    6 'rpc_set_service_failure_response "nfsd"'
diff --git a/tests/eventscripts/60.nfs.monitor.114.sh b/tests/eventscripts/60.nfs.monitor.114.sh
new file mode 100755
index 0000000..8279395
--- /dev/null
+++ b/tests/eventscripts/60.nfs.monitor.114.sh
@@ -0,0 +1,18 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "knfsd down, 6 iterations, dump 5 threads, 3 hung"
+
+# knfsd fails and attempts to restart it fail.
+setup_nfs
+rpc_services_down "nfs"
+
+# Additionally, any hung threads should have stack traces dumped.
+CTDB_NFS_DUMP_STUCK_THREADS=5
+FAKE_NFSD_THREAD_PIDS="1001 1002 1003"
+
+iterate_test 6 'ok_null' \
+    2 'rpc_set_service_failure_response "nfsd"' \
+    4 'rpc_set_service_failure_response "nfsd"' \
+    6 'rpc_set_service_failure_response "nfsd"'
diff --git a/tests/eventscripts/etc-ctdb/rc.local b/tests/eventscripts/etc-ctdb/rc.local
index ae93ae5..9cd4d55 100755
--- a/tests/eventscripts/etc-ctdb/rc.local
+++ b/tests/eventscripts/etc-ctdb/rc.local
@@ -33,6 +33,12 @@ get_proc ()
 	sys/net/ipv4/conf/all/arp_filter)
 	    echo 1
 	    ;;
+	fs/nfsd/threads)
+	    echo "$FAKE_NFSD_THREAD_PIDS" | wc -w
+	    ;;
+	*/stack)
+	    echo "[<ffffffff87654321>] fake_stack_trace_for_pid_${1}+0x0/0xff"
+	    ;;
 	*)
 	    echo "get_proc: \"$1\" not implemented"
 	    exit 1
diff --git a/tests/eventscripts/scripts/local.sh b/tests/eventscripts/scripts/local.sh
index 3f55830..6d1d527 100644
--- a/tests/eventscripts/scripts/local.sh
+++ b/tests/eventscripts/scripts/local.sh
@@ -115,6 +115,14 @@ setup_generic ()
     rm -f "$FAKE_IP_STATE"/*/*
     rm -f "$FAKE_IP_STATE"/* 2>/dev/null || true
     rmdir "$FAKE_IP_STATE"/* 2>/dev/null || true
+
+
+    export CTDB_DBDIR="${EVENTSCRIPTS_TESTS_VAR_DIR}/db"
+    mkdir -p "${CTDB_DBDIR}/persistent"
+
+    export FAKE_TDBTOOL_SUPPORTS_CHECK="yes"
+    export FAKE_TDB_IS_OK
+    export FAKE_DATE_OUTPUT
 }
 
 tcp_port_down ()
@@ -275,9 +283,10 @@ setup_ctdb ()
     export FAKE_CTDB_PNN="${2:-0}"
     echo "Setting up CTDB with PNN ${FAKE_CTDB_PNN}"
 
+    export CTDB_PUBLIC_ADDRESSES="${CTDB_BASE}/public_addresses"
     if [ -n "$3" ] ; then
 	echo "Setting up CTDB_PUBLIC_ADDRESSES: $3"
-	export CTDB_PUBLIC_ADDRESSES=$(mktemp)
+	CTDB_PUBLIC_ADDRESSES=$(mktemp)
 	for _i in $3 ; do
 	    _ip="${_i%@*}"
 	    _ifaces="${_i#*@}"
@@ -555,6 +564,9 @@ setup_nfs ()
 
     export CTDB_NFS_SKIP_SHARE_CHECK="no"
 
+    export CTDB_MONITOR_NFS_THREAD_COUNT RPCNFSDCOUNT FAKE_NFSD_THREAD_PIDS
+    export CTDB_NFS_DUMP_STUCK_THREADS
+
     # Reset the failcounts for nfs services.
     eventscript_call eval rm -f '$ctdb_fail_dir/nfs_*'
 
@@ -579,6 +591,20 @@ setup_nfs ()
     fi
 }
 
+setup_nfs_ganesha ()
+{
+    setup_nfs "$@"
+    export NFS_SERVER_MODE="ganesha"
+    if [ "$1" != "down" ] ; then
+	export CTDB_MANAGES_NFS="yes"
+    fi
+
+    # We do not support testing the Ganesha-nfsd-specific part of the
+    # eventscript.
+    export CTDB_SKIP_GANESHA_NFSD_CHECK="yes"
+    export CTDB_NFS_SKIP_SHARE_CHECK="yes"
+}
+
 rpc_services_down ()
 {
     for _i ; do
@@ -673,7 +699,19 @@ program $_pn version $_ver is not available"
 			case "${_progname}${_action#restart}" in
 			    nfsd)
 				_t="\
-Trying to restart NFS service
+Trying to restart NFS service"
+
+				if [ -n "$CTDB_NFS_DUMP_STUCK_THREADS" ] ; then
+				    for _pid in $FAKE_NFSD_THREAD_PIDS ; do
+					_t="\
+$_t
+Stack trace for stuck nfsd thread [${_pid}]:
+[<ffffffff87654321>] fake_stack_trace_for_pid_${_pid}/stack+0x0/0xff"
+				    done
+				fi
+
+				_t="\
+${_t}
 Starting nfslock: OK
 Starting nfs: OK"
 				;;
@@ -790,11 +828,6 @@ EOF
 
 # Any args are passed to the eventscript.
 
-# Eventscript tracing can be done by setting:
-#   EVENTSCRIPTS_TESTS_TRACE="sh -x"
-
-# or similar.  This will almost certainly make a test fail but is
-# useful for debugging.
 simple_test ()
 {
     [ -n "$event" ] || die 'simple_test: $event not set'
@@ -899,7 +932,11 @@ iterate_test ()
 	    shift 2
 	fi
 
-	_out=$($EVENTSCRIPTS_TESTS_TRACE "${CTDB_BASE}/events.d/$script" "$event" $args 2>&1)
+	_trace=""
+	if $TEST_COMMAND_TRACE ; then
+	    _trace="sh -x"
+	fi
+	_out=$($_trace "${CTDB_BASE}/events.d/$script" "$event" $args 2>&1)
 	_rc=$?
 
     if [ -n "$OUT_FILTER" ] ; then
diff --git a/tests/eventscripts/stubs/date b/tests/eventscripts/stubs/date
new file mode 100755
index 0000000..2f470a8
--- /dev/null
+++ b/tests/eventscripts/stubs/date
@@ -0,0 +1,7 @@
+#!/bin/sh
+
+if [ "$FAKE_DATE_OUTPUT" ] ; then
+    echo "$FAKE_DATE_OUTPUT"
+else	
+    /bin/date "$@"
+fi
diff --git a/tests/eventscripts/stubs/ip b/tests/eventscripts/stubs/ip
index 709c379..fb7e8b6 100755
--- a/tests/eventscripts/stubs/ip
+++ b/tests/eventscripts/stubs/ip
@@ -87,6 +87,7 @@ ip_addr_show ()
     dev=""
     primary=true
     secondary=true
+    _to=""
     while [ -n "$1" ] ; do
 	case "$1" in
 	    dev)
@@ -99,6 +100,9 @@ ip_addr_show ()
 	    secondary)
 		secondary=true ; primary=false ; shift
 		;;
+	    to)
+		_to="$2" ; shift 2
+		;;
 	    *)
 	        # Assume an interface name
 		dev="$1" ; shift 1
@@ -131,26 +135,35 @@ ${n}: ${dev}: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state
 EOF
 	if $primary && [ -r "$pf" ] ; then
 	    read local <"$pf"
-	    calc_brd
-	    cat <<EOF
+	    if [ -z "$_to" -o "${_to%/*}" = "${local%/*}" ] ; then
+		calc_brd
+		cat <<EOF
     inet ${local} brd ${brd} scope global ${dev}
 EOF
+	    fi
 	fi
 	if $secondary && [ -r "$sf" ] ; then
 	    while read local ; do
-		calc_brd
-		cat <<EOF
+		if [ -z "$_to" -o "${_to%/*}" = "${local%/*}" ] ; then
+		    calc_brd
+		    cat <<EOF
     inet ${local} brd ${brd} scope global secondary ${dev}
 EOF
+		fi
 	    done <"$sf"
 	fi
-	cat <<EOF
+	if [ -z "$_to" ] ; then
+	    cat <<EOF
        valid_lft forever preferred_lft forever
 EOF
+	fi
     }
     n=1
     for dev in $devices ; do
-	show_iface
+	if [ -z "$_to" ] || \
+	    grep -F "${_to%/*}/" "${FAKE_IP_STATE}/addresses/${dev}-"* >/dev/null ; then
+	    show_iface
+	fi
 	n=$(($n + 1))
     done
 }
diff --git a/tests/eventscripts/stubs/pidof b/tests/eventscripts/stubs/pidof
new file mode 100755
index 0000000..b6ad6d8
--- /dev/null
+++ b/tests/eventscripts/stubs/pidof
@@ -0,0 +1,10 @@
+#!/bin/sh
+
+case "$1" in
+    nfsd)
+	echo "$FAKE_NFSD_THREAD_PIDS"
+	;;
+    *)
+	echo "pidof: \"$1\" not implemented"
+	exit 1
+esac
diff --git a/tests/eventscripts/stubs/tdbdump b/tests/eventscripts/stubs/tdbdump
new file mode 100755
index 0000000..986c5c5
--- /dev/null
+++ b/tests/eventscripts/stubs/tdbdump
@@ -0,0 +1,9 @@
+#!/bin/sh
+
+if [ "$FAKE_TDB_IS_OK" = "yes" ] ; then
+    echo "TDB good"
+    exit 0
+else
+    echo "TDB busted"
+    exit 1
+fi
diff --git a/tests/eventscripts/stubs/tdbtool b/tests/eventscripts/stubs/tdbtool
new file mode 100755
index 0000000..c6c0a16
--- /dev/null
+++ b/tests/eventscripts/stubs/tdbtool
@@ -0,0 +1,15 @@
+#!/bin/sh
+
+if [ -z "$1" ] ; then
+    if [ "$FAKE_TDBTOOL_SUPPORTS_CHECK" = "yes" ] ; then
+	echo "check"
+    fi
+fi
+
+if [ "$FAKE_TDB_IS_OK" = "yes" ] ; then
+    echo "Database integrity is OK"
+else
+    echo "Database is busted"
+fi
+
+exit 0
diff --git a/tests/scripts/integration.bash b/tests/scripts/integration.bash
index 45e0b99..95d67c3 100644
--- a/tests/scripts/integration.bash
+++ b/tests/scripts/integration.bash
@@ -320,21 +320,8 @@ sleep_for ()
 
 _cluster_is_healthy ()
 {
-    local out x count line
-
-    out=$($CTDB -Y status 2>/dev/null) || return 1
-
-    {
-        read x
-	count=0
-        while read line ; do
-	    # We need to see valid lines if we're going to be healthy.
-	    [ "${line#:[0-9]}" != "$line" ] && count=$(($count + 1))
-	    # A line indicating a node is unhealthy causes failure.
-	    [ "${line##:*:*:*1:}" != "$line" ] && return 1
-        done
-	[ $count -gt 0 ] && return $?
-    } <<<"$out" # Yay bash!
+    $CTDB nodestatus all >/dev/null && \
+	node_has_status 0 recovered
 }
 
 cluster_is_healthy ()
@@ -688,8 +675,6 @@ _ctdb_start_post ()
     echo "Forcing a recovery..."
     onnode -q 0 $CTDB recover
     sleep_for 1
-    echo "Forcing a recovery..."
-    onnode -q 0 $CTDB recover
 
     echo "ctdb is ready"
 }
@@ -749,8 +734,6 @@ restart_ctdb ()
 	echo "Forcing a recovery..."
 	onnode -q 0 $CTDB recover
 	sleep_for 1
-	echo "Forcing a recovery..."
-	onnode -q 0 $CTDB recover
 
 	# Cluster is still healthy.  Good, we're done!
 	if ! onnode 0 $CTDB_TEST_WRAPPER _cluster_is_healthy ; then
diff --git a/tests/src/ctdb_bench.c b/tests/src/ctdb_bench.c
index 8463200..3323589 100644
--- a/tests/src/ctdb_bench.c
+++ b/tests/src/ctdb_bench.c
@@ -222,6 +222,9 @@ int main(int argc, const char *argv[])
 
 	/* initialise ctdb */
 	ctdb = ctdb_cmdline_client(ev, timeval_current_ofs(3, 0));
+	if (ctdb == NULL) {
+		exit(1);
+	}
 
 	/* attach to a specific database */
 	ctdb_db = ctdb_attach(ctdb, timeval_current_ofs(2, 0), "test.tdb",
diff --git a/tests/src/ctdb_fetch.c b/tests/src/ctdb_fetch.c
index db84f5d..b900efa 100644
--- a/tests/src/ctdb_fetch.c
+++ b/tests/src/ctdb_fetch.c
@@ -83,6 +83,11 @@ static void bench_fetch_1node(struct ctdb_context *ctdb)
 	data.dptr = (uint8_t *)talloc_asprintf_append((char *)data.dptr, 
 						      "msg_count=%d on node %d\n",
 						      msg_count, ctdb_get_pnn(ctdb));
+	if (data.dptr == NULL) {
+		printf("Failed to create record\n");
+		talloc_free(tmp_ctx);
+		return;
+	}
 	data.dsize = strlen((const char *)data.dptr)+1;
 
 	ret = ctdb_record_store(h, data);
diff --git a/tests/src/ctdb_fetch_readonly_loop.c b/tests/src/ctdb_fetch_readonly_loop.c
index d3cc72c..5944fb7 100644
--- a/tests/src/ctdb_fetch_readonly_loop.c
+++ b/tests/src/ctdb_fetch_readonly_loop.c
@@ -107,6 +107,9 @@ int main(int argc, const char *argv[])
 	ev = event_context_init(NULL);
 
 	ctdb = ctdb_cmdline_client(ev, timeval_current_ofs(5, 0));
+	if (ctdb == NULL) {
+		exit(1);
+	}
 
 	key.dptr  = discard_const(TESTKEY);
 	key.dsize = strlen(TESTKEY);
diff --git a/tests/src/ctdb_trackingdb_test.c b/tests/src/ctdb_trackingdb_test.c
index d8525d5..ee473c0 100644
--- a/tests/src/ctdb_trackingdb_test.c
+++ b/tests/src/ctdb_trackingdb_test.c
@@ -125,6 +125,9 @@ int main(int argc, const char *argv[])
 	ev = event_context_init(NULL);
 
 	ctdb = ctdb_cmdline_client(ev, timeval_current_ofs(5, 0));
+	if (ctdb == NULL) {
+		exit(1);
+	}
 
 	trackdb_test(ctdb);
 
diff --git a/tests/src/ctdb_traverse.c b/tests/src/ctdb_traverse.c
index d5eb304..5b37ed9 100644
--- a/tests/src/ctdb_traverse.c
+++ b/tests/src/ctdb_traverse.c
@@ -89,6 +89,9 @@ int main(int argc, const char *argv[])
 	ev = event_context_init(NULL);
 
 	ctdb = ctdb_cmdline_client(ev, timeval_current_ofs(3, 0));
+	if (ctdb == NULL) {
+		exit(1);
+	}
 
 	/* attach to a specific database */
 	ctdb_db = ctdb_attach(ctdb, timeval_current_ofs(2, 0), dbname, false, 0);
diff --git a/tests/src/ctdb_update_record.c b/tests/src/ctdb_update_record.c
index 260f86e..6eff1d0 100644
--- a/tests/src/ctdb_update_record.c
+++ b/tests/src/ctdb_update_record.c
@@ -128,6 +128,9 @@ int main(int argc, const char *argv[])
 	ev = event_context_init(NULL);
 
 	ctdb = ctdb_cmdline_client(ev, timeval_current_ofs(5, 0));
+	if (ctdb == NULL) {
+		exit(1);
+	}
 
 	/* attach to a specific database */
 	ctdb_db = ctdb_attach(ctdb, timeval_current_ofs(5, 0), "test.tdb", false, 0);
diff --git a/tests/src/ctdb_update_record_persistent.c b/tests/src/ctdb_update_record_persistent.c
index 07b2c48..a0bb383 100644
--- a/tests/src/ctdb_update_record_persistent.c
+++ b/tests/src/ctdb_update_record_persistent.c
@@ -31,7 +31,7 @@ static void update_once(struct ctdb_context *ctdb, struct event_context *ev, str
 	TDB_DATA key, data, olddata;
 	struct ctdb_ltdb_header header;
 
-	memset(&header, sizeof(header), 0);
+	memset(&header, 0, sizeof(header));
 
 	key.dptr  = (uint8_t *)record;
 	key.dsize = strlen(record);
@@ -98,6 +98,9 @@ int main(int argc, const char *argv[])
 	ev = event_context_init(NULL);
 
 	ctdb = ctdb_cmdline_client(ev, timeval_current_ofs(5, 0));
+	if (ctdb == NULL) {
+		exit(1);
+	}
 
 	if (test_db == NULL) {
 		fprintf(stderr, "You must specify the database\n");
diff --git a/tests/src/ctdbd_test.c b/tests/src/ctdbd_test.c
index 6f045a3..bf95e0b 100644
--- a/tests/src/ctdbd_test.c
+++ b/tests/src/ctdbd_test.c
@@ -21,7 +21,7 @@
 #define _CTDBD_TEST_C
 
 #include "includes.h"
-#include "lib/tdb/include/tdb.h"
+#include "tdb.h"
 #include "ctdb_private.h"
 
 /*
diff --git a/tools/ctdb.c b/tools/ctdb.c
index b3cbade..313ec3c 100644
--- a/tools/ctdb.c
+++ b/tools/ctdb.c
@@ -619,7 +619,7 @@ static int control_dbstatistics(struct ctdb_context *ctdb, int argc, const char
 		return -1;
 	}
 
-	printf("DB Statistics:\n");
+	printf("DB Statistics: %s\n", argv[0]);
 	printf(" %*s%-22s%*s%10u\n", 0, "", "ro_delegations", 4, "",
 		dbstat->db_ro_delegations);
 	printf(" %*s%-22s%*s%10u\n", 0, "", "ro_revokes", 4, "",
@@ -633,23 +633,28 @@ static int control_dbstatistics(struct ctdb_context *ctdb, int argc, const char
 		dbstat->locks.num_current);
 	printf(" %*s%-22s%*s%10u\n", 4, "", "pending", 0, "",
 		dbstat->locks.num_pending);
+	printf(" %s", "hop_count_buckets:");
+	for (i=0; i<MAX_COUNT_BUCKETS; i++) {
+		printf(" %d", dbstat->hop_count_bucket[i]);
+	}
+	printf("\n");
+	printf(" %s", "lock_buckets:");
+	for (i=0; i<MAX_COUNT_BUCKETS; i++) {
+		printf(" %d", dbstat->locks.buckets[i]);
+	}
+	printf("\n");
 	printf(" %-30s     %.6f/%.6f/%.6f sec out of %d\n",
-		"    latency_ctdbd  MIN/AVG/MAX",
+		"locks_latency      MIN/AVG/MAX",
 		dbstat->locks.latency.min,
 		(dbstat->locks.latency.num ?
 		 dbstat->locks.latency.total /dbstat->locks.latency.num :
 		 0.0),
 		dbstat->locks.latency.max,
 		dbstat->locks.latency.num);
-	printf(" %s", "    buckets:");
-	for (i=0; i<MAX_COUNT_BUCKETS; i++) {
-		printf(" %d", dbstat->hop_count_bucket[i]);
-	}
-	printf("\n");
-	printf("Num Hot Keys:     %d\n", dbstat->num_hot_keys);
+	printf(" Num Hot Keys:     %d\n", dbstat->num_hot_keys);
 	for (i = 0; i < dbstat->num_hot_keys; i++) {
 		int j;
-		printf("Count:%d Key:", dbstat->hot_keys[i].count);
+		printf("     Count:%d Key:", dbstat->hot_keys[i].count);
 		for (j = 0; j < dbstat->hot_keys[i].key.dsize; j++) {
 			printf("%02x", dbstat->hot_keys[i].key.dptr[j]&0xff);
 		}
@@ -904,6 +909,7 @@ static int control_status(struct ctdb_context *ctdb, int argc, const char **argv
 	struct ctdb_vnn_map *vnnmap=NULL;
 	struct ctdb_node_map *nodemap=NULL;
 	uint32_t recmode, recmaster, mypnn;
+	int num_deleted_nodes = 0;
 
 	if (!ctdb_getpnn(ctdb_connection, options.pnn, &mypnn)) {
 		return -1;
@@ -926,7 +932,17 @@ static int control_status(struct ctdb_context *ctdb, int argc, const char **argv
 		return 0;
 	}
 
-	printf("Number of nodes:%d\n", nodemap->num);
+	for (i=0; i<nodemap->num; i++) {
+		if (nodemap->nodes[i].flags & NODE_FLAGS_DELETED) {
+			num_deleted_nodes++;
+		}
+	}
+	if (num_deleted_nodes == 0) {
+		printf("Number of nodes:%d\n", nodemap->num);
+	} else {
+		printf("Number of nodes:%d (including %d deleted nodes)\n",
+		       nodemap->num, num_deleted_nodes);
+	}
 	for(i=0;i<nodemap->num;i++){
 		if (nodemap->nodes[i].flags & NODE_FLAGS_DELETED) {
 			continue;
@@ -1986,7 +2002,8 @@ again:
 	/* get the number of nodes and node flags */
 	if (ctdb_ctrl_getnodemap(ctdb, TIMELIMIT(), options.pnn, ctdb, &nodemap) != 0) {
 		DEBUG(DEBUG_ERR, ("Unable to get nodemap from local node\n"));
-		return -1;
+		sleep(1);
+		goto again;
 	}
 
 	ipreallocate_finished = false;
@@ -2879,10 +2896,9 @@ static int control_stop(struct ctdb_context *ctdb, int argc, const char **argv)
 		/* read the nodemap and verify the change took effect */
 		if (ctdb_ctrl_getnodemap(ctdb, TIMELIMIT(), CTDB_CURRENT_NODE, ctdb, &nodemap) != 0) {
 			DEBUG(DEBUG_ERR, ("Unable to get nodemap from local node\n"));
-			exit(10);
 		}
 
-	} while (!(nodemap->nodes[options.pnn].flags & NODE_FLAGS_STOPPED));
+	} while (nodemap == NULL || !(nodemap->nodes[options.pnn].flags & NODE_FLAGS_STOPPED));
 	ret = control_ipreallocate(ctdb, argc, argv);
 	if (ret != 0) {
 		DEBUG(DEBUG_ERR, ("IP Reallocate failed on node %u\n", options.pnn));
@@ -2913,10 +2929,9 @@ static int control_continue(struct ctdb_context *ctdb, int argc, const char **ar
 		/* read the nodemap and verify the change took effect */
 		if (ctdb_ctrl_getnodemap(ctdb, TIMELIMIT(), CTDB_CURRENT_NODE, ctdb, &nodemap) != 0) {
 			DEBUG(DEBUG_ERR, ("Unable to get nodemap from local node\n"));
-			exit(10);
 		}
 
-	} while (nodemap->nodes[options.pnn].flags & NODE_FLAGS_STOPPED);
+	} while (nodemap == NULL || nodemap->nodes[options.pnn].flags & NODE_FLAGS_STOPPED);
 	ret = control_ipreallocate(ctdb, argc, argv);
 	if (ret != 0) {
 		DEBUG(DEBUG_ERR, ("IP Reallocate failed on node %u\n", options.pnn));
@@ -3105,14 +3120,21 @@ static int control_recover(struct ctdb_context *ctdb, int argc, const char **arg
 {
 	int ret;
 	uint32_t generation, next_generation;
+	bool force;
+
+	/* "force" option ignores freeze failure and forces recovery */
+	force = (argc == 1) && (strcasecmp(argv[0], "force") == 0);
 
 	/* record the current generation number */
 	generation = get_generation(ctdb);
 
 	ret = ctdb_ctrl_freeze_priority(ctdb, TIMELIMIT(), options.pnn, 1);
 	if (ret != 0) {
-		DEBUG(DEBUG_ERR, ("Unable to freeze node\n"));
-		return ret;
+		if (!force) {
+			DEBUG(DEBUG_ERR, ("Unable to freeze node\n"));
+			return ret;
+		}
+		DEBUG(DEBUG_WARNING, ("Unable to freeze node but proceeding because \"force\" option given\n"));
 	}
 
 	ret = ctdb_ctrl_setrecmode(ctdb, TIMELIMIT(), options.pnn, CTDB_RECOVERY_ACTIVE);
diff --git a/utils/nagios/check_ctdb b/utils/nagios/check_ctdb
index 9430333..cc0c222 100644
--- a/utils/nagios/check_ctdb
+++ b/utils/nagios/check_ctdb
@@ -178,7 +178,11 @@ if ($info eq "scriptstatus") {
             next if $. == 1; # Header
             $script_count++;
             chop;
-            my ($type, $name, $code, $status, $start, $end, @error) = split(":");
+            my ($col0, $type, $name, $code, $status, $start, $end, @error) = split(":");
+            if ($col0 ne '') {
+              # Old version, before 30 Aug 2011 and commit a779d83a6213
+              ($type, $name, $code, $status, $start, $end, @error) = ($col0, $type, $name, $code, $status, $start, $end, @error);
+            }
             my $error = join(':', @error);
             if ($error ne "") {
                 $output = "$output ;; " if $output;
diff --git a/utils/ping_pong/ping_pong.c b/utils/ping_pong/ping_pong.c
index 0a49d66..16f58d8 100644
--- a/utils/ping_pong/ping_pong.c
+++ b/utils/ping_pong/ping_pong.c
@@ -237,6 +237,10 @@ int main(int argc, char *argv[])
 
 	fname = argv[0];
 	num_locks = atoi(argv[1]);
+	if (num_locks <= 0) {
+		printf("num_locks should be > 0\n");
+		exit(1);
+	}
 
 	fd = open(fname, O_CREAT|O_RDWR, 0600);
 	if (fd == -1) exit(1);
diff --git a/web/samba.html b/web/samba.html
index c210f4b..fb17d0f 100644
--- a/web/samba.html
+++ b/web/samba.html
@@ -23,11 +23,8 @@ A clustered Samba install must set some specific configuration parameters
 <pre>
   clustering = yes
   idmap backend = tdb2
-  private dir = /a/directory/on/your/cluster/filesystem
 </pre>
 
-It is vital that the private directory is on shared storage. 
-
 <h2>Using smbcontrol</h2>
 
 You can check for connectivity to the smbd daemons on each node using smbcontrol
@@ -37,12 +34,9 @@ You can check for connectivity to the smbd daemons on each node using smbcontrol
 
 <h2>Using Samba4 smbtorture</h2>
 
-The Samba4 version of smbtorture has several tests that can be used to benchmark a CIFS cluster.<br>
-You can download Samba4 like this:
-<pre>
-  svn co svn://svnanon.samba.org/samba/branches/SAMBA_4_0
-</pre>
-Then configure and compile it as usual.<br>
+The Samba4 version of smbtorture has several tests that can be used to
+benchmark a CIFS cluster. You can download Samba 4 from Samba website.
+
 The particular tests that are helpful for cluster benchmarking are the RAW-BENCH-OPEN, RAW-BENCH-LOCK and BENCH-NBENCH tests.<br>
 These tests take a unclist that allows you to spread the workload out over more than one node. For example:
 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-samba/ctdb.git