Mailing List Archive

[PATCH 2 of 2] build: split C sources and agent script into separate subdirs
# HG changeset patch
# User Florian Haas <florian@hastexo.com>
# Date 1338274073 -7200
# Branch autotools
# Node ID 31225c028d4f93d2a9828ac15c92d12e8aabf8c7
# Parent 9888c2e4353b08599e6977e5e61dd6d34ce6151e
build: split C sources and agent script into separate subdirs

diff -r 9888c2e4353b -r 31225c028d4f Makefile.am
--- a/Makefile.am Mon May 28 20:49:01 2012 +0200
+++ b/Makefile.am Tue May 29 08:47:53 2012 +0200
@@ -1,17 +1,1 @@
-
-AM_CFLAGS = -D_GNU_SOURCE
-AM_CPPFLAGS = -I$(includedir)/pacemaker -I$(includedir)/clplumbing \
- -I$(includedir)/heartbeat \
- $(glib_CFLAGS) \
- $(libxml_CFLAGS)
-
-agentdir = $(libdir)/stonith/plugins/external
-agent_SCRIPTS = sbd.agent
-
-sbin_PROGRAMS = sbd
-
-sbd_SOURCES = sbd-common.c sbd-md.c sbd-pacemaker.c
-
-sbd_LDFLAGS = $(glib_LIBS) $(libcoroipcc_LIBS)
-
-
+SUBDIRS = src agent
diff -r 9888c2e4353b -r 31225c028d4f agent/Makefile.am
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/agent/Makefile.am Tue May 29 08:47:53 2012 +0200
@@ -0,0 +1,2 @@
+agentdir = $(libdir)/stonith/plugins/external
+agent_SCRIPTS = sbd
diff -r 9888c2e4353b -r 31225c028d4f agent/sbd
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/agent/sbd Tue May 29 08:47:53 2012 +0200
@@ -0,0 +1,116 @@
+#!/bin/bash
+#
+# This STONITH script drives the shared-storage stonith plugin.
+#
+# Author: Lars Marowsky-Bree
+# Copyright: 2008 Lars Marowsky-Bree
+# License: GNU General Public License (GPL)
+#
+
+# Main code
+
+if [ x$sbd_device = x ]; then
+ if [ -f /etc/sysconfig/sbd ]; then
+ source /etc/sysconfig/sbd
+ sbd_device=$SBD_DEVICE
+ fi
+fi
+
+SBD_DEVS=${sbd_device%;}
+
+sbd_device=${SBD_DEVS//;/ -d }
+
+case $1 in
+gethosts)
+ echo `sbd -d $sbd_device list | cut -f2 | sort | uniq`
+ exit 0
+ ;;
+off|reset)
+ message=$1
+ case "$crashdump" in
+ yes|true|1|YES|TRUE|ja|on|ON) message="crashdump" ;;
+ esac
+ sbd -d $sbd_device message $2 $message
+ exit $?
+ ;;
+status)
+ if ! sbd -d $sbd_device list >/dev/null 2>&1 ; then
+ ha_log.sh err "sbd could not list nodes from $sbd_device"
+ exit 1
+ fi
+ exit 0
+ ;;
+on)
+ exit 1
+ ;;
+getconfignames)
+ echo "sbd_device crashdump"
+ exit 0
+ ;;
+getinfo-devid)
+ echo "Shared storage STONITH device"
+ exit 0
+ ;;
+getinfo-devname)
+ echo "Shared storage STONITH device"
+ exit 0
+ ;;
+getinfo-devdescr)
+ cat << DESC
+sbd uses a shared storage device as a medium to communicate
+fencing requests. This allows clusters without network power
+switches; the downside is that access to the shared storage
+device becomes a Single Point of Failure.
+
+It requires sbd to be configured on all nodes.
+
+Please read http://linux-ha.org/wiki/SBD_Fencing!
+
+DESC
+ exit 0
+ ;;
+getinfo-devurl)
+ echo "http://linux-ha.org/wiki/SBD_Fencing"
+ exit 0
+ ;;
+getinfo-xml)
+ cat << SSHXML
+<parameters>
+
+<parameter name="crashdump">
+<content type="string" />
+<shortdesc lang="en">
+Crashdump instead of regular fence
+</shortdesc>
+<longdesc lang="en">
+If SBD is given a fence command, this option will instead perform a
+kernel crash of a reboot or power-off, which on a properly configured
+system can lead to a crashdump for analysis.
+
+This is less safe for production environments. Please use with caution
+and for debugging purposes only.
+</longdesc>
+</parameter>
+
+<parameter name="sbd_device" unique="1">
+<content type="string" />
+<shortdesc lang="en">
+SBD device(s)
+</shortdesc>
+<longdesc lang="en">
+The block device used for the SBD partition. Up to three
+can be specified if separated by a semicolon. (Please check
+the documentation if specifying two.)
+
+If not specified, will default to the value from /etc/sysconfig/sbd.
+
+</longdesc>
+</parameter>
+</parameters>
+SSHXML
+ exit 0
+ ;;
+*)
+ exit 1
+ ;;
+esac
diff -r 9888c2e4353b -r 31225c028d4f configure.ac
--- a/configure.ac Mon May 28 20:49:01 2012 +0200
+++ b/configure.ac Tue May 29 08:47:53 2012 +0200
@@ -50,7 +50,7 @@


dnl The Makefiles and shell scripts we output
-AC_CONFIG_FILES([Makefile])
+AC_CONFIG_FILES([Makefile src/Makefile agent/Makefile])

dnl Now process the entire list of files added by previous
dnl calls to AC_CONFIG_FILES()
diff -r 9888c2e4353b -r 31225c028d4f sbd-common.c
--- a/sbd-common.c Mon May 28 20:49:01 2012 +0200
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,983 +0,0 @@
-
-#include "sbd.h"
-
-/* These have to match the values in the header of the partition */
-static char sbd_magic[8] = "SBD_SBD_";
-static char sbd_version = 0x02;
-
-/* Tunable defaults: */
-unsigned long timeout_watchdog = 5;
-unsigned long timeout_watchdog_warn = 3;
-int timeout_allocate = 2;
-int timeout_loop = 1;
-int timeout_msgwait = 10;
-int timeout_io = 3;
-
-int watchdog_use = 0;
-int watchdog_set_timeout = 1;
-unsigned long timeout_watchdog_crashdump = 240;
-int skip_rt = 0;
-int check_pcmk = 0;
-int debug = 0;
-int debug_mode = 0;
-const char *watchdogdev = "/dev/watchdog";
-char * local_uname;
-
-/* Global, non-tunable variables: */
-int sector_size = 0;
-int watchdogfd = -1;
-
-/*const char *devname;*/
-const char *cmdname;
-
-void
-usage(void)
-{
- fprintf(stderr,
-"Shared storage fencing tool.\n"
-"Syntax:\n"
-" %s <options> <command> <cmdarguments>\n"
-"Options:\n"
-"-d <devname> Block device to use (mandatory; can be specified up to 3 times)\n"
-"-h Display this help.\n"
-"-n <node> Set local node name; defaults to uname -n (optional)\n"
-"\n"
-"-R Do NOT enable realtime priority (debugging only)\n"
-"-W Use watchdog (recommended) (watch only)\n"
-"-w <dev> Specify watchdog device (optional) (watch only)\n"
-"-T Do NOT initialize the watchdog timeout (watch only)\n"
-"-v Enable some verbose debug logging (optional)\n"
-"\n"
-"-1 <N> Set watchdog timeout to N seconds (optional, create only)\n"
-"-2 <N> Set slot allocation timeout to N seconds (optional, create only)\n"
-"-3 <N> Set daemon loop timeout to N seconds (optional, create only)\n"
-"-4 <N> Set msgwait timeout to N seconds (optional, create only)\n"
-"-5 <N> Warn if loop latency exceeds threshold (optional, watch only)\n"
-" (default is 3, set to 0 to disable)\n"
-"-C <N> Watchdog timeout to set before crashdumping (def: 240s, optional)\n"
-"-I <N> Async IO read timeout (defaults to 3 * loop timeout, optional)\n"
-"-t <N> Dampening delay before faulty servants are restarted (optional)\n"
-" (default is 60, set to 0 to disable)\n"
-"-F <N> # of failures before a servant is considered faulty (optional)\n"
-" (default is 10, set to 0 to disable)\n"
-"-P Check Pacemaker quorum and node health (optional, watch only)\n"
-"-Z Enable trace mode. WARNING: UNSAFE FOR PRODUCTION!\n"
-"Commands:\n"
-"create initialize N slots on <dev> - OVERWRITES DEVICE!\n"
-"list List all allocated slots on device, and messages.\n"
-"dump Dump meta-data header from device.\n"
-"watch Loop forever, monitoring own slot\n"
-"allocate <node>\n"
-" Allocate a slot for node (optional)\n"
-"message <node> (test|reset|off|clear|exit)\n"
-" Writes the specified message to node's slot.\n"
-, cmdname);
-}
-
-int
-watchdog_init_interval(void)
-{
- int timeout = timeout_watchdog;
-
- if (watchdogfd < 0) {
- return 0;
- }
-
-
- if (watchdog_set_timeout == 0) {
- cl_log(LOG_INFO, "NOT setting watchdog timeout on explicit user request!");
- return 0;
- }
-
- if (ioctl(watchdogfd, WDIOC_SETTIMEOUT, &timeout) < 0) {
- cl_perror( "WDIOC_SETTIMEOUT"
- ": Failed to set watchdog timer to %u seconds.",
- timeout);
- cl_log(LOG_CRIT, "Please validate your watchdog configuration!");
- cl_log(LOG_CRIT, "Choose a different watchdog driver or specify -T to silence this check if you are sure.");
- /* return -1; */
- } else {
- cl_log(LOG_INFO, "Set watchdog timeout to %u seconds.",
- timeout);
- }
- return 0;
-}
-
-int
-watchdog_tickle(void)
-{
- if (watchdogfd >= 0) {
- if (write(watchdogfd, "", 1) != 1) {
- cl_perror("Watchdog write failure: %s!",
- watchdogdev);
- return -1;
- }
- }
- return 0;
-}
-
-int
-watchdog_init(void)
-{
- if (watchdogfd < 0 && watchdogdev != NULL) {
- watchdogfd = open(watchdogdev, O_WRONLY);
- if (watchdogfd >= 0) {
- cl_log(LOG_NOTICE, "Using watchdog device: %s",
- watchdogdev);
- if ((watchdog_init_interval() < 0)
- || (watchdog_tickle() < 0)) {
- return -1;
- }
- }else{
- cl_perror("Cannot open watchdog device: %s",
- watchdogdev);
- return -1;
- }
- }
- return 0;
-}
-
-void
-watchdog_close(void)
-{
- if (watchdogfd >= 0) {
- if (write(watchdogfd, "V", 1) != 1) {
- cl_perror(
- "Watchdog write magic character failure: closing %s!",
- watchdogdev);
- }
- if (close(watchdogfd) < 0) {
- cl_perror("Watchdog close(2) failed.");
- }
- watchdogfd = -1;
- }
-}
-
-/* This duplicates some code from linux/ioprio.h since these are not included
- * even in linux-kernel-headers. Sucks. See also
- * /usr/src/linux/Documentation/block/ioprio.txt and ioprio_set(2) */
-extern int sys_ioprio_set(int, int, int);
-int ioprio_set(int which, int who, int ioprio);
-inline int ioprio_set(int which, int who, int ioprio)
-{
- return syscall(__NR_ioprio_set, which, who, ioprio);
-}
-
-enum {
- IOPRIO_CLASS_NONE,
- IOPRIO_CLASS_RT,
- IOPRIO_CLASS_BE,
- IOPRIO_CLASS_IDLE,
-};
-
-enum {
- IOPRIO_WHO_PROCESS = 1,
- IOPRIO_WHO_PGRP,
- IOPRIO_WHO_USER,
-};
-
-#define IOPRIO_BITS (16)
-#define IOPRIO_CLASS_SHIFT (13)
-#define IOPRIO_PRIO_MASK ((1UL << IOPRIO_CLASS_SHIFT) - 1)
-
-#define IOPRIO_PRIO_CLASS(mask) ((mask) >> IOPRIO_CLASS_SHIFT)
-#define IOPRIO_PRIO_DATA(mask) ((mask) & IOPRIO_PRIO_MASK)
-#define IOPRIO_PRIO_VALUE(class, data) (((class) << IOPRIO_CLASS_SHIFT) | data)
-
-void
-maximize_priority(void)
-{
- if (skip_rt) {
- cl_log(LOG_INFO, "Not elevating to realtime (-R specified).");
- return;
- }
-
- cl_make_realtime(-1, -1, 256, 256);
-
- if (ioprio_set(IOPRIO_WHO_PROCESS, getpid(),
- IOPRIO_PRIO_VALUE(IOPRIO_CLASS_RT, 1)) != 0) {
- cl_perror("ioprio_set() call failed.");
- }
-}
-
-void
-close_device(struct sbd_context *st)
-{
- close(st->devfd);
- free(st);
-}
-
-struct sbd_context *
-open_device(const char* devname)
-{
- struct sbd_context *st;
-
- if (!devname)
- return NULL;
-
- st = malloc(sizeof(struct sbd_context));
- if (!st)
- return NULL;
- memset(st, 0, sizeof(struct sbd_context));
-
- if (io_setup(1, &st->ioctx) != 0) {
- cl_perror("io_setup failed");
- free(st);
- return NULL;
- }
-
- st->devfd = open(devname, O_SYNC|O_RDWR|O_DIRECT);
-
- if (st->devfd == -1) {
- cl_perror("Opening device %s failed.", devname);
- free(st);
- return NULL;
- }
-
- ioctl(st->devfd, BLKSSZGET, &sector_size);
-
- if (sector_size == 0) {
- cl_perror("Get sector size failed.\n");
- close_device(st);
- return NULL;
- }
-
- return st;
-}
-
-signed char
-cmd2char(const char *cmd)
-{
- if (strcmp("clear", cmd) == 0) {
- return SBD_MSG_EMPTY;
- } else if (strcmp("test", cmd) == 0) {
- return SBD_MSG_TEST;
- } else if (strcmp("reset", cmd) == 0) {
- return SBD_MSG_RESET;
- } else if (strcmp("off", cmd) == 0) {
- return SBD_MSG_OFF;
- } else if (strcmp("exit", cmd) == 0) {
- return SBD_MSG_EXIT;
- } else if (strcmp("crashdump", cmd) == 0) {
- return SBD_MSG_CRASHDUMP;
- }
- return -1;
-}
-
-void *
-sector_alloc(void)
-{
- void *x;
-
- x = valloc(sector_size);
- if (!x) {
- exit(1);
- }
- memset(x, 0, sector_size);
-
- return x;
-}
-
-const char*
-char2cmd(const char cmd)
-{
- switch (cmd) {
- case SBD_MSG_EMPTY:
- return "clear";
- break;
- case SBD_MSG_TEST:
- return "test";
- break;
- case SBD_MSG_RESET:
- return "reset";
- break;
- case SBD_MSG_OFF:
- return "off";
- break;
- case SBD_MSG_EXIT:
- return "exit";
- break;
- case SBD_MSG_CRASHDUMP:
- return "crashdump";
- break;
- default:
- return "undefined";
- break;
- }
-}
-
-int
-sector_write(struct sbd_context *st, int sector, const void *data)
-{
- if (lseek(st->devfd, sector_size*sector, 0) < 0) {
- cl_perror("sector_write: lseek() failed");
- return -1;
- }
-
- if (write(st->devfd, data, sector_size) <= 0) {
- cl_perror("sector_write: write_sector() failed");
- return -1;
- }
- return(0);
-}
-
-int
-sector_read(struct sbd_context *st, int sector, void *data)
-{
- struct timespec timeout;
- struct io_event event;
- struct iocb *ios[1] = { &st->io };
- long r;
-
- timeout.tv_sec = timeout_io;
- timeout.tv_nsec = 0;
-
- memset(&st->io, 0, sizeof(struct iocb));
- io_prep_pread(&st->io, st->devfd, data, sector_size, sector_size * sector);
- if (io_submit(st->ioctx, 1, ios) != 1) {
- cl_log(LOG_ERR, "Failed to submit IO request!");
- return -1;
- }
-
- errno = 0;
- r = io_getevents(st->ioctx, 1L, 1L, &event, &timeout);
-
- if (r < 0 ) {
- cl_log(LOG_ERR, "Failed to retrieve IO events");
- return -1;
- } else if (r < 1L) {
- cl_log(LOG_INFO, "Cancelling IO request due to timeout");
- r = io_cancel(st->ioctx, ios[0], &event);
- if (r) {
- DBGLOG(LOG_INFO, "Could not cancel IO request.");
- /* Doesn't really matter, debugging information.
- */
- }
- return -1;
- }
-
- /* IO is happy */
- if (event.res == sector_size) {
- return 0;
- } else {
- cl_log(LOG_ERR, "Short read");
- return -1;
- }
-}
-
-int
-slot_read(struct sbd_context *st, int slot, struct sector_node_s *s_node)
-{
- return sector_read(st, SLOT_TO_SECTOR(slot), s_node);
-}
-
-int
-slot_write(struct sbd_context *st, int slot, const struct sector_node_s *s_node)
-{
- return sector_write(st, SLOT_TO_SECTOR(slot), s_node);
-}
-
-int
-mbox_write(struct sbd_context *st, int mbox, const struct sector_mbox_s *s_mbox)
-{
- return sector_write(st, MBOX_TO_SECTOR(mbox), s_mbox);
-}
-
-int
-mbox_read(struct sbd_context *st, int mbox, struct sector_mbox_s *s_mbox)
-{
- return sector_read(st, MBOX_TO_SECTOR(mbox), s_mbox);
-}
-
-int
-mbox_write_verify(struct sbd_context *st, int mbox, const struct sector_mbox_s *s_mbox)
-{
- void *data;
- int rc = 0;
-
- if (sector_write(st, MBOX_TO_SECTOR(mbox), s_mbox) < 0)
- return -1;
-
- data = sector_alloc();
- if (sector_read(st, MBOX_TO_SECTOR(mbox), data) < 0) {
- rc = -1;
- goto out;
- }
-
-
- if (memcmp(s_mbox, data, sector_size) != 0) {
- cl_log(LOG_ERR, "Write verification failed!");
- rc = -1;
- goto out;
- }
- rc = 0;
-out:
- free(data);
- return rc;
-}
-
-int header_write(struct sbd_context *st, struct sector_header_s *s_header)
-{
- s_header->sector_size = htonl(s_header->sector_size);
- s_header->timeout_watchdog = htonl(s_header->timeout_watchdog);
- s_header->timeout_allocate = htonl(s_header->timeout_allocate);
- s_header->timeout_loop = htonl(s_header->timeout_loop);
- s_header->timeout_msgwait = htonl(s_header->timeout_msgwait);
- return sector_write(st, 0, s_header);
-}
-
-int
-header_read(struct sbd_context *st, struct sector_header_s *s_header)
-{
- if (sector_read(st, 0, s_header) < 0)
- return -1;
-
- s_header->sector_size = ntohl(s_header->sector_size);
- s_header->timeout_watchdog = ntohl(s_header->timeout_watchdog);
- s_header->timeout_allocate = ntohl(s_header->timeout_allocate);
- s_header->timeout_loop = ntohl(s_header->timeout_loop);
- s_header->timeout_msgwait = ntohl(s_header->timeout_msgwait);
- /* This sets the global defaults: */
- timeout_watchdog = s_header->timeout_watchdog;
- timeout_allocate = s_header->timeout_allocate;
- timeout_loop = s_header->timeout_loop;
- timeout_msgwait = s_header->timeout_msgwait;
-
- return 0;
-}
-
-int
-valid_header(const struct sector_header_s *s_header)
-{
- if (memcmp(s_header->magic, sbd_magic, sizeof(s_header->magic)) != 0) {
- cl_log(LOG_ERR, "Header magic does not match.");
- return -1;
- }
- if (s_header->version != sbd_version) {
- cl_log(LOG_ERR, "Header version does not match.");
- return -1;
- }
- if (s_header->sector_size != sector_size) {
- cl_log(LOG_ERR, "Header sector size does not match.");
- return -1;
- }
- return 0;
-}
-
-struct sector_header_s *
-header_get(struct sbd_context *st)
-{
- struct sector_header_s *s_header;
- s_header = sector_alloc();
-
- if (header_read(st, s_header) < 0) {
- cl_log(LOG_ERR, "Unable to read header from device %d", st->devfd);
- return NULL;
- }
-
- if (valid_header(s_header) < 0) {
- cl_log(LOG_ERR, "header on device %d is not valid.", st->devfd);
- return NULL;
- }
-
- /* cl_log(LOG_INFO, "Found version %d header with %d slots",
- s_header->version, s_header->slots); */
-
- return s_header;
-}
-
-int
-init_device(struct sbd_context *st)
-{
- struct sector_header_s *s_header;
- struct sector_node_s *s_node;
- struct sector_mbox_s *s_mbox;
- struct stat s;
- int i;
- int rc = 0;
-
- s_header = sector_alloc();
- s_node = sector_alloc();
- s_mbox = sector_alloc();
- memcpy(s_header->magic, sbd_magic, sizeof(s_header->magic));
- s_header->version = sbd_version;
- s_header->slots = 255;
- s_header->sector_size = sector_size;
- s_header->timeout_watchdog = timeout_watchdog;
- s_header->timeout_allocate = timeout_allocate;
- s_header->timeout_loop = timeout_loop;
- s_header->timeout_msgwait = timeout_msgwait;
-
- fstat(st->devfd, &s);
- /* printf("st_size = %ld, st_blksize = %ld, st_blocks = %ld\n",
- s.st_size, s.st_blksize, s.st_blocks); */
-
- cl_log(LOG_INFO, "Creating version %d header on device %d",
- s_header->version,
- st->devfd);
- fprintf(stdout, "Creating version %d header on device %d\n",
- s_header->version,
- st->devfd);
- if (header_write(st, s_header) < 0) {
- rc = -1; goto out;
- }
- cl_log(LOG_INFO, "Initializing %d slots on device %d",
- s_header->slots,
- st->devfd);
- fprintf(stdout, "Initializing %d slots on device %d\n",
- s_header->slots,
- st->devfd);
- for (i=0;i < s_header->slots;i++) {
- if (slot_write(st, i, s_node) < 0) {
- rc = -1; goto out;
- }
- if (mbox_write(st, i, s_mbox) < 0) {
- rc = -1; goto out;
- }
- }
-
-out: free(s_node);
- free(s_header);
- free(s_mbox);
- return(rc);
-}
-
-/* Check if there already is a slot allocated to said name; returns the
- * slot number. If not found, returns -1.
- * This is necessary because slots might not be continuous. */
-int
-slot_lookup(struct sbd_context *st, const struct sector_header_s *s_header, const char *name)
-{
- struct sector_node_s *s_node = NULL;
- int i;
- int rc = -1;
-
- if (!name) {
- cl_log(LOG_ERR, "slot_lookup(): No name specified.\n");
- goto out;
- }
-
- s_node = sector_alloc();
-
- for (i=0; i < s_header->slots; i++) {
- if (slot_read(st, i, s_node) < 0) {
- rc = -2; goto out;
- }
- if (s_node->in_use != 0) {
- if (strncasecmp(s_node->name, name,
- sizeof(s_node->name)) == 0) {
- DBGLOG(LOG_INFO, "%s owns slot %d", name, i);
- rc = i; goto out;
- }
- }
- }
-
-out: free(s_node);
- return rc;
-}
-
-int
-slot_unused(struct sbd_context *st, const struct sector_header_s *s_header)
-{
- struct sector_node_s *s_node;
- int i;
- int rc = -1;
-
- s_node = sector_alloc();
-
- for (i=0; i < s_header->slots; i++) {
- if (slot_read(st, i, s_node) < 0) {
- rc = -1; goto out;
- }
- if (s_node->in_use == 0) {
- rc = i; goto out;
- }
- }
-
-out: free(s_node);
- return rc;
-}
-
-
-int
-slot_allocate(struct sbd_context *st, const char *name)
-{
- struct sector_header_s *s_header = NULL;
- struct sector_node_s *s_node = NULL;
- struct sector_mbox_s *s_mbox = NULL;
- int i;
- int rc = 0;
-
- if (!name) {
- cl_log(LOG_ERR, "slot_allocate(): No name specified.\n");
- fprintf(stderr, "slot_allocate(): No name specified.\n");
- rc = -1; goto out;
- }
-
- s_header = header_get(st);
- if (!s_header) {
- rc = -1; goto out;
- }
-
- s_node = sector_alloc();
- s_mbox = sector_alloc();
-
- while (1) {
- i = slot_lookup(st, s_header, name);
- if ((i >= 0) || (i == -2)) {
- /* -1 is "no slot found", in which case we
- * proceed to allocate a new one.
- * -2 is "read error during lookup", in which
- * case we error out too
- * >= 0 is "slot already allocated" */
- rc = i; goto out;
- }
-
- i = slot_unused(st, s_header);
- if (i >= 0) {
- cl_log(LOG_INFO, "slot %d is unused - trying to own", i);
- fprintf(stdout, "slot %d is unused - trying to own\n", i);
- memset(s_node, 0, sizeof(*s_node));
- s_node->in_use = 1;
- strncpy(s_node->name, name, sizeof(s_node->name));
- if (slot_write(st, i, s_node) < 0) {
- rc = -1; goto out;
- }
- sleep(timeout_allocate);
- } else {
- cl_log(LOG_ERR, "No more free slots.");
- fprintf(stderr, "No more free slots.\n");
- rc = -1; goto out;
- }
- }
-
-out: free(s_node);
- free(s_header);
- free(s_mbox);
- return(rc);
-}
-
-int
-slot_list(struct sbd_context *st)
-{
- struct sector_header_s *s_header = NULL;
- struct sector_node_s *s_node = NULL;
- struct sector_mbox_s *s_mbox = NULL;
- int i;
- int rc = 0;
-
- s_header = header_get(st);
- if (!s_header) {
- rc = -1; goto out;
- }
-
- s_node = sector_alloc();
- s_mbox = sector_alloc();
-
- for (i=0; i < s_header->slots; i++) {
- if (slot_read(st, i, s_node) < 0) {
- rc = -1; goto out;
- }
- if (s_node->in_use > 0) {
- if (mbox_read(st, i, s_mbox) < 0) {
- rc = -1; goto out;
- }
- printf("%d\t%s\t%s\t%s\n",
- i, s_node->name, char2cmd(s_mbox->cmd),
- s_mbox->from);
- }
- }
-
-out: free(s_node);
- free(s_header);
- free(s_mbox);
- return rc;
-}
-
-int
-slot_msg(struct sbd_context *st, const char *name, const char *cmd)
-{
- struct sector_header_s *s_header = NULL;
- struct sector_mbox_s *s_mbox = NULL;
- int mbox;
- int rc = 0;
-
- if (!name || !cmd) {
- cl_log(LOG_ERR, "slot_msg(): No recipient / cmd specified.\n");
- rc = -1; goto out;
- }
-
- s_header = header_get(st);
- if (!s_header) {
- rc = -1; goto out;
- }
-
- if (strcmp(name, "LOCAL") == 0) {
- name = local_uname;
- }
-
- mbox = slot_lookup(st, s_header, name);
- if (mbox < 0) {
- cl_log(LOG_ERR, "slot_msg(): No slot found for %s.", name);
- rc = -1; goto out;
- }
-
- s_mbox = sector_alloc();
-
- s_mbox->cmd = cmd2char(cmd);
- if (s_mbox->cmd < 0) {
- cl_log(LOG_ERR, "slot_msg(): Invalid command %s.", cmd);
- rc = -1; goto out;
- }
-
- strncpy(s_mbox->from, local_uname, sizeof(s_mbox->from)-1);
-
- cl_log(LOG_INFO, "Writing %s to node slot %s",
- cmd, name);
- if (mbox_write_verify(st, mbox, s_mbox) < -1) {
- rc = -1; goto out;
- }
- if (strcasecmp(cmd, "exit") != 0) {
- cl_log(LOG_INFO, "Messaging delay: %d",
- (int)timeout_msgwait);
- sleep(timeout_msgwait);
- }
- cl_log(LOG_INFO, "%s successfully delivered to %s",
- cmd, name);
-
-out: free(s_mbox);
- free(s_header);
- return rc;
-}
-
-int
-slot_ping(struct sbd_context *st, const char *name)
-{
- struct sector_header_s *s_header = NULL;
- struct sector_mbox_s *s_mbox = NULL;
- int mbox;
- int waited = 0;
- int rc = 0;
-
- if (!name) {
- cl_log(LOG_ERR, "slot_ping(): No recipient specified.\n");
- rc = -1; goto out;
- }
-
- s_header = header_get(st);
- if (!s_header) {
- rc = -1; goto out;
- }
-
- if (strcmp(name, "LOCAL") == 0) {
- name = local_uname;
- }
-
- mbox = slot_lookup(st, s_header, name);
- if (mbox < 0) {
- cl_log(LOG_ERR, "slot_msg(): No slot found for %s.", name);
- rc = -1; goto out;
- }
-
- s_mbox = sector_alloc();
- s_mbox->cmd = SBD_MSG_TEST;
-
- strncpy(s_mbox->from, local_uname, sizeof(s_mbox->from)-1);
-
- DBGLOG(LOG_DEBUG, "Pinging node %s", name);
- if (mbox_write(st, mbox, s_mbox) < -1) {
- rc = -1; goto out;
- }
-
- rc = -1;
- while (waited <= timeout_msgwait) {
- if (mbox_read(st, mbox, s_mbox) < 0)
- break;
- if (s_mbox->cmd != SBD_MSG_TEST) {
- rc = 0;
- break;
- }
- sleep(1);
- waited++;
- }
-
- if (rc == 0) {
- cl_log(LOG_DEBUG, "%s successfully pinged.", name);
- } else {
- cl_log(LOG_ERR, "%s failed to ping.", name);
- }
-
-out: free(s_mbox);
- free(s_header);
- return rc;
-}
-
-void
-sysrq_init(void)
-{
- FILE* procf;
- int c;
- procf = fopen("/proc/sys/kernel/sysrq", "r");
- if (!procf) {
- cl_perror("cannot open /proc/sys/kernel/sysrq for read.");
- return;
- }
- fscanf(procf, "%d", &c);
- fclose(procf);
- if (c == 1)
- return;
- /* 8 for debugging dumps of processes,
- 128 for reboot/poweroff */
- c |= 136;
- procf = fopen("/proc/sys/kernel/sysrq", "w");
- if (!procf) {
- printf("cannot open /proc/sys/kernel/sysrq for write\n");
- return;
- }
- fprintf(procf, "%d", c);
- fclose(procf);
- return;
-}
-
-void
-sysrq_trigger(char t)
-{
- FILE *procf;
-
- procf = fopen("/proc/sysrq-trigger", "a");
- if (!procf) {
- cl_perror("Opening sysrq-trigger failed.");
- return;
- }
- cl_log(LOG_INFO, "sysrq-trigger: %c\n", t);
- fprintf(procf, "%c\n", t);
- fclose(procf);
- return;
-}
-
-void
-do_crashdump(void)
-{
- if (timeout_watchdog_crashdump) {
- timeout_watchdog = timeout_watchdog_crashdump;
- watchdog_init_interval();
- watchdog_tickle();
- }
- sysrq_trigger('c');
- /* is it possible to reach the following line? */
- cl_reboot(5, "sbd is triggering crashdumping");
- exit(1);
-}
-
-void
-do_reset(void)
-{
- if (debug_mode == 2) {
- cl_log(LOG_ERR, "Skipping request to suicide due to DEBUG MODE!");
- watchdog_close();
- exit(0);
- }
- if (debug_mode == 1) {
- cl_log(LOG_ERR, "Request to suicide changed to kdump due to DEBUG MODE!");
- watchdog_close();
- sysrq_trigger('c');
- exit(0);
- }
- sysrq_trigger('b');
- cl_reboot(5, "sbd is self-fencing (reset)");
- sleep(timeout_watchdog * 2);
- exit(1);
-}
-
-void
-do_off(void)
-{
- if (debug_mode == 2) {
- cl_log(LOG_ERR, "Skipping request to power-off due to DEBUG MODE!");
- watchdog_close();
- exit(0);
- }
- if (debug_mode == 1) {
- cl_log(LOG_ERR, "Request to power-off changed to kdump due to DEBUG MODE!");
- watchdog_close();
- sysrq_trigger('c');
- exit(0);
- }
- sysrq_trigger('o');
- cl_reboot(5, "sbd is self-fencing (power-off)");
- sleep(timeout_watchdog * 2);
- exit(1);
-}
-
-pid_t
-make_daemon(void)
-{
- pid_t pid;
- const char * devnull = "/dev/null";
-
- pid = fork();
- if (pid < 0) {
- cl_log(LOG_ERR, "%s: could not start daemon\n",
- cmdname);
- cl_perror("fork");
- exit(1);
- }else if (pid > 0) {
- return pid;
- }
-
- cl_log_enable_stderr(FALSE);
-
- /* This is the child; ensure privileges have not been lost. */
- maximize_priority();
-
- umask(022);
- close(0);
- (void)open(devnull, O_RDONLY);
- close(1);
- (void)open(devnull, O_WRONLY);
- close(2);
- (void)open(devnull, O_WRONLY);
- cl_cdtocoredir();
- return 0;
-}
-
-int
-header_dump(struct sbd_context *st)
-{
- struct sector_header_s *s_header;
- s_header = header_get(st);
- if (s_header == NULL)
- return -1;
-
- printf("Header version : %u\n", s_header->version);
- printf("Number of slots : %u\n", s_header->slots);
- printf("Sector size : %lu\n",
- (unsigned long)s_header->sector_size);
- printf("Timeout (watchdog) : %lu\n",
- (unsigned long)s_header->timeout_watchdog);
- printf("Timeout (allocate) : %lu\n",
- (unsigned long)s_header->timeout_allocate);
- printf("Timeout (loop) : %lu\n",
- (unsigned long)s_header->timeout_loop);
- printf("Timeout (msgwait) : %lu\n",
- (unsigned long)s_header->timeout_msgwait);
- return 0;
-}
-
-void
-sbd_get_uname(void)
-{
- struct utsname uname_buf;
- int i;
-
- if (uname(&uname_buf) < 0) {
- cl_perror("uname() failed?");
- exit(1);
- }
-
- local_uname = strdup(uname_buf.nodename);
-
- for (i = 0; i < strlen(local_uname); i++)
- local_uname[i] = tolower(local_uname[i]);
-}
-
diff -r 9888c2e4353b -r 31225c028d4f sbd-md.c
--- a/sbd-md.c Mon May 28 20:49:01 2012 +0200
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,1013 +0,0 @@
-/*
- * Copyright (C) 2008 Lars Marowsky-Bree <lmb@suse.de>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This software is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include "sbd.h"
-
-struct servants_list_item *servants_leader = NULL;
-
-static int servant_count = 0;
-static int servant_restart_interval = 5;
-static int servant_restart_count = 1;
-static int servant_inform_parent = 0;
-static int check_pcmk = 0;
-
-int quorum_write(int good_servants)
-{
- return (good_servants > servant_count/2);
-}
-
-int quorum_read(int good_servants)
-{
- if (servant_count >= 3)
- return (good_servants > servant_count/2);
- else
- return (good_servants >= 1);
-}
-
-int assign_servant(const char* devname, functionp_t functionp, const void* argp)
-{
- pid_t pid = 0;
- int rc = 0;
-
- pid = fork();
- if (pid == 0) { /* child */
- maximize_priority();
- rc = (*functionp)(devname, argp);
- if (rc == -1)
- exit(1);
- else
- exit(0);
- } else if (pid != -1) { /* parent */
- return pid;
- } else {
- cl_log(LOG_ERR,"Failed to fork servant");
- exit(1);
- }
-}
-
-int init_devices()
-{
- int rc = 0;
- struct sbd_context *st;
- struct servants_list_item *s;
-
- for (s = servants_leader; s; s = s->next) {
- fprintf(stdout, "Initializing device %s\n",
- s->devname);
- st = open_device(s->devname);
- if (!st) {
- return -1;
- }
- rc = init_device(st);
- close_device(st);
- if (rc == -1) {
- fprintf(stderr, "Failed to init device %s\n", s->devname);
- return rc;
- }
- fprintf(stdout, "Device %s is initialized.\n", s->devname);
- }
- return 0;
-}
-
-int slot_msg_wrapper(const char* devname, const void* argp)
-{
- int rc = 0;
- struct sbd_context *st;
- const struct slot_msg_arg_t* arg = (const struct slot_msg_arg_t*)argp;
-
- st = open_device(devname);
- if (!st)
- return -1;
- cl_log(LOG_INFO, "Delivery process handling %s",
- devname);
- rc = slot_msg(st, arg->name, arg->msg);
- close_device(st);
- return rc;
-}
-
-int slot_ping_wrapper(const char* devname, const void* argp)
-{
- int rc = 0;
- const char* name = (const char*)argp;
- struct sbd_context *st;
-
- st = open_device(devname);
- if (!st)
- return -1;
- rc = slot_ping(st, name);
- close_device(st);
- return rc;
-}
-
-int allocate_slots(const char *name)
-{
- int rc = 0;
- struct sbd_context *st;
- struct servants_list_item *s;
-
- for (s = servants_leader; s; s = s->next) {
- fprintf(stdout, "Trying to allocate slot for %s on device %s.\n",
- name,
- s->devname);
- st = open_device(s->devname);
- if (!st) {
- return -1;
- }
- rc = slot_allocate(st, name);
- close_device(st);
- if (rc < 0)
- return rc;
- fprintf(stdout, "Slot for %s has been allocated on %s.\n",
- name,
- s->devname);
- }
- return 0;
-}
-
-int list_slots()
-{
- int rc = 0;
- struct servants_list_item *s;
- struct sbd_context *st;
-
- for (s = servants_leader; s; s = s->next) {
- st = open_device(s->devname);
- if (!st) {
- fprintf(stdout, "== disk %s unreadable!\n", s->devname);
- continue;
- }
- rc = slot_list(st);
- close_device(st);
- if (rc == -1) {
- fprintf(stdout, "== Slots on disk %s NOT dumped\n", s->devname);
- }
- }
- return 0;
-}
-
-int ping_via_slots(const char *name)
-{
- int sig = 0;
- pid_t pid = 0;
- int status = 0;
- int servants_finished = 0;
- sigset_t procmask;
- siginfo_t sinfo;
- struct servants_list_item *s;
-
- sigemptyset(&procmask);
- sigaddset(&procmask, SIGCHLD);
- sigprocmask(SIG_BLOCK, &procmask, NULL);
-
- for (s = servants_leader; s; s = s->next) {
- s->pid = assign_servant(s->devname, &slot_ping_wrapper, (const void*)name);
- }
-
- while (servants_finished < servant_count) {
- sig = sigwaitinfo(&procmask, &sinfo);
- if (sig == SIGCHLD) {
- while ((pid = wait(&status))) {
- if (pid == -1 && errno == ECHILD) {
- break;
- } else {
- s = lookup_servant_by_pid(pid);
- if (s) {
- servants_finished++;
- }
- }
- }
- }
- }
- return 0;
-}
-
-/* This is a bit hackish, but the easiest way to rewire all process
- * exits to send the desired signal to the parent. */
-void servant_exit(void)
-{
- pid_t ppid;
- union sigval signal_value;
-
- ppid = getppid();
- if (servant_inform_parent) {
- memset(&signal_value, 0, sizeof(signal_value));
- sigqueue(ppid, SIG_IO_FAIL, signal_value);
- }
-}
-
-int servant(const char *diskname, const void* argp)
-{
- struct sector_mbox_s *s_mbox = NULL;
- int mbox;
- int rc = 0;
- time_t t0, t1, latency;
- union sigval signal_value;
- sigset_t servant_masks;
- struct sbd_context *st;
- pid_t ppid;
-
- if (!diskname) {
- cl_log(LOG_ERR, "Empty disk name %s.", diskname);
- return -1;
- }
-
- cl_log(LOG_INFO, "Servant starting for device %s", diskname);
-
- /* Block most of the signals */
- sigfillset(&servant_masks);
- sigdelset(&servant_masks, SIGKILL);
- sigdelset(&servant_masks, SIGFPE);
- sigdelset(&servant_masks, SIGILL);
- sigdelset(&servant_masks, SIGSEGV);
- sigdelset(&servant_masks, SIGBUS);
- sigdelset(&servant_masks, SIGALRM);
- /* FIXME: check error */
- sigprocmask(SIG_SETMASK, &servant_masks, NULL);
-
- atexit(servant_exit);
- servant_inform_parent = 1;
-
- st = open_device(diskname);
- if (!st) {
- return -1;
- }
-
- mbox = slot_allocate(st, local_uname);
- if (mbox < 0) {
- cl_log(LOG_ERR,
- "No slot allocated, and automatic allocation failed for disk %s.",
- diskname);
- rc = -1;
- goto out;
- }
- DBGLOG(LOG_INFO, "Monitoring slot %d on disk %s", mbox, diskname);
- set_proc_title("sbd: watcher: %s - slot: %d", diskname, mbox);
-
- s_mbox = sector_alloc();
- if (mbox_write(st, mbox, s_mbox) < 0) {
- rc = -1;
- goto out;
- }
-
- memset(&signal_value, 0, sizeof(signal_value));
-
- while (1) {
- t0 = time(NULL);
- sleep(timeout_loop);
-
- ppid = getppid();
-
- if (ppid == 1) {
- /* Our parent died unexpectedly. Triggering
- * self-fence. */
- do_reset();
- }
-
- if (mbox_read(st, mbox, s_mbox) < 0) {
- cl_log(LOG_ERR, "mbox read failed in servant.");
- exit(1);
- }
-
- if (s_mbox->cmd > 0) {
- cl_log(LOG_INFO,
- "Received command %s from %s on disk %s",
- char2cmd(s_mbox->cmd), s_mbox->from, diskname);
-
- switch (s_mbox->cmd) {
- case SBD_MSG_TEST:
- memset(s_mbox, 0, sizeof(*s_mbox));
- mbox_write(st, mbox, s_mbox);
- sigqueue(ppid, SIG_TEST, signal_value);
- break;
- case SBD_MSG_RESET:
- do_reset();
- break;
- case SBD_MSG_OFF:
- do_off();
- break;
- case SBD_MSG_EXIT:
- sigqueue(ppid, SIG_EXITREQ, signal_value);
- break;
- case SBD_MSG_CRASHDUMP:
- do_crashdump();
- break;
- default:
- /* FIXME:
- An "unknown" message might result
- from a partial write.
- log it and clear the slot.
- */
- cl_log(LOG_ERR, "Unknown message on disk %s",
- diskname);
- memset(s_mbox, 0, sizeof(*s_mbox));
- mbox_write(st, mbox, s_mbox);
- break;
- }
- }
- sigqueue(ppid, SIG_LIVENESS, signal_value);
-
- t1 = time(NULL);
- latency = t1 - t0;
- if (timeout_watchdog_warn && (latency > timeout_watchdog_warn)) {
- cl_log(LOG_WARNING,
- "Latency: %d exceeded threshold %d on disk %s",
- (int)latency, (int)timeout_watchdog_warn,
- diskname);
- } else if (debug) {
- DBGLOG(LOG_INFO, "Latency: %d on disk %s", (int)latency,
- diskname);
- }
- }
- out:
- free(s_mbox);
- close_device(st);
- if (rc == 0) {
- servant_inform_parent = 0;
- }
- return rc;
-}
-
-void recruit_servant(const char *devname, pid_t pid)
-{
- struct servants_list_item *s = servants_leader;
- struct servants_list_item *newbie;
-
- newbie = malloc(sizeof(*newbie));
- if (!newbie) {
- fprintf(stderr, "malloc failed in recruit_servant.");
- exit(1);
- }
- memset(newbie, 0, sizeof(*newbie));
- newbie->devname = strdup(devname);
- newbie->pid = pid;
-
- if (!s) {
- servants_leader = newbie;
- } else {
- while (s->next)
- s = s->next;
- s->next = newbie;
- }
-
- servant_count++;
-}
-
-struct servants_list_item *lookup_servant_by_dev(const char *devname)
-{
- struct servants_list_item *s;
-
- for (s = servants_leader; s; s = s->next) {
- if (strncasecmp(s->devname, devname, strlen(s->devname)))
- break;
- }
- return s;
-}
-
-struct servants_list_item *lookup_servant_by_pid(pid_t pid)
-{
- struct servants_list_item *s;
-
- for (s = servants_leader; s; s = s->next) {
- if (s->pid == pid)
- break;
- }
- return s;
-}
-
-int check_all_dead(void)
-{
- struct servants_list_item *s;
- int r = 0;
- union sigval svalue;
-
- for (s = servants_leader; s; s = s->next) {
- if (s->pid != 0) {
- r = sigqueue(s->pid, 0, svalue);
- if (r == -1 && errno == ESRCH)
- continue;
- return 0;
- }
- }
- return 1;
-}
-
-
-void servant_start(struct servants_list_item *s)
-{
- int r = 0;
- union sigval svalue;
-
- if (s->pid != 0) {
- r = sigqueue(s->pid, 0, svalue);
- if ((r != -1 || errno != ESRCH))
- return;
- }
- s->restarts++;
- if (strcmp("pcmk",s->devname) == 0) {
- DBGLOG(LOG_INFO, "Starting Pacemaker servant");
- s->pid = assign_servant(s->devname, servant_pcmk, NULL);
- } else {
- DBGLOG(LOG_INFO, "Starting servant for device %s",
- s->devname);
- s->pid = assign_servant(s->devname, servant, NULL);
- }
-
- clock_gettime(CLOCK_MONOTONIC, &s->t_started);
- return;
-}
-
-void servants_start(void)
-{
- struct servants_list_item *s;
-
- for (s = servants_leader; s; s = s->next) {
- s->restarts = 0;
- servant_start(s);
- }
-}
-
-void servants_kill(void)
-{
- struct servants_list_item *s;
- union sigval svalue;
-
- for (s = servants_leader; s; s = s->next) {
- if (s->pid != 0)
- sigqueue(s->pid, SIGKILL, svalue);
- }
-}
-
-int check_timeout_inconsistent(void)
-{
- struct sbd_context *st;
- struct sector_header_s *hdr_cur = 0, *hdr_last = 0;
- struct servants_list_item* s;
- int inconsistent = 0;
-
- for (s = servants_leader; s; s = s->next) {
- st = open_device(s->devname);
- if (!st)
- continue;
- hdr_cur = header_get(st);
- close_device(st);
- if (!hdr_cur)
- continue;
- if (hdr_last) {
- if (hdr_last->timeout_watchdog != hdr_cur->timeout_watchdog
- || hdr_last->timeout_allocate != hdr_cur->timeout_allocate
- || hdr_last->timeout_loop != hdr_cur->timeout_loop
- || hdr_last->timeout_msgwait != hdr_cur->timeout_msgwait)
- inconsistent = 1;
- free(hdr_last);
- }
- hdr_last = hdr_cur;
- }
-
- if (hdr_last) {
- timeout_watchdog = hdr_last->timeout_watchdog;
- timeout_allocate = hdr_last->timeout_allocate;
- timeout_loop = hdr_last->timeout_loop;
- timeout_msgwait = hdr_last->timeout_msgwait;
- } else {
- cl_log(LOG_ERR, "No devices were available at start-up.");
- exit(1);
- }
-
- free(hdr_last);
- return inconsistent;
-}
-
-inline void cleanup_servant_by_pid(pid_t pid)
-{
- struct servants_list_item* s;
-
- s = lookup_servant_by_pid(pid);
- if (s) {
- cl_log(LOG_WARNING, "Servant for %s (pid: %i) has terminated",
- s->devname, s->pid);
- s->pid = 0;
- } else {
- /* This most likely is a stray signal from somewhere, or
- * a SIGCHLD for a process that has previously
- * explicitly disconnected. */
- DBGLOG(LOG_INFO, "cleanup_servant: Nothing known about pid %i",
- pid);
- }
-}
-
-int inquisitor_decouple(void)
-{
- pid_t ppid = getppid();
- union sigval signal_value;
-
- /* During start-up, we only arm the watchdog once we've got
- * quorum at least once. */
- if (watchdog_use) {
- if (watchdog_init() < 0) {
- return -1;
- }
- }
-
- if (ppid > 1) {
- sigqueue(ppid, SIG_LIVENESS, signal_value);
- }
- return 0;
-}
-
-void inquisitor_child(void)
-{
- int sig, pid;
- sigset_t procmask;
- siginfo_t sinfo;
- int status;
- struct timespec timeout;
- int exiting = 0;
- int decoupled = 0;
- int pcmk_healthy = 0;
- time_t latency;
- struct timespec t_last_tickle, t_now;
- struct servants_list_item* s;
-
- if (debug_mode) {
- cl_log(LOG_ERR, "DEBUG MODE IS ACTIVE - DO NOT RUN IN PRODUCTION!");
- }
-
- set_proc_title("sbd: inquisitor");
-
- sigemptyset(&procmask);
- sigaddset(&procmask, SIGCHLD);
- sigaddset(&procmask, SIG_LIVENESS);
- sigaddset(&procmask, SIG_EXITREQ);
- sigaddset(&procmask, SIG_TEST);
- sigaddset(&procmask, SIG_IO_FAIL);
- sigaddset(&procmask, SIG_PCMK_UNHEALTHY);
- sigaddset(&procmask, SIG_RESTART);
- sigaddset(&procmask, SIGUSR1);
- sigaddset(&procmask, SIGUSR2);
- sigprocmask(SIG_BLOCK, &procmask, NULL);
-
- /* We only want this to have an effect during watch right now;
- * pinging and fencing would be too confused */
- if (check_pcmk) {
- recruit_servant("pcmk", 0);
- servant_count--;
- }
-
- servants_start();
-
- timeout.tv_sec = timeout_loop;
- timeout.tv_nsec = 0;
- clock_gettime(CLOCK_MONOTONIC, &t_last_tickle);
-
- while (1) {
- int good_servants = 0;
-
- sig = sigtimedwait(&procmask, &sinfo, &timeout);
-
- clock_gettime(CLOCK_MONOTONIC, &t_now);
-
- if (sig == SIG_EXITREQ) {
- servants_kill();
- watchdog_close();
- exiting = 1;
- } else if (sig == SIGCHLD) {
- while ((pid = waitpid(-1, &status, WNOHANG))) {
- if (pid == -1 && errno == ECHILD) {
- break;
- } else {
- cleanup_servant_by_pid(pid);
- }
- }
- } else if (sig == SIG_PCMK_UNHEALTHY) {
- s = lookup_servant_by_pid(sinfo.si_pid);
- if (s && strcmp(s->devname, "pcmk") == 0) {
- if (pcmk_healthy != 0) {
- cl_log(LOG_WARNING, "Pacemaker health check: UNHEALTHY");
- }
- pcmk_healthy = 0;
- clock_gettime(CLOCK_MONOTONIC, &s->t_last);
- } else {
- cl_log(LOG_WARNING, "Ignoring SIG_PCMK_UNHEALTHY from unknown source");
- }
- } else if (sig == SIG_IO_FAIL) {
- s = lookup_servant_by_pid(sinfo.si_pid);
- if (s) {
- DBGLOG(LOG_INFO, "Servant for %s requests to be disowned",
- s->devname);
- cleanup_servant_by_pid(sinfo.si_pid);
- }
- } else if (sig == SIG_LIVENESS) {
- s = lookup_servant_by_pid(sinfo.si_pid);
- if (s) {
- if (strcmp(s->devname, "pcmk") == 0) {
- if (pcmk_healthy != 1) {
- cl_log(LOG_INFO, "Pacemaker health check: OK");
- }
- pcmk_healthy = 1;
- };
- clock_gettime(CLOCK_MONOTONIC, &s->t_last);
-
- }
- } else if (sig == SIG_TEST) {
- } else if (sig == SIGUSR1) {
- if (exiting)
- continue;
- servants_start();
- }
-
- if (exiting) {
- if (check_all_dead())
- exit(0);
- else
- continue;
- }
-
- good_servants = 0;
- for (s = servants_leader; s; s = s->next) {
- int age = t_now.tv_sec - s->t_last.tv_sec;
-
- if (!s->t_last.tv_sec)
- continue;
-
- if (age < (int)(timeout_io+timeout_loop)) {
- if (strcmp(s->devname, "pcmk") != 0) {
- good_servants++;
- }
- s->outdated = 0;
- } else if (!s->outdated) {
- if (strcmp(s->devname, "pcmk") == 0) {
- /* If the state is outdated, we
- * override the last reported
- * state */
- pcmk_healthy = 0;
- cl_log(LOG_WARNING, "Pacemaker state outdated (age: %d)",
- age);
- } else if (!s->restart_blocked) {
- cl_log(LOG_WARNING, "Servant for %s outdated (age: %d)",
- s->devname, age);
- }
- s->outdated = 1;
- }
- }
-
- if (quorum_read(good_servants) || pcmk_healthy) {
- if (!decoupled) {
- if (inquisitor_decouple() < 0) {
- servants_kill();
- exiting = 1;
- continue;
- } else {
- decoupled = 1;
- }
- }
-
- watchdog_tickle();
- clock_gettime(CLOCK_MONOTONIC, &t_last_tickle);
- }
-
- /* Note that this can actually be negative, since we set
- * last_tickle after we set now. */
- latency = t_now.tv_sec - t_last_tickle.tv_sec;
- if (timeout_watchdog && (latency > (int)timeout_watchdog)) {
- if (!decoupled) {
- /* We're still being watched by our
- * parent. We don't fence, but exit. */
- cl_log(LOG_ERR, "SBD: Not enough votes to proceed. Aborting start-up.");
- servants_kill();
- exiting = 1;
- continue;
- }
- if (debug_mode < 2) {
- /* At level 2, we do nothing, but expect
- * things to eventually return to
- * normal. */
- do_reset();
- } else {
- cl_log(LOG_ERR, "SBD: DEBUG MODE: Would have fenced due to timeout!");
- }
- }
- if (timeout_watchdog_warn && (latency > (int)timeout_watchdog_warn)) {
- cl_log(LOG_WARNING,
- "Latency: No liveness for %d s exceeds threshold of %d s (healthy servants: %d)",
- (int)latency, (int)timeout_watchdog_warn, good_servants);
- }
-
- for (s = servants_leader; s; s = s->next) {
- int age = t_now.tv_sec - s->t_started.tv_sec;
-
- if (age > servant_restart_interval) {
- s->restarts = 0;
- s->restart_blocked = 0;
- }
-
- if (servant_restart_count
- && (s->restarts >= servant_restart_count)
- && !s->restart_blocked) {
- if (servant_restart_count > 1) {
- cl_log(LOG_WARNING, "Max retry count (%d) reached: not restarting servant for %s",
- (int)servant_restart_count, s->devname);
- }
- s->restart_blocked = 1;
- }
-
- if (!s->restart_blocked) {
- servant_start(s);
- }
- }
- }
- /* not reached */
- exit(0);
-}
-
-int inquisitor(void)
-{
- int sig, pid, inquisitor_pid;
- int status;
- sigset_t procmask;
- siginfo_t sinfo;
-
- /* Where's the best place for sysrq init ?*/
- sysrq_init();
-
- sigemptyset(&procmask);
- sigaddset(&procmask, SIGCHLD);
- sigaddset(&procmask, SIG_LIVENESS);
- sigprocmask(SIG_BLOCK, &procmask, NULL);
-
- if (check_timeout_inconsistent() == 1) {
- fprintf(stderr, "Timeout settings are different across SBD devices!\n");
- fprintf(stderr, "You have to correct them and re-start SBD again.\n");
- return -1;
- }
-
- inquisitor_pid = make_daemon();
- if (inquisitor_pid == 0) {
- inquisitor_child();
- }
-
- /* We're the parent. Wait for a happy signal from our child
- * before we proceed - we either get "SIG_LIVENESS" when the
- * inquisitor has completed the first successful round, or
- * ECHLD when it exits with an error. */
-
- while (1) {
- sig = sigwaitinfo(&procmask, &sinfo);
- if (sig == SIGCHLD) {
- while ((pid = waitpid(-1, &status, WNOHANG))) {
- if (pid == -1 && errno == ECHILD) {
- break;
- }
- /* We got here because the inquisitor
- * did not succeed. */
- return -1;
- }
- } else if (sig == SIG_LIVENESS) {
- /* Inquisitor started up properly. */
- return 0;
- } else {
- fprintf(stderr, "Nobody expected the spanish inquisition!\n");
- continue;
- }
- }
- /* not reached */
- return -1;
-}
-
-int messenger(const char *name, const char *msg)
-{
- int sig = 0;
- pid_t pid = 0;
- int status = 0;
- int servants_finished = 0;
- int successful_delivery = 0;
- sigset_t procmask;
- siginfo_t sinfo;
- struct servants_list_item *s;
- struct slot_msg_arg_t slot_msg_arg = {name, msg};
-
- sigemptyset(&procmask);
- sigaddset(&procmask, SIGCHLD);
- sigprocmask(SIG_BLOCK, &procmask, NULL);
-
- for (s = servants_leader; s; s = s->next) {
- s->pid = assign_servant(s->devname, &slot_msg_wrapper, &slot_msg_arg);
- }
-
- while (!(quorum_write(successful_delivery) ||
- (servants_finished == servant_count))) {
- sig = sigwaitinfo(&procmask, &sinfo);
- if (sig == SIGCHLD) {
- while ((pid = waitpid(-1, &status, WNOHANG))) {
- if (pid == -1 && errno == ECHILD) {
- break;
- } else {
- servants_finished++;
- if (WIFEXITED(status)
- && WEXITSTATUS(status) == 0) {
- DBGLOG(LOG_INFO, "Process %d succeeded.",
- (int)pid);
- successful_delivery++;
- } else {
- cl_log(LOG_WARNING, "Process %d failed to deliver!",
- (int)pid);
- }
- }
- }
- }
- }
- if (quorum_write(successful_delivery)) {
- cl_log(LOG_INFO, "Message successfully delivered.");
- return 0;
- } else {
- cl_log(LOG_ERR, "Message is not delivered via more then a half of devices");
- return -1;
- }
-}
-
-int dump_headers(void)
-{
- int rc = 0;
- struct servants_list_item *s = servants_leader;
- struct sbd_context *st;
-
- for (s = servants_leader; s; s = s->next) {
- fprintf(stdout, "==Dumping header on disk %s\n", s->devname);
- st = open_device(s->devname);
- if (!st) {
- fprintf(stdout, "== disk %s unreadable!\n", s->devname);
- continue;
- }
-
- rc = header_dump(st);
- close_device(st);
-
- if (rc == -1) {
- fprintf(stdout, "==Header on disk %s NOT dumped\n", s->devname);
- } else {
- fprintf(stdout, "==Header on disk %s is dumped\n", s->devname);
- }
- }
- return rc;
-}
-
-int main(int argc, char **argv, char **envp)
-{
- int exit_status = 0;
- int c;
-
- if ((cmdname = strrchr(argv[0], '/')) == NULL) {
- cmdname = argv[0];
- } else {
- ++cmdname;
- }
-
- cl_log_set_entity(cmdname);
- cl_log_enable_stderr(0);
- cl_log_set_facility(LOG_DAEMON);
-
- sbd_get_uname();
-
- while ((c = getopt(argc, argv, "C:DPRTWZhvw:d:n:1:2:3:4:5:t:I:F:")) != -1) {
- switch (c) {
- case 'D':
- break;
- case 'Z':
- debug_mode++;
- cl_log(LOG_INFO, "Debug mode now at level %d", (int)debug_mode);
- break;
- case 'R':
- skip_rt = 1;
- cl_log(LOG_INFO, "Realtime mode deactivated.");
- break;
- case 'v':
- debug = 1;
- cl_log(LOG_INFO, "Verbose mode enabled.");
- break;
- case 'T':
- watchdog_set_timeout = 0;
- cl_log(LOG_INFO, "Setting watchdog timeout disabled; using defaults.");
- break;
- case 'W':
- watchdog_use = 1;
- cl_log(LOG_INFO, "Watchdog enabled.");
- break;
- case 'w':
- watchdogdev = optarg;
- break;
- case 'd':
- recruit_servant(optarg, 0);
- break;
- case 'P':
- check_pcmk = 1;
- break;
- case 'n':
- local_uname = optarg;
- cl_log(LOG_INFO, "Overriding local hostname to %s", local_uname);
- break;
- case 'C':
- timeout_watchdog_crashdump = atoi(optarg);
- cl_log(LOG_INFO, "Setting crashdump watchdog timeout to %d",
- (int)timeout_watchdog_crashdump);
- break;
- case '1':
- timeout_watchdog = atoi(optarg);
- break;
- case '2':
- timeout_allocate = atoi(optarg);
- break;
- case '3':
- timeout_loop = atoi(optarg);
- break;
- case '4':
- timeout_msgwait = atoi(optarg);
- break;
- case '5':
- timeout_watchdog_warn = atoi(optarg);
- cl_log(LOG_INFO, "Setting latency warning to %d",
- (int)timeout_watchdog_warn);
- break;
- case 't':
- servant_restart_interval = atoi(optarg);
- cl_log(LOG_INFO, "Setting servant restart interval to %d",
- (int)servant_restart_interval);
- break;
- case 'I':
- timeout_io = atoi(optarg);
- cl_log(LOG_INFO, "Setting IO timeout to %d",
- (int)timeout_io);
- break;
- case 'F':
- servant_restart_count = atoi(optarg);
- cl_log(LOG_INFO, "Servant restart count set to %d",
- (int)servant_restart_count);
- break;
- case 'h':
- usage();
- return (0);
- default:
- exit_status = -1;
- goto out;
- break;
- }
- }
-
- if (servant_count < 1 || servant_count > 3) {
- fprintf(stderr, "You must specify 1 to 3 devices via the -d option.\n");
- exit_status = -1;
- goto out;
- }
-
- /* There must at least be one command following the options: */
- if ((argc - optind) < 1) {
- fprintf(stderr, "Not enough arguments.\n");
- exit_status = -1;
- goto out;
- }
-
- if (init_set_proc_title(argc, argv, envp) < 0) {
- fprintf(stderr, "Allocation of proc title failed.");
- exit(1);
- }
-
- maximize_priority();
-
- if (strcmp(argv[optind], "create") == 0) {
- exit_status = init_devices();
- } else if (strcmp(argv[optind], "dump") == 0) {
- exit_status = dump_headers();
- } else if (strcmp(argv[optind], "allocate") == 0) {
- exit_status = allocate_slots(argv[optind + 1]);
- } else if (strcmp(argv[optind], "list") == 0) {
- exit_status = list_slots();
- } else if (strcmp(argv[optind], "message") == 0) {
- exit_status = messenger(argv[optind + 1], argv[optind + 2]);
- } else if (strcmp(argv[optind], "ping") == 0) {
- exit_status = ping_via_slots(argv[optind + 1]);
- } else if (strcmp(argv[optind], "watch") == 0) {
- exit_status = inquisitor();
- } else {
- exit_status = -1;
- }
-
-out:
- if (exit_status < 0) {
- usage();
- return (1);
- }
- return (0);
-}
diff -r 9888c2e4353b -r 31225c028d4f sbd-pacemaker.c
--- a/sbd-pacemaker.c Mon May 28 20:49:01 2012 +0200
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,500 +0,0 @@
-
-/*
- * Copyright (C) 2012 Lars Marowsky-Bree <lmb@suse.com>
- *
- * Based on crm_mon.c, which was:
- * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This software is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-/* TODO list:
- *
- * - Trying to shutdown a node if no devices are up will fail, since SBD
- * currently uses a message via the disk to achieve this.
- *
- * - Shutting down cluster nodes while the majority of devices is down
- * will eventually take the cluster below the quorum threshold, at which
- * time the remaining cluster nodes will all immediately suicide.
- *
- * - With the CIB refreshed every timeout_loop seconds, do we still need
- * to watch for CIB update notifications or can that be removed?
- *
- */
-
-#include "sbd.h"
-
-#include <sys/param.h>
-
-#include <crm/crm.h>
-
-#include <stdio.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <unistd.h>
-
-#include <stdlib.h>
-#include <errno.h>
-#include <fcntl.h>
-#include <libgen.h>
-#include <sys/utsname.h>
-
-#include <crm_config.h>
-#include <crm/msg_xml.h>
-#include <crm/common/util.h>
-#include <crm/common/xml.h>
-#include <crm/common/ipc.h>
-#include <crm/common/mainloop.h>
-#ifdef CHECK_AIS
-#include <crm/cluster/stack.h>
-#include <crm/common/cluster.h>
-#endif
-#include <crm/cib.h>
-#include <crm/pengine/status.h>
-
-void clean_up(int rc);
-void crm_diff_update(const char *event, xmlNode * msg);
-gboolean mon_refresh_state(gpointer user_data);
-int cib_connect(gboolean full);
-void set_pcmk_health(int healthy);
-void notify_parent(void);
-
-int reconnect_msec = 5000;
-GMainLoop *mainloop = NULL;
-guint timer_id_reconnect = 0;
-guint timer_id_notify = 0;
-
-int pcmk_healthy = 0;
-
-#ifdef CHECK_AIS
-guint timer_id_ais = 0;
-enum cluster_type_e cluster_stack = pcmk_cluster_unknown;
-int local_id = 0;
-struct timespec t_last_quorum;
-#endif
-
-cib_t *cib = NULL;
-xmlNode *current_cib = NULL;
-
-long last_refresh = 0;
-crm_trigger_t *refresh_trigger = NULL;
-
-static gboolean
-mon_timer_popped(gpointer data)
-{
- int rc = cib_ok;
-
- if (timer_id_reconnect > 0) {
- g_source_remove(timer_id_reconnect);
- }
-
- rc = cib_connect(TRUE);
-
- if (rc != cib_ok) {
- timer_id_reconnect = g_timeout_add(reconnect_msec, mon_timer_popped, NULL);
- set_pcmk_health(0);
- }
- return FALSE;
-}
-
-static void
-mon_cib_connection_destroy(gpointer user_data)
-{
- if (cib) {
- set_pcmk_health(0);
- /* Reconnecting */
- cib->cmds->signoff(cib);
- timer_id_reconnect = g_timeout_add(reconnect_msec, mon_timer_popped, NULL);
- }
- return;
-}
-
-static gboolean
-mon_timer_notify(gpointer data)
-{
- if (timer_id_notify > 0) {
- g_source_remove(timer_id_notify);
- }
-
- /* TODO - do we really want to do this every loop interval? Lets
- * check how much CPU that takes ... */
- if (1) {
- free_xml(current_cib);
- current_cib = get_cib_copy(cib);
- mon_refresh_state(NULL);
- } else {
- notify_parent();
- }
-
- timer_id_notify = g_timeout_add(timeout_loop * 1000, mon_timer_notify, NULL);
- return FALSE;
-}
-
-/*
- * Mainloop signal handler.
- */
-static void
-mon_shutdown(int nsig)
-{
- clean_up(0);
-}
-
-int
-cib_connect(gboolean full)
-{
- int rc = cib_ok;
-
- CRM_CHECK(cib != NULL, return cib_missing);
-
- if (cib->state != cib_connected_query && cib->state != cib_connected_command) {
-
- rc = cib->cmds->signon(cib, crm_system_name, cib_query);
-
- if (rc != cib_ok) {
- return rc;
- }
-
- current_cib = get_cib_copy(cib);
- mon_refresh_state(NULL);
-
- if (full) {
- if (rc == cib_ok) {
- rc = cib->cmds->set_connection_dnotify(cib, mon_cib_connection_destroy);
- if (rc == cib_NOTSUPPORTED) {
- /* Notification setup failed, won't be able to reconnect after failure */
- rc = cib_ok;
- }
- }
-
- if (rc == cib_ok) {
- cib->cmds->del_notify_callback(cib, T_CIB_DIFF_NOTIFY, crm_diff_update);
- rc = cib->cmds->add_notify_callback(cib, T_CIB_DIFF_NOTIFY, crm_diff_update);
- }
-
- if (rc != cib_ok) {
- /* Notification setup failed, could not monitor CIB actions */
- clean_up(-rc);
- }
- }
- }
- return rc;
-}
-
-#ifdef CHECK_AIS
-static gboolean
-mon_timer_ais(gpointer data)
-{
- if (timer_id_ais > 0) {
- g_source_remove(timer_id_ais);
- }
-
- send_ais_text(crm_class_quorum, NULL, TRUE, NULL, crm_msg_ais);
-
- /* The timer is set in the response processing */
- return FALSE;
-}
-
-static void
-ais_membership_destroy(gpointer user_data)
-{
- cl_log(LOG_ERR, "AIS connection terminated - corosync down?");
- ais_fd_sync = -1;
- /* TODO: Is recovery even worth it here? After all, this means
- * that corosync died ... */
- exit(1);
-}
-
-static gboolean
-ais_membership_dispatch(AIS_Message * wrapper, char *data, int sender)
-{
- switch (wrapper->header.id) {
- case crm_class_quorum:
- break;
- default:
- return TRUE;
- break;
- }
-
- DBGLOG(LOG_INFO, "AIS quorum state: %d", (int)crm_have_quorum);
- clock_gettime(CLOCK_MONOTONIC, &t_last_quorum);
-
- timer_id_ais = g_timeout_add(timeout_loop * 1000, mon_timer_ais, NULL);
-
- return TRUE;
-}
-#endif
-
-int
-servant_pcmk(const char *diskname, const void* argp)
-{
- int exit_code = 0;
-
- cl_log(LOG_INFO, "Monitoring Pacemaker health");
- set_proc_title("sbd: watcher: Pacemaker");
- reconnect_msec = 2000;
-
- /* We don't want any noisy crm messages */
- set_crm_log_level(LOG_ERR);
-
-#ifdef CHECK_AIS
- cluster_stack = get_cluster_type();
-
- if (cluster_stack != pcmk_cluster_classic_ais) {
- cl_log(LOG_ERR, "SBD currently only supports legacy AIS for quorum state poll");
- }
-
- while (!init_ais_connection_once
- (ais_membership_dispatch, ais_membership_destroy, NULL, NULL, &local_id)) {
- cl_log(LOG_INFO, "Waiting to sign in with AIS ...");
- sleep(reconnect_msec / 1000);
- }
-#endif
-
- if (current_cib == NULL) {
- cib = cib_new();
-
- do {
- exit_code = cib_connect(TRUE);
-
- if (exit_code != cib_ok) {
- sleep(reconnect_msec / 1000);
- }
- } while (exit_code == cib_connection);
-
- if (exit_code != cib_ok) {
- clean_up(-exit_code);
- }
- }
-
- mainloop = g_main_new(FALSE);
-
- mainloop_add_signal(SIGTERM, mon_shutdown);
- mainloop_add_signal(SIGINT, mon_shutdown);
- refresh_trigger = mainloop_add_trigger(G_PRIORITY_LOW, mon_refresh_state, NULL);
- timer_id_notify = g_timeout_add(timeout_loop * 1000, mon_timer_notify, NULL);
-#ifdef CHECK_AIS
- timer_id_ais = g_timeout_add(timeout_loop * 1000, mon_timer_ais, NULL);
-#endif
-
- g_main_run(mainloop);
- g_main_destroy(mainloop);
-
- clean_up(0);
- return 0; /* never reached */
-}
-
-#define LOGONCE(state, lvl, fmt, args...) do { \
- if (last_state != state) { \
- cl_log(lvl, fmt, ##args); \
- last_state = state; \
- } \
- } while(0)
-
-static int
-compute_status(pe_working_set_t * data_set)
-{
- static int updates = 0;
- static int last_state = 0;
- int healthy = 0;
- node_t *dc = NULL;
- struct timespec t_now;
-
- updates++;
- dc = data_set->dc_node;
- clock_gettime(CLOCK_MONOTONIC, &t_now);
-
- if (dc == NULL) {
- /* Means we don't know if we have quorum. Hrm. Probably needs to
- * allow for this state for a period of time and then decide
- * that we don't have quorum - TODO - should we skip
- * notifying the parent? */
- LOGONCE(1, LOG_INFO, "We don't have a DC right now.");
- goto out;
- } else {
- const char *cib_quorum = crm_element_value(data_set->input, XML_ATTR_HAVE_QUORUM);
-
- if (crm_is_true(cib_quorum)) {
- DBGLOG(LOG_INFO, "CIB: We have quorum!");
- } else {
- LOGONCE(3, LOG_WARNING, "CIB: We do NOT have quorum!");
- goto out;
- }
-
- }
-
-#ifdef CHECK_AIS
- int quorum_age = t_now.tv_sec - t_last_quorum.tv_sec;
-
- if (quorum_age > (int)(timeout_io+timeout_loop)) {
- if (t_last_quorum.tv_sec != 0)
- LOGONCE(2, LOG_WARNING, "AIS: Quorum outdated!");
- goto out;
- }
-
- if (crm_have_quorum) {
- DBGLOG(LOG_INFO, "AIS: We have quorum!");
- } else {
- LOGONCE(8, LOG_WARNING, "AIS: We do NOT have quorum!");
- goto out;
- }
-#endif
-
- node_t *node = pe_find_node(data_set->nodes, local_uname);
-
- if (node->details->unclean) {
- LOGONCE(4, LOG_WARNING, "Node state: UNCLEAN");
- goto out;
- } else if (node->details->pending) {
- LOGONCE(5, LOG_WARNING, "Node state: pending");
- /* TODO ? */
- } else if (node->details->online) {
- LOGONCE(6, LOG_INFO, "Node state: online");
- healthy = 1;
- } else {
- LOGONCE(7, LOG_WARNING, "Node state: UNKNOWN");
- goto out;
- }
-
-out:
- set_pcmk_health(healthy);
-
- return 0;
-}
-
-void
-set_pcmk_health(int healthy)
-{
- pcmk_healthy = healthy;
- notify_parent();
-}
-
-void
-notify_parent(void)
-{
- pid_t ppid;
- union sigval signal_value;
-
- memset(&signal_value, 0, sizeof(signal_value));
- ppid = getppid();
-
- if (ppid == 1) {
- /* Our parent died unexpectedly. Triggering
- * self-fence. */
- cl_log(LOG_WARNING, "Our parent is dead.");
- do_reset();
- }
-
- if (pcmk_healthy) {
- DBGLOG(LOG_INFO, "Notifying parent: healthy");
- sigqueue(ppid, SIG_LIVENESS, signal_value);
- } else {
- DBGLOG(LOG_WARNING, "Notifying parent: UNHEALTHY");
- sigqueue(ppid, SIG_PCMK_UNHEALTHY, signal_value);
- }
-}
-
-void
-crm_diff_update(const char *event, xmlNode * msg)
-{
- int rc = -1;
- long now = time(NULL);
- const char *op = NULL;
- unsigned int log_level = LOG_INFO;
-
- xmlNode *diff = NULL;
- xmlNode *cib_last = NULL;
-
- if (msg == NULL) {
- crm_err("NULL update");
- return;
- }
-
- crm_element_value_int(msg, F_CIB_RC, &rc);
- op = crm_element_value(msg, F_CIB_OPERATION);
- diff = get_message_xml(msg, F_CIB_UPDATE_RESULT);
-
- if (rc < cib_ok) {
- log_level = LOG_WARNING;
- cl_log(log_level, "[%s] %s ABORTED: %s", event, op, cib_error2string(rc));
- return;
- }
-
- if (current_cib != NULL) {
- cib_last = current_cib;
- current_cib = NULL;
- rc = cib_process_diff(op, cib_force_diff, NULL, NULL, diff, cib_last, &current_cib, NULL);
-
- if (rc != cib_ok) {
- crm_debug("Update didn't apply, requesting full copy: %s", cib_error2string(rc));
- free_xml(current_cib);
- current_cib = NULL;
- }
- }
-
- if (current_cib == NULL) {
- current_cib = get_cib_copy(cib);
- }
-
- if ((now - last_refresh) > (reconnect_msec / 1000)) {
- /* Force a refresh */
- mon_refresh_state(NULL);
- } else {
- mainloop_set_trigger(refresh_trigger);
- }
- free_xml(cib_last);
-}
-
-gboolean
-mon_refresh_state(gpointer user_data)
-{
- xmlNode *cib_copy = copy_xml(current_cib);
- pe_working_set_t data_set;
-
- last_refresh = time(NULL);
-
- if (cli_config_update(&cib_copy, NULL, FALSE) == FALSE) {
- if (cib) {
- cib->cmds->signoff(cib);
- }
- /* TODO: Not good path, upgrade failed */
- clean_up(1);
- return FALSE;
- }
-
- set_working_set_defaults(&data_set);
- data_set.input = cib_copy;
- cluster_status(&data_set);
-
- compute_status(&data_set);
-
- cleanup_calculations(&data_set);
- return TRUE;
-}
-
-void
-clean_up(int rc)
-{
- if (cib != NULL) {
- cib->cmds->signoff(cib);
- cib_delete(cib);
- cib = NULL;
- }
-
- if (rc >= 0) {
- exit(rc);
- }
- return;
-}
-
diff -r 9888c2e4353b -r 31225c028d4f sbd.agent
--- a/sbd.agent Mon May 28 20:49:01 2012 +0200
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,116 +0,0 @@
-#!/bin/bash
-#
-# This STONITH script drives the shared-storage stonith plugin.
-#
-# Author: Lars Marowsky-Bree
-# Copyright: 2008 Lars Marowsky-Bree
-# License: GNU General Public License (GPL)
-#
-
-# Main code
-
-if [ x$sbd_device = x ]; then
- if [ -f /etc/sysconfig/sbd ]; then
- source /etc/sysconfig/sbd
- sbd_device=$SBD_DEVICE
- fi
-fi
-
-SBD_DEVS=${sbd_device%;}
-
-sbd_device=${SBD_DEVS//;/ -d }
-
-case $1 in
-gethosts)
- echo `sbd -d $sbd_device list | cut -f2 | sort | uniq`
- exit 0
- ;;
-off|reset)
- message=$1
- case "$crashdump" in
- yes|true|1|YES|TRUE|ja|on|ON) message="crashdump" ;;
- esac
- sbd -d $sbd_device message $2 $message
- exit $?
- ;;
-status)
- if ! sbd -d $sbd_device list >/dev/null 2>&1 ; then
- ha_log.sh err "sbd could not list nodes from $sbd_device"
- exit 1
- fi
- exit 0
- ;;
-on)
- exit 1
- ;;
-getconfignames)
- echo "sbd_device crashdump"
- exit 0
- ;;
-getinfo-devid)
- echo "Shared storage STONITH device"
- exit 0
- ;;
-getinfo-devname)
- echo "Shared storage STONITH device"
- exit 0
- ;;
-getinfo-devdescr)
- cat << DESC
-sbd uses a shared storage device as a medium to communicate
-fencing requests. This allows clusters without network power
-switches; the downside is that access to the shared storage
-device becomes a Single Point of Failure.
-
-It requires sbd to be configured on all nodes.
-
-Please read http://linux-ha.org/wiki/SBD_Fencing!
-
-DESC
- exit 0
- ;;
-getinfo-devurl)
- echo "http://linux-ha.org/wiki/SBD_Fencing"
- exit 0
- ;;
-getinfo-xml)
- cat << SSHXML
-<parameters>
-
-<parameter name="crashdump">
-<content type="string" />
-<shortdesc lang="en">
-Crashdump instead of regular fence
-</shortdesc>
-<longdesc lang="en">
-If SBD is given a fence command, this option will instead perform a
-kernel crash of a reboot or power-off, which on a properly configured
-system can lead to a crashdump for analysis.
-
-This is less safe for production environments. Please use with caution
-and for debugging purposes only.
-</longdesc>
-</parameter>
-
-<parameter name="sbd_device" unique="1">
-<content type="string" />
-<shortdesc lang="en">
-SBD device(s)
-</shortdesc>
-<longdesc lang="en">
-The block device used for the SBD partition. Up to three
-can be specified if separated by a semicolon. (Please check
-the documentation if specifying two.)
-
-If not specified, will default to the value from /etc/sysconfig/sbd.
-
-</longdesc>
-</parameter>
-</parameters>
-SSHXML
- exit 0
- ;;
-*)
- exit 1
- ;;
-esac
diff -r 9888c2e4353b -r 31225c028d4f sbd.h
--- a/sbd.h Mon May 28 20:49:01 2012 +0200
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,207 +0,0 @@
-/*
- * Copyright (C) 2008 Lars Marowsky-Bree <lmb@suse.de>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This software is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include <arpa/inet.h>
-#include <asm/unistd.h>
-#include <clplumbing/cl_log.h>
-#include <clplumbing/cl_reboot.h>
-#include <clplumbing/coredumps.h>
-#include <clplumbing/realtime.h>
-#include <clplumbing/setproctitle.h>
-#include <ctype.h>
-#include <errno.h>
-#include <fcntl.h>
-#include <libaio.h>
-#include <linux/fs.h>
-#include <linux/types.h>
-#include <linux/watchdog.h>
-#include <malloc.h>
-#include <signal.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-#include <sys/ptrace.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-#include <sys/utsname.h>
-#include <sys/wait.h>
-#include <syslog.h>
-#include <time.h>
-#include <unistd.h>
-
-/* signals reserved for multi-disk sbd */
-#define SIG_LIVENESS (SIGRTMIN + 1) /* report liveness of the disk */
-#define SIG_EXITREQ (SIGRTMIN + 2) /* exit request to inquisitor */
-#define SIG_TEST (SIGRTMIN + 3) /* trigger self test */
-#define SIG_RESTART (SIGRTMIN + 4) /* trigger restart of all failed disk */
-#define SIG_IO_FAIL (SIGRTMIN + 5) /* the IO child requests to be considered failed */
-#define SIG_PCMK_UNHEALTHY (SIGRTMIN + 6)
-/* FIXME: should add dynamic check of SIG_XX >= SIGRTMAX */
-
-/* Sector data types */
-struct sector_header_s {
- char magic[8];
- unsigned char version;
- unsigned char slots;
- /* Caveat: stored in network byte-order */
- uint32_t sector_size;
- uint32_t timeout_watchdog;
- uint32_t timeout_allocate;
- uint32_t timeout_loop;
- uint32_t timeout_msgwait;
-};
-
-struct sector_mbox_s {
- signed char cmd;
- char from[64];
-};
-
-struct sector_node_s {
- /* slots will be created with in_use == 0 */
- char in_use;
- char name[64];
-};
-
-struct servants_list_item {
- const char* devname;
- pid_t pid;
- int restarts;
- int restart_blocked;
- int outdated;
- struct timespec t_last, t_started;
- struct servants_list_item *next;
-};
-
-struct sbd_context {
- int devfd;
- io_context_t ioctx;
- struct iocb io;
-};
-
-#define SBD_MSG_EMPTY 0x00
-#define SBD_MSG_TEST 0x01
-#define SBD_MSG_RESET 0x02
-#define SBD_MSG_OFF 0x03
-#define SBD_MSG_EXIT 0x04
-#define SBD_MSG_CRASHDUMP 0x05
-
-#define SLOT_TO_SECTOR(slot) (1+slot*2)
-#define MBOX_TO_SECTOR(mbox) (2+mbox*2)
-
-void usage(void);
-int watchdog_init_interval(void);
-int watchdog_tickle(void);
-int watchdog_init(void);
-void sysrq_init(void);
-void watchdog_close(void);
-struct sbd_context *open_device(const char* devname);
-void close_device(struct sbd_context *st);
-signed char cmd2char(const char *cmd);
-void * sector_alloc(void);
-const char* char2cmd(const char cmd);
-int sector_write(struct sbd_context *st, int sector, const void *data);
-int sector_read(struct sbd_context *st, int sector, void *data);
-int slot_read(struct sbd_context *st, int slot, struct sector_node_s *s_node);
-int slot_write(struct sbd_context *st, int slot, const struct sector_node_s *s_node);
-int mbox_write(struct sbd_context *st, int mbox, const struct sector_mbox_s *s_mbox);
-int mbox_read(struct sbd_context *st, int mbox, struct sector_mbox_s *s_mbox);
-int mbox_write_verify(struct sbd_context *st, int mbox, const struct sector_mbox_s *s_mbox);
-/* After a call to header_write(), certain data fields will have been
- * converted to on-disk byte-order; the header should not be accessed
- * afterwards anymore! */
-int header_write(struct sbd_context *st, struct sector_header_s *s_header);
-int header_read(struct sbd_context *st, struct sector_header_s *s_header);
-int valid_header(const struct sector_header_s *s_header);
-struct sector_header_s * header_get(struct sbd_context *st);
-int init_device(struct sbd_context *st);
-int slot_lookup(struct sbd_context *st, const struct sector_header_s *s_header, const char *name);
-int slot_unused(struct sbd_context *st, const struct sector_header_s *s_header);
-int slot_allocate(struct sbd_context *st, const char *name);
-int slot_list(struct sbd_context *st);
-int slot_ping(struct sbd_context *st, const char *name);
-int slot_msg(struct sbd_context *st, const char *name, const char *cmd);
-int header_dump(struct sbd_context *st);
-void sysrq_trigger(char t);
-void do_crashdump(void);
-void do_reset(void);
-void do_off(void);
-pid_t make_daemon(void);
-void maximize_priority(void);
-void sbd_get_uname(void);
-
-/* Tunable defaults: */
-extern unsigned long timeout_watchdog;
-extern unsigned long timeout_watchdog_warn;
-extern unsigned long timeout_watchdog_crashdump;
-extern int timeout_allocate;
-extern int timeout_loop;
-extern int timeout_msgwait;
-extern int timeout_io;
-extern int watchdog_use;
-extern int watchdog_set_timeout;
-extern int skip_rt;
-extern int debug;
-extern int debug_mode;
-extern const char *watchdogdev;
-extern char* local_uname;
-
-/* Global, non-tunable variables: */
-extern int sector_size;
-extern int watchdogfd;
-extern const char* cmdname;
-
-typedef int (*functionp_t)(const char* devname, const void* argp);
-
-int assign_servant(const char* devname, functionp_t functionp, const void* argp);
-int init_devices(void);
-struct slot_msg_arg_t {
- const char* name;
- const char* msg;
-};
-int slot_msg_wrapper(const char* devname, const void* argp);
-int slot_ping_wrapper(const char* devname, const void* argp);
-int allocate_slots(const char *name);
-int list_slots(void);
-int ping_via_slots(const char *name);
-int dump_headers(void);
-
-int check_all_dead(void);
-void servant_exit(void);
-int servant(const char *diskname, const void* argp);
-void recruit_servant(const char *devname, pid_t pid);
-struct servants_list_item *lookup_servant_by_dev(const char *devname);
-struct servants_list_item *lookup_servant_by_pid(pid_t pid);
-void servants_kill(void);
-void servants_start(void);
-void servant_start(struct servants_list_item *s);
-void inquisitor_child(void);
-int inquisitor(void);
-int inquisitor_decouple(void);
-int messenger(const char *name, const char *msg);
-int check_timeout_inconsistent(void);
-void cleanup_servant_by_pid(pid_t pid);
-int quorum_write(int good_servants);
-int quorum_read(int good_servants);
-
-int pcmk_have_quorum(void);
-int servant_pcmk(const char *diskname, const void* argp);
-
-#define DBGLOG(lvl, fmt, args...) do { \
- if (debug > 1) cl_log(lvl, fmt, ##args); \
- } while(0)
diff -r 9888c2e4353b -r 31225c028d4f src/Makefile.am
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Makefile.am Tue May 29 08:47:53 2012 +0200
@@ -0,0 +1,13 @@
+AM_CFLAGS = -D_GNU_SOURCE
+AM_CPPFLAGS = -I$(includedir)/pacemaker -I$(includedir)/clplumbing \
+ -I$(includedir)/heartbeat \
+ $(glib_CFLAGS) \
+ $(libxml_CFLAGS)
+
+sbin_PROGRAMS = sbd
+
+sbd_SOURCES = sbd-common.c sbd-md.c sbd-pacemaker.c
+
+sbd_LDFLAGS = $(glib_LIBS) $(libcoroipcc_LIBS)
+
+
diff -r 9888c2e4353b -r 31225c028d4f src/sbd-common.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/sbd-common.c Tue May 29 08:47:53 2012 +0200
@@ -0,0 +1,983 @@
+
+#include "sbd.h"
+
+/* These have to match the values in the header of the partition */
+static char sbd_magic[8] = "SBD_SBD_";
+static char sbd_version = 0x02;
+
+/* Tunable defaults: */
+unsigned long timeout_watchdog = 5;
+unsigned long timeout_watchdog_warn = 3;
+int timeout_allocate = 2;
+int timeout_loop = 1;
+int timeout_msgwait = 10;
+int timeout_io = 3;
+
+int watchdog_use = 0;
+int watchdog_set_timeout = 1;
+unsigned long timeout_watchdog_crashdump = 240;
+int skip_rt = 0;
+int check_pcmk = 0;
+int debug = 0;
+int debug_mode = 0;
+const char *watchdogdev = "/dev/watchdog";
+char * local_uname;
+
+/* Global, non-tunable variables: */
+int sector_size = 0;
+int watchdogfd = -1;
+
+/*const char *devname;*/
+const char *cmdname;
+
+void
+usage(void)
+{
+ fprintf(stderr,
+"Shared storage fencing tool.\n"
+"Syntax:\n"
+" %s <options> <command> <cmdarguments>\n"
+"Options:\n"
+"-d <devname> Block device to use (mandatory; can be specified up to 3 times)\n"
+"-h Display this help.\n"
+"-n <node> Set local node name; defaults to uname -n (optional)\n"
+"\n"
+"-R Do NOT enable realtime priority (debugging only)\n"
+"-W Use watchdog (recommended) (watch only)\n"
+"-w <dev> Specify watchdog device (optional) (watch only)\n"
+"-T Do NOT initialize the watchdog timeout (watch only)\n"
+"-v Enable some verbose debug logging (optional)\n"
+"\n"
+"-1 <N> Set watchdog timeout to N seconds (optional, create only)\n"
+"-2 <N> Set slot allocation timeout to N seconds (optional, create only)\n"
+"-3 <N> Set daemon loop timeout to N seconds (optional, create only)\n"
+"-4 <N> Set msgwait timeout to N seconds (optional, create only)\n"
+"-5 <N> Warn if loop latency exceeds threshold (optional, watch only)\n"
+" (default is 3, set to 0 to disable)\n"
+"-C <N> Watchdog timeout to set before crashdumping (def: 240s, optional)\n"
+"-I <N> Async IO read timeout (defaults to 3 * loop timeout, optional)\n"
+"-t <N> Dampening delay before faulty servants are restarted (optional)\n"
+" (default is 60, set to 0 to disable)\n"
+"-F <N> # of failures before a servant is considered faulty (optional)\n"
+" (default is 10, set to 0 to disable)\n"
+"-P Check Pacemaker quorum and node health (optional, watch only)\n"
+"-Z Enable trace mode. WARNING: UNSAFE FOR PRODUCTION!\n"
+"Commands:\n"
+"create initialize N slots on <dev> - OVERWRITES DEVICE!\n"
+"list List all allocated slots on device, and messages.\n"
+"dump Dump meta-data header from device.\n"
+"watch Loop forever, monitoring own slot\n"
+"allocate <node>\n"
+" Allocate a slot for node (optional)\n"
+"message <node> (test|reset|off|clear|exit)\n"
+" Writes the specified message to node's slot.\n"
+, cmdname);
+}
+
+int
+watchdog_init_interval(void)
+{
+ int timeout = timeout_watchdog;
+
+ if (watchdogfd < 0) {
+ return 0;
+ }
+
+
+ if (watchdog_set_timeout == 0) {
+ cl_log(LOG_INFO, "NOT setting watchdog timeout on explicit user request!");
+ return 0;
+ }
+
+ if (ioctl(watchdogfd, WDIOC_SETTIMEOUT, &timeout) < 0) {
+ cl_perror( "WDIOC_SETTIMEOUT"
+ ": Failed to set watchdog timer to %u seconds.",
+ timeout);
+ cl_log(LOG_CRIT, "Please validate your watchdog configuration!");
+ cl_log(LOG_CRIT, "Choose a different watchdog driver or specify -T to silence this check if you are sure.");
+ /* return -1; */
+ } else {
+ cl_log(LOG_INFO, "Set watchdog timeout to %u seconds.",
+ timeout);
+ }
+ return 0;
+}
+
+int
+watchdog_tickle(void)
+{
+ if (watchdogfd >= 0) {
+ if (write(watchdogfd, "", 1) != 1) {
+ cl_perror("Watchdog write failure: %s!",
+ watchdogdev);
+ return -1;
+ }
+ }
+ return 0;
+}
+
+int
+watchdog_init(void)
+{
+ if (watchdogfd < 0 && watchdogdev != NULL) {
+ watchdogfd = open(watchdogdev, O_WRONLY);
+ if (watchdogfd >= 0) {
+ cl_log(LOG_NOTICE, "Using watchdog device: %s",
+ watchdogdev);
+ if ((watchdog_init_interval() < 0)
+ || (watchdog_tickle() < 0)) {
+ return -1;
+ }
+ }else{
+ cl_perror("Cannot open watchdog device: %s",
+ watchdogdev);
+ return -1;
+ }
+ }
+ return 0;
+}
+
+void
+watchdog_close(void)
+{
+ if (watchdogfd >= 0) {
+ if (write(watchdogfd, "V", 1) != 1) {
+ cl_perror(
+ "Watchdog write magic character failure: closing %s!",
+ watchdogdev);
+ }
+ if (close(watchdogfd) < 0) {
+ cl_perror("Watchdog close(2) failed.");
+ }
+ watchdogfd = -1;
+ }
+}
+
+/* This duplicates some code from linux/ioprio.h since these are not included
+ * even in linux-kernel-headers. Sucks. See also
+ * /usr/src/linux/Documentation/block/ioprio.txt and ioprio_set(2) */
+extern int sys_ioprio_set(int, int, int);
+int ioprio_set(int which, int who, int ioprio);
+inline int ioprio_set(int which, int who, int ioprio)
+{
+ return syscall(__NR_ioprio_set, which, who, ioprio);
+}
+
+enum {
+ IOPRIO_CLASS_NONE,
+ IOPRIO_CLASS_RT,
+ IOPRIO_CLASS_BE,
+ IOPRIO_CLASS_IDLE,
+};
+
+enum {
+ IOPRIO_WHO_PROCESS = 1,
+ IOPRIO_WHO_PGRP,
+ IOPRIO_WHO_USER,
+};
+
+#define IOPRIO_BITS (16)
+#define IOPRIO_CLASS_SHIFT (13)
+#define IOPRIO_PRIO_MASK ((1UL << IOPRIO_CLASS_SHIFT) - 1)
+
+#define IOPRIO_PRIO_CLASS(mask) ((mask) >> IOPRIO_CLASS_SHIFT)
+#define IOPRIO_PRIO_DATA(mask) ((mask) & IOPRIO_PRIO_MASK)
+#define IOPRIO_PRIO_VALUE(class, data) (((class) << IOPRIO_CLASS_SHIFT) | data)
+
+void
+maximize_priority(void)
+{
+ if (skip_rt) {
+ cl_log(LOG_INFO, "Not elevating to realtime (-R specified).");
+ return;
+ }
+
+ cl_make_realtime(-1, -1, 256, 256);
+
+ if (ioprio_set(IOPRIO_WHO_PROCESS, getpid(),
+ IOPRIO_PRIO_VALUE(IOPRIO_CLASS_RT, 1)) != 0) {
+ cl_perror("ioprio_set() call failed.");
+ }
+}
+
+void
+close_device(struct sbd_context *st)
+{
+ close(st->devfd);
+ free(st);
+}
+
+struct sbd_context *
+open_device(const char* devname)
+{
+ struct sbd_context *st;
+
+ if (!devname)
+ return NULL;
+
+ st = malloc(sizeof(struct sbd_context));
+ if (!st)
+ return NULL;
+ memset(st, 0, sizeof(struct sbd_context));
+
+ if (io_setup(1, &st->ioctx) != 0) {
+ cl_perror("io_setup failed");
+ free(st);
+ return NULL;
+ }
+
+ st->devfd = open(devname, O_SYNC|O_RDWR|O_DIRECT);
+
+ if (st->devfd == -1) {
+ cl_perror("Opening device %s failed.", devname);
+ free(st);
+ return NULL;
+ }
+
+ ioctl(st->devfd, BLKSSZGET, &sector_size);
+
+ if (sector_size == 0) {
+ cl_perror("Get sector size failed.\n");
+ close_device(st);
+ return NULL;
+ }
+
+ return st;
+}
+
+signed char
+cmd2char(const char *cmd)
+{
+ if (strcmp("clear", cmd) == 0) {
+ return SBD_MSG_EMPTY;
+ } else if (strcmp("test", cmd) == 0) {
+ return SBD_MSG_TEST;
+ } else if (strcmp("reset", cmd) == 0) {
+ return SBD_MSG_RESET;
+ } else if (strcmp("off", cmd) == 0) {
+ return SBD_MSG_OFF;
+ } else if (strcmp("exit", cmd) == 0) {
+ return SBD_MSG_EXIT;
+ } else if (strcmp("crashdump", cmd) == 0) {
+ return SBD_MSG_CRASHDUMP;
+ }
+ return -1;
+}
+
+void *
+sector_alloc(void)
+{
+ void *x;
+
+ x = valloc(sector_size);
+ if (!x) {
+ exit(1);
+ }
+ memset(x, 0, sector_size);
+
+ return x;
+}
+
+const char*
+char2cmd(const char cmd)
+{
+ switch (cmd) {
+ case SBD_MSG_EMPTY:
+ return "clear";
+ break;
+ case SBD_MSG_TEST:
+ return "test";
+ break;
+ case SBD_MSG_RESET:
+ return "reset";
+ break;
+ case SBD_MSG_OFF:
+ return "off";
+ break;
+ case SBD_MSG_EXIT:
+ return "exit";
+ break;
+ case SBD_MSG_CRASHDUMP:
+ return "crashdump";
+ break;
+ default:
+ return "undefined";
+ break;
+ }
+}
+
+int
+sector_write(struct sbd_context *st, int sector, const void *data)
+{
+ if (lseek(st->devfd, sector_size*sector, 0) < 0) {
+ cl_perror("sector_write: lseek() failed");
+ return -1;
+ }
+
+ if (write(st->devfd, data, sector_size) <= 0) {
+ cl_perror("sector_write: write_sector() failed");
+ return -1;
+ }
+ return(0);
+}
+
+int
+sector_read(struct sbd_context *st, int sector, void *data)
+{
+ struct timespec timeout;
+ struct io_event event;
+ struct iocb *ios[1] = { &st->io };
+ long r;
+
+ timeout.tv_sec = timeout_io;
+ timeout.tv_nsec = 0;
+
+ memset(&st->io, 0, sizeof(struct iocb));
+ io_prep_pread(&st->io, st->devfd, data, sector_size, sector_size * sector);
+ if (io_submit(st->ioctx, 1, ios) != 1) {
+ cl_log(LOG_ERR, "Failed to submit IO request!");
+ return -1;
+ }
+
+ errno = 0;
+ r = io_getevents(st->ioctx, 1L, 1L, &event, &timeout);
+
+ if (r < 0 ) {
+ cl_log(LOG_ERR, "Failed to retrieve IO events");
+ return -1;
+ } else if (r < 1L) {
+ cl_log(LOG_INFO, "Cancelling IO request due to timeout");
+ r = io_cancel(st->ioctx, ios[0], &event);
+ if (r) {
+ DBGLOG(LOG_INFO, "Could not cancel IO request.");
+ /* Doesn't really matter, debugging information.
+ */
+ }
+ return -1;
+ }
+
+ /* IO is happy */
+ if (event.res == sector_size) {
+ return 0;
+ } else {
+ cl_log(LOG_ERR, "Short read");
+ return -1;
+ }
+}
+
+int
+slot_read(struct sbd_context *st, int slot, struct sector_node_s *s_node)
+{
+ return sector_read(st, SLOT_TO_SECTOR(slot), s_node);
+}
+
+int
+slot_write(struct sbd_context *st, int slot, const struct sector_node_s *s_node)
+{
+ return sector_write(st, SLOT_TO_SECTOR(slot), s_node);
+}
+
+int
+mbox_write(struct sbd_context *st, int mbox, const struct sector_mbox_s *s_mbox)
+{
+ return sector_write(st, MBOX_TO_SECTOR(mbox), s_mbox);
+}
+
+int
+mbox_read(struct sbd_context *st, int mbox, struct sector_mbox_s *s_mbox)
+{
+ return sector_read(st, MBOX_TO_SECTOR(mbox), s_mbox);
+}
+
+int
+mbox_write_verify(struct sbd_context *st, int mbox, const struct sector_mbox_s *s_mbox)
+{
+ void *data;
+ int rc = 0;
+
+ if (sector_write(st, MBOX_TO_SECTOR(mbox), s_mbox) < 0)
+ return -1;
+
+ data = sector_alloc();
+ if (sector_read(st, MBOX_TO_SECTOR(mbox), data) < 0) {
+ rc = -1;
+ goto out;
+ }
+
+
+ if (memcmp(s_mbox, data, sector_size) != 0) {
+ cl_log(LOG_ERR, "Write verification failed!");
+ rc = -1;
+ goto out;
+ }
+ rc = 0;
+out:
+ free(data);
+ return rc;
+}
+
+int header_write(struct sbd_context *st, struct sector_header_s *s_header)
+{
+ s_header->sector_size = htonl(s_header->sector_size);
+ s_header->timeout_watchdog = htonl(s_header->timeout_watchdog);
+ s_header->timeout_allocate = htonl(s_header->timeout_allocate);
+ s_header->timeout_loop = htonl(s_header->timeout_loop);
+ s_header->timeout_msgwait = htonl(s_header->timeout_msgwait);
+ return sector_write(st, 0, s_header);
+}
+
+int
+header_read(struct sbd_context *st, struct sector_header_s *s_header)
+{
+ if (sector_read(st, 0, s_header) < 0)
+ return -1;
+
+ s_header->sector_size = ntohl(s_header->sector_size);
+ s_header->timeout_watchdog = ntohl(s_header->timeout_watchdog);
+ s_header->timeout_allocate = ntohl(s_header->timeout_allocate);
+ s_header->timeout_loop = ntohl(s_header->timeout_loop);
+ s_header->timeout_msgwait = ntohl(s_header->timeout_msgwait);
+ /* This sets the global defaults: */
+ timeout_watchdog = s_header->timeout_watchdog;
+ timeout_allocate = s_header->timeout_allocate;
+ timeout_loop = s_header->timeout_loop;
+ timeout_msgwait = s_header->timeout_msgwait;
+
+ return 0;
+}
+
+int
+valid_header(const struct sector_header_s *s_header)
+{
+ if (memcmp(s_header->magic, sbd_magic, sizeof(s_header->magic)) != 0) {
+ cl_log(LOG_ERR, "Header magic does not match.");
+ return -1;
+ }
+ if (s_header->version != sbd_version) {
+ cl_log(LOG_ERR, "Header version does not match.");
+ return -1;
+ }
+ if (s_header->sector_size != sector_size) {
+ cl_log(LOG_ERR, "Header sector size does not match.");
+ return -1;
+ }
+ return 0;
+}
+
+struct sector_header_s *
+header_get(struct sbd_context *st)
+{
+ struct sector_header_s *s_header;
+ s_header = sector_alloc();
+
+ if (header_read(st, s_header) < 0) {
+ cl_log(LOG_ERR, "Unable to read header from device %d", st->devfd);
+ return NULL;
+ }
+
+ if (valid_header(s_header) < 0) {
+ cl_log(LOG_ERR, "header on device %d is not valid.", st->devfd);
+ return NULL;
+ }
+
+ /* cl_log(LOG_INFO, "Found version %d header with %d slots",
+ s_header->version, s_header->slots); */
+
+ return s_header;
+}
+
+int
+init_device(struct sbd_context *st)
+{
+ struct sector_header_s *s_header;
+ struct sector_node_s *s_node;
+ struct sector_mbox_s *s_mbox;
+ struct stat s;
+ int i;
+ int rc = 0;
+
+ s_header = sector_alloc();
+ s_node = sector_alloc();
+ s_mbox = sector_alloc();
+ memcpy(s_header->magic, sbd_magic, sizeof(s_header->magic));
+ s_header->version = sbd_version;
+ s_header->slots = 255;
+ s_header->sector_size = sector_size;
+ s_header->timeout_watchdog = timeout_watchdog;
+ s_header->timeout_allocate = timeout_allocate;
+ s_header->timeout_loop = timeout_loop;
+ s_header->timeout_msgwait = timeout_msgwait;
+
+ fstat(st->devfd, &s);
+ /* printf("st_size = %ld, st_blksize = %ld, st_blocks = %ld\n",
+ s.st_size, s.st_blksize, s.st_blocks); */
+
+ cl_log(LOG_INFO, "Creating version %d header on device %d",
+ s_header->version,
+ st->devfd);
+ fprintf(stdout, "Creating version %d header on device %d\n",
+ s_header->version,
+ st->devfd);
+ if (header_write(st, s_header) < 0) {
+ rc = -1; goto out;
+ }
+ cl_log(LOG_INFO, "Initializing %d slots on device %d",
+ s_header->slots,
+ st->devfd);
+ fprintf(stdout, "Initializing %d slots on device %d\n",
+ s_header->slots,
+ st->devfd);
+ for (i=0;i < s_header->slots;i++) {
+ if (slot_write(st, i, s_node) < 0) {
+ rc = -1; goto out;
+ }
+ if (mbox_write(st, i, s_mbox) < 0) {
+ rc = -1; goto out;
+ }
+ }
+
+out: free(s_node);
+ free(s_header);
+ free(s_mbox);
+ return(rc);
+}
+
+/* Check if there already is a slot allocated to said name; returns the
+ * slot number. If not found, returns -1.
+ * This is necessary because slots might not be continuous. */
+int
+slot_lookup(struct sbd_context *st, const struct sector_header_s *s_header, const char *name)
+{
+ struct sector_node_s *s_node = NULL;
+ int i;
+ int rc = -1;
+
+ if (!name) {
+ cl_log(LOG_ERR, "slot_lookup(): No name specified.\n");
+ goto out;
+ }
+
+ s_node = sector_alloc();
+
+ for (i=0; i < s_header->slots; i++) {
+ if (slot_read(st, i, s_node) < 0) {
+ rc = -2; goto out;
+ }
+ if (s_node->in_use != 0) {
+ if (strncasecmp(s_node->name, name,
+ sizeof(s_node->name)) == 0) {
+ DBGLOG(LOG_INFO, "%s owns slot %d", name, i);
+ rc = i; goto out;
+ }
+ }
+ }
+
+out: free(s_node);
+ return rc;
+}
+
+int
+slot_unused(struct sbd_context *st, const struct sector_header_s *s_header)
+{
+ struct sector_node_s *s_node;
+ int i;
+ int rc = -1;
+
+ s_node = sector_alloc();
+
+ for (i=0; i < s_header->slots; i++) {
+ if (slot_read(st, i, s_node) < 0) {
+ rc = -1; goto out;
+ }
+ if (s_node->in_use == 0) {
+ rc = i; goto out;
+ }
+ }
+
+out: free(s_node);
+ return rc;
+}
+
+
+int
+slot_allocate(struct sbd_context *st, const char *name)
+{
+ struct sector_header_s *s_header = NULL;
+ struct sector_node_s *s_node = NULL;
+ struct sector_mbox_s *s_mbox = NULL;
+ int i;
+ int rc = 0;
+
+ if (!name) {
+ cl_log(LOG_ERR, "slot_allocate(): No name specified.\n");
+ fprintf(stderr, "slot_allocate(): No name specified.\n");
+ rc = -1; goto out;
+ }
+
+ s_header = header_get(st);
+ if (!s_header) {
+ rc = -1; goto out;
+ }
+
+ s_node = sector_alloc();
+ s_mbox = sector_alloc();
+
+ while (1) {
+ i = slot_lookup(st, s_header, name);
+ if ((i >= 0) || (i == -2)) {
+ /* -1 is "no slot found", in which case we
+ * proceed to allocate a new one.
+ * -2 is "read error during lookup", in which
+ * case we error out too
+ * >= 0 is "slot already allocated" */
+ rc = i; goto out;
+ }
+
+ i = slot_unused(st, s_header);
+ if (i >= 0) {
+ cl_log(LOG_INFO, "slot %d is unused - trying to own", i);
+ fprintf(stdout, "slot %d is unused - trying to own\n", i);
+ memset(s_node, 0, sizeof(*s_node));
+ s_node->in_use = 1;
+ strncpy(s_node->name, name, sizeof(s_node->name));
+ if (slot_write(st, i, s_node) < 0) {
+ rc = -1; goto out;
+ }
+ sleep(timeout_allocate);
+ } else {
+ cl_log(LOG_ERR, "No more free slots.");
+ fprintf(stderr, "No more free slots.\n");
+ rc = -1; goto out;
+ }
+ }
+
+out: free(s_node);
+ free(s_header);
+ free(s_mbox);
+ return(rc);
+}
+
+int
+slot_list(struct sbd_context *st)
+{
+ struct sector_header_s *s_header = NULL;
+ struct sector_node_s *s_node = NULL;
+ struct sector_mbox_s *s_mbox = NULL;
+ int i;
+ int rc = 0;
+
+ s_header = header_get(st);
+ if (!s_header) {
+ rc = -1; goto out;
+ }
+
+ s_node = sector_alloc();
+ s_mbox = sector_alloc();
+
+ for (i=0; i < s_header->slots; i++) {
+ if (slot_read(st, i, s_node) < 0) {
+ rc = -1; goto out;
+ }
+ if (s_node->in_use > 0) {
+ if (mbox_read(st, i, s_mbox) < 0) {
+ rc = -1; goto out;
+ }
+ printf("%d\t%s\t%s\t%s\n",
+ i, s_node->name, char2cmd(s_mbox->cmd),
+ s_mbox->from);
+ }
+ }
+
+out: free(s_node);
+ free(s_header);
+ free(s_mbox);
+ return rc;
+}
+
+int
+slot_msg(struct sbd_context *st, const char *name, const char *cmd)
+{
+ struct sector_header_s *s_header = NULL;
+ struct sector_mbox_s *s_mbox = NULL;
+ int mbox;
+ int rc = 0;
+
+ if (!name || !cmd) {
+ cl_log(LOG_ERR, "slot_msg(): No recipient / cmd specified.\n");
+ rc = -1; goto out;
+ }
+
+ s_header = header_get(st);
+ if (!s_header) {
+ rc = -1; goto out;
+ }
+
+ if (strcmp(name, "LOCAL") == 0) {
+ name = local_uname;
+ }
+
+ mbox = slot_lookup(st, s_header, name);
+ if (mbox < 0) {
+ cl_log(LOG_ERR, "slot_msg(): No slot found for %s.", name);
+ rc = -1; goto out;
+ }
+
+ s_mbox = sector_alloc();
+
+ s_mbox->cmd = cmd2char(cmd);
+ if (s_mbox->cmd < 0) {
+ cl_log(LOG_ERR, "slot_msg(): Invalid command %s.", cmd);
+ rc = -1; goto out;
+ }
+
+ strncpy(s_mbox->from, local_uname, sizeof(s_mbox->from)-1);
+
+ cl_log(LOG_INFO, "Writing %s to node slot %s",
+ cmd, name);
+ if (mbox_write_verify(st, mbox, s_mbox) < -1) {
+ rc = -1; goto out;
+ }
+ if (strcasecmp(cmd, "exit") != 0) {
+ cl_log(LOG_INFO, "Messaging delay: %d",
+ (int)timeout_msgwait);
+ sleep(timeout_msgwait);
+ }
+ cl_log(LOG_INFO, "%s successfully delivered to %s",
+ cmd, name);
+
+out: free(s_mbox);
+ free(s_header);
+ return rc;
+}
+
+int
+slot_ping(struct sbd_context *st, const char *name)
+{
+ struct sector_header_s *s_header = NULL;
+ struct sector_mbox_s *s_mbox = NULL;
+ int mbox;
+ int waited = 0;
+ int rc = 0;
+
+ if (!name) {
+ cl_log(LOG_ERR, "slot_ping(): No recipient specified.\n");
+ rc = -1; goto out;
+ }
+
+ s_header = header_get(st);
+ if (!s_header) {
+ rc = -1; goto out;
+ }
+
+ if (strcmp(name, "LOCAL") == 0) {
+ name = local_uname;
+ }
+
+ mbox = slot_lookup(st, s_header, name);
+ if (mbox < 0) {
+ cl_log(LOG_ERR, "slot_msg(): No slot found for %s.", name);
+ rc = -1; goto out;
+ }
+
+ s_mbox = sector_alloc();
+ s_mbox->cmd = SBD_MSG_TEST;
+
+ strncpy(s_mbox->from, local_uname, sizeof(s_mbox->from)-1);
+
+ DBGLOG(LOG_DEBUG, "Pinging node %s", name);
+ if (mbox_write(st, mbox, s_mbox) < -1) {
+ rc = -1; goto out;
+ }
+
+ rc = -1;
+ while (waited <= timeout_msgwait) {
+ if (mbox_read(st, mbox, s_mbox) < 0)
+ break;
+ if (s_mbox->cmd != SBD_MSG_TEST) {
+ rc = 0;
+ break;
+ }
+ sleep(1);
+ waited++;
+ }
+
+ if (rc == 0) {
+ cl_log(LOG_DEBUG, "%s successfully pinged.", name);
+ } else {
+ cl_log(LOG_ERR, "%s failed to ping.", name);
+ }
+
+out: free(s_mbox);
+ free(s_header);
+ return rc;
+}
+
+void
+sysrq_init(void)
+{
+ FILE* procf;
+ int c;
+ procf = fopen("/proc/sys/kernel/sysrq", "r");
+ if (!procf) {
+ cl_perror("cannot open /proc/sys/kernel/sysrq for read.");
+ return;
+ }
+ fscanf(procf, "%d", &c);
+ fclose(procf);
+ if (c == 1)
+ return;
+ /* 8 for debugging dumps of processes,
+ 128 for reboot/poweroff */
+ c |= 136;
+ procf = fopen("/proc/sys/kernel/sysrq", "w");
+ if (!procf) {
+ printf("cannot open /proc/sys/kernel/sysrq for write\n");
+ return;
+ }
+ fprintf(procf, "%d", c);
+ fclose(procf);
+ return;
+}
+
+void
+sysrq_trigger(char t)
+{
+ FILE *procf;
+
+ procf = fopen("/proc/sysrq-trigger", "a");
+ if (!procf) {
+ cl_perror("Opening sysrq-trigger failed.");
+ return;
+ }
+ cl_log(LOG_INFO, "sysrq-trigger: %c\n", t);
+ fprintf(procf, "%c\n", t);
+ fclose(procf);
+ return;
+}
+
+void
+do_crashdump(void)
+{
+ if (timeout_watchdog_crashdump) {
+ timeout_watchdog = timeout_watchdog_crashdump;
+ watchdog_init_interval();
+ watchdog_tickle();
+ }
+ sysrq_trigger('c');
+ /* is it possible to reach the following line? */
+ cl_reboot(5, "sbd is triggering crashdumping");
+ exit(1);
+}
+
+void
+do_reset(void)
+{
+ if (debug_mode == 2) {
+ cl_log(LOG_ERR, "Skipping request to suicide due to DEBUG MODE!");
+ watchdog_close();
+ exit(0);
+ }
+ if (debug_mode == 1) {
+ cl_log(LOG_ERR, "Request to suicide changed to kdump due to DEBUG MODE!");
+ watchdog_close();
+ sysrq_trigger('c');
+ exit(0);
+ }
+ sysrq_trigger('b');
+ cl_reboot(5, "sbd is self-fencing (reset)");
+ sleep(timeout_watchdog * 2);
+ exit(1);
+}
+
+void
+do_off(void)
+{
+ if (debug_mode == 2) {
+ cl_log(LOG_ERR, "Skipping request to power-off due to DEBUG MODE!");
+ watchdog_close();
+ exit(0);
+ }
+ if (debug_mode == 1) {
+ cl_log(LOG_ERR, "Request to power-off changed to kdump due to DEBUG MODE!");
+ watchdog_close();
+ sysrq_trigger('c');
+ exit(0);
+ }
+ sysrq_trigger('o');
+ cl_reboot(5, "sbd is self-fencing (power-off)");
+ sleep(timeout_watchdog * 2);
+ exit(1);
+}
+
+pid_t
+make_daemon(void)
+{
+ pid_t pid;
+ const char * devnull = "/dev/null";
+
+ pid = fork();
+ if (pid < 0) {
+ cl_log(LOG_ERR, "%s: could not start daemon\n",
+ cmdname);
+ cl_perror("fork");
+ exit(1);
+ }else if (pid > 0) {
+ return pid;
+ }
+
+ cl_log_enable_stderr(FALSE);
+
+ /* This is the child; ensure privileges have not been lost. */
+ maximize_priority();
+
+ umask(022);
+ close(0);
+ (void)open(devnull, O_RDONLY);
+ close(1);
+ (void)open(devnull, O_WRONLY);
+ close(2);
+ (void)open(devnull, O_WRONLY);
+ cl_cdtocoredir();
+ return 0;
+}
+
+int
+header_dump(struct sbd_context *st)
+{
+ struct sector_header_s *s_header;
+ s_header = header_get(st);
+ if (s_header == NULL)
+ return -1;
+
+ printf("Header version : %u\n", s_header->version);
+ printf("Number of slots : %u\n", s_header->slots);
+ printf("Sector size : %lu\n",
+ (unsigned long)s_header->sector_size);
+ printf("Timeout (watchdog) : %lu\n",
+ (unsigned long)s_header->timeout_watchdog);
+ printf("Timeout (allocate) : %lu\n",
+ (unsigned long)s_header->timeout_allocate);
+ printf("Timeout (loop) : %lu\n",
+ (unsigned long)s_header->timeout_loop);
+ printf("Timeout (msgwait) : %lu\n",
+ (unsigned long)s_header->timeout_msgwait);
+ return 0;
+}
+
+void
+sbd_get_uname(void)
+{
+ struct utsname uname_buf;
+ int i;
+
+ if (uname(&uname_buf) < 0) {
+ cl_perror("uname() failed?");
+ exit(1);
+ }
+
+ local_uname = strdup(uname_buf.nodename);
+
+ for (i = 0; i < strlen(local_uname); i++)
+ local_uname[i] = tolower(local_uname[i]);
+}
+
diff -r 9888c2e4353b -r 31225c028d4f src/sbd-md.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/sbd-md.c Tue May 29 08:47:53 2012 +0200
@@ -0,0 +1,1013 @@
+/*
+ * Copyright (C) 2008 Lars Marowsky-Bree <lmb@suse.de>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "sbd.h"
+
+struct servants_list_item *servants_leader = NULL;
+
+static int servant_count = 0;
+static int servant_restart_interval = 5;
+static int servant_restart_count = 1;
+static int servant_inform_parent = 0;
+static int check_pcmk = 0;
+
+int quorum_write(int good_servants)
+{
+ return (good_servants > servant_count/2);
+}
+
+int quorum_read(int good_servants)
+{
+ if (servant_count >= 3)
+ return (good_servants > servant_count/2);
+ else
+ return (good_servants >= 1);
+}
+
+int assign_servant(const char* devname, functionp_t functionp, const void* argp)
+{
+ pid_t pid = 0;
+ int rc = 0;
+
+ pid = fork();
+ if (pid == 0) { /* child */
+ maximize_priority();
+ rc = (*functionp)(devname, argp);
+ if (rc == -1)
+ exit(1);
+ else
+ exit(0);
+ } else if (pid != -1) { /* parent */
+ return pid;
+ } else {
+ cl_log(LOG_ERR,"Failed to fork servant");
+ exit(1);
+ }
+}
+
+int init_devices()
+{
+ int rc = 0;
+ struct sbd_context *st;
+ struct servants_list_item *s;
+
+ for (s = servants_leader; s; s = s->next) {
+ fprintf(stdout, "Initializing device %s\n",
+ s->devname);
+ st = open_device(s->devname);
+ if (!st) {
+ return -1;
+ }
+ rc = init_device(st);
+ close_device(st);
+ if (rc == -1) {
+ fprintf(stderr, "Failed to init device %s\n", s->devname);
+ return rc;
+ }
+ fprintf(stdout, "Device %s is initialized.\n", s->devname);
+ }
+ return 0;
+}
+
+int slot_msg_wrapper(const char* devname, const void* argp)
+{
+ int rc = 0;
+ struct sbd_context *st;
+ const struct slot_msg_arg_t* arg = (const struct slot_msg_arg_t*)argp;
+
+ st = open_device(devname);
+ if (!st)
+ return -1;
+ cl_log(LOG_INFO, "Delivery process handling %s",
+ devname);
+ rc = slot_msg(st, arg->name, arg->msg);
+ close_device(st);
+ return rc;
+}
+
+int slot_ping_wrapper(const char* devname, const void* argp)
+{
+ int rc = 0;
+ const char* name = (const char*)argp;
+ struct sbd_context *st;
+
+ st = open_device(devname);
+ if (!st)
+ return -1;
+ rc = slot_ping(st, name);
+ close_device(st);
+ return rc;
+}
+
+int allocate_slots(const char *name)
+{
+ int rc = 0;
+ struct sbd_context *st;
+ struct servants_list_item *s;
+
+ for (s = servants_leader; s; s = s->next) {
+ fprintf(stdout, "Trying to allocate slot for %s on device %s.\n",
+ name,
+ s->devname);
+ st = open_device(s->devname);
+ if (!st) {
+ return -1;
+ }
+ rc = slot_allocate(st, name);
+ close_device(st);
+ if (rc < 0)
+ return rc;
+ fprintf(stdout, "Slot for %s has been allocated on %s.\n",
+ name,
+ s->devname);
+ }
+ return 0;
+}
+
+int list_slots()
+{
+ int rc = 0;
+ struct servants_list_item *s;
+ struct sbd_context *st;
+
+ for (s = servants_leader; s; s = s->next) {
+ st = open_device(s->devname);
+ if (!st) {
+ fprintf(stdout, "== disk %s unreadable!\n", s->devname);
+ continue;
+ }
+ rc = slot_list(st);
+ close_device(st);
+ if (rc == -1) {
+ fprintf(stdout, "== Slots on disk %s NOT dumped\n", s->devname);
+ }
+ }
+ return 0;
+}
+
+int ping_via_slots(const char *name)
+{
+ int sig = 0;
+ pid_t pid = 0;
+ int status = 0;
+ int servants_finished = 0;
+ sigset_t procmask;
+ siginfo_t sinfo;
+ struct servants_list_item *s;
+
+ sigemptyset(&procmask);
+ sigaddset(&procmask, SIGCHLD);
+ sigprocmask(SIG_BLOCK, &procmask, NULL);
+
+ for (s = servants_leader; s; s = s->next) {
+ s->pid = assign_servant(s->devname, &slot_ping_wrapper, (const void*)name);
+ }
+
+ while (servants_finished < servant_count) {
+ sig = sigwaitinfo(&procmask, &sinfo);
+ if (sig == SIGCHLD) {
+ while ((pid = wait(&status))) {
+ if (pid == -1 && errno == ECHILD) {
+ break;
+ } else {
+ s = lookup_servant_by_pid(pid);
+ if (s) {
+ servants_finished++;
+ }
+ }
+ }
+ }
+ }
+ return 0;
+}
+
+/* This is a bit hackish, but the easiest way to rewire all process
+ * exits to send the desired signal to the parent. */
+void servant_exit(void)
+{
+ pid_t ppid;
+ union sigval signal_value;
+
+ ppid = getppid();
+ if (servant_inform_parent) {
+ memset(&signal_value, 0, sizeof(signal_value));
+ sigqueue(ppid, SIG_IO_FAIL, signal_value);
+ }
+}
+
+int servant(const char *diskname, const void* argp)
+{
+ struct sector_mbox_s *s_mbox = NULL;
+ int mbox;
+ int rc = 0;
+ time_t t0, t1, latency;
+ union sigval signal_value;
+ sigset_t servant_masks;
+ struct sbd_context *st;
+ pid_t ppid;
+
+ if (!diskname) {
+ cl_log(LOG_ERR, "Empty disk name %s.", diskname);
+ return -1;
+ }
+
+ cl_log(LOG_INFO, "Servant starting for device %s", diskname);
+
+ /* Block most of the signals */
+ sigfillset(&servant_masks);
+ sigdelset(&servant_masks, SIGKILL);
+ sigdelset(&servant_masks, SIGFPE);
+ sigdelset(&servant_masks, SIGILL);
+ sigdelset(&servant_masks, SIGSEGV);
+ sigdelset(&servant_masks, SIGBUS);
+ sigdelset(&servant_masks, SIGALRM);
+ /* FIXME: check error */
+ sigprocmask(SIG_SETMASK, &servant_masks, NULL);
+
+ atexit(servant_exit);
+ servant_inform_parent = 1;
+
+ st = open_device(diskname);
+ if (!st) {
+ return -1;
+ }
+
+ mbox = slot_allocate(st, local_uname);
+ if (mbox < 0) {
+ cl_log(LOG_ERR,
+ "No slot allocated, and automatic allocation failed for disk %s.",
+ diskname);
+ rc = -1;
+ goto out;
+ }
+ DBGLOG(LOG_INFO, "Monitoring slot %d on disk %s", mbox, diskname);
+ set_proc_title("sbd: watcher: %s - slot: %d", diskname, mbox);
+
+ s_mbox = sector_alloc();
+ if (mbox_write(st, mbox, s_mbox) < 0) {
+ rc = -1;
+ goto out;
+ }
+
+ memset(&signal_value, 0, sizeof(signal_value));
+
+ while (1) {
+ t0 = time(NULL);
+ sleep(timeout_loop);
+
+ ppid = getppid();
+
+ if (ppid == 1) {
+ /* Our parent died unexpectedly. Triggering
+ * self-fence. */
+ do_reset();
+ }
+
+ if (mbox_read(st, mbox, s_mbox) < 0) {
+ cl_log(LOG_ERR, "mbox read failed in servant.");
+ exit(1);
+ }
+
+ if (s_mbox->cmd > 0) {
+ cl_log(LOG_INFO,
+ "Received command %s from %s on disk %s",
+ char2cmd(s_mbox->cmd), s_mbox->from, diskname);
+
+ switch (s_mbox->cmd) {
+ case SBD_MSG_TEST:
+ memset(s_mbox, 0, sizeof(*s_mbox));
+ mbox_write(st, mbox, s_mbox);
+ sigqueue(ppid, SIG_TEST, signal_value);
+ break;
+ case SBD_MSG_RESET:
+ do_reset();
+ break;
+ case SBD_MSG_OFF:
+ do_off();
+ break;
+ case SBD_MSG_EXIT:
+ sigqueue(ppid, SIG_EXITREQ, signal_value);
+ break;
+ case SBD_MSG_CRASHDUMP:
+ do_crashdump();
+ break;
+ default:
+ /* FIXME:
+ An "unknown" message might result
+ from a partial write.
+ log it and clear the slot.
+ */
+ cl_log(LOG_ERR, "Unknown message on disk %s",
+ diskname);
+ memset(s_mbox, 0, sizeof(*s_mbox));
+ mbox_write(st, mbox, s_mbox);
+ break;
+ }
+ }
+ sigqueue(ppid, SIG_LIVENESS, signal_value);
+
+ t1 = time(NULL);
+ latency = t1 - t0;
+ if (timeout_watchdog_warn && (latency > timeout_watchdog_warn)) {
+ cl_log(LOG_WARNING,
+ "Latency: %d exceeded threshold %d on disk %s",
+ (int)latency, (int)timeout_watchdog_warn,
+ diskname);
+ } else if (debug) {
+ DBGLOG(LOG_INFO, "Latency: %d on disk %s", (int)latency,
+ diskname);
+ }
+ }
+ out:
+ free(s_mbox);
+ close_device(st);
+ if (rc == 0) {
+ servant_inform_parent = 0;
+ }
+ return rc;
+}
+
+void recruit_servant(const char *devname, pid_t pid)
+{
+ struct servants_list_item *s = servants_leader;
+ struct servants_list_item *newbie;
+
+ newbie = malloc(sizeof(*newbie));
+ if (!newbie) {
+ fprintf(stderr, "malloc failed in recruit_servant.");
+ exit(1);
+ }
+ memset(newbie, 0, sizeof(*newbie));
+ newbie->devname = strdup(devname);
+ newbie->pid = pid;
+
+ if (!s) {
+ servants_leader = newbie;
+ } else {
+ while (s->next)
+ s = s->next;
+ s->next = newbie;
+ }
+
+ servant_count++;
+}
+
+struct servants_list_item *lookup_servant_by_dev(const char *devname)
+{
+ struct servants_list_item *s;
+
+ for (s = servants_leader; s; s = s->next) {
+ if (strncasecmp(s->devname, devname, strlen(s->devname)))
+ break;
+ }
+ return s;
+}
+
+struct servants_list_item *lookup_servant_by_pid(pid_t pid)
+{
+ struct servants_list_item *s;
+
+ for (s = servants_leader; s; s = s->next) {
+ if (s->pid == pid)
+ break;
+ }
+ return s;
+}
+
+int check_all_dead(void)
+{
+ struct servants_list_item *s;
+ int r = 0;
+ union sigval svalue;
+
+ for (s = servants_leader; s; s = s->next) {
+ if (s->pid != 0) {
+ r = sigqueue(s->pid, 0, svalue);
+ if (r == -1 && errno == ESRCH)
+ continue;
+ return 0;
+ }
+ }
+ return 1;
+}
+
+
+void servant_start(struct servants_list_item *s)
+{
+ int r = 0;
+ union sigval svalue;
+
+ if (s->pid != 0) {
+ r = sigqueue(s->pid, 0, svalue);
+ if ((r != -1 || errno != ESRCH))
+ return;
+ }
+ s->restarts++;
+ if (strcmp("pcmk",s->devname) == 0) {
+ DBGLOG(LOG_INFO, "Starting Pacemaker servant");
+ s->pid = assign_servant(s->devname, servant_pcmk, NULL);
+ } else {
+ DBGLOG(LOG_INFO, "Starting servant for device %s",
+ s->devname);
+ s->pid = assign_servant(s->devname, servant, NULL);
+ }
+
+ clock_gettime(CLOCK_MONOTONIC, &s->t_started);
+ return;
+}
+
+void servants_start(void)
+{
+ struct servants_list_item *s;
+
+ for (s = servants_leader; s; s = s->next) {
+ s->restarts = 0;
+ servant_start(s);
+ }
+}
+
+void servants_kill(void)
+{
+ struct servants_list_item *s;
+ union sigval svalue;
+
+ for (s = servants_leader; s; s = s->next) {
+ if (s->pid != 0)
+ sigqueue(s->pid, SIGKILL, svalue);
+ }
+}
+
+int check_timeout_inconsistent(void)
+{
+ struct sbd_context *st;
+ struct sector_header_s *hdr_cur = 0, *hdr_last = 0;
+ struct servants_list_item* s;
+ int inconsistent = 0;
+
+ for (s = servants_leader; s; s = s->next) {
+ st = open_device(s->devname);
+ if (!st)
+ continue;
+ hdr_cur = header_get(st);
+ close_device(st);
+ if (!hdr_cur)
+ continue;
+ if (hdr_last) {
+ if (hdr_last->timeout_watchdog != hdr_cur->timeout_watchdog
+ || hdr_last->timeout_allocate != hdr_cur->timeout_allocate
+ || hdr_last->timeout_loop != hdr_cur->timeout_loop
+ || hdr_last->timeout_msgwait != hdr_cur->timeout_msgwait)
+ inconsistent = 1;
+ free(hdr_last);
+ }
+ hdr_last = hdr_cur;
+ }
+
+ if (hdr_last) {
+ timeout_watchdog = hdr_last->timeout_watchdog;
+ timeout_allocate = hdr_last->timeout_allocate;
+ timeout_loop = hdr_last->timeout_loop;
+ timeout_msgwait = hdr_last->timeout_msgwait;
+ } else {
+ cl_log(LOG_ERR, "No devices were available at start-up.");
+ exit(1);
+ }
+
+ free(hdr_last);
+ return inconsistent;
+}
+
+inline void cleanup_servant_by_pid(pid_t pid)
+{
+ struct servants_list_item* s;
+
+ s = lookup_servant_by_pid(pid);
+ if (s) {
+ cl_log(LOG_WARNING, "Servant for %s (pid: %i) has terminated",
+ s->devname, s->pid);
+ s->pid = 0;
+ } else {
+ /* This most likely is a stray signal from somewhere, or
+ * a SIGCHLD for a process that has previously
+ * explicitly disconnected. */
+ DBGLOG(LOG_INFO, "cleanup_servant: Nothing known about pid %i",
+ pid);
+ }
+}
+
+int inquisitor_decouple(void)
+{
+ pid_t ppid = getppid();
+ union sigval signal_value;
+
+ /* During start-up, we only arm the watchdog once we've got
+ * quorum at least once. */
+ if (watchdog_use) {
+ if (watchdog_init() < 0) {
+ return -1;
+ }
+ }
+
+ if (ppid > 1) {
+ sigqueue(ppid, SIG_LIVENESS, signal_value);
+ }
+ return 0;
+}
+
+void inquisitor_child(void)
+{
+ int sig, pid;
+ sigset_t procmask;
+ siginfo_t sinfo;
+ int status;
+ struct timespec timeout;
+ int exiting = 0;
+ int decoupled = 0;
+ int pcmk_healthy = 0;
+ time_t latency;
+ struct timespec t_last_tickle, t_now;
+ struct servants_list_item* s;
+
+ if (debug_mode) {
+ cl_log(LOG_ERR, "DEBUG MODE IS ACTIVE - DO NOT RUN IN PRODUCTION!");
+ }
+
+ set_proc_title("sbd: inquisitor");
+
+ sigemptyset(&procmask);
+ sigaddset(&procmask, SIGCHLD);
+ sigaddset(&procmask, SIG_LIVENESS);
+ sigaddset(&procmask, SIG_EXITREQ);
+ sigaddset(&procmask, SIG_TEST);
+ sigaddset(&procmask, SIG_IO_FAIL);
+ sigaddset(&procmask, SIG_PCMK_UNHEALTHY);
+ sigaddset(&procmask, SIG_RESTART);
+ sigaddset(&procmask, SIGUSR1);
+ sigaddset(&procmask, SIGUSR2);
+ sigprocmask(SIG_BLOCK, &procmask, NULL);
+
+ /* We only want this to have an effect during watch right now;
+ * pinging and fencing would be too confused */
+ if (check_pcmk) {
+ recruit_servant("pcmk", 0);
+ servant_count--;
+ }
+
+ servants_start();
+
+ timeout.tv_sec = timeout_loop;
+ timeout.tv_nsec = 0;
+ clock_gettime(CLOCK_MONOTONIC, &t_last_tickle);
+
+ while (1) {
+ int good_servants = 0;
+
+ sig = sigtimedwait(&procmask, &sinfo, &timeout);
+
+ clock_gettime(CLOCK_MONOTONIC, &t_now);
+
+ if (sig == SIG_EXITREQ) {
+ servants_kill();
+ watchdog_close();
+ exiting = 1;
+ } else if (sig == SIGCHLD) {
+ while ((pid = waitpid(-1, &status, WNOHANG))) {
+ if (pid == -1 && errno == ECHILD) {
+ break;
+ } else {
+ cleanup_servant_by_pid(pid);
+ }
+ }
+ } else if (sig == SIG_PCMK_UNHEALTHY) {
+ s = lookup_servant_by_pid(sinfo.si_pid);
+ if (s && strcmp(s->devname, "pcmk") == 0) {
+ if (pcmk_healthy != 0) {
+ cl_log(LOG_WARNING, "Pacemaker health check: UNHEALTHY");
+ }
+ pcmk_healthy = 0;
+ clock_gettime(CLOCK_MONOTONIC, &s->t_last);
+ } else {
+ cl_log(LOG_WARNING, "Ignoring SIG_PCMK_UNHEALTHY from unknown source");
+ }
+ } else if (sig == SIG_IO_FAIL) {
+ s = lookup_servant_by_pid(sinfo.si_pid);
+ if (s) {
+ DBGLOG(LOG_INFO, "Servant for %s requests to be disowned",
+ s->devname);
+ cleanup_servant_by_pid(sinfo.si_pid);
+ }
+ } else if (sig == SIG_LIVENESS) {
+ s = lookup_servant_by_pid(sinfo.si_pid);
+ if (s) {
+ if (strcmp(s->devname, "pcmk") == 0) {
+ if (pcmk_healthy != 1) {
+ cl_log(LOG_INFO, "Pacemaker health check: OK");
+ }
+ pcmk_healthy = 1;
+ };
+ clock_gettime(CLOCK_MONOTONIC, &s->t_last);
+
+ }
+ } else if (sig == SIG_TEST) {
+ } else if (sig == SIGUSR1) {
+ if (exiting)
+ continue;
+ servants_start();
+ }
+
+ if (exiting) {
+ if (check_all_dead())
+ exit(0);
+ else
+ continue;
+ }
+
+ good_servants = 0;
+ for (s = servants_leader; s; s = s->next) {
+ int age = t_now.tv_sec - s->t_last.tv_sec;
+
+ if (!s->t_last.tv_sec)
+ continue;
+
+ if (age < (int)(timeout_io+timeout_loop)) {
+ if (strcmp(s->devname, "pcmk") != 0) {
+ good_servants++;
+ }
+ s->outdated = 0;
+ } else if (!s->outdated) {
+ if (strcmp(s->devname, "pcmk") == 0) {
+ /* If the state is outdated, we
+ * override the last reported
+ * state */
+ pcmk_healthy = 0;
+ cl_log(LOG_WARNING, "Pacemaker state outdated (age: %d)",
+ age);
+ } else if (!s->restart_blocked) {
+ cl_log(LOG_WARNING, "Servant for %s outdated (age: %d)",
+ s->devname, age);
+ }
+ s->outdated = 1;
+ }
+ }
+
+ if (quorum_read(good_servants) || pcmk_healthy) {
+ if (!decoupled) {
+ if (inquisitor_decouple() < 0) {
+ servants_kill();
+ exiting = 1;
+ continue;
+ } else {
+ decoupled = 1;
+ }
+ }
+
+ watchdog_tickle();
+ clock_gettime(CLOCK_MONOTONIC, &t_last_tickle);
+ }
+
+ /* Note that this can actually be negative, since we set
+ * last_tickle after we set now. */
+ latency = t_now.tv_sec - t_last_tickle.tv_sec;
+ if (timeout_watchdog && (latency > (int)timeout_watchdog)) {
+ if (!decoupled) {
+ /* We're still being watched by our
+ * parent. We don't fence, but exit. */
+ cl_log(LOG_ERR, "SBD: Not enough votes to proceed. Aborting start-up.");
+ servants_kill();
+ exiting = 1;
+ continue;
+ }
+ if (debug_mode < 2) {
+ /* At level 2, we do nothing, but expect
+ * things to eventually return to
+ * normal. */
+ do_reset();
+ } else {
+ cl_log(LOG_ERR, "SBD: DEBUG MODE: Would have fenced due to timeout!");
+ }
+ }
+ if (timeout_watchdog_warn && (latency > (int)timeout_watchdog_warn)) {
+ cl_log(LOG_WARNING,
+ "Latency: No liveness for %d s exceeds threshold of %d s (healthy servants: %d)",
+ (int)latency, (int)timeout_watchdog_warn, good_servants);
+ }
+
+ for (s = servants_leader; s; s = s->next) {
+ int age = t_now.tv_sec - s->t_started.tv_sec;
+
+ if (age > servant_restart_interval) {
+ s->restarts = 0;
+ s->restart_blocked = 0;
+ }
+
+ if (servant_restart_count
+ && (s->restarts >= servant_restart_count)
+ && !s->restart_blocked) {
+ if (servant_restart_count > 1) {
+ cl_log(LOG_WARNING, "Max retry count (%d) reached: not restarting servant for %s",
+ (int)servant_restart_count, s->devname);
+ }
+ s->restart_blocked = 1;
+ }
+
+ if (!s->restart_blocked) {
+ servant_start(s);
+ }
+ }
+ }
+ /* not reached */
+ exit(0);
+}
+
+int inquisitor(void)
+{
+ int sig, pid, inquisitor_pid;
+ int status;
+ sigset_t procmask;
+ siginfo_t sinfo;
+
+ /* Where's the best place for sysrq init ?*/
+ sysrq_init();
+
+ sigemptyset(&procmask);
+ sigaddset(&procmask, SIGCHLD);
+ sigaddset(&procmask, SIG_LIVENESS);
+ sigprocmask(SIG_BLOCK, &procmask, NULL);
+
+ if (check_timeout_inconsistent() == 1) {
+ fprintf(stderr, "Timeout settings are different across SBD devices!\n");
+ fprintf(stderr, "You have to correct them and re-start SBD again.\n");
+ return -1;
+ }
+
+ inquisitor_pid = make_daemon();
+ if (inquisitor_pid == 0) {
+ inquisitor_child();
+ }
+
+ /* We're the parent. Wait for a happy signal from our child
+ * before we proceed - we either get "SIG_LIVENESS" when the
+ * inquisitor has completed the first successful round, or
+ * ECHLD when it exits with an error. */
+
+ while (1) {
+ sig = sigwaitinfo(&procmask, &sinfo);
+ if (sig == SIGCHLD) {
+ while ((pid = waitpid(-1, &status, WNOHANG))) {
+ if (pid == -1 && errno == ECHILD) {
+ break;
+ }
+ /* We got here because the inquisitor
+ * did not succeed. */
+ return -1;
+ }
+ } else if (sig == SIG_LIVENESS) {
+ /* Inquisitor started up properly. */
+ return 0;
+ } else {
+ fprintf(stderr, "Nobody expected the spanish inquisition!\n");
+ continue;
+ }
+ }
+ /* not reached */
+ return -1;
+}
+
+int messenger(const char *name, const char *msg)
+{
+ int sig = 0;
+ pid_t pid = 0;
+ int status = 0;
+ int servants_finished = 0;
+ int successful_delivery = 0;
+ sigset_t procmask;
+ siginfo_t sinfo;
+ struct servants_list_item *s;
+ struct slot_msg_arg_t slot_msg_arg = {name, msg};
+
+ sigemptyset(&procmask);
+ sigaddset(&procmask, SIGCHLD);
+ sigprocmask(SIG_BLOCK, &procmask, NULL);
+
+ for (s = servants_leader; s; s = s->next) {
+ s->pid = assign_servant(s->devname, &slot_msg_wrapper, &slot_msg_arg);
+ }
+
+ while (!(quorum_write(successful_delivery) ||
+ (servants_finished == servant_count))) {
+ sig = sigwaitinfo(&procmask, &sinfo);
+ if (sig == SIGCHLD) {
+ while ((pid = waitpid(-1, &status, WNOHANG))) {
+ if (pid == -1 && errno == ECHILD) {
+ break;
+ } else {
+ servants_finished++;
+ if (WIFEXITED(status)
+ && WEXITSTATUS(status) == 0) {
+ DBGLOG(LOG_INFO, "Process %d succeeded.",
+ (int)pid);
+ successful_delivery++;
+ } else {
+ cl_log(LOG_WARNING, "Process %d failed to deliver!",
+ (int)pid);
+ }
+ }
+ }
+ }
+ }
+ if (quorum_write(successful_delivery)) {
+ cl_log(LOG_INFO, "Message successfully delivered.");
+ return 0;
+ } else {
+ cl_log(LOG_ERR, "Message is not delivered via more then a half of devices");
+ return -1;
+ }
+}
+
+int dump_headers(void)
+{
+ int rc = 0;
+ struct servants_list_item *s = servants_leader;
+ struct sbd_context *st;
+
+ for (s = servants_leader; s; s = s->next) {
+ fprintf(stdout, "==Dumping header on disk %s\n", s->devname);
+ st = open_device(s->devname);
+ if (!st) {
+ fprintf(stdout, "== disk %s unreadable!\n", s->devname);
+ continue;
+ }
+
+ rc = header_dump(st);
+ close_device(st);
+
+ if (rc == -1) {
+ fprintf(stdout, "==Header on disk %s NOT dumped\n", s->devname);
+ } else {
+ fprintf(stdout, "==Header on disk %s is dumped\n", s->devname);
+ }
+ }
+ return rc;
+}
+
+int main(int argc, char **argv, char **envp)
+{
+ int exit_status = 0;
+ int c;
+
+ if ((cmdname = strrchr(argv[0], '/')) == NULL) {
+ cmdname = argv[0];
+ } else {
+ ++cmdname;
+ }
+
+ cl_log_set_entity(cmdname);
+ cl_log_enable_stderr(0);
+ cl_log_set_facility(LOG_DAEMON);
+
+ sbd_get_uname();
+
+ while ((c = getopt(argc, argv, "C:DPRTWZhvw:d:n:1:2:3:4:5:t:I:F:")) != -1) {
+ switch (c) {
+ case 'D':
+ break;
+ case 'Z':
+ debug_mode++;
+ cl_log(LOG_INFO, "Debug mode now at level %d", (int)debug_mode);
+ break;
+ case 'R':
+ skip_rt = 1;
+ cl_log(LOG_INFO, "Realtime mode deactivated.");
+ break;
+ case 'v':
+ debug = 1;
+ cl_log(LOG_INFO, "Verbose mode enabled.");
+ break;
+ case 'T':
+ watchdog_set_timeout = 0;
+ cl_log(LOG_INFO, "Setting watchdog timeout disabled; using defaults.");
+ break;
+ case 'W':
+ watchdog_use = 1;
+ cl_log(LOG_INFO, "Watchdog enabled.");
+ break;
+ case 'w':
+ watchdogdev = optarg;
+ break;
+ case 'd':
+ recruit_servant(optarg, 0);
+ break;
+ case 'P':
+ check_pcmk = 1;
+ break;
+ case 'n':
+ local_uname = optarg;
+ cl_log(LOG_INFO, "Overriding local hostname to %s", local_uname);
+ break;
+ case 'C':
+ timeout_watchdog_crashdump = atoi(optarg);
+ cl_log(LOG_INFO, "Setting crashdump watchdog timeout to %d",
+ (int)timeout_watchdog_crashdump);
+ break;
+ case '1':
+ timeout_watchdog = atoi(optarg);
+ break;
+ case '2':
+ timeout_allocate = atoi(optarg);
+ break;
+ case '3':
+ timeout_loop = atoi(optarg);
+ break;
+ case '4':
+ timeout_msgwait = atoi(optarg);
+ break;
+ case '5':
+ timeout_watchdog_warn = atoi(optarg);
+ cl_log(LOG_INFO, "Setting latency warning to %d",
+ (int)timeout_watchdog_warn);
+ break;
+ case 't':
+ servant_restart_interval = atoi(optarg);
+ cl_log(LOG_INFO, "Setting servant restart interval to %d",
+ (int)servant_restart_interval);
+ break;
+ case 'I':
+ timeout_io = atoi(optarg);
+ cl_log(LOG_INFO, "Setting IO timeout to %d",
+ (int)timeout_io);
+ break;
+ case 'F':
+ servant_restart_count = atoi(optarg);
+ cl_log(LOG_INFO, "Servant restart count set to %d",
+ (int)servant_restart_count);
+ break;
+ case 'h':
+ usage();
+ return (0);
+ default:
+ exit_status = -1;
+ goto out;
+ break;
+ }
+ }
+
+ if (servant_count < 1 || servant_count > 3) {
+ fprintf(stderr, "You must specify 1 to 3 devices via the -d option.\n");
+ exit_status = -1;
+ goto out;
+ }
+
+ /* There must at least be one command following the options: */
+ if ((argc - optind) < 1) {
+ fprintf(stderr, "Not enough arguments.\n");
+ exit_status = -1;
+ goto out;
+ }
+
+ if (init_set_proc_title(argc, argv, envp) < 0) {
+ fprintf(stderr, "Allocation of proc title failed.");
+ exit(1);
+ }
+
+ maximize_priority();
+
+ if (strcmp(argv[optind], "create") == 0) {
+ exit_status = init_devices();
+ } else if (strcmp(argv[optind], "dump") == 0) {
+ exit_status = dump_headers();
+ } else if (strcmp(argv[optind], "allocate") == 0) {
+ exit_status = allocate_slots(argv[optind + 1]);
+ } else if (strcmp(argv[optind], "list") == 0) {
+ exit_status = list_slots();
+ } else if (strcmp(argv[optind], "message") == 0) {
+ exit_status = messenger(argv[optind + 1], argv[optind + 2]);
+ } else if (strcmp(argv[optind], "ping") == 0) {
+ exit_status = ping_via_slots(argv[optind + 1]);
+ } else if (strcmp(argv[optind], "watch") == 0) {
+ exit_status = inquisitor();
+ } else {
+ exit_status = -1;
+ }
+
+out:
+ if (exit_status < 0) {
+ usage();
+ return (1);
+ }
+ return (0);
+}
diff -r 9888c2e4353b -r 31225c028d4f src/sbd-pacemaker.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/sbd-pacemaker.c Tue May 29 08:47:53 2012 +0200
@@ -0,0 +1,500 @@
+
+/*
+ * Copyright (C) 2012 Lars Marowsky-Bree <lmb@suse.com>
+ *
+ * Based on crm_mon.c, which was:
+ * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/* TODO list:
+ *
+ * - Trying to shutdown a node if no devices are up will fail, since SBD
+ * currently uses a message via the disk to achieve this.
+ *
+ * - Shutting down cluster nodes while the majority of devices is down
+ * will eventually take the cluster below the quorum threshold, at which
+ * time the remaining cluster nodes will all immediately suicide.
+ *
+ * - With the CIB refreshed every timeout_loop seconds, do we still need
+ * to watch for CIB update notifications or can that be removed?
+ *
+ */
+
+#include "sbd.h"
+
+#include <sys/param.h>
+
+#include <crm/crm.h>
+
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include <stdlib.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <libgen.h>
+#include <sys/utsname.h>
+
+#include <crm_config.h>
+#include <crm/msg_xml.h>
+#include <crm/common/util.h>
+#include <crm/common/xml.h>
+#include <crm/common/ipc.h>
+#include <crm/common/mainloop.h>
+#ifdef CHECK_AIS
+#include <crm/cluster/stack.h>
+#include <crm/common/cluster.h>
+#endif
+#include <crm/cib.h>
+#include <crm/pengine/status.h>
+
+void clean_up(int rc);
+void crm_diff_update(const char *event, xmlNode * msg);
+gboolean mon_refresh_state(gpointer user_data);
+int cib_connect(gboolean full);
+void set_pcmk_health(int healthy);
+void notify_parent(void);
+
+int reconnect_msec = 5000;
+GMainLoop *mainloop = NULL;
+guint timer_id_reconnect = 0;
+guint timer_id_notify = 0;
+
+int pcmk_healthy = 0;
+
+#ifdef CHECK_AIS
+guint timer_id_ais = 0;
+enum cluster_type_e cluster_stack = pcmk_cluster_unknown;
+int local_id = 0;
+struct timespec t_last_quorum;
+#endif
+
+cib_t *cib = NULL;
+xmlNode *current_cib = NULL;
+
+long last_refresh = 0;
+crm_trigger_t *refresh_trigger = NULL;
+
+static gboolean
+mon_timer_popped(gpointer data)
+{
+ int rc = cib_ok;
+
+ if (timer_id_reconnect > 0) {
+ g_source_remove(timer_id_reconnect);
+ }
+
+ rc = cib_connect(TRUE);
+
+ if (rc != cib_ok) {
+ timer_id_reconnect = g_timeout_add(reconnect_msec, mon_timer_popped, NULL);
+ set_pcmk_health(0);
+ }
+ return FALSE;
+}
+
+static void
+mon_cib_connection_destroy(gpointer user_data)
+{
+ if (cib) {
+ set_pcmk_health(0);
+ /* Reconnecting */
+ cib->cmds->signoff(cib);
+ timer_id_reconnect = g_timeout_add(reconnect_msec, mon_timer_popped, NULL);
+ }
+ return;
+}
+
+static gboolean
+mon_timer_notify(gpointer data)
+{
+ if (timer_id_notify > 0) {
+ g_source_remove(timer_id_notify);
+ }
+
+ /* TODO - do we really want to do this every loop interval? Lets
+ * check how much CPU that takes ... */
+ if (1) {
+ free_xml(current_cib);
+ current_cib = get_cib_copy(cib);
+ mon_refresh_state(NULL);
+ } else {
+ notify_parent();
+ }
+
+ timer_id_notify = g_timeout_add(timeout_loop * 1000, mon_timer_notify, NULL);
+ return FALSE;
+}
+
+/*
+ * Mainloop signal handler.
+ */
+static void
+mon_shutdown(int nsig)
+{
+ clean_up(0);
+}
+
+int
+cib_connect(gboolean full)
+{
+ int rc = cib_ok;
+
+ CRM_CHECK(cib != NULL, return cib_missing);
+
+ if (cib->state != cib_connected_query && cib->state != cib_connected_command) {
+
+ rc = cib->cmds->signon(cib, crm_system_name, cib_query);
+
+ if (rc != cib_ok) {
+ return rc;
+ }
+
+ current_cib = get_cib_copy(cib);
+ mon_refresh_state(NULL);
+
+ if (full) {
+ if (rc == cib_ok) {
+ rc = cib->cmds->set_connection_dnotify(cib, mon_cib_connection_destroy);
+ if (rc == cib_NOTSUPPORTED) {
+ /* Notification setup failed, won't be able to reconnect after failure */
+ rc = cib_ok;
+ }
+ }
+
+ if (rc == cib_ok) {
+ cib->cmds->del_notify_callback(cib, T_CIB_DIFF_NOTIFY, crm_diff_update);
+ rc = cib->cmds->add_notify_callback(cib, T_CIB_DIFF_NOTIFY, crm_diff_update);
+ }
+
+ if (rc != cib_ok) {
+ /* Notification setup failed, could not monitor CIB actions */
+ clean_up(-rc);
+ }
+ }
+ }
+ return rc;
+}
+
+#ifdef CHECK_AIS
+static gboolean
+mon_timer_ais(gpointer data)
+{
+ if (timer_id_ais > 0) {
+ g_source_remove(timer_id_ais);
+ }
+
+ send_ais_text(crm_class_quorum, NULL, TRUE, NULL, crm_msg_ais);
+
+ /* The timer is set in the response processing */
+ return FALSE;
+}
+
+static void
+ais_membership_destroy(gpointer user_data)
+{
+ cl_log(LOG_ERR, "AIS connection terminated - corosync down?");
+ ais_fd_sync = -1;
+ /* TODO: Is recovery even worth it here? After all, this means
+ * that corosync died ... */
+ exit(1);
+}
+
+static gboolean
+ais_membership_dispatch(AIS_Message * wrapper, char *data, int sender)
+{
+ switch (wrapper->header.id) {
+ case crm_class_quorum:
+ break;
+ default:
+ return TRUE;
+ break;
+ }
+
+ DBGLOG(LOG_INFO, "AIS quorum state: %d", (int)crm_have_quorum);
+ clock_gettime(CLOCK_MONOTONIC, &t_last_quorum);
+
+ timer_id_ais = g_timeout_add(timeout_loop * 1000, mon_timer_ais, NULL);
+
+ return TRUE;
+}
+#endif
+
+int
+servant_pcmk(const char *diskname, const void* argp)
+{
+ int exit_code = 0;
+
+ cl_log(LOG_INFO, "Monitoring Pacemaker health");
+ set_proc_title("sbd: watcher: Pacemaker");
+ reconnect_msec = 2000;
+
+ /* We don't want any noisy crm messages */
+ set_crm_log_level(LOG_ERR);
+
+#ifdef CHECK_AIS
+ cluster_stack = get_cluster_type();
+
+ if (cluster_stack != pcmk_cluster_classic_ais) {
+ cl_log(LOG_ERR, "SBD currently only supports legacy AIS for quorum state poll");
+ }
+
+ while (!init_ais_connection_once
+ (ais_membership_dispatch, ais_membership_destroy, NULL, NULL, &local_id)) {
+ cl_log(LOG_INFO, "Waiting to sign in with AIS ...");
+ sleep(reconnect_msec / 1000);
+ }
+#endif
+
+ if (current_cib == NULL) {
+ cib = cib_new();
+
+ do {
+ exit_code = cib_connect(TRUE);
+
+ if (exit_code != cib_ok) {
+ sleep(reconnect_msec / 1000);
+ }
+ } while (exit_code == cib_connection);
+
+ if (exit_code != cib_ok) {
+ clean_up(-exit_code);
+ }
+ }
+
+ mainloop = g_main_new(FALSE);
+
+ mainloop_add_signal(SIGTERM, mon_shutdown);
+ mainloop_add_signal(SIGINT, mon_shutdown);
+ refresh_trigger = mainloop_add_trigger(G_PRIORITY_LOW, mon_refresh_state, NULL);
+ timer_id_notify = g_timeout_add(timeout_loop * 1000, mon_timer_notify, NULL);
+#ifdef CHECK_AIS
+ timer_id_ais = g_timeout_add(timeout_loop * 1000, mon_timer_ais, NULL);
+#endif
+
+ g_main_run(mainloop);
+ g_main_destroy(mainloop);
+
+ clean_up(0);
+ return 0; /* never reached */
+}
+
+#define LOGONCE(state, lvl, fmt, args...) do { \
+ if (last_state != state) { \
+ cl_log(lvl, fmt, ##args); \
+ last_state = state; \
+ } \
+ } while(0)
+
+static int
+compute_status(pe_working_set_t * data_set)
+{
+ static int updates = 0;
+ static int last_state = 0;
+ int healthy = 0;
+ node_t *dc = NULL;
+ struct timespec t_now;
+
+ updates++;
+ dc = data_set->dc_node;
+ clock_gettime(CLOCK_MONOTONIC, &t_now);
+
+ if (dc == NULL) {
+ /* Means we don't know if we have quorum. Hrm. Probably needs to
+ * allow for this state for a period of time and then decide
+ * that we don't have quorum - TODO - should we skip
+ * notifying the parent? */
+ LOGONCE(1, LOG_INFO, "We don't have a DC right now.");
+ goto out;
+ } else {
+ const char *cib_quorum = crm_element_value(data_set->input, XML_ATTR_HAVE_QUORUM);
+
+ if (crm_is_true(cib_quorum)) {
+ DBGLOG(LOG_INFO, "CIB: We have quorum!");
+ } else {
+ LOGONCE(3, LOG_WARNING, "CIB: We do NOT have quorum!");
+ goto out;
+ }
+
+ }
+
+#ifdef CHECK_AIS
+ int quorum_age = t_now.tv_sec - t_last_quorum.tv_sec;
+
+ if (quorum_age > (int)(timeout_io+timeout_loop)) {
+ if (t_last_quorum.tv_sec != 0)
+ LOGONCE(2, LOG_WARNING, "AIS: Quorum outdated!");
+ goto out;
+ }
+
+ if (crm_have_quorum) {
+ DBGLOG(LOG_INFO, "AIS: We have quorum!");
+ } else {
+ LOGONCE(8, LOG_WARNING, "AIS: We do NOT have quorum!");
+ goto out;
+ }
+#endif
+
+ node_t *node = pe_find_node(data_set->nodes, local_uname);
+
+ if (node->details->unclean) {
+ LOGONCE(4, LOG_WARNING, "Node state: UNCLEAN");
+ goto out;
+ } else if (node->details->pending) {
+ LOGONCE(5, LOG_WARNING, "Node state: pending");
+ /* TODO ? */
+ } else if (node->details->online) {
+ LOGONCE(6, LOG_INFO, "Node state: online");
+ healthy = 1;
+ } else {
+ LOGONCE(7, LOG_WARNING, "Node state: UNKNOWN");
+ goto out;
+ }
+
+out:
+ set_pcmk_health(healthy);
+
+ return 0;
+}
+
+void
+set_pcmk_health(int healthy)
+{
+ pcmk_healthy = healthy;
+ notify_parent();
+}
+
+void
+notify_parent(void)
+{
+ pid_t ppid;
+ union sigval signal_value;
+
+ memset(&signal_value, 0, sizeof(signal_value));
+ ppid = getppid();
+
+ if (ppid == 1) {
+ /* Our parent died unexpectedly. Triggering
+ * self-fence. */
+ cl_log(LOG_WARNING, "Our parent is dead.");
+ do_reset();
+ }
+
+ if (pcmk_healthy) {
+ DBGLOG(LOG_INFO, "Notifying parent: healthy");
+ sigqueue(ppid, SIG_LIVENESS, signal_value);
+ } else {
+ DBGLOG(LOG_WARNING, "Notifying parent: UNHEALTHY");
+ sigqueue(ppid, SIG_PCMK_UNHEALTHY, signal_value);
+ }
+}
+
+void
+crm_diff_update(const char *event, xmlNode * msg)
+{
+ int rc = -1;
+ long now = time(NULL);
+ const char *op = NULL;
+ unsigned int log_level = LOG_INFO;
+
+ xmlNode *diff = NULL;
+ xmlNode *cib_last = NULL;
+
+ if (msg == NULL) {
+ crm_err("NULL update");
+ return;
+ }
+
+ crm_element_value_int(msg, F_CIB_RC, &rc);
+ op = crm_element_value(msg, F_CIB_OPERATION);
+ diff = get_message_xml(msg, F_CIB_UPDATE_RESULT);
+
+ if (rc < cib_ok) {
+ log_level = LOG_WARNING;
+ cl_log(log_level, "[%s] %s ABORTED: %s", event, op, cib_error2string(rc));
+ return;
+ }
+
+ if (current_cib != NULL) {
+ cib_last = current_cib;
+ current_cib = NULL;
+ rc = cib_process_diff(op, cib_force_diff, NULL, NULL, diff, cib_last, &current_cib, NULL);
+
+ if (rc != cib_ok) {
+ crm_debug("Update didn't apply, requesting full copy: %s", cib_error2string(rc));
+ free_xml(current_cib);
+ current_cib = NULL;
+ }
+ }
+
+ if (current_cib == NULL) {
+ current_cib = get_cib_copy(cib);
+ }
+
+ if ((now - last_refresh) > (reconnect_msec / 1000)) {
+ /* Force a refresh */
+ mon_refresh_state(NULL);
+ } else {
+ mainloop_set_trigger(refresh_trigger);
+ }
+ free_xml(cib_last);
+}
+
+gboolean
+mon_refresh_state(gpointer user_data)
+{
+ xmlNode *cib_copy = copy_xml(current_cib);
+ pe_working_set_t data_set;
+
+ last_refresh = time(NULL);
+
+ if (cli_config_update(&cib_copy, NULL, FALSE) == FALSE) {
+ if (cib) {
+ cib->cmds->signoff(cib);
+ }
+ /* TODO: Not good path, upgrade failed */
+ clean_up(1);
+ return FALSE;
+ }
+
+ set_working_set_defaults(&data_set);
+ data_set.input = cib_copy;
+ cluster_status(&data_set);
+
+ compute_status(&data_set);
+
+ cleanup_calculations(&data_set);
+ return TRUE;
+}
+
+void
+clean_up(int rc)
+{
+ if (cib != NULL) {
+ cib->cmds->signoff(cib);
+ cib_delete(cib);
+ cib = NULL;
+ }
+
+ if (rc >= 0) {
+ exit(rc);
+ }
+ return;
+}
+
diff -r 9888c2e4353b -r 31225c028d4f src/sbd.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/sbd.h Tue May 29 08:47:53 2012 +0200
@@ -0,0 +1,207 @@
+/*
+ * Copyright (C) 2008 Lars Marowsky-Bree <lmb@suse.de>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <arpa/inet.h>
+#include <asm/unistd.h>
+#include <clplumbing/cl_log.h>
+#include <clplumbing/cl_reboot.h>
+#include <clplumbing/coredumps.h>
+#include <clplumbing/realtime.h>
+#include <clplumbing/setproctitle.h>
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <libaio.h>
+#include <linux/fs.h>
+#include <linux/types.h>
+#include <linux/watchdog.h>
+#include <malloc.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/ptrace.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/utsname.h>
+#include <sys/wait.h>
+#include <syslog.h>
+#include <time.h>
+#include <unistd.h>
+
+/* signals reserved for multi-disk sbd */
+#define SIG_LIVENESS (SIGRTMIN + 1) /* report liveness of the disk */
+#define SIG_EXITREQ (SIGRTMIN + 2) /* exit request to inquisitor */
+#define SIG_TEST (SIGRTMIN + 3) /* trigger self test */
+#define SIG_RESTART (SIGRTMIN + 4) /* trigger restart of all failed disk */
+#define SIG_IO_FAIL (SIGRTMIN + 5) /* the IO child requests to be considered failed */
+#define SIG_PCMK_UNHEALTHY (SIGRTMIN + 6)
+/* FIXME: should add dynamic check of SIG_XX >= SIGRTMAX */
+
+/* Sector data types */
+struct sector_header_s {
+ char magic[8];
+ unsigned char version;
+ unsigned char slots;
+ /* Caveat: stored in network byte-order */
+ uint32_t sector_size;
+ uint32_t timeout_watchdog;
+ uint32_t timeout_allocate;
+ uint32_t timeout_loop;
+ uint32_t timeout_msgwait;
+};
+
+struct sector_mbox_s {
+ signed char cmd;
+ char from[64];
+};
+
+struct sector_node_s {
+ /* slots will be created with in_use == 0 */
+ char in_use;
+ char name[64];
+};
+
+struct servants_list_item {
+ const char* devname;
+ pid_t pid;
+ int restarts;
+ int restart_blocked;
+ int outdated;
+ struct timespec t_last, t_started;
+ struct servants_list_item *next;
+};
+
+struct sbd_context {
+ int devfd;
+ io_context_t ioctx;
+ struct iocb io;
+};
+
+#define SBD_MSG_EMPTY 0x00
+#define SBD_MSG_TEST 0x01
+#define SBD_MSG_RESET 0x02
+#define SBD_MSG_OFF 0x03
+#define SBD_MSG_EXIT 0x04
+#define SBD_MSG_CRASHDUMP 0x05
+
+#define SLOT_TO_SECTOR(slot) (1+slot*2)
+#define MBOX_TO_SECTOR(mbox) (2+mbox*2)
+
+void usage(void);
+int watchdog_init_interval(void);
+int watchdog_tickle(void);
+int watchdog_init(void);
+void sysrq_init(void);
+void watchdog_close(void);
+struct sbd_context *open_device(const char* devname);
+void close_device(struct sbd_context *st);
+signed char cmd2char(const char *cmd);
+void * sector_alloc(void);
+const char* char2cmd(const char cmd);
+int sector_write(struct sbd_context *st, int sector, const void *data);
+int sector_read(struct sbd_context *st, int sector, void *data);
+int slot_read(struct sbd_context *st, int slot, struct sector_node_s *s_node);
+int slot_write(struct sbd_context *st, int slot, const struct sector_node_s *s_node);
+int mbox_write(struct sbd_context *st, int mbox, const struct sector_mbox_s *s_mbox);
+int mbox_read(struct sbd_context *st, int mbox, struct sector_mbox_s *s_mbox);
+int mbox_write_verify(struct sbd_context *st, int mbox, const struct sector_mbox_s *s_mbox);
+/* After a call to header_write(), certain data fields will have been
+ * converted to on-disk byte-order; the header should not be accessed
+ * afterwards anymore! */
+int header_write(struct sbd_context *st, struct sector_header_s *s_header);
+int header_read(struct sbd_context *st, struct sector_header_s *s_header);
+int valid_header(const struct sector_header_s *s_header);
+struct sector_header_s * header_get(struct sbd_context *st);
+int init_device(struct sbd_context *st);
+int slot_lookup(struct sbd_context *st, const struct sector_header_s *s_header, const char *name);
+int slot_unused(struct sbd_context *st, const struct sector_header_s *s_header);
+int slot_allocate(struct sbd_context *st, const char *name);
+int slot_list(struct sbd_context *st);
+int slot_ping(struct sbd_context *st, const char *name);
+int slot_msg(struct sbd_context *st, const char *name, const char *cmd);
+int header_dump(struct sbd_context *st);
+void sysrq_trigger(char t);
+void do_crashdump(void);
+void do_reset(void);
+void do_off(void);
+pid_t make_daemon(void);
+void maximize_priority(void);
+void sbd_get_uname(void);
+
+/* Tunable defaults: */
+extern unsigned long timeout_watchdog;
+extern unsigned long timeout_watchdog_warn;
+extern unsigned long timeout_watchdog_crashdump;
+extern int timeout_allocate;
+extern int timeout_loop;
+extern int timeout_msgwait;
+extern int timeout_io;
+extern int watchdog_use;
+extern int watchdog_set_timeout;
+extern int skip_rt;
+extern int debug;
+extern int debug_mode;
+extern const char *watchdogdev;
+extern char* local_uname;
+
+/* Global, non-tunable variables: */
+extern int sector_size;
+extern int watchdogfd;
+extern const char* cmdname;
+
+typedef int (*functionp_t)(const char* devname, const void* argp);
+
+int assign_servant(const char* devname, functionp_t functionp, const void* argp);
+int init_devices(void);
+struct slot_msg_arg_t {
+ const char* name;
+ const char* msg;
+};
+int slot_msg_wrapper(const char* devname, const void* argp);
+int slot_ping_wrapper(const char* devname, const void* argp);
+int allocate_slots(const char *name);
+int list_slots(void);
+int ping_via_slots(const char *name);
+int dump_headers(void);
+
+int check_all_dead(void);
+void servant_exit(void);
+int servant(const char *diskname, const void* argp);
+void recruit_servant(const char *devname, pid_t pid);
+struct servants_list_item *lookup_servant_by_dev(const char *devname);
+struct servants_list_item *lookup_servant_by_pid(pid_t pid);
+void servants_kill(void);
+void servants_start(void);
+void servant_start(struct servants_list_item *s);
+void inquisitor_child(void);
+int inquisitor(void);
+int inquisitor_decouple(void);
+int messenger(const char *name, const char *msg);
+int check_timeout_inconsistent(void);
+void cleanup_servant_by_pid(pid_t pid);
+int quorum_write(int good_servants);
+int quorum_read(int good_servants);
+
+int pcmk_have_quorum(void);
+int servant_pcmk(const char *diskname, const void* argp);
+
+#define DBGLOG(lvl, fmt, args...) do { \
+ if (debug > 1) cl_log(lvl, fmt, ##args); \
+ } while(0)
_______________________________________________________
Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
Home Page: http://linux-ha.org/