Hi,=20
This patch against heartbeat 0.4.6c adds per-link status
instead per-node status. This is the first step on the link/status
dependancy scheme we are planning to do. The handling of the dead link=20
will be done by external scripts which we will send to this list soon.=20
It also adds the "nice_failback" option in ha.cf which avoids the
"takeback" of services when it notices that the cluster is alive.=20
Any comments are welcome.
---
diff -Nur heartbeat-0.4.6c.orig/heartbeat/config.c heartbeat-0.4.6c/heartbe=
at/config.c
--- heartbeat-0.4.6c.orig/heartbeat/config.c=09Sat Dec 25 06:46:01 1999
+++ heartbeat-0.4.6c/heartbeat/config.c=09Mon Mar 27 15:00:46 2000
@@ -43,9 +43,10 @@
extern volatile struct process_info *=09curproc;
extern char *=09=09=09=09watchdogdev;
extern int=09=09=09=09nummedia;
+extern int=09=09=09=09nice_failback;
=20
int=09islegaldirective(const char *directive);
-int=09parse_config(const char * cfgfile);
+int=09parse_config(const char * cfgfile, char *nodename);
int=09parse_ha_resources(const char * cfgfile);
void=09dump_config(void);
int=09add_option(const char *=09option, const char * value);
@@ -87,7 +88,7 @@
=09uname(&u);
=09curnode =3D NULL;
=20
-=09if (!parse_config(cfgfile)) {
+=09if (!parse_config(cfgfile, u.nodename)) {
=09=09ha_log(LOG_ERR, "Heartbeat not started: configuration error.");
=09=09return(HA_FAIL);
=09}
@@ -163,6 +164,7 @@
#define=09KEY_FACILITY=09"logfacility"
#define=09KEY_LOGFILE=09"logfile"
#define=09KEY_DBGFILE=09"debugfile"
+#define KEY_FAILBACK "nice_failback"=20
=20
int add_node(const char *);
int set_hopfudge(const char *);
@@ -174,6 +176,7 @@
int set_facility(const char *);
int set_logfile(const char *);
int set_dbgfile(const char *);
+int set_nice_failback(const char *);
=20
extern const struct hb_media_fns=09ip_media_fns;
extern const struct hb_media_fns=09serial_media_fns;
@@ -203,6 +206,7 @@
,=09{KEY_FACILITY, set_facility}
,=09{KEY_LOGFILE, set_logfile}
,=09{KEY_DBGFILE, set_dbgfile}
+,=09{KEY_FAILBACK, set_nice_failback}
};
=20
=20
@@ -210,7 +214,7 @@
*=09Parse the configuration file and stash away the data
*/
int
-parse_config(const char * cfgfile)
+parse_config(const char * cfgfile, char *nodename)
{
=09FILE=09*=09f;
=09char=09=09buf[MAXLINE];
@@ -220,6 +224,10 @@
=09char=09=09option[MAXLINE];
=09int=09=09optionlength;
=09int=09=09errcount =3D 0;
+=09int=09=09j;
+=09int =09i;
+=09clock_t cticks;
+=09
=09struct stat=09sbuf;
=20
=09if ((f =3D fopen(cfgfile, "r")) =3D=3D NULL) {
@@ -233,8 +241,7 @@
=09/* It's ugly, but effective */
=20
=09while (fgets(buf, MAXLINE, f) !=3D NULL) {
-=09=09char *=09bp =3D buf;
-=09=09int=09j;
+=09=09char *bp =3D buf;=20
=20
=09=09/* Skip over white space */
=09=09bp +=3D strspn(bp, WHITESPACE);
@@ -291,6 +298,24 @@
=09=09=09bp +=3D strspn(bp, DELIMS);
=09=09}
=09}
+
+=09cticks =3D times(NULL);
+
+=09for (j=3D0; j < nummedia; j++) {
+=09=09for (i=3D0; i < config->nodecount; ++i) {
+=09=09=09=09struct link *lnk =3D &config->nodes[i].links[j];
+=09=09=09=09lnk->name =3D sysmedia[j]->name;
+=09=09=09=09lnk->lastupdate =3D cticks;
+=09=09 }
+=09 }
+
+=09j++;
+
+=09for (i=3D0; i < config->nodecount; ++i) {
+=09=09struct link *lnk =3D &config->nodes[i].links[j];
+=09=09lnk->name =3D NULL;
+=09}=09
+=09
=09fclose(f);
=09return(errcount ? HA_FAIL : HA_OK);
}
@@ -806,6 +831,19 @@
set_logfile(const char * value)
{
=09strncpy(config->logfile, value, PATH_MAX);
+=09return(HA_OK);
+}
+
+/* sets nice_failback behavior on/off */
+int
+set_nice_failback(const char * value)
+{
+=09if(!strcasecmp(value, "on")) {
+=09=09nice_failback =3D 1;
+=09} else {
+=09=09nice_failback =3D 0;
+=09}
+
=09return(HA_OK);
}
=20
diff -Nur heartbeat-0.4.6c.orig/heartbeat/ha_msg.c heartbeat-0.4.6c/heartbe=
at/ha_msg.c
--- heartbeat-0.4.6c.orig/heartbeat/ha_msg.c=09Tue Nov 23 06:52:40 1999
+++ heartbeat-0.4.6c/heartbeat/ha_msg.c=09Mon Mar 27 14:28:57 2000
@@ -216,6 +216,73 @@
=20
=20
/* Return the next message found in the stream */
+
+struct ha_msg *
+if_msgfromstream(FILE * f, char *iface)
+{
+=09char=09=09buf[MAXLINE];
+=09char *=09=09getsret;
+=09struct ha_msg*=09ret;
+
+=09clearerr(f);
+
+=09if(!(getsret=3Dfgets(buf, MAXLINE, f))) {=20
+=09=09if (!ferror(f) || errno !=3D EINTR)=20
+=09=09=09ha_error("if_msgfromstream: cannot get message");
+=09=09return(NULL);
+=09}
+
+=09/* Try to find the interface on the message. */
+
+=09if(!strcmp(buf, IFACE)) {
+=09=09/* Found interface name header, get interface name. */
+=09=09if(!(getsret=3Dfgets(buf, MAXLINE, f))) {=20
+=09=09=09if (!ferror(f) || errno !=3D EINTR)
+=09=09=09=09ha_error("if_msgfromstream: cannot get message");
+=09=09=09return(NULL);
+=09=09}
+=09=09if(iface) {=20
+=09=09=09int len =3D strlen(buf);
+=09=09=09if(len < MAXIFACELEN) {
+=09=09=09=09strncpy(iface, buf, len);
+=09=09=09=09iface[len -1] =3D EOS;
+=09=09=09}
+=09=09}
+=09}
+
+=09if(strcmp(buf, MSG_START)) { =09
+=09=09/* Skip until we find a MSG_START (hopefully we skip nothing) */
+=09=09while ((getsret=3Dfgets(buf, MAXLINE, f)) !=3D NULL
+=09=09&&=09strcmp(buf, MSG_START) !=3D 0) {
+=09=09=09/* Nothing */
+=09=09}
+=09}
+
+=09if (getsret =3D=3D NULL || (ret =3D ha_msg_new(0)) =3D=3D NULL) {
+=09=09/* Getting an error with EINTR is pretty normal */
+=09=09if (!ferror(f) || errno !=3D EINTR) {
+=09=09=09ha_error("if_msgfromstream: cannot get message");
+=09=09}
+=09=09return(NULL);
+=09}
+
+=09/* Add Name=3Dvalue pairs until we reach MSG_END or EOF */
+=09while ((getsret=3Dfgets(buf, MAXLINE, f)) !=3D NULL
+=09&&=09strcmp(buf, MSG_END) !=3D 0) {
+
+=09=09/* Add the "name=3Dvalue" string on this line to the message */
+=09=09if (ha_msg_add_nv(ret, buf) !=3D HA_OK) {
+=09=09=09ha_error("NV failure (if_msgfromsteam):");
+=09=09=09ha_error(buf);
+=09=09=09ha_msg_del(ret);
+=09=09=09return(NULL);
+=09=09}
+=09}
+=09return(ret);
+}
+
+/* Writes a message into a stream - used for serial lines */
+/* Return the next message found in the stream */
struct ha_msg *
msgfromstream(FILE * f)
{
@@ -315,7 +382,44 @@
=09return(ret);
}
=20
+/* Converts a message into a string and adds the iface name on the message=
*/
+char *
+msg2if_string(const struct ha_msg *m, const char *iface)=20
+{
=20
+=09int=09j;
+=09char *=09buf;
+=09char *=09bp;
+
+=09if (m->nfields <=3D 0) {
+=09=09ha_error("msg2if_string: Message with zero fields");
+=09=09return(NULL);
+=09}
+
+=09buf =3D ha_malloc(m->stringlen + ((strlen(iface) + sizeof(IFACE)) * siz=
eof(char *)));
+
+=09if (buf =3D=3D NULL) {
+=09=09ha_error("msg2if_string: no memory for string");
+=09}else{
+=09=09bp =3D buf;
+=09=09strcpy(buf, IFACE);
+=09=09strcat(buf, iface);
+=09=09strcat(buf, "\n");
+=09=09strcat(buf, MSG_START);
+=09=09for (j=3D0; j < m->nfields; ++j) {
+=09=09=09strcat(bp, m->names[j]);
+=09=09=09bp +=3D m->nlens[j];
+=09=09=09strcat(bp, "=3D");
+=09=09=09bp++;
+=09=09=09strcat(bp, m->values[j]);
+=09=09=09bp +=3D m->vlens[j];
+=09=09=09strcat(bp, "\n");
+=09=09=09bp++;
+=09=09}
+=09=09strcat(bp, MSG_END);
+=09}
+=09return(buf);
+}
/* Converts a message into a string (for sending out UDP interface) */
char *
msg2string(const struct ha_msg *m)
diff -Nur heartbeat-0.4.6c.orig/heartbeat/ha_msg.h heartbeat-0.4.6c/heartbe=
at/ha_msg.h
--- heartbeat-0.4.6c.orig/heartbeat/ha_msg.h=09Tue Nov 23 06:52:40 1999
+++ heartbeat-0.4.6c/heartbeat/ha_msg.h=09Mon Mar 27 14:29:45 2000
@@ -16,7 +16,7 @@
=09char **=09values;
=09int *=09vlens;
};
-
+#define IFACE=09=09"!^!\n"=20
#define=09MSG_START=09">>>\n"
#define=09MSG_END=09=09"<<<\n"
#define=09EQUAL=09=09"=3D"
@@ -71,6 +71,8 @@
/* Reads a stream -- converts it into a message */
struct ha_msg *=09msgfromstream(FILE * f);
=20
+struct ha_msg * if_msgfromstream(FILE * f, char *iface);
+
/* Writes a message into a stream */
int=09=09msg2stream(struct ha_msg* m, FILE * f);
=20
@@ -79,6 +81,9 @@
=20
/* Converts a message into a string for sending out UDP interface */
char *=09=09msg2string(const struct ha_msg *m);
+
+/* Converts a message into a string and adds the iface name on start */
+char *=09=09msg2if_string(const struct ha_msg *m, const char * iface);
=20
/* Reads from control fifo, and creates a new message from it */
/* This adds the default sequence#, load avg, etc. to the message */
diff -Nur heartbeat-0.4.6c.orig/heartbeat/heartbeat.c heartbeat-0.4.6c/hear=
tbeat/heartbeat.c
--- heartbeat-0.4.6c.orig/heartbeat/heartbeat.c=09Sat Dec 25 06:46:02 1999
+++ heartbeat-0.4.6c/heartbeat/heartbeat.c=09Mon Mar 27 18:11:26 2000
@@ -132,6 +132,18 @@
*=09=09is less likely to fail. But if it does, it might mean
*=09=09seven years of bad luck :-)
*
+ *
+ * Changes:=20
+ *
+ * 03/17/2000 Luis Claudio R. Gon=E7alves <lclaudio@conectiva.com.br>
+ * Created the role switching idea... if the cluster is already active,
+ * when the primary starts or performs a failback it stills quiet,
+ * acting as a standby host. Thus we have controlled failback
+ * =20
+ * 03/26/2000 Marcelo Tosatti <marcelo@conectiva.com.br>=20
+ * Per-link and global (node) status instead simply node status. First
+ * step of the link/service dependancy scheme.
+ * =20
*/
=20
#include <stdio.h>
@@ -160,6 +172,9 @@
=20
#define OPTARGS=09=09"dkrRsv"
=20
+#define=09KEEPIT=090
+#define=09DROPIT=091
+#define=09DUPLICATE 2
=20
int=09=09verbose =3D 0;
=20
@@ -169,6 +184,9 @@
int=09=09debug =3D 0;
int=09=09RestartRequested =3D 0;
int=09=09WeAreRestarting =3D 0;
+int=09=09cluster_already_active =3D 0;=20
+int=09=09we_are_primary =3D 0; =20
+int nice_failback =3D 0;=20
int=09=09killrunninghb =3D 0;
int=09=09rpt_hb_status =3D 0;
int=09=09childpid =3D -1;
@@ -231,6 +249,7 @@
void=09check_node_timeouts(void);
void=09request_msg_rexmit(struct node_info *, unsigned long lowseq, unsign=
ed long hiseq);
void=09check_rexmit_reqs(void);
+void=09mark_link_dead(struct node_info* hip, struct link *lnk);
void=09mark_node_dead(struct node_info* hip);
void=09notify_world(struct ha_msg * msg, const char * ostatus);
pid_t=09get_running_hb_pid(void);
@@ -238,7 +257,8 @@
void=09heartbeat_monitor(struct ha_msg * msg);
void=09send_to_all_media(char * smsg, int len);
void=09init_monitor(void);
-int=09should_drop_message(struct node_info* node, const struct ha_msg* msg=
);
+int=09should_drop_message(struct node_info* node, const struct ha_msg* msg=
,
+=09=09=09=09=09=09const char *iface);
void=09add2_xmit_hist (struct msg_xmit_hist * hist, struct ha_msg* msg
,=09=09unsigned long seq);
void=09init_xmit_hist (struct msg_xmit_hist * hist);
@@ -302,6 +322,20 @@
}
=20
=20
+/* Look up the interface in the node struct, returning the link info struc=
ture */
+struct link *
+lookup_iface(struct node_info * hip, const char *iface)=20
+{
+=09struct link *lnk;=09
+=09int j =3D 0;
+=09while((lnk =3D &hip->links[j]) && lnk->name) {
+=09=09if(strcmp(lnk->name, iface) =3D=3D 0) {
+=09=09=09return lnk;
+=09=09}
+=09=09j++;
+=09}
+=09return NULL;
+}
=20
/* Look up the node in the configuration, returning the node info structur=
e */
struct node_info *
@@ -584,11 +618,12 @@
=09for (;;) {
=09=09struct=09ha_msg*=09m =3D mp->vf->read(mp);
=09=09char *=09=09sm;
+
=09=09if (m =3D=3D NULL) {
=09=09=09continue;
=09=09}
+=09=09sm =3D msg2if_string(m, mp->name);
=20
-=09=09sm =3D msg2string(m);
=09=09if (sm !=3D NULL) {
=09=09=09msglen =3D strlen(sm);
=09=09=09if (DEBUGPKT) {
@@ -626,7 +661,8 @@
=20
=09siginterrupt(SIGALRM, 1);
=09for (;;) {
-=09=09struct ha_msg *=09msgp =3D msgfromstream(ourfp);
+=09=09struct ha_msg *=09msgp;
+=09=09msgp =3D if_msgfromstream(ourfp, NULL);
=09=09if (msgp =3D=3D NULL) {
=09=09=09continue;
=09=09}
@@ -760,8 +796,12 @@
=09FILE *=09=09=09f =3D fdopen(status_pipe[P_READFD], "r");
=09struct ha_msg *=09=09msg =3D NULL;
=09int=09=09=09resources_requested_yet =3D 0;
+=09
=09time_t=09=09=09lastnow =3D 0L;
-
+=09char=09iface[MAXIFACELEN];
+=09struct link=09*lnk;
+=09int starting =3D 1;
+=09int starting_flag =3D 1;
=20
=09init_status_alarm();
=09init_watchdog();
@@ -776,9 +816,20 @@
=09=09const char *=09type;
=20
=09=09if (send_status_now) {
+=09=09=09/* If heartbeat is starting right now on this node
+=09=09=09* give time for someone else in the cluster send a
+=09=09=09* message... the cluster is already active?
+=09=09=09*/
+=09=09=09if (starting && nice_failback) {
+=09=09=09=09ha_log(LOG_DEBUG,"%s", "Waiting for someone else..." );
+=09=09=09=09sleep (2*config->heartbeat_interval);
+=09=09=09=09starting =3D 0;
+=09=09=09}
+
=09=09=09send_status_now =3D 0;
=09=09=09send_local_status();
=09=09}
+
=09=09if (dump_stats_now) {
=09=09=09dump_stats_now =3D 0;
=09=09=09dump_all_proc_stats();
@@ -797,7 +848,9 @@
=09=09}
=09=09lastnow =3D now;
=20
-=09=09msg =3D msgfromstream(f);
+=09=09bzero(iface, MAXIFACELEN);
+
+=09=09msg =3D if_msgfromstream(f, iface);
=20
=09=09/* This may be caused by SIGALRM */
=09=09if (msg =3D=3D NULL) {
@@ -847,6 +900,18 @@
#endif
=09=09}
=20
+=09=09/* If the message came from another node, the cluster is=20
+=09=09 * already active. It's only valid for the starting process
+=09=09 */
+
+=09=09if (!WeAreRestarting && !resources_requested_yet
+=09=09&& (thisnode !=3D curnode && (now-starttime) < RQSTDELAY)
+=09=09&& (starting_flag && nice_failback)) {
+=09=09=09ha_log(LOG_DEBUG, "%s", "The cluster is already active");
+=09=09=09cluster_already_active =3D 1;
+=09=09=09starting_flag =3D 0;
+=09=09}
+=09
=09=09/* Throw away some incoming packets if testing is enabled */
=09=09if (TESTRCV) {
=09=09=09if (thisnode !=3D curnode && TestRand(rcv_loss_prob)) {
@@ -863,17 +928,61 @@
=09=09 * in from somewhere else, then cluster comm is working...
=09=09 *
=09=09 */
-=09
+
=09=09if (!WeAreRestarting && !resources_requested_yet
=09=09&&=09(thisnode !=3D curnode || (now-starttime) > RQSTDELAY)) {
=09=09=09=09resources_requested_yet=3D1;
=09=09=09=09req_our_resources();
=09=09}
=20
+=09=09lnk =3D lookup_iface(thisnode, iface);
+
=09=09/* Is this message a duplicate, or destined for someone else? */
-=09=09if (should_drop_message(thisnode, msg)) {
+
+=09=09switch (should_drop_message(thisnode, msg, iface)) {
+=09=09const char *=09status;
+=09=09const char *=09cseq;
+=09=09long=09=09seqno;
+=09=09case DUPLICATE:
+
+=09=09=09if(!lnk) continue;
+
+=09=09=09sscanf(ts, "%lx", &msgtime);
+=09=09=09status =3D ha_msg_value(msg, F_STATUS);
+=09=09=09if (status =3D=3D NULL) {
+=09=09=09=09ha_log(LOG_ERR, "master_status_process (duplicate): "
+=09=09=09=09"status update without "
+=09=09=09=09F_STATUS " field");
+=09=09=09=09continue;
+=09=09=09}
+=09=09=09if ((cseq =3D ha_msg_value(msg, F_SEQ)) !=3D NULL) {
+=09=09=09=09if (sscanf(cseq, "%lx", &seqno) !=3D 1
+=09=09=09=09||=09seqno <=3D 0) {
+=09=09=09=09=09continue;
+=09=09=09=09}
+=09=09=09}
+
+=09=09=09/* Do we already have a newer status? */
+=09=09=09if (msgtime < lnk->rmt_lastupdate) {=20
+=09=09=09=09continue;
+=09=09=09}
+
+=09=09=09lnk->rmt_lastupdate =3D msgtime;
+
+=09=09=09thisnode->local_lastupdate =3D lnk->lastupdate =3D times(NULL);
+
+=09=09=09if (strcasecmp(lnk->status, status) !=3D 0) {
+=09=09=09=09ha_log(LOG_INFO, "node %s -- link %s: status %s"
+=09=09=09=09, thisnode->nodename, lnk->name, status);
+=09=09=09=09strcpy(lnk->status, status);
+=09=09=09}
+=09=09continue;
+=09=09case DROPIT:
+=09=09=09/* Ignore it */
=09=09=09continue;
-=09=09}
+=09=09}=20
+
+=09=09thisnode->track.last_iface =3D iface;
=20
=09=09/* Is this a status update message? */
=09=09if (strcasecmp(type, T_STATUS) =3D=3D 0) {
@@ -897,8 +1006,7 @@
=09=09=09=09}
=09=09=09}
=20
-=09=09=09/* Do we already have a newer status? */
-=09=09=09if (msgtime < thisnode->rmt_lastupdate
+=09=09=09if (msgtime < thisnode->rmt_lastupdate=20
=09=09=09&&=09seqno < thisnode->status_seqno) {
=09=09=09=09continue;
=09=09=09}
@@ -906,10 +1014,16 @@
=09=09=09heartbeat_monitor(msg);
=20
=09=09=09thisnode->rmt_lastupdate =3D msgtime;
-=09=09=09thisnode->local_lastupdate =3D times(NULL);
+
+=09=09=09if(lnk) {=20
+=09=09=09=09thisnode->local_lastupdate =3D lnk->lastupdate =3D times(NULL)=
;
+=09=09=09}else{
+=09=09=09=09thisnode->local_lastupdate =3D times(NULL);
+=09=09=09}=09
+
=09=09=09thisnode->status_seqno =3D seqno;
=20
-=09=09=09/* Is the status the same? */
+=09=09=09/* Is the node status the same? */
=09=09=09if (strcasecmp(thisnode->status, status) !=3D 0) {
=09=09=09=09ha_log(LOG_INFO
=09=09=09=09,=09"node %s: status %s"
@@ -919,6 +1033,15 @@
=09=09=09=09strcpy(thisnode->status, status);
=09=09=09}
=20
+=09=09=09/* Is the link status the same? */
+=09=09=09if(lnk) {
+=09=09=09=09if (strcasecmp(lnk->status, status) !=3D 0) {
+=09=09=09=09=09ha_log(LOG_INFO, "node %s -- link %s: status %s"=20
+=09=09=09=09=09=09, thisnode->nodename, lnk->name, status);
+=09=09=09=09strcpy(lnk->status, status);
+=09=09=09=09}
+=09=09=09}
+
=09=09=09/* Did we get a status update on ourselves? */
=09=09=09if (thisnode =3D=3D curnode) {
=09=09=09=09tickle_watchdog();
@@ -1260,13 +1383,13 @@
{
=09clock_t=09now =3D times(NULL);
=09struct node_info *=09hip;
-=09clock_t=09dead_ticks =3D (CLK_TCK * config->deadtime_interval);
-=09clock_t=09TooOld =3D now - dead_ticks;
+=09clock_t dead_ticks =3D (CLK_TCK * config->deadtime_interval);
+=09clock_t TooOld =3D now - dead_ticks;
=09int=09j;
=20
=09/* We need to be careful to handle clock_t wrapround carefully */
=09if (now < dead_ticks) {
-=09=09return;=09/* Ignore timeouts during wraparound */
+=09=09return; /* Ignore timeouts during wraparound */
=09=09=09/* This doubles our timeout at this time */
=09=09=09/* Sorry. */
=09}
@@ -1286,7 +1409,27 @@
=09=09mark_node_dead(hip);
=09}
=20
+=09/* Check all links status of all nodes */
=20
+=09for (j=3D0; j < config->nodecount; ++j) {
+=09=09struct link *lnk;
+=09=09int i =3D 0;
+=09=09hip =3D &config->nodes[j];
+=09=09if(hip =3D=3D curnode) continue;
+
+=09=09while((lnk =3D &hip->links[i]) && lnk->name) {
+=09=09=09if (lnk->lastupdate > now) {
+=09=09=09=09lnk->lastupdate =3D 0L;
+=09=09=09}
+=09=09=09if (lnk->lastupdate >=3D TooOld
+=09=09=09|| strcmp(lnk->status, DEADSTATUS) =3D=3D 0 ) {
+=09=09=09=09i++;
+=09=09=09=09continue;
+=09=09=09}
+=09=09=09mark_link_dead(hip, lnk);
+=09=09=09i++;
+=09=09}
+=09}
}
=20
/* Set our local status to the given value, and send it out*/
@@ -1364,6 +1507,15 @@
=09return(rc);
}
=20
+/* Mark the given link dead */
+void
+mark_link_dead(struct node_info *hip, struct link *lnk)
+{
+=09/* FIXME: Do something usefull */
+=09ha_log(LOG_ERR, "Link %s:%s DEAD.", hip->nodename, lnk->name);
+ strcpy(lnk->status, "dead");
+}
+=09=09=09=09=09
/* Mark the given node dead */
void
mark_node_dead(struct node_info *hip)
@@ -1397,7 +1549,14 @@
=09=09/* Uh, oh... we're dead! */
=09=09ha_log(LOG_ERR, "No local heartbeat. Forcing shutdown.");
=09=09kill(procinfo->info[0].pid, SIGTERM);
+=09} else {
+=09=09if (we_are_primary && nice_failback) {
+=09=09=09ha_log(LOG_DEBUG,"%s",=09"[lcrg] We are primary again!");
+=09=09=09we_are_primary =3D 0;
+=09=09=09req_our_resources();
+=09=09}
=09}
+=09
=09ha_msg_del(hmsg);
}
=20
@@ -1554,12 +1713,25 @@
=09=09if (buf[strlen(buf)-1] =3D=3D '\n') {
=09=09=09buf[strlen(buf)-1] =3D EOS;
=09=09}
-=09=09sprintf(getcmd, HALIB "/req_resource %s &", buf);
-=09=09if ((rc=3Dsystem(getcmd)) !=3D 0) {
-=09=09=09ha_perror("%s returned %d", getcmd, rc);
-=09=09=09finalrc=3DHA_FAIL;
+=09=09
+=09=09/* If the cluster is already active, act as standby. */
+=09=09if (cluster_already_active && nice_failback) {
+=09=09=09ha_log(LOG_DEBUG,
+=09=09=09"Acting as standby for resource %s",buf);
+=09=09}else {
+=09=09=09sprintf(getcmd, HALIB "/req_resource %s &", buf);
+=09=09=09if ((rc=3Dsystem(getcmd)) !=3D 0) {
+=09=09=09=09ha_perror("%s returned %d", getcmd, rc);
+=09=09=09=09finalrc=3DHA_FAIL;
+=09=09=09}
=09=09}
=09}
+
+=09if (rsc_count && nice_failback) {
+=09=09cluster_already_active =3D 0;
+=09=09we_are_primary =3D 1;
+=09}
+
=09rc=3Dpclose(rkeys);
=09if (rc < 0 && errno !=3D ECHILD) {
=09=09ha_perror("pclose(%s) returned %d", cmd, rc);
@@ -2048,14 +2220,13 @@
*=09do for now...
*/
#define=09SEQGAP=09100=09/* A heuristic number */
-#define=09KEEPIT=090
-#define=09DROPIT=091
=20
/*
*=09Should we ignore this packet, or pay attention to it?
*/
int
-should_drop_message(struct node_info * thisnode, const struct ha_msg *msg)
+should_drop_message(struct node_info * thisnode, const struct ha_msg *msg,
+=09=09=09=09=09const char *iface)
{
=09struct seqtrack *=09t =3D &thisnode->track;
=09const char *=09=09cseq =3D ha_msg_value(msg, F_SEQ);
@@ -2086,8 +2257,12 @@
=09/* Is this packet in sequence? */
=09if (t->last_seq =3D=3D NOSEQUENCE || seq =3D=3D (t->last_seq+1)) {
=09=09t->last_seq =3D seq;
+=09=09t->last_iface =3D iface;
=09=09return(IsToUs ? KEEPIT : DROPIT);
=09}else if (seq =3D=3D t->last_seq) {
+=09=09if(iface && t->last_iface && strcmp(iface, t->last_iface) =3D=3D 0) =
{=20
+=09=09=09return (DUPLICATE);
+=09=09}
=09=09/* Same as last-seen packet -- very common case */
=09=09if (DEBUGPKT) {
=09=09=09ha_log(LOG_DEBUG,
@@ -2114,6 +2289,7 @@
=09=09=09/* This keeps the loop below from going a long time */
=09=09=09t->nmissing =3D 0;
=09=09=09t->last_seq =3D seq;
+=09=09=09t->last_iface =3D iface;
=09=09=09ha_log(LOG_ERR, "lost a lot of packets!");
=09=09=09return(IsToUs ? KEEPIT : DROPIT);
=09=09}else{
@@ -2147,6 +2323,7 @@
=09=09=09}
=09=09}
=09=09t->last_seq =3D seq;
+=09=09t->last_iface =3D iface;
=09=09return(IsToUs ? KEEPIT : DROPIT);
=09}
=09/*
@@ -2209,6 +2386,7 @@
=09=09=09t->nmissing =3D 0;
=09=09=09t->last_seq =3D seq;
=09=09=09t->last_rexmit_req =3D 0L;
+=09=09=09t->last_iface =3D iface;
=09=09=09return(IsToUs ? KEEPIT : DROPIT);
=09=09}
=09}
diff -Nur heartbeat-0.4.6c.orig/heartbeat/heartbeat.h heartbeat-0.4.6c/hear=
tbeat/heartbeat.h
--- heartbeat-0.4.6c.orig/heartbeat/heartbeat.h=09Sat Dec 25 06:46:02 1999
+++ heartbeat-0.4.6c/heartbeat/heartbeat.h=09Mon Mar 27 14:40:04 2000
@@ -41,6 +41,7 @@
#define=09MAXFIELDS=0915=09=09/* Max # of fields in a msg */
#define HOSTLENG=09100=09=09/* Maximum size of "uname -a" return */
#define STATUSLENG=0932=09=09/* Maximum size of status field */
+#define MAXIFACELEN=0930 =09=09/* Maximum interface length */
#define=09MAXSERIAL=094
#define=09MAXMEDIA=0912
#define=09MAXNODE=09=09100
@@ -142,10 +143,20 @@
=09int=09=09nmissing;
=09unsigned long=09last_seq;
=09unsigned long=09seqmissing[MAXMISSING];
+=09const char *=09last_iface;
};
+
+struct link {=20
+=09clock_t lastupdate; /* Date of last update in this interface */
+=09const char *name;=20
+=09char=09status[STATUSLENG];=09/* Status from heartbeat */
+=09time_t=09rmt_lastupdate;=09=09/* node's idea of last update time over t=
his link */
+};
+
struct node_info {
=09char=09nodename[HOSTLENG];=09/* Host name from config file */
=09char=09status[STATUSLENG];=09/* Status from heartbeat */
+=09struct link links[MAXMEDIA];=20
=09time_t=09rmt_lastupdate;=09=09/* node's idea of last update time */
=09unsigned long=09status_seqno;=09/* Seqno of last status update */
=09clock_t=09local_lastupdate;=09/* Date of last update in clock_t time */
@@ -283,6 +294,9 @@
extern unsigned char * =09calc_cksum(const char * authmethod, const char *=
key, const char * value);
struct auth_type *=09findauth(const char * type);
struct node_info *=09lookup_node(const char *);
+struct link * lookup_iface(struct node_info * hip, const char *iface);
+struct link *=09iface_lookup_node(const char *);
+
void*=09=09ha_malloc(size_t size);
void*=09=09ha_calloc(size_t nmemb, size_t size);
void=09=09ha_free(void *ptr);
diff -Nur heartbeat-0.4.6c.orig/heartbeat/lib/req_resource heartbeat-0.4.6c=
/heartbeat/lib/req_resource
--- heartbeat-0.4.6c.orig/heartbeat/lib/req_resource=09Wed Nov 10 18:31:05 =
1999
+++ heartbeat-0.4.6c/heartbeat/lib/req_resource=09Mon Mar 27 15:48:04 2000
@@ -14,7 +14,9 @@
HA_DIR=3D/etc/ha.d; export HA_DIR
. $HA_DIR/shellfuncs
=20
-TIMEOUT=3D30
+# To comply with the new failback stuff the timeout was reduced to 10s.
+# The primary failover always waits $TIMEOUT...
+TIMEOUT=3D10
=20
RESOURCE=3D$1
=20
This patch against heartbeat 0.4.6c adds per-link status
instead per-node status. This is the first step on the link/status
dependancy scheme we are planning to do. The handling of the dead link=20
will be done by external scripts which we will send to this list soon.=20
It also adds the "nice_failback" option in ha.cf which avoids the
"takeback" of services when it notices that the cluster is alive.=20
Any comments are welcome.
---
diff -Nur heartbeat-0.4.6c.orig/heartbeat/config.c heartbeat-0.4.6c/heartbe=
at/config.c
--- heartbeat-0.4.6c.orig/heartbeat/config.c=09Sat Dec 25 06:46:01 1999
+++ heartbeat-0.4.6c/heartbeat/config.c=09Mon Mar 27 15:00:46 2000
@@ -43,9 +43,10 @@
extern volatile struct process_info *=09curproc;
extern char *=09=09=09=09watchdogdev;
extern int=09=09=09=09nummedia;
+extern int=09=09=09=09nice_failback;
=20
int=09islegaldirective(const char *directive);
-int=09parse_config(const char * cfgfile);
+int=09parse_config(const char * cfgfile, char *nodename);
int=09parse_ha_resources(const char * cfgfile);
void=09dump_config(void);
int=09add_option(const char *=09option, const char * value);
@@ -87,7 +88,7 @@
=09uname(&u);
=09curnode =3D NULL;
=20
-=09if (!parse_config(cfgfile)) {
+=09if (!parse_config(cfgfile, u.nodename)) {
=09=09ha_log(LOG_ERR, "Heartbeat not started: configuration error.");
=09=09return(HA_FAIL);
=09}
@@ -163,6 +164,7 @@
#define=09KEY_FACILITY=09"logfacility"
#define=09KEY_LOGFILE=09"logfile"
#define=09KEY_DBGFILE=09"debugfile"
+#define KEY_FAILBACK "nice_failback"=20
=20
int add_node(const char *);
int set_hopfudge(const char *);
@@ -174,6 +176,7 @@
int set_facility(const char *);
int set_logfile(const char *);
int set_dbgfile(const char *);
+int set_nice_failback(const char *);
=20
extern const struct hb_media_fns=09ip_media_fns;
extern const struct hb_media_fns=09serial_media_fns;
@@ -203,6 +206,7 @@
,=09{KEY_FACILITY, set_facility}
,=09{KEY_LOGFILE, set_logfile}
,=09{KEY_DBGFILE, set_dbgfile}
+,=09{KEY_FAILBACK, set_nice_failback}
};
=20
=20
@@ -210,7 +214,7 @@
*=09Parse the configuration file and stash away the data
*/
int
-parse_config(const char * cfgfile)
+parse_config(const char * cfgfile, char *nodename)
{
=09FILE=09*=09f;
=09char=09=09buf[MAXLINE];
@@ -220,6 +224,10 @@
=09char=09=09option[MAXLINE];
=09int=09=09optionlength;
=09int=09=09errcount =3D 0;
+=09int=09=09j;
+=09int =09i;
+=09clock_t cticks;
+=09
=09struct stat=09sbuf;
=20
=09if ((f =3D fopen(cfgfile, "r")) =3D=3D NULL) {
@@ -233,8 +241,7 @@
=09/* It's ugly, but effective */
=20
=09while (fgets(buf, MAXLINE, f) !=3D NULL) {
-=09=09char *=09bp =3D buf;
-=09=09int=09j;
+=09=09char *bp =3D buf;=20
=20
=09=09/* Skip over white space */
=09=09bp +=3D strspn(bp, WHITESPACE);
@@ -291,6 +298,24 @@
=09=09=09bp +=3D strspn(bp, DELIMS);
=09=09}
=09}
+
+=09cticks =3D times(NULL);
+
+=09for (j=3D0; j < nummedia; j++) {
+=09=09for (i=3D0; i < config->nodecount; ++i) {
+=09=09=09=09struct link *lnk =3D &config->nodes[i].links[j];
+=09=09=09=09lnk->name =3D sysmedia[j]->name;
+=09=09=09=09lnk->lastupdate =3D cticks;
+=09=09 }
+=09 }
+
+=09j++;
+
+=09for (i=3D0; i < config->nodecount; ++i) {
+=09=09struct link *lnk =3D &config->nodes[i].links[j];
+=09=09lnk->name =3D NULL;
+=09}=09
+=09
=09fclose(f);
=09return(errcount ? HA_FAIL : HA_OK);
}
@@ -806,6 +831,19 @@
set_logfile(const char * value)
{
=09strncpy(config->logfile, value, PATH_MAX);
+=09return(HA_OK);
+}
+
+/* sets nice_failback behavior on/off */
+int
+set_nice_failback(const char * value)
+{
+=09if(!strcasecmp(value, "on")) {
+=09=09nice_failback =3D 1;
+=09} else {
+=09=09nice_failback =3D 0;
+=09}
+
=09return(HA_OK);
}
=20
diff -Nur heartbeat-0.4.6c.orig/heartbeat/ha_msg.c heartbeat-0.4.6c/heartbe=
at/ha_msg.c
--- heartbeat-0.4.6c.orig/heartbeat/ha_msg.c=09Tue Nov 23 06:52:40 1999
+++ heartbeat-0.4.6c/heartbeat/ha_msg.c=09Mon Mar 27 14:28:57 2000
@@ -216,6 +216,73 @@
=20
=20
/* Return the next message found in the stream */
+
+struct ha_msg *
+if_msgfromstream(FILE * f, char *iface)
+{
+=09char=09=09buf[MAXLINE];
+=09char *=09=09getsret;
+=09struct ha_msg*=09ret;
+
+=09clearerr(f);
+
+=09if(!(getsret=3Dfgets(buf, MAXLINE, f))) {=20
+=09=09if (!ferror(f) || errno !=3D EINTR)=20
+=09=09=09ha_error("if_msgfromstream: cannot get message");
+=09=09return(NULL);
+=09}
+
+=09/* Try to find the interface on the message. */
+
+=09if(!strcmp(buf, IFACE)) {
+=09=09/* Found interface name header, get interface name. */
+=09=09if(!(getsret=3Dfgets(buf, MAXLINE, f))) {=20
+=09=09=09if (!ferror(f) || errno !=3D EINTR)
+=09=09=09=09ha_error("if_msgfromstream: cannot get message");
+=09=09=09return(NULL);
+=09=09}
+=09=09if(iface) {=20
+=09=09=09int len =3D strlen(buf);
+=09=09=09if(len < MAXIFACELEN) {
+=09=09=09=09strncpy(iface, buf, len);
+=09=09=09=09iface[len -1] =3D EOS;
+=09=09=09}
+=09=09}
+=09}
+
+=09if(strcmp(buf, MSG_START)) { =09
+=09=09/* Skip until we find a MSG_START (hopefully we skip nothing) */
+=09=09while ((getsret=3Dfgets(buf, MAXLINE, f)) !=3D NULL
+=09=09&&=09strcmp(buf, MSG_START) !=3D 0) {
+=09=09=09/* Nothing */
+=09=09}
+=09}
+
+=09if (getsret =3D=3D NULL || (ret =3D ha_msg_new(0)) =3D=3D NULL) {
+=09=09/* Getting an error with EINTR is pretty normal */
+=09=09if (!ferror(f) || errno !=3D EINTR) {
+=09=09=09ha_error("if_msgfromstream: cannot get message");
+=09=09}
+=09=09return(NULL);
+=09}
+
+=09/* Add Name=3Dvalue pairs until we reach MSG_END or EOF */
+=09while ((getsret=3Dfgets(buf, MAXLINE, f)) !=3D NULL
+=09&&=09strcmp(buf, MSG_END) !=3D 0) {
+
+=09=09/* Add the "name=3Dvalue" string on this line to the message */
+=09=09if (ha_msg_add_nv(ret, buf) !=3D HA_OK) {
+=09=09=09ha_error("NV failure (if_msgfromsteam):");
+=09=09=09ha_error(buf);
+=09=09=09ha_msg_del(ret);
+=09=09=09return(NULL);
+=09=09}
+=09}
+=09return(ret);
+}
+
+/* Writes a message into a stream - used for serial lines */
+/* Return the next message found in the stream */
struct ha_msg *
msgfromstream(FILE * f)
{
@@ -315,7 +382,44 @@
=09return(ret);
}
=20
+/* Converts a message into a string and adds the iface name on the message=
*/
+char *
+msg2if_string(const struct ha_msg *m, const char *iface)=20
+{
=20
+=09int=09j;
+=09char *=09buf;
+=09char *=09bp;
+
+=09if (m->nfields <=3D 0) {
+=09=09ha_error("msg2if_string: Message with zero fields");
+=09=09return(NULL);
+=09}
+
+=09buf =3D ha_malloc(m->stringlen + ((strlen(iface) + sizeof(IFACE)) * siz=
eof(char *)));
+
+=09if (buf =3D=3D NULL) {
+=09=09ha_error("msg2if_string: no memory for string");
+=09}else{
+=09=09bp =3D buf;
+=09=09strcpy(buf, IFACE);
+=09=09strcat(buf, iface);
+=09=09strcat(buf, "\n");
+=09=09strcat(buf, MSG_START);
+=09=09for (j=3D0; j < m->nfields; ++j) {
+=09=09=09strcat(bp, m->names[j]);
+=09=09=09bp +=3D m->nlens[j];
+=09=09=09strcat(bp, "=3D");
+=09=09=09bp++;
+=09=09=09strcat(bp, m->values[j]);
+=09=09=09bp +=3D m->vlens[j];
+=09=09=09strcat(bp, "\n");
+=09=09=09bp++;
+=09=09}
+=09=09strcat(bp, MSG_END);
+=09}
+=09return(buf);
+}
/* Converts a message into a string (for sending out UDP interface) */
char *
msg2string(const struct ha_msg *m)
diff -Nur heartbeat-0.4.6c.orig/heartbeat/ha_msg.h heartbeat-0.4.6c/heartbe=
at/ha_msg.h
--- heartbeat-0.4.6c.orig/heartbeat/ha_msg.h=09Tue Nov 23 06:52:40 1999
+++ heartbeat-0.4.6c/heartbeat/ha_msg.h=09Mon Mar 27 14:29:45 2000
@@ -16,7 +16,7 @@
=09char **=09values;
=09int *=09vlens;
};
-
+#define IFACE=09=09"!^!\n"=20
#define=09MSG_START=09">>>\n"
#define=09MSG_END=09=09"<<<\n"
#define=09EQUAL=09=09"=3D"
@@ -71,6 +71,8 @@
/* Reads a stream -- converts it into a message */
struct ha_msg *=09msgfromstream(FILE * f);
=20
+struct ha_msg * if_msgfromstream(FILE * f, char *iface);
+
/* Writes a message into a stream */
int=09=09msg2stream(struct ha_msg* m, FILE * f);
=20
@@ -79,6 +81,9 @@
=20
/* Converts a message into a string for sending out UDP interface */
char *=09=09msg2string(const struct ha_msg *m);
+
+/* Converts a message into a string and adds the iface name on start */
+char *=09=09msg2if_string(const struct ha_msg *m, const char * iface);
=20
/* Reads from control fifo, and creates a new message from it */
/* This adds the default sequence#, load avg, etc. to the message */
diff -Nur heartbeat-0.4.6c.orig/heartbeat/heartbeat.c heartbeat-0.4.6c/hear=
tbeat/heartbeat.c
--- heartbeat-0.4.6c.orig/heartbeat/heartbeat.c=09Sat Dec 25 06:46:02 1999
+++ heartbeat-0.4.6c/heartbeat/heartbeat.c=09Mon Mar 27 18:11:26 2000
@@ -132,6 +132,18 @@
*=09=09is less likely to fail. But if it does, it might mean
*=09=09seven years of bad luck :-)
*
+ *
+ * Changes:=20
+ *
+ * 03/17/2000 Luis Claudio R. Gon=E7alves <lclaudio@conectiva.com.br>
+ * Created the role switching idea... if the cluster is already active,
+ * when the primary starts or performs a failback it stills quiet,
+ * acting as a standby host. Thus we have controlled failback
+ * =20
+ * 03/26/2000 Marcelo Tosatti <marcelo@conectiva.com.br>=20
+ * Per-link and global (node) status instead simply node status. First
+ * step of the link/service dependancy scheme.
+ * =20
*/
=20
#include <stdio.h>
@@ -160,6 +172,9 @@
=20
#define OPTARGS=09=09"dkrRsv"
=20
+#define=09KEEPIT=090
+#define=09DROPIT=091
+#define=09DUPLICATE 2
=20
int=09=09verbose =3D 0;
=20
@@ -169,6 +184,9 @@
int=09=09debug =3D 0;
int=09=09RestartRequested =3D 0;
int=09=09WeAreRestarting =3D 0;
+int=09=09cluster_already_active =3D 0;=20
+int=09=09we_are_primary =3D 0; =20
+int nice_failback =3D 0;=20
int=09=09killrunninghb =3D 0;
int=09=09rpt_hb_status =3D 0;
int=09=09childpid =3D -1;
@@ -231,6 +249,7 @@
void=09check_node_timeouts(void);
void=09request_msg_rexmit(struct node_info *, unsigned long lowseq, unsign=
ed long hiseq);
void=09check_rexmit_reqs(void);
+void=09mark_link_dead(struct node_info* hip, struct link *lnk);
void=09mark_node_dead(struct node_info* hip);
void=09notify_world(struct ha_msg * msg, const char * ostatus);
pid_t=09get_running_hb_pid(void);
@@ -238,7 +257,8 @@
void=09heartbeat_monitor(struct ha_msg * msg);
void=09send_to_all_media(char * smsg, int len);
void=09init_monitor(void);
-int=09should_drop_message(struct node_info* node, const struct ha_msg* msg=
);
+int=09should_drop_message(struct node_info* node, const struct ha_msg* msg=
,
+=09=09=09=09=09=09const char *iface);
void=09add2_xmit_hist (struct msg_xmit_hist * hist, struct ha_msg* msg
,=09=09unsigned long seq);
void=09init_xmit_hist (struct msg_xmit_hist * hist);
@@ -302,6 +322,20 @@
}
=20
=20
+/* Look up the interface in the node struct, returning the link info struc=
ture */
+struct link *
+lookup_iface(struct node_info * hip, const char *iface)=20
+{
+=09struct link *lnk;=09
+=09int j =3D 0;
+=09while((lnk =3D &hip->links[j]) && lnk->name) {
+=09=09if(strcmp(lnk->name, iface) =3D=3D 0) {
+=09=09=09return lnk;
+=09=09}
+=09=09j++;
+=09}
+=09return NULL;
+}
=20
/* Look up the node in the configuration, returning the node info structur=
e */
struct node_info *
@@ -584,11 +618,12 @@
=09for (;;) {
=09=09struct=09ha_msg*=09m =3D mp->vf->read(mp);
=09=09char *=09=09sm;
+
=09=09if (m =3D=3D NULL) {
=09=09=09continue;
=09=09}
+=09=09sm =3D msg2if_string(m, mp->name);
=20
-=09=09sm =3D msg2string(m);
=09=09if (sm !=3D NULL) {
=09=09=09msglen =3D strlen(sm);
=09=09=09if (DEBUGPKT) {
@@ -626,7 +661,8 @@
=20
=09siginterrupt(SIGALRM, 1);
=09for (;;) {
-=09=09struct ha_msg *=09msgp =3D msgfromstream(ourfp);
+=09=09struct ha_msg *=09msgp;
+=09=09msgp =3D if_msgfromstream(ourfp, NULL);
=09=09if (msgp =3D=3D NULL) {
=09=09=09continue;
=09=09}
@@ -760,8 +796,12 @@
=09FILE *=09=09=09f =3D fdopen(status_pipe[P_READFD], "r");
=09struct ha_msg *=09=09msg =3D NULL;
=09int=09=09=09resources_requested_yet =3D 0;
+=09
=09time_t=09=09=09lastnow =3D 0L;
-
+=09char=09iface[MAXIFACELEN];
+=09struct link=09*lnk;
+=09int starting =3D 1;
+=09int starting_flag =3D 1;
=20
=09init_status_alarm();
=09init_watchdog();
@@ -776,9 +816,20 @@
=09=09const char *=09type;
=20
=09=09if (send_status_now) {
+=09=09=09/* If heartbeat is starting right now on this node
+=09=09=09* give time for someone else in the cluster send a
+=09=09=09* message... the cluster is already active?
+=09=09=09*/
+=09=09=09if (starting && nice_failback) {
+=09=09=09=09ha_log(LOG_DEBUG,"%s", "Waiting for someone else..." );
+=09=09=09=09sleep (2*config->heartbeat_interval);
+=09=09=09=09starting =3D 0;
+=09=09=09}
+
=09=09=09send_status_now =3D 0;
=09=09=09send_local_status();
=09=09}
+
=09=09if (dump_stats_now) {
=09=09=09dump_stats_now =3D 0;
=09=09=09dump_all_proc_stats();
@@ -797,7 +848,9 @@
=09=09}
=09=09lastnow =3D now;
=20
-=09=09msg =3D msgfromstream(f);
+=09=09bzero(iface, MAXIFACELEN);
+
+=09=09msg =3D if_msgfromstream(f, iface);
=20
=09=09/* This may be caused by SIGALRM */
=09=09if (msg =3D=3D NULL) {
@@ -847,6 +900,18 @@
#endif
=09=09}
=20
+=09=09/* If the message came from another node, the cluster is=20
+=09=09 * already active. It's only valid for the starting process
+=09=09 */
+
+=09=09if (!WeAreRestarting && !resources_requested_yet
+=09=09&& (thisnode !=3D curnode && (now-starttime) < RQSTDELAY)
+=09=09&& (starting_flag && nice_failback)) {
+=09=09=09ha_log(LOG_DEBUG, "%s", "The cluster is already active");
+=09=09=09cluster_already_active =3D 1;
+=09=09=09starting_flag =3D 0;
+=09=09}
+=09
=09=09/* Throw away some incoming packets if testing is enabled */
=09=09if (TESTRCV) {
=09=09=09if (thisnode !=3D curnode && TestRand(rcv_loss_prob)) {
@@ -863,17 +928,61 @@
=09=09 * in from somewhere else, then cluster comm is working...
=09=09 *
=09=09 */
-=09
+
=09=09if (!WeAreRestarting && !resources_requested_yet
=09=09&&=09(thisnode !=3D curnode || (now-starttime) > RQSTDELAY)) {
=09=09=09=09resources_requested_yet=3D1;
=09=09=09=09req_our_resources();
=09=09}
=20
+=09=09lnk =3D lookup_iface(thisnode, iface);
+
=09=09/* Is this message a duplicate, or destined for someone else? */
-=09=09if (should_drop_message(thisnode, msg)) {
+
+=09=09switch (should_drop_message(thisnode, msg, iface)) {
+=09=09const char *=09status;
+=09=09const char *=09cseq;
+=09=09long=09=09seqno;
+=09=09case DUPLICATE:
+
+=09=09=09if(!lnk) continue;
+
+=09=09=09sscanf(ts, "%lx", &msgtime);
+=09=09=09status =3D ha_msg_value(msg, F_STATUS);
+=09=09=09if (status =3D=3D NULL) {
+=09=09=09=09ha_log(LOG_ERR, "master_status_process (duplicate): "
+=09=09=09=09"status update without "
+=09=09=09=09F_STATUS " field");
+=09=09=09=09continue;
+=09=09=09}
+=09=09=09if ((cseq =3D ha_msg_value(msg, F_SEQ)) !=3D NULL) {
+=09=09=09=09if (sscanf(cseq, "%lx", &seqno) !=3D 1
+=09=09=09=09||=09seqno <=3D 0) {
+=09=09=09=09=09continue;
+=09=09=09=09}
+=09=09=09}
+
+=09=09=09/* Do we already have a newer status? */
+=09=09=09if (msgtime < lnk->rmt_lastupdate) {=20
+=09=09=09=09continue;
+=09=09=09}
+
+=09=09=09lnk->rmt_lastupdate =3D msgtime;
+
+=09=09=09thisnode->local_lastupdate =3D lnk->lastupdate =3D times(NULL);
+
+=09=09=09if (strcasecmp(lnk->status, status) !=3D 0) {
+=09=09=09=09ha_log(LOG_INFO, "node %s -- link %s: status %s"
+=09=09=09=09, thisnode->nodename, lnk->name, status);
+=09=09=09=09strcpy(lnk->status, status);
+=09=09=09}
+=09=09continue;
+=09=09case DROPIT:
+=09=09=09/* Ignore it */
=09=09=09continue;
-=09=09}
+=09=09}=20
+
+=09=09thisnode->track.last_iface =3D iface;
=20
=09=09/* Is this a status update message? */
=09=09if (strcasecmp(type, T_STATUS) =3D=3D 0) {
@@ -897,8 +1006,7 @@
=09=09=09=09}
=09=09=09}
=20
-=09=09=09/* Do we already have a newer status? */
-=09=09=09if (msgtime < thisnode->rmt_lastupdate
+=09=09=09if (msgtime < thisnode->rmt_lastupdate=20
=09=09=09&&=09seqno < thisnode->status_seqno) {
=09=09=09=09continue;
=09=09=09}
@@ -906,10 +1014,16 @@
=09=09=09heartbeat_monitor(msg);
=20
=09=09=09thisnode->rmt_lastupdate =3D msgtime;
-=09=09=09thisnode->local_lastupdate =3D times(NULL);
+
+=09=09=09if(lnk) {=20
+=09=09=09=09thisnode->local_lastupdate =3D lnk->lastupdate =3D times(NULL)=
;
+=09=09=09}else{
+=09=09=09=09thisnode->local_lastupdate =3D times(NULL);
+=09=09=09}=09
+
=09=09=09thisnode->status_seqno =3D seqno;
=20
-=09=09=09/* Is the status the same? */
+=09=09=09/* Is the node status the same? */
=09=09=09if (strcasecmp(thisnode->status, status) !=3D 0) {
=09=09=09=09ha_log(LOG_INFO
=09=09=09=09,=09"node %s: status %s"
@@ -919,6 +1033,15 @@
=09=09=09=09strcpy(thisnode->status, status);
=09=09=09}
=20
+=09=09=09/* Is the link status the same? */
+=09=09=09if(lnk) {
+=09=09=09=09if (strcasecmp(lnk->status, status) !=3D 0) {
+=09=09=09=09=09ha_log(LOG_INFO, "node %s -- link %s: status %s"=20
+=09=09=09=09=09=09, thisnode->nodename, lnk->name, status);
+=09=09=09=09strcpy(lnk->status, status);
+=09=09=09=09}
+=09=09=09}
+
=09=09=09/* Did we get a status update on ourselves? */
=09=09=09if (thisnode =3D=3D curnode) {
=09=09=09=09tickle_watchdog();
@@ -1260,13 +1383,13 @@
{
=09clock_t=09now =3D times(NULL);
=09struct node_info *=09hip;
-=09clock_t=09dead_ticks =3D (CLK_TCK * config->deadtime_interval);
-=09clock_t=09TooOld =3D now - dead_ticks;
+=09clock_t dead_ticks =3D (CLK_TCK * config->deadtime_interval);
+=09clock_t TooOld =3D now - dead_ticks;
=09int=09j;
=20
=09/* We need to be careful to handle clock_t wrapround carefully */
=09if (now < dead_ticks) {
-=09=09return;=09/* Ignore timeouts during wraparound */
+=09=09return; /* Ignore timeouts during wraparound */
=09=09=09/* This doubles our timeout at this time */
=09=09=09/* Sorry. */
=09}
@@ -1286,7 +1409,27 @@
=09=09mark_node_dead(hip);
=09}
=20
+=09/* Check all links status of all nodes */
=20
+=09for (j=3D0; j < config->nodecount; ++j) {
+=09=09struct link *lnk;
+=09=09int i =3D 0;
+=09=09hip =3D &config->nodes[j];
+=09=09if(hip =3D=3D curnode) continue;
+
+=09=09while((lnk =3D &hip->links[i]) && lnk->name) {
+=09=09=09if (lnk->lastupdate > now) {
+=09=09=09=09lnk->lastupdate =3D 0L;
+=09=09=09}
+=09=09=09if (lnk->lastupdate >=3D TooOld
+=09=09=09|| strcmp(lnk->status, DEADSTATUS) =3D=3D 0 ) {
+=09=09=09=09i++;
+=09=09=09=09continue;
+=09=09=09}
+=09=09=09mark_link_dead(hip, lnk);
+=09=09=09i++;
+=09=09}
+=09}
}
=20
/* Set our local status to the given value, and send it out*/
@@ -1364,6 +1507,15 @@
=09return(rc);
}
=20
+/* Mark the given link dead */
+void
+mark_link_dead(struct node_info *hip, struct link *lnk)
+{
+=09/* FIXME: Do something usefull */
+=09ha_log(LOG_ERR, "Link %s:%s DEAD.", hip->nodename, lnk->name);
+ strcpy(lnk->status, "dead");
+}
+=09=09=09=09=09
/* Mark the given node dead */
void
mark_node_dead(struct node_info *hip)
@@ -1397,7 +1549,14 @@
=09=09/* Uh, oh... we're dead! */
=09=09ha_log(LOG_ERR, "No local heartbeat. Forcing shutdown.");
=09=09kill(procinfo->info[0].pid, SIGTERM);
+=09} else {
+=09=09if (we_are_primary && nice_failback) {
+=09=09=09ha_log(LOG_DEBUG,"%s",=09"[lcrg] We are primary again!");
+=09=09=09we_are_primary =3D 0;
+=09=09=09req_our_resources();
+=09=09}
=09}
+=09
=09ha_msg_del(hmsg);
}
=20
@@ -1554,12 +1713,25 @@
=09=09if (buf[strlen(buf)-1] =3D=3D '\n') {
=09=09=09buf[strlen(buf)-1] =3D EOS;
=09=09}
-=09=09sprintf(getcmd, HALIB "/req_resource %s &", buf);
-=09=09if ((rc=3Dsystem(getcmd)) !=3D 0) {
-=09=09=09ha_perror("%s returned %d", getcmd, rc);
-=09=09=09finalrc=3DHA_FAIL;
+=09=09
+=09=09/* If the cluster is already active, act as standby. */
+=09=09if (cluster_already_active && nice_failback) {
+=09=09=09ha_log(LOG_DEBUG,
+=09=09=09"Acting as standby for resource %s",buf);
+=09=09}else {
+=09=09=09sprintf(getcmd, HALIB "/req_resource %s &", buf);
+=09=09=09if ((rc=3Dsystem(getcmd)) !=3D 0) {
+=09=09=09=09ha_perror("%s returned %d", getcmd, rc);
+=09=09=09=09finalrc=3DHA_FAIL;
+=09=09=09}
=09=09}
=09}
+
+=09if (rsc_count && nice_failback) {
+=09=09cluster_already_active =3D 0;
+=09=09we_are_primary =3D 1;
+=09}
+
=09rc=3Dpclose(rkeys);
=09if (rc < 0 && errno !=3D ECHILD) {
=09=09ha_perror("pclose(%s) returned %d", cmd, rc);
@@ -2048,14 +2220,13 @@
*=09do for now...
*/
#define=09SEQGAP=09100=09/* A heuristic number */
-#define=09KEEPIT=090
-#define=09DROPIT=091
=20
/*
*=09Should we ignore this packet, or pay attention to it?
*/
int
-should_drop_message(struct node_info * thisnode, const struct ha_msg *msg)
+should_drop_message(struct node_info * thisnode, const struct ha_msg *msg,
+=09=09=09=09=09const char *iface)
{
=09struct seqtrack *=09t =3D &thisnode->track;
=09const char *=09=09cseq =3D ha_msg_value(msg, F_SEQ);
@@ -2086,8 +2257,12 @@
=09/* Is this packet in sequence? */
=09if (t->last_seq =3D=3D NOSEQUENCE || seq =3D=3D (t->last_seq+1)) {
=09=09t->last_seq =3D seq;
+=09=09t->last_iface =3D iface;
=09=09return(IsToUs ? KEEPIT : DROPIT);
=09}else if (seq =3D=3D t->last_seq) {
+=09=09if(iface && t->last_iface && strcmp(iface, t->last_iface) =3D=3D 0) =
{=20
+=09=09=09return (DUPLICATE);
+=09=09}
=09=09/* Same as last-seen packet -- very common case */
=09=09if (DEBUGPKT) {
=09=09=09ha_log(LOG_DEBUG,
@@ -2114,6 +2289,7 @@
=09=09=09/* This keeps the loop below from going a long time */
=09=09=09t->nmissing =3D 0;
=09=09=09t->last_seq =3D seq;
+=09=09=09t->last_iface =3D iface;
=09=09=09ha_log(LOG_ERR, "lost a lot of packets!");
=09=09=09return(IsToUs ? KEEPIT : DROPIT);
=09=09}else{
@@ -2147,6 +2323,7 @@
=09=09=09}
=09=09}
=09=09t->last_seq =3D seq;
+=09=09t->last_iface =3D iface;
=09=09return(IsToUs ? KEEPIT : DROPIT);
=09}
=09/*
@@ -2209,6 +2386,7 @@
=09=09=09t->nmissing =3D 0;
=09=09=09t->last_seq =3D seq;
=09=09=09t->last_rexmit_req =3D 0L;
+=09=09=09t->last_iface =3D iface;
=09=09=09return(IsToUs ? KEEPIT : DROPIT);
=09=09}
=09}
diff -Nur heartbeat-0.4.6c.orig/heartbeat/heartbeat.h heartbeat-0.4.6c/hear=
tbeat/heartbeat.h
--- heartbeat-0.4.6c.orig/heartbeat/heartbeat.h=09Sat Dec 25 06:46:02 1999
+++ heartbeat-0.4.6c/heartbeat/heartbeat.h=09Mon Mar 27 14:40:04 2000
@@ -41,6 +41,7 @@
#define=09MAXFIELDS=0915=09=09/* Max # of fields in a msg */
#define HOSTLENG=09100=09=09/* Maximum size of "uname -a" return */
#define STATUSLENG=0932=09=09/* Maximum size of status field */
+#define MAXIFACELEN=0930 =09=09/* Maximum interface length */
#define=09MAXSERIAL=094
#define=09MAXMEDIA=0912
#define=09MAXNODE=09=09100
@@ -142,10 +143,20 @@
=09int=09=09nmissing;
=09unsigned long=09last_seq;
=09unsigned long=09seqmissing[MAXMISSING];
+=09const char *=09last_iface;
};
+
+struct link {=20
+=09clock_t lastupdate; /* Date of last update in this interface */
+=09const char *name;=20
+=09char=09status[STATUSLENG];=09/* Status from heartbeat */
+=09time_t=09rmt_lastupdate;=09=09/* node's idea of last update time over t=
his link */
+};
+
struct node_info {
=09char=09nodename[HOSTLENG];=09/* Host name from config file */
=09char=09status[STATUSLENG];=09/* Status from heartbeat */
+=09struct link links[MAXMEDIA];=20
=09time_t=09rmt_lastupdate;=09=09/* node's idea of last update time */
=09unsigned long=09status_seqno;=09/* Seqno of last status update */
=09clock_t=09local_lastupdate;=09/* Date of last update in clock_t time */
@@ -283,6 +294,9 @@
extern unsigned char * =09calc_cksum(const char * authmethod, const char *=
key, const char * value);
struct auth_type *=09findauth(const char * type);
struct node_info *=09lookup_node(const char *);
+struct link * lookup_iface(struct node_info * hip, const char *iface);
+struct link *=09iface_lookup_node(const char *);
+
void*=09=09ha_malloc(size_t size);
void*=09=09ha_calloc(size_t nmemb, size_t size);
void=09=09ha_free(void *ptr);
diff -Nur heartbeat-0.4.6c.orig/heartbeat/lib/req_resource heartbeat-0.4.6c=
/heartbeat/lib/req_resource
--- heartbeat-0.4.6c.orig/heartbeat/lib/req_resource=09Wed Nov 10 18:31:05 =
1999
+++ heartbeat-0.4.6c/heartbeat/lib/req_resource=09Mon Mar 27 15:48:04 2000
@@ -14,7 +14,9 @@
HA_DIR=3D/etc/ha.d; export HA_DIR
. $HA_DIR/shellfuncs
=20
-TIMEOUT=3D30
+# To comply with the new failback stuff the timeout was reduced to 10s.
+# The primary failover always waits $TIMEOUT...
+TIMEOUT=3D10
=20
RESOURCE=3D$1
=20