Mailing List Archive

heartbeat per-link status and nice failback enhancements
Hi,=20

This patch against heartbeat 0.4.6c adds per-link status
instead per-node status. This is the first step on the link/status
dependancy scheme we are planning to do. The handling of the dead link=20
will be done by external scripts which we will send to this list soon.=20
It also adds the "nice_failback" option in ha.cf which avoids the
"takeback" of services when it notices that the cluster is alive.=20
Any comments are welcome.

---

diff -Nur heartbeat-0.4.6c.orig/heartbeat/config.c heartbeat-0.4.6c/heartbe=
at/config.c
--- heartbeat-0.4.6c.orig/heartbeat/config.c=09Sat Dec 25 06:46:01 1999
+++ heartbeat-0.4.6c/heartbeat/config.c=09Mon Mar 27 15:00:46 2000
@@ -43,9 +43,10 @@
extern volatile struct process_info *=09curproc;
extern char *=09=09=09=09watchdogdev;
extern int=09=09=09=09nummedia;
+extern int=09=09=09=09nice_failback;
=20
int=09islegaldirective(const char *directive);
-int=09parse_config(const char * cfgfile);
+int=09parse_config(const char * cfgfile, char *nodename);
int=09parse_ha_resources(const char * cfgfile);
void=09dump_config(void);
int=09add_option(const char *=09option, const char * value);
@@ -87,7 +88,7 @@
=09uname(&u);
=09curnode =3D NULL;
=20
-=09if (!parse_config(cfgfile)) {
+=09if (!parse_config(cfgfile, u.nodename)) {
=09=09ha_log(LOG_ERR, "Heartbeat not started: configuration error.");
=09=09return(HA_FAIL);
=09}
@@ -163,6 +164,7 @@
#define=09KEY_FACILITY=09"logfacility"
#define=09KEY_LOGFILE=09"logfile"
#define=09KEY_DBGFILE=09"debugfile"
+#define KEY_FAILBACK "nice_failback"=20
=20
int add_node(const char *);
int set_hopfudge(const char *);
@@ -174,6 +176,7 @@
int set_facility(const char *);
int set_logfile(const char *);
int set_dbgfile(const char *);
+int set_nice_failback(const char *);
=20
extern const struct hb_media_fns=09ip_media_fns;
extern const struct hb_media_fns=09serial_media_fns;
@@ -203,6 +206,7 @@
,=09{KEY_FACILITY, set_facility}
,=09{KEY_LOGFILE, set_logfile}
,=09{KEY_DBGFILE, set_dbgfile}
+,=09{KEY_FAILBACK, set_nice_failback}
};
=20
=20
@@ -210,7 +214,7 @@
*=09Parse the configuration file and stash away the data
*/
int
-parse_config(const char * cfgfile)
+parse_config(const char * cfgfile, char *nodename)
{
=09FILE=09*=09f;
=09char=09=09buf[MAXLINE];
@@ -220,6 +224,10 @@
=09char=09=09option[MAXLINE];
=09int=09=09optionlength;
=09int=09=09errcount =3D 0;
+=09int=09=09j;
+=09int =09i;
+=09clock_t cticks;
+=09
=09struct stat=09sbuf;
=20
=09if ((f =3D fopen(cfgfile, "r")) =3D=3D NULL) {
@@ -233,8 +241,7 @@
=09/* It's ugly, but effective */
=20
=09while (fgets(buf, MAXLINE, f) !=3D NULL) {
-=09=09char *=09bp =3D buf;
-=09=09int=09j;
+=09=09char *bp =3D buf;=20
=20
=09=09/* Skip over white space */
=09=09bp +=3D strspn(bp, WHITESPACE);
@@ -291,6 +298,24 @@
=09=09=09bp +=3D strspn(bp, DELIMS);
=09=09}
=09}
+
+=09cticks =3D times(NULL);
+
+=09for (j=3D0; j < nummedia; j++) {
+=09=09for (i=3D0; i < config->nodecount; ++i) {
+=09=09=09=09struct link *lnk =3D &config->nodes[i].links[j];
+=09=09=09=09lnk->name =3D sysmedia[j]->name;
+=09=09=09=09lnk->lastupdate =3D cticks;
+=09=09 }
+=09 }
+
+=09j++;
+
+=09for (i=3D0; i < config->nodecount; ++i) {
+=09=09struct link *lnk =3D &config->nodes[i].links[j];
+=09=09lnk->name =3D NULL;
+=09}=09
+=09
=09fclose(f);
=09return(errcount ? HA_FAIL : HA_OK);
}
@@ -806,6 +831,19 @@
set_logfile(const char * value)
{
=09strncpy(config->logfile, value, PATH_MAX);
+=09return(HA_OK);
+}
+
+/* sets nice_failback behavior on/off */
+int
+set_nice_failback(const char * value)
+{
+=09if(!strcasecmp(value, "on")) {
+=09=09nice_failback =3D 1;
+=09} else {
+=09=09nice_failback =3D 0;
+=09}
+
=09return(HA_OK);
}
=20
diff -Nur heartbeat-0.4.6c.orig/heartbeat/ha_msg.c heartbeat-0.4.6c/heartbe=
at/ha_msg.c
--- heartbeat-0.4.6c.orig/heartbeat/ha_msg.c=09Tue Nov 23 06:52:40 1999
+++ heartbeat-0.4.6c/heartbeat/ha_msg.c=09Mon Mar 27 14:28:57 2000
@@ -216,6 +216,73 @@
=20
=20
/* Return the next message found in the stream */
+
+struct ha_msg *
+if_msgfromstream(FILE * f, char *iface)
+{
+=09char=09=09buf[MAXLINE];
+=09char *=09=09getsret;
+=09struct ha_msg*=09ret;
+
+=09clearerr(f);
+
+=09if(!(getsret=3Dfgets(buf, MAXLINE, f))) {=20
+=09=09if (!ferror(f) || errno !=3D EINTR)=20
+=09=09=09ha_error("if_msgfromstream: cannot get message");
+=09=09return(NULL);
+=09}
+
+=09/* Try to find the interface on the message. */
+
+=09if(!strcmp(buf, IFACE)) {
+=09=09/* Found interface name header, get interface name. */
+=09=09if(!(getsret=3Dfgets(buf, MAXLINE, f))) {=20
+=09=09=09if (!ferror(f) || errno !=3D EINTR)
+=09=09=09=09ha_error("if_msgfromstream: cannot get message");
+=09=09=09return(NULL);
+=09=09}
+=09=09if(iface) {=20
+=09=09=09int len =3D strlen(buf);
+=09=09=09if(len < MAXIFACELEN) {
+=09=09=09=09strncpy(iface, buf, len);
+=09=09=09=09iface[len -1] =3D EOS;
+=09=09=09}
+=09=09}
+=09}
+
+=09if(strcmp(buf, MSG_START)) { =09
+=09=09/* Skip until we find a MSG_START (hopefully we skip nothing) */
+=09=09while ((getsret=3Dfgets(buf, MAXLINE, f)) !=3D NULL
+=09=09&&=09strcmp(buf, MSG_START) !=3D 0) {
+=09=09=09/* Nothing */
+=09=09}
+=09}
+
+=09if (getsret =3D=3D NULL || (ret =3D ha_msg_new(0)) =3D=3D NULL) {
+=09=09/* Getting an error with EINTR is pretty normal */
+=09=09if (!ferror(f) || errno !=3D EINTR) {
+=09=09=09ha_error("if_msgfromstream: cannot get message");
+=09=09}
+=09=09return(NULL);
+=09}
+
+=09/* Add Name=3Dvalue pairs until we reach MSG_END or EOF */
+=09while ((getsret=3Dfgets(buf, MAXLINE, f)) !=3D NULL
+=09&&=09strcmp(buf, MSG_END) !=3D 0) {
+
+=09=09/* Add the "name=3Dvalue" string on this line to the message */
+=09=09if (ha_msg_add_nv(ret, buf) !=3D HA_OK) {
+=09=09=09ha_error("NV failure (if_msgfromsteam):");
+=09=09=09ha_error(buf);
+=09=09=09ha_msg_del(ret);
+=09=09=09return(NULL);
+=09=09}
+=09}
+=09return(ret);
+}
+
+/* Writes a message into a stream - used for serial lines */
+/* Return the next message found in the stream */
struct ha_msg *
msgfromstream(FILE * f)
{
@@ -315,7 +382,44 @@
=09return(ret);
}
=20
+/* Converts a message into a string and adds the iface name on the message=
*/
+char *
+msg2if_string(const struct ha_msg *m, const char *iface)=20
+{
=20
+=09int=09j;
+=09char *=09buf;
+=09char *=09bp;
+
+=09if (m->nfields <=3D 0) {
+=09=09ha_error("msg2if_string: Message with zero fields");
+=09=09return(NULL);
+=09}
+
+=09buf =3D ha_malloc(m->stringlen + ((strlen(iface) + sizeof(IFACE)) * siz=
eof(char *)));
+
+=09if (buf =3D=3D NULL) {
+=09=09ha_error("msg2if_string: no memory for string");
+=09}else{
+=09=09bp =3D buf;
+=09=09strcpy(buf, IFACE);
+=09=09strcat(buf, iface);
+=09=09strcat(buf, "\n");
+=09=09strcat(buf, MSG_START);
+=09=09for (j=3D0; j < m->nfields; ++j) {
+=09=09=09strcat(bp, m->names[j]);
+=09=09=09bp +=3D m->nlens[j];
+=09=09=09strcat(bp, "=3D");
+=09=09=09bp++;
+=09=09=09strcat(bp, m->values[j]);
+=09=09=09bp +=3D m->vlens[j];
+=09=09=09strcat(bp, "\n");
+=09=09=09bp++;
+=09=09}
+=09=09strcat(bp, MSG_END);
+=09}
+=09return(buf);
+}
/* Converts a message into a string (for sending out UDP interface) */
char *
msg2string(const struct ha_msg *m)
diff -Nur heartbeat-0.4.6c.orig/heartbeat/ha_msg.h heartbeat-0.4.6c/heartbe=
at/ha_msg.h
--- heartbeat-0.4.6c.orig/heartbeat/ha_msg.h=09Tue Nov 23 06:52:40 1999
+++ heartbeat-0.4.6c/heartbeat/ha_msg.h=09Mon Mar 27 14:29:45 2000
@@ -16,7 +16,7 @@
=09char **=09values;
=09int *=09vlens;
};
-
+#define IFACE=09=09"!^!\n"=20
#define=09MSG_START=09">>>\n"
#define=09MSG_END=09=09"<<<\n"
#define=09EQUAL=09=09"=3D"
@@ -71,6 +71,8 @@
/* Reads a stream -- converts it into a message */
struct ha_msg *=09msgfromstream(FILE * f);
=20
+struct ha_msg * if_msgfromstream(FILE * f, char *iface);
+
/* Writes a message into a stream */
int=09=09msg2stream(struct ha_msg* m, FILE * f);
=20
@@ -79,6 +81,9 @@
=20
/* Converts a message into a string for sending out UDP interface */
char *=09=09msg2string(const struct ha_msg *m);
+
+/* Converts a message into a string and adds the iface name on start */
+char *=09=09msg2if_string(const struct ha_msg *m, const char * iface);
=20
/* Reads from control fifo, and creates a new message from it */
/* This adds the default sequence#, load avg, etc. to the message */
diff -Nur heartbeat-0.4.6c.orig/heartbeat/heartbeat.c heartbeat-0.4.6c/hear=
tbeat/heartbeat.c
--- heartbeat-0.4.6c.orig/heartbeat/heartbeat.c=09Sat Dec 25 06:46:02 1999
+++ heartbeat-0.4.6c/heartbeat/heartbeat.c=09Mon Mar 27 18:11:26 2000
@@ -132,6 +132,18 @@
*=09=09is less likely to fail. But if it does, it might mean
*=09=09seven years of bad luck :-)
*
+ *
+ * Changes:=20
+ *
+ * 03/17/2000 Luis Claudio R. Gon=E7alves <lclaudio@conectiva.com.br>
+ * Created the role switching idea... if the cluster is already active,
+ * when the primary starts or performs a failback it stills quiet,
+ * acting as a standby host. Thus we have controlled failback
+ * =20
+ * 03/26/2000 Marcelo Tosatti <marcelo@conectiva.com.br>=20
+ * Per-link and global (node) status instead simply node status. First
+ * step of the link/service dependancy scheme.
+ * =20
*/
=20
#include <stdio.h>
@@ -160,6 +172,9 @@
=20
#define OPTARGS=09=09"dkrRsv"
=20
+#define=09KEEPIT=090
+#define=09DROPIT=091
+#define=09DUPLICATE 2
=20
int=09=09verbose =3D 0;
=20
@@ -169,6 +184,9 @@
int=09=09debug =3D 0;
int=09=09RestartRequested =3D 0;
int=09=09WeAreRestarting =3D 0;
+int=09=09cluster_already_active =3D 0;=20
+int=09=09we_are_primary =3D 0; =20
+int nice_failback =3D 0;=20
int=09=09killrunninghb =3D 0;
int=09=09rpt_hb_status =3D 0;
int=09=09childpid =3D -1;
@@ -231,6 +249,7 @@
void=09check_node_timeouts(void);
void=09request_msg_rexmit(struct node_info *, unsigned long lowseq, unsign=
ed long hiseq);
void=09check_rexmit_reqs(void);
+void=09mark_link_dead(struct node_info* hip, struct link *lnk);
void=09mark_node_dead(struct node_info* hip);
void=09notify_world(struct ha_msg * msg, const char * ostatus);
pid_t=09get_running_hb_pid(void);
@@ -238,7 +257,8 @@
void=09heartbeat_monitor(struct ha_msg * msg);
void=09send_to_all_media(char * smsg, int len);
void=09init_monitor(void);
-int=09should_drop_message(struct node_info* node, const struct ha_msg* msg=
);
+int=09should_drop_message(struct node_info* node, const struct ha_msg* msg=
,
+=09=09=09=09=09=09const char *iface);
void=09add2_xmit_hist (struct msg_xmit_hist * hist, struct ha_msg* msg
,=09=09unsigned long seq);
void=09init_xmit_hist (struct msg_xmit_hist * hist);
@@ -302,6 +322,20 @@
}
=20
=20
+/* Look up the interface in the node struct, returning the link info struc=
ture */
+struct link *
+lookup_iface(struct node_info * hip, const char *iface)=20
+{
+=09struct link *lnk;=09
+=09int j =3D 0;
+=09while((lnk =3D &hip->links[j]) && lnk->name) {
+=09=09if(strcmp(lnk->name, iface) =3D=3D 0) {
+=09=09=09return lnk;
+=09=09}
+=09=09j++;
+=09}
+=09return NULL;
+}
=20
/* Look up the node in the configuration, returning the node info structur=
e */
struct node_info *
@@ -584,11 +618,12 @@
=09for (;;) {
=09=09struct=09ha_msg*=09m =3D mp->vf->read(mp);
=09=09char *=09=09sm;
+
=09=09if (m =3D=3D NULL) {
=09=09=09continue;
=09=09}
+=09=09sm =3D msg2if_string(m, mp->name);
=20
-=09=09sm =3D msg2string(m);
=09=09if (sm !=3D NULL) {
=09=09=09msglen =3D strlen(sm);
=09=09=09if (DEBUGPKT) {
@@ -626,7 +661,8 @@
=20
=09siginterrupt(SIGALRM, 1);
=09for (;;) {
-=09=09struct ha_msg *=09msgp =3D msgfromstream(ourfp);
+=09=09struct ha_msg *=09msgp;
+=09=09msgp =3D if_msgfromstream(ourfp, NULL);
=09=09if (msgp =3D=3D NULL) {
=09=09=09continue;
=09=09}
@@ -760,8 +796,12 @@
=09FILE *=09=09=09f =3D fdopen(status_pipe[P_READFD], "r");
=09struct ha_msg *=09=09msg =3D NULL;
=09int=09=09=09resources_requested_yet =3D 0;
+=09
=09time_t=09=09=09lastnow =3D 0L;
-
+=09char=09iface[MAXIFACELEN];
+=09struct link=09*lnk;
+=09int starting =3D 1;
+=09int starting_flag =3D 1;
=20
=09init_status_alarm();
=09init_watchdog();
@@ -776,9 +816,20 @@
=09=09const char *=09type;
=20
=09=09if (send_status_now) {
+=09=09=09/* If heartbeat is starting right now on this node
+=09=09=09* give time for someone else in the cluster send a
+=09=09=09* message... the cluster is already active?
+=09=09=09*/
+=09=09=09if (starting && nice_failback) {
+=09=09=09=09ha_log(LOG_DEBUG,"%s", "Waiting for someone else..." );
+=09=09=09=09sleep (2*config->heartbeat_interval);
+=09=09=09=09starting =3D 0;
+=09=09=09}
+
=09=09=09send_status_now =3D 0;
=09=09=09send_local_status();
=09=09}
+
=09=09if (dump_stats_now) {
=09=09=09dump_stats_now =3D 0;
=09=09=09dump_all_proc_stats();
@@ -797,7 +848,9 @@
=09=09}
=09=09lastnow =3D now;
=20
-=09=09msg =3D msgfromstream(f);
+=09=09bzero(iface, MAXIFACELEN);
+
+=09=09msg =3D if_msgfromstream(f, iface);
=20
=09=09/* This may be caused by SIGALRM */
=09=09if (msg =3D=3D NULL) {
@@ -847,6 +900,18 @@
#endif
=09=09}
=20
+=09=09/* If the message came from another node, the cluster is=20
+=09=09 * already active. It's only valid for the starting process
+=09=09 */
+
+=09=09if (!WeAreRestarting && !resources_requested_yet
+=09=09&& (thisnode !=3D curnode && (now-starttime) < RQSTDELAY)
+=09=09&& (starting_flag && nice_failback)) {
+=09=09=09ha_log(LOG_DEBUG, "%s", "The cluster is already active");
+=09=09=09cluster_already_active =3D 1;
+=09=09=09starting_flag =3D 0;
+=09=09}
+=09
=09=09/* Throw away some incoming packets if testing is enabled */
=09=09if (TESTRCV) {
=09=09=09if (thisnode !=3D curnode && TestRand(rcv_loss_prob)) {
@@ -863,17 +928,61 @@
=09=09 * in from somewhere else, then cluster comm is working...
=09=09 *
=09=09 */
-=09
+
=09=09if (!WeAreRestarting && !resources_requested_yet
=09=09&&=09(thisnode !=3D curnode || (now-starttime) > RQSTDELAY)) {
=09=09=09=09resources_requested_yet=3D1;
=09=09=09=09req_our_resources();
=09=09}
=20
+=09=09lnk =3D lookup_iface(thisnode, iface);
+
=09=09/* Is this message a duplicate, or destined for someone else? */
-=09=09if (should_drop_message(thisnode, msg)) {
+
+=09=09switch (should_drop_message(thisnode, msg, iface)) {
+=09=09const char *=09status;
+=09=09const char *=09cseq;
+=09=09long=09=09seqno;
+=09=09case DUPLICATE:
+
+=09=09=09if(!lnk) continue;
+
+=09=09=09sscanf(ts, "%lx", &msgtime);
+=09=09=09status =3D ha_msg_value(msg, F_STATUS);
+=09=09=09if (status =3D=3D NULL) {
+=09=09=09=09ha_log(LOG_ERR, "master_status_process (duplicate): "
+=09=09=09=09"status update without "
+=09=09=09=09F_STATUS " field");
+=09=09=09=09continue;
+=09=09=09}
+=09=09=09if ((cseq =3D ha_msg_value(msg, F_SEQ)) !=3D NULL) {
+=09=09=09=09if (sscanf(cseq, "%lx", &seqno) !=3D 1
+=09=09=09=09||=09seqno <=3D 0) {
+=09=09=09=09=09continue;
+=09=09=09=09}
+=09=09=09}
+
+=09=09=09/* Do we already have a newer status? */
+=09=09=09if (msgtime < lnk->rmt_lastupdate) {=20
+=09=09=09=09continue;
+=09=09=09}
+
+=09=09=09lnk->rmt_lastupdate =3D msgtime;
+
+=09=09=09thisnode->local_lastupdate =3D lnk->lastupdate =3D times(NULL);
+
+=09=09=09if (strcasecmp(lnk->status, status) !=3D 0) {
+=09=09=09=09ha_log(LOG_INFO, "node %s -- link %s: status %s"
+=09=09=09=09, thisnode->nodename, lnk->name, status);
+=09=09=09=09strcpy(lnk->status, status);
+=09=09=09}
+=09=09continue;
+=09=09case DROPIT:
+=09=09=09/* Ignore it */
=09=09=09continue;
-=09=09}
+=09=09}=20
+
+=09=09thisnode->track.last_iface =3D iface;
=20
=09=09/* Is this a status update message? */
=09=09if (strcasecmp(type, T_STATUS) =3D=3D 0) {
@@ -897,8 +1006,7 @@
=09=09=09=09}
=09=09=09}
=20
-=09=09=09/* Do we already have a newer status? */
-=09=09=09if (msgtime < thisnode->rmt_lastupdate
+=09=09=09if (msgtime < thisnode->rmt_lastupdate=20
=09=09=09&&=09seqno < thisnode->status_seqno) {
=09=09=09=09continue;
=09=09=09}
@@ -906,10 +1014,16 @@
=09=09=09heartbeat_monitor(msg);
=20
=09=09=09thisnode->rmt_lastupdate =3D msgtime;
-=09=09=09thisnode->local_lastupdate =3D times(NULL);
+
+=09=09=09if(lnk) {=20
+=09=09=09=09thisnode->local_lastupdate =3D lnk->lastupdate =3D times(NULL)=
;
+=09=09=09}else{
+=09=09=09=09thisnode->local_lastupdate =3D times(NULL);
+=09=09=09}=09
+
=09=09=09thisnode->status_seqno =3D seqno;
=20
-=09=09=09/* Is the status the same? */
+=09=09=09/* Is the node status the same? */
=09=09=09if (strcasecmp(thisnode->status, status) !=3D 0) {
=09=09=09=09ha_log(LOG_INFO
=09=09=09=09,=09"node %s: status %s"
@@ -919,6 +1033,15 @@
=09=09=09=09strcpy(thisnode->status, status);
=09=09=09}
=20
+=09=09=09/* Is the link status the same? */
+=09=09=09if(lnk) {
+=09=09=09=09if (strcasecmp(lnk->status, status) !=3D 0) {
+=09=09=09=09=09ha_log(LOG_INFO, "node %s -- link %s: status %s"=20
+=09=09=09=09=09=09, thisnode->nodename, lnk->name, status);
+=09=09=09=09strcpy(lnk->status, status);
+=09=09=09=09}
+=09=09=09}
+
=09=09=09/* Did we get a status update on ourselves? */
=09=09=09if (thisnode =3D=3D curnode) {
=09=09=09=09tickle_watchdog();
@@ -1260,13 +1383,13 @@
{
=09clock_t=09now =3D times(NULL);
=09struct node_info *=09hip;
-=09clock_t=09dead_ticks =3D (CLK_TCK * config->deadtime_interval);
-=09clock_t=09TooOld =3D now - dead_ticks;
+=09clock_t dead_ticks =3D (CLK_TCK * config->deadtime_interval);
+=09clock_t TooOld =3D now - dead_ticks;
=09int=09j;
=20
=09/* We need to be careful to handle clock_t wrapround carefully */
=09if (now < dead_ticks) {
-=09=09return;=09/* Ignore timeouts during wraparound */
+=09=09return; /* Ignore timeouts during wraparound */
=09=09=09/* This doubles our timeout at this time */
=09=09=09/* Sorry. */
=09}
@@ -1286,7 +1409,27 @@
=09=09mark_node_dead(hip);
=09}
=20
+=09/* Check all links status of all nodes */
=20
+=09for (j=3D0; j < config->nodecount; ++j) {
+=09=09struct link *lnk;
+=09=09int i =3D 0;
+=09=09hip =3D &config->nodes[j];
+=09=09if(hip =3D=3D curnode) continue;
+
+=09=09while((lnk =3D &hip->links[i]) && lnk->name) {
+=09=09=09if (lnk->lastupdate > now) {
+=09=09=09=09lnk->lastupdate =3D 0L;
+=09=09=09}
+=09=09=09if (lnk->lastupdate >=3D TooOld
+=09=09=09|| strcmp(lnk->status, DEADSTATUS) =3D=3D 0 ) {
+=09=09=09=09i++;
+=09=09=09=09continue;
+=09=09=09}
+=09=09=09mark_link_dead(hip, lnk);
+=09=09=09i++;
+=09=09}
+=09}
}
=20
/* Set our local status to the given value, and send it out*/
@@ -1364,6 +1507,15 @@
=09return(rc);
}
=20
+/* Mark the given link dead */
+void
+mark_link_dead(struct node_info *hip, struct link *lnk)
+{
+=09/* FIXME: Do something usefull */
+=09ha_log(LOG_ERR, "Link %s:%s DEAD.", hip->nodename, lnk->name);
+ strcpy(lnk->status, "dead");
+}
+=09=09=09=09=09
/* Mark the given node dead */
void
mark_node_dead(struct node_info *hip)
@@ -1397,7 +1549,14 @@
=09=09/* Uh, oh... we're dead! */
=09=09ha_log(LOG_ERR, "No local heartbeat. Forcing shutdown.");
=09=09kill(procinfo->info[0].pid, SIGTERM);
+=09} else {
+=09=09if (we_are_primary && nice_failback) {
+=09=09=09ha_log(LOG_DEBUG,"%s",=09"[lcrg] We are primary again!");
+=09=09=09we_are_primary =3D 0;
+=09=09=09req_our_resources();
+=09=09}
=09}
+=09
=09ha_msg_del(hmsg);
}
=20
@@ -1554,12 +1713,25 @@
=09=09if (buf[strlen(buf)-1] =3D=3D '\n') {
=09=09=09buf[strlen(buf)-1] =3D EOS;
=09=09}
-=09=09sprintf(getcmd, HALIB "/req_resource %s &", buf);
-=09=09if ((rc=3Dsystem(getcmd)) !=3D 0) {
-=09=09=09ha_perror("%s returned %d", getcmd, rc);
-=09=09=09finalrc=3DHA_FAIL;
+=09=09
+=09=09/* If the cluster is already active, act as standby. */
+=09=09if (cluster_already_active && nice_failback) {
+=09=09=09ha_log(LOG_DEBUG,
+=09=09=09"Acting as standby for resource %s",buf);
+=09=09}else {
+=09=09=09sprintf(getcmd, HALIB "/req_resource %s &", buf);
+=09=09=09if ((rc=3Dsystem(getcmd)) !=3D 0) {
+=09=09=09=09ha_perror("%s returned %d", getcmd, rc);
+=09=09=09=09finalrc=3DHA_FAIL;
+=09=09=09}
=09=09}
=09}
+
+=09if (rsc_count && nice_failback) {
+=09=09cluster_already_active =3D 0;
+=09=09we_are_primary =3D 1;
+=09}
+
=09rc=3Dpclose(rkeys);
=09if (rc < 0 && errno !=3D ECHILD) {
=09=09ha_perror("pclose(%s) returned %d", cmd, rc);
@@ -2048,14 +2220,13 @@
*=09do for now...
*/
#define=09SEQGAP=09100=09/* A heuristic number */
-#define=09KEEPIT=090
-#define=09DROPIT=091
=20
/*
*=09Should we ignore this packet, or pay attention to it?
*/
int
-should_drop_message(struct node_info * thisnode, const struct ha_msg *msg)
+should_drop_message(struct node_info * thisnode, const struct ha_msg *msg,
+=09=09=09=09=09const char *iface)
{
=09struct seqtrack *=09t =3D &thisnode->track;
=09const char *=09=09cseq =3D ha_msg_value(msg, F_SEQ);
@@ -2086,8 +2257,12 @@
=09/* Is this packet in sequence? */
=09if (t->last_seq =3D=3D NOSEQUENCE || seq =3D=3D (t->last_seq+1)) {
=09=09t->last_seq =3D seq;
+=09=09t->last_iface =3D iface;
=09=09return(IsToUs ? KEEPIT : DROPIT);
=09}else if (seq =3D=3D t->last_seq) {
+=09=09if(iface && t->last_iface && strcmp(iface, t->last_iface) =3D=3D 0) =
{=20
+=09=09=09return (DUPLICATE);
+=09=09}
=09=09/* Same as last-seen packet -- very common case */
=09=09if (DEBUGPKT) {
=09=09=09ha_log(LOG_DEBUG,
@@ -2114,6 +2289,7 @@
=09=09=09/* This keeps the loop below from going a long time */
=09=09=09t->nmissing =3D 0;
=09=09=09t->last_seq =3D seq;
+=09=09=09t->last_iface =3D iface;
=09=09=09ha_log(LOG_ERR, "lost a lot of packets!");
=09=09=09return(IsToUs ? KEEPIT : DROPIT);
=09=09}else{
@@ -2147,6 +2323,7 @@
=09=09=09}
=09=09}
=09=09t->last_seq =3D seq;
+=09=09t->last_iface =3D iface;
=09=09return(IsToUs ? KEEPIT : DROPIT);
=09}
=09/*
@@ -2209,6 +2386,7 @@
=09=09=09t->nmissing =3D 0;
=09=09=09t->last_seq =3D seq;
=09=09=09t->last_rexmit_req =3D 0L;
+=09=09=09t->last_iface =3D iface;
=09=09=09return(IsToUs ? KEEPIT : DROPIT);
=09=09}
=09}
diff -Nur heartbeat-0.4.6c.orig/heartbeat/heartbeat.h heartbeat-0.4.6c/hear=
tbeat/heartbeat.h
--- heartbeat-0.4.6c.orig/heartbeat/heartbeat.h=09Sat Dec 25 06:46:02 1999
+++ heartbeat-0.4.6c/heartbeat/heartbeat.h=09Mon Mar 27 14:40:04 2000
@@ -41,6 +41,7 @@
#define=09MAXFIELDS=0915=09=09/* Max # of fields in a msg */
#define HOSTLENG=09100=09=09/* Maximum size of "uname -a" return */
#define STATUSLENG=0932=09=09/* Maximum size of status field */
+#define MAXIFACELEN=0930 =09=09/* Maximum interface length */
#define=09MAXSERIAL=094
#define=09MAXMEDIA=0912
#define=09MAXNODE=09=09100
@@ -142,10 +143,20 @@
=09int=09=09nmissing;
=09unsigned long=09last_seq;
=09unsigned long=09seqmissing[MAXMISSING];
+=09const char *=09last_iface;
};
+
+struct link {=20
+=09clock_t lastupdate; /* Date of last update in this interface */
+=09const char *name;=20
+=09char=09status[STATUSLENG];=09/* Status from heartbeat */
+=09time_t=09rmt_lastupdate;=09=09/* node's idea of last update time over t=
his link */
+};
+
struct node_info {
=09char=09nodename[HOSTLENG];=09/* Host name from config file */
=09char=09status[STATUSLENG];=09/* Status from heartbeat */
+=09struct link links[MAXMEDIA];=20
=09time_t=09rmt_lastupdate;=09=09/* node's idea of last update time */
=09unsigned long=09status_seqno;=09/* Seqno of last status update */
=09clock_t=09local_lastupdate;=09/* Date of last update in clock_t time */
@@ -283,6 +294,9 @@
extern unsigned char * =09calc_cksum(const char * authmethod, const char *=
key, const char * value);
struct auth_type *=09findauth(const char * type);
struct node_info *=09lookup_node(const char *);
+struct link * lookup_iface(struct node_info * hip, const char *iface);
+struct link *=09iface_lookup_node(const char *);
+
void*=09=09ha_malloc(size_t size);
void*=09=09ha_calloc(size_t nmemb, size_t size);
void=09=09ha_free(void *ptr);
diff -Nur heartbeat-0.4.6c.orig/heartbeat/lib/req_resource heartbeat-0.4.6c=
/heartbeat/lib/req_resource
--- heartbeat-0.4.6c.orig/heartbeat/lib/req_resource=09Wed Nov 10 18:31:05 =
1999
+++ heartbeat-0.4.6c/heartbeat/lib/req_resource=09Mon Mar 27 15:48:04 2000
@@ -14,7 +14,9 @@
HA_DIR=3D/etc/ha.d; export HA_DIR
. $HA_DIR/shellfuncs
=20
-TIMEOUT=3D30
+# To comply with the new failback stuff the timeout was reduced to 10s.
+# The primary failover always waits $TIMEOUT...
+TIMEOUT=3D10
=20
RESOURCE=3D$1
=20
heartbeat per-link status and nice failback enhancements [ In reply to ]
Marcelo Tosatti wrote:
>
> Hi,
>
> This patch against heartbeat 0.4.6c adds per-link status
> instead per-node status. This is the first step on the link/status
> dependancy scheme we are planning to do. The handling of the dead link
> will be done by external scripts which we will send to this list soon.
> It also adds the "nice_failback" option in ha.cf which avoids the
> "takeback" of services when it notices that the cluster is alive.
> Any comments are welcome.

In function if_msgfromstream(), you don't loop looking for an interface,
but just give up if you don't find one. Is that the right thing to do
there?


> /* Return the next message found in the stream */
> +
> +struct ha_msg *
> +if_msgfromstream(FILE * f, char *iface)
> +{
> + char buf[MAXLINE];
> + char * getsret;
> + struct ha_msg* ret;
> +
> + clearerr(f);
> +
> + if(!(getsret=fgets(buf, MAXLINE, f))) {
> + if (!ferror(f) || errno != EINTR)
> + ha_error("if_msgfromstream: cannot get message");
> + return(NULL);
> + }
> +
> + /* Try to find the interface on the message. */
> +
> + if(!strcmp(buf, IFACE)) {
> + /* Found interface name header, get interface name. */
> + if(!(getsret=fgets(buf, MAXLINE, f))) {
> + if (!ferror(f) || errno != EINTR)
> + ha_error("if_msgfromstream: cannot get message");
> + return(NULL);
> + }
> + if(iface) {
> + int len = strlen(buf);
> + if(len < MAXIFACELEN) {
> + strncpy(iface, buf, len);
> + iface[len -1] = EOS;
> + }
> + }
> + }
> +


Jumping to msg2if_string...


> +/* Converts a message into a string and adds the iface name on the message */
> +char *
> +msg2if_string(const struct ha_msg *m, const char *iface)
> +{
>
> + int j;
> + char * buf;
> + char * bp;
> +
> + if (m->nfields <= 0) {
> + ha_error("msg2if_string: Message with zero fields");
> + return(NULL);
> + }
> +
> + buf = ha_malloc(m->stringlen + ((strlen(iface) + sizeof(IFACE)) * sizeof(char *)));

You're not appending a (char*) to the string, so you shouldn't use
sizeof(char*). Perhaps you meant sizeof(char) or 2*sizeof(char)? I
forget if the stringlen field includes the terminating NULL or not.


Jumping ahead again:

Trivia: The word "useful" is misspelled :-)

> +/* Mark the given link dead */
> +void
> +mark_link_dead(struct node_info *hip, struct link *lnk)
> +{
> + /* FIXME: Do something usefull */
> + ha_log(LOG_ERR, "Link %s:%s DEAD.", hip->nodename, lnk->name);
> + strcpy(lnk->status, "dead");
> +}


Continuing on...

You made changes to the retransmission protocol. Please make sure you
have tested this thoroughly. If you don't know how to set up the test
environment to simulate packet losses, email me and I'll tell you.

All in all, this looks like a useful set of changes.

Thanks!

-- Alan Robertson
alanr@suse.com
heartbeat per-link status and nice failback enhancements [ In reply to ]
On Mon, 27 Mar 2000, Alan Robertson wrote:

> You're not appending a (char*) to the string, so you shouldn't use
> sizeof(char*). Perhaps you meant sizeof(char) or 2*sizeof(char)? I
> forget if the stringlen field includes the terminating NULL or not.

I think that strlen doesn't count NULL - values, so you have to add it
yourself if you're calculating the memroy consumption.

kind regards,
Michael Moerz Systemengineer
CUBIT IT-Solutions
heartbeat per-link status and nice failback enhancements [ In reply to ]
Michael Moerz wrote:
>
> On Mon, 27 Mar 2000, Alan Robertson wrote:
>
> > You're not appending a (char*) to the string, so you shouldn't use
> > sizeof(char*). Perhaps you meant sizeof(char) or 2*sizeof(char)? I
> > forget if the stringlen field includes the terminating NULL or not.
>
> I think that strlen doesn't count NULL - values, so you have to add it
> yourself if you're calculating the memroy consumption.
>
> kind regards,
> Michael Moerz Systemengineer
> CUBIT IT-Solutions

The line in question was:

buf = ha_malloc(m->stringlen +
((strlen(iface) + sizeof(IFACE)) * sizeof(char *)));

But, the field "m->stringlen" already includes space for a NULL byte. I
went back and reread the code. So, Marcelo didn't need to leave room
for an additional one. This field is initialized in function
ha_msg_new() in this way:

ret->stringlen = sizeof(MSG_START)+sizeof(MSG_END)-1;

So, it leaves room for two strings minus the room for one of the two
NULL
bytes associated with the two constants. Unlike strlen, sizeof *does*
count the NULL byte. Perhaps I should have named the field stringspace
instead of stringlen. To further attest to my intentions, when I copy a
message buffer to a string in function msg2string(), I do this:

buf = ha_malloc(m->stringlen);

Fortunately, stringlen is a private field only used a handful of times
in only one file, so it's pretty easy to see how it's used.

It's GREAT to have someone else carefully looking over these patches!

Thanks Michael!

-- Alan Robertson
alanr@suse.com
heartbeat per-link status and nice failback enhancements [ In reply to ]
> In function if_msgfromstream(), you don't loop looking for an interface,
> but just give up if you don't find one. Is that the right thing to do
> there?
msg2if_string always add the interface name on the beginning of the
message.

> You're not appending a (char*) to the string, so you shouldn't use
> sizeof(char*). Perhaps you meant sizeof(char) or 2*sizeof(char)? I
> forget if the stringlen field includes the terminating NULL or not.
sizeof(char). I'll fix this before commiting it to the CVS.

> You made changes to the retransmission protocol. Please make sure you
> have tested this thoroughly.
> If you don't know how to set up the test
> environment to simulate packet losses, email me and I'll tell you.

We are doing tests/developing with the packet loss simulation code turned
on.

> All in all, this looks like a useful set of changes.

Thanks for the comments!
heartbeat per-link status and nice failback enhancements [ In reply to ]
Marcelo Tosatti wrote:
>
> > In function if_msgfromstream(), you don't loop looking for an interface,
> > but just give up if you don't find one. Is that the right thing to do
> > there?
> msg2if_string always add the interface name on the beginning of the
> message.

Yes, but things happen... And what you have isn't that robust if
something happens...

> > You're not appending a (char*) to the string, so you shouldn't use
> > sizeof(char*). Perhaps you meant sizeof(char) or 2*sizeof(char)? I
> > forget if the stringlen field includes the terminating NULL or not.
> sizeof(char). I'll fix this before commiting it to the CVS.
>
> > You made changes to the retransmission protocol. Please make sure you
> > have tested this thoroughly.
> > If you don't know how to set up the test
> > environment to simulate packet losses, email me and I'll tell you.
>
> We are doing tests/developing with the packet loss simulation code turned
> on.

I ran it at about a 50% error rate, a 1-second heartbeat rate and a 15
second timeout, for a day or so. That meant that from time to time the
two nodes would lose communication, then regain it. It gets a pretty
good workout from that :-)

> > All in all, this looks like a useful set of changes.
>
> Thanks for the comments!

Thanks for your work!


-- Alan Robertson
alanr@suse.com
heartbeat per-link status and nice failback enhancements [ In reply to ]
On Tue, 28 Mar 2000, Alan Robertson wrote:

> Marcelo Tosatti wrote:
> >
> > > In function if_msgfromstream(), you don't loop looking for an interface,
> > > but just give up if you don't find one. Is that the right thing to do
> > > there?
> > msg2if_string always add the interface name on the beginning of the
> > message.
>
> Yes, but things happen... And what you have isn't that robust if
> something happens...
Oh yes, you are right.
I thought you were talking about the IFACE field to be a "normal" field.