Hi,
I wrote a wrapper using hbclient api for an application that manages the redundancy of our system. The application uses the wrapper to send/receive messages (string) between the primary and secondary.
In our testing of reset and switch over, once in a while, there is core dump in the send with double free in libc, that I do not know if caused by my wrapper of hbclient api.
/lib/libc.so.6[0xf7d71629]
/lib/libc.so.6(cfree+0x59)[0xf7d719e9]
/usr/lib/libplumb.so.2[0xf7e88dcf]
/usr/lib/libplumb.so.2[0xf7e9a03e]
/usr/lib/libplumb.so.2[0xf7e9a1a4]
/usr/lib/libplumb.so.2[0xf7e9922f]
/usr/lib/libplumb.so.2(msg2ipcchan+0xb8)[0xf7e891ea]
/usr/lib/libhbclient.so.1[0xf7e6a736]
/usr/lib/libha_lib.so(hb_send+0x204)[0xf7e61e15] ---> my wrapper
I use send_ordered_nodemsg() to send and readmsg() to read (based on api_test.c). However in sample codes of ipfail or drbd, I saw the setting up of IPChannel and usage of msg2ipcchan(). Which is more appropriate?
I'd also like to know if I should add more codes to handle node status change because the crashes always occur when the other node go reset.
Snippet of my codes:
1. Initialization:
if (mhm_hb->llc_ops->signon(mhm_hb, "ping")!= HA_OK) { // I pasted the common "ping",
// plan to change to different name
cl_log(LOG_ERR, "Cannot sign on with heartbeat");
...
2. Send:
int hb_send(ll_cluster_t *hb, char *dest, void *buf, size_t sz)
{
HA_Message *msg;
if (hb==NULL) return HA_FAIL;
msg = ha_msg_new(0);
if (ha_msg_add(msg, F_TYPE, T_MHM_MSG) != HA_OK) {
cl_log(LOG_ERR, "hb_send: cannot add field TYPE\n");
ZAPMSG(msg);
return HA_FAIL;
}
if (ha_msg_add(msg, F_ORIG, node_name) != HA_OK) {
cl_log(LOG_ERR, "hb_send: cannot add field ORIG\n");
ZAPMSG(msg);
return HA_FAIL;
}
char *payload = malloc(sz+1);
if (payload==NULL) {
ZAPMSG(msg);
return HA_FAIL;
}
memset(payload, 0, sz+1); // Add a Null byte at the end
memcpy(payload, buf, sz);
if (ha_msg_add(msg, F_MHM_PAYLOAD, payload) != HA_OK) {
cl_log(LOG_ERR, "hb_send: cannot add field PAYLOAD\n");
ZAPMSG(msg);
return HA_FAIL;
}
if (hb->llc_ops->send_ordered_nodemsg(hb, msg, peer_name) != HA_OK) {
ZAPMSG(msg);
return HA_FAIL;
}
else {
ZAPMSG(msg);
return sz;
}
}
3. Receive:
int hb_recv(ll_cluster_t *hb, void *buf, size_t sz)
{
int msgcount=0;
HA_Message *reply;
if (hb==NULL) return HA_FAIL;
memset(buf, 0, sz);
for(; (reply=hb->llc_ops->readmsg(hb, 1)) != NULL;) { ----> Blocking receiving
const char * type;
const char * orig;
const char *payload;
++msgcount;
if ((type = ha_msg_value(reply, F_TYPE)) == NULL) {
type = "?";
}
if ((orig = ha_msg_value(reply, F_ORIG)) == NULL) {
orig = "?";
}
cl_log(LOG_DEBUG, "Got message %d of type [%s] from [%s]"
, msgcount, type, orig);
if (strcmp(type, T_MHM_MSG) == 0) {
payload = ha_msg_value(reply, F_MHM_PAYLOAD);
int p_sz = strlen(payload);
cl_log(LOG_DEBUG, "payload %s sz %d p_sz %d\n", payload, sz, p_sz);
if (p_sz <= sz) {
char *tmp = (char*) buf;
strncpy(tmp, payload, p_sz);
cl_log(LOG_DEBUG, "return buf %s sz %d ret_val %d", buf, strlen(buf), p_sz);
ZAPMSG(reply);
return(p_sz);
} else {
cl_log(LOG_ERR, "Receive buffer %d too small for payload %d", sz, p_sz);
ZAPMSG(reply);
return HA_FAIL;
}
}
ZAPMSG(reply); //// ---> Could we delete message that's not meant to our module, or should we let it go?
}
if (reply==NULL) {
cl_log(LOG_ERR, "read_hb_msg returned NULL");
cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb));
}
return 0;
}
Thanks,
Phong
I wrote a wrapper using hbclient api for an application that manages the redundancy of our system. The application uses the wrapper to send/receive messages (string) between the primary and secondary.
In our testing of reset and switch over, once in a while, there is core dump in the send with double free in libc, that I do not know if caused by my wrapper of hbclient api.
/lib/libc.so.6[0xf7d71629]
/lib/libc.so.6(cfree+0x59)[0xf7d719e9]
/usr/lib/libplumb.so.2[0xf7e88dcf]
/usr/lib/libplumb.so.2[0xf7e9a03e]
/usr/lib/libplumb.so.2[0xf7e9a1a4]
/usr/lib/libplumb.so.2[0xf7e9922f]
/usr/lib/libplumb.so.2(msg2ipcchan+0xb8)[0xf7e891ea]
/usr/lib/libhbclient.so.1[0xf7e6a736]
/usr/lib/libha_lib.so(hb_send+0x204)[0xf7e61e15] ---> my wrapper
I use send_ordered_nodemsg() to send and readmsg() to read (based on api_test.c). However in sample codes of ipfail or drbd, I saw the setting up of IPChannel and usage of msg2ipcchan(). Which is more appropriate?
I'd also like to know if I should add more codes to handle node status change because the crashes always occur when the other node go reset.
Snippet of my codes:
1. Initialization:
if (mhm_hb->llc_ops->signon(mhm_hb, "ping")!= HA_OK) { // I pasted the common "ping",
// plan to change to different name
cl_log(LOG_ERR, "Cannot sign on with heartbeat");
...
2. Send:
int hb_send(ll_cluster_t *hb, char *dest, void *buf, size_t sz)
{
HA_Message *msg;
if (hb==NULL) return HA_FAIL;
msg = ha_msg_new(0);
if (ha_msg_add(msg, F_TYPE, T_MHM_MSG) != HA_OK) {
cl_log(LOG_ERR, "hb_send: cannot add field TYPE\n");
ZAPMSG(msg);
return HA_FAIL;
}
if (ha_msg_add(msg, F_ORIG, node_name) != HA_OK) {
cl_log(LOG_ERR, "hb_send: cannot add field ORIG\n");
ZAPMSG(msg);
return HA_FAIL;
}
char *payload = malloc(sz+1);
if (payload==NULL) {
ZAPMSG(msg);
return HA_FAIL;
}
memset(payload, 0, sz+1); // Add a Null byte at the end
memcpy(payload, buf, sz);
if (ha_msg_add(msg, F_MHM_PAYLOAD, payload) != HA_OK) {
cl_log(LOG_ERR, "hb_send: cannot add field PAYLOAD\n");
ZAPMSG(msg);
return HA_FAIL;
}
if (hb->llc_ops->send_ordered_nodemsg(hb, msg, peer_name) != HA_OK) {
ZAPMSG(msg);
return HA_FAIL;
}
else {
ZAPMSG(msg);
return sz;
}
}
3. Receive:
int hb_recv(ll_cluster_t *hb, void *buf, size_t sz)
{
int msgcount=0;
HA_Message *reply;
if (hb==NULL) return HA_FAIL;
memset(buf, 0, sz);
for(; (reply=hb->llc_ops->readmsg(hb, 1)) != NULL;) { ----> Blocking receiving
const char * type;
const char * orig;
const char *payload;
++msgcount;
if ((type = ha_msg_value(reply, F_TYPE)) == NULL) {
type = "?";
}
if ((orig = ha_msg_value(reply, F_ORIG)) == NULL) {
orig = "?";
}
cl_log(LOG_DEBUG, "Got message %d of type [%s] from [%s]"
, msgcount, type, orig);
if (strcmp(type, T_MHM_MSG) == 0) {
payload = ha_msg_value(reply, F_MHM_PAYLOAD);
int p_sz = strlen(payload);
cl_log(LOG_DEBUG, "payload %s sz %d p_sz %d\n", payload, sz, p_sz);
if (p_sz <= sz) {
char *tmp = (char*) buf;
strncpy(tmp, payload, p_sz);
cl_log(LOG_DEBUG, "return buf %s sz %d ret_val %d", buf, strlen(buf), p_sz);
ZAPMSG(reply);
return(p_sz);
} else {
cl_log(LOG_ERR, "Receive buffer %d too small for payload %d", sz, p_sz);
ZAPMSG(reply);
return HA_FAIL;
}
}
ZAPMSG(reply); //// ---> Could we delete message that's not meant to our module, or should we let it go?
}
if (reply==NULL) {
cl_log(LOG_ERR, "read_hb_msg returned NULL");
cl_log(LOG_ERR, "REASON: %s", hb->llc_ops->errmsg(hb));
}
return 0;
}
Thanks,
Phong