Mailing List Archive

Modified patch for RA
Hi Dejan,
here's another modified patch for the mysql agent of the commit
version 4c18035 (git@github.com:y-trudeau/resource-agents.git branch
mysql-repl). Following a comment of Keisuke, I put back the log level
for mysql_status in probe mode.

Regards,

Yves
Re: Modified patch for RA [ In reply to ]
Hi Yves,

On Fri, May 04, 2012 at 04:29:34PM -0400, Yves Trudeau wrote:
> Hi Dejan,
> here's another modified patch for the mysql agent of the commit
> version 4c18035 (git@github.com:y-trudeau/resource-agents.git branch
> mysql-repl). Following a comment of Keisuke, I put back the log level
> for mysql_status in probe mode.

Cool!

So, according to the discussion at github, you insist that the
replication IP needs to be specified in the node's static
attributes. I'm not very happy with it, because it is a
precedence, but I won't insist on it. Let's hope for the best.

There are a few concerns which went unanswered so far:

1a. In case this attribute is not set, would replication fail
properly (with an informative error message) or misbehave? It is
not explicitely checked for existence in the code.

1b. Mori-san suggested to make this attribute optional and in
case it doesn't exist just to use uname. That sounds like a good
idea to me.

2. Is it possible/plausible to have more than one mysql
instance? If so, then the attribute name should include the
instance name. Say ${INSTANCE_NAME}_mysql_replication_IP or
something to that extent. Also, it would make for a better
looking configuration. "IP" doesn't really say much.

3. This attribute is part of the configuration and supposed to
be setup by the user. Please document that in the meta-data.

Cheers,

Dejan

P.S. Any chance of finishing this by Friday?

> Regards,
>
> Yves

> --- ../../mysql.4c18035b7 2012-05-04 16:23:45.238421759 -0400
> +++ mysql 2012-05-04 15:58:50.318419875 -0400
> @@ -79,6 +79,7 @@
> OCF_RESKEY_max_slave_lag_default="3600"
> OCF_RESKEY_evict_outdated_slaves_default="false"
> OCF_RESKEY_reader_attribute_default="readable"
> +OCF_RESKEY_replication_info_attribute_default="replication_info"
>
> : ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}
> MYSQL_BINDIR=`dirname ${OCF_RESKEY_binary}`
> @@ -109,7 +110,8 @@
> : ${OCF_RESKEY_max_slave_lag=${OCF_RESKEY_max_slave_lag_default}}
> : ${OCF_RESKEY_evict_outdated_slaves=${OCF_RESKEY_evict_outdated_slaves_default}}
>
> -: ${OCF_RESKEY_reader_attribute=${OCF_RESKEY_evict_reader_attribute_default}}
> +: ${OCF_RESKEY_reader_attribute=${OCF_RESKEY_reader_attribute_default}}
> +: ${OCF_RESKEY_replication_info_attribute=${OCF_RESKEY_replication_info_attribute_default}}
>
> #######################################################################
>
> @@ -328,7 +330,19 @@
> </longdesc>
> <shortdesc lang="en">Sets the node attribute that determines
> whether a node is usable for clients to read from.</shortdesc>
> -<content type="boolean" default="${OCF_RESKEY_reader_attribute_default}" />
> +<content type="string" default="${OCF_RESKEY_reader_attribute_default}" />
> +</parameter>
> +
> +<parameter name="replication_info_attribute" unique="1" required="0">
> +<longdesc lang="en">
> +An attribute that stores the current master IP, replication file and position.
> +This is queried by the agent in the post-promote notification
> +to reconnect the slaves to the new master.
> +
> +This parameter is only meaningful in master/slave set configurations.
> +</longdesc>
> +<shortdesc lang="en">Cluster attribute storing replication information</shortdesc>
> +<content type="string" default="${OCF_RESKEY_replication_info_attribute_default}" />
> </parameter>
> </parameters>
>
> @@ -355,10 +369,12 @@
> MYSQL_OPTIONS_LOCAL="-S $OCF_RESKEY_socket --connect_timeout=10"
> MYSQL_OPTIONS_REPL="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_replication_user --password=$OCF_RESKEY_replication_passwd"
> MYSQL_OPTIONS_TEST="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_test_user --password=$OCF_RESKEY_test_passwd"
> +MYSQL_TOO_MANY_CONN_ERR=1040
>
> CRM_MASTER="${HA_SBIN_DIR}/crm_master -l reboot "
> HOSTNAME=`uname -n`
> CRM_ATTR="${HA_SBIN_DIR}/crm_attribute -N $HOSTNAME "
> +CRM_ATTR_REPL_INFO="${HA_SBIN_DIR}/crm_attribute --type crm_config --name ${OCF_RESKEY_replication_info_attribute} -s mysql_replication --query -q"
> INSTANCE_ATTR_NAME=`echo ${OCF_RESOURCE_INSTANCE}| awk -F : '{print $1}'`
>
> #######################################################################
> @@ -468,7 +484,7 @@
>
> if [ $rc -eq 0 ]; then
> # Did we receive an error other than max_connections?
> - if [ $last_errno -ne 0 -a $last_errno -ne 1040 ]; then
> + if [ $last_errno -ne 0 -a $last_errno -ne "$MYSQL_TOO_MANY_CONN_ERR" ]; then
> # Whoa. Replication ran into an error. This slave has
> # diverged from its master. Make sure this resource
> # doesn't restart in place.
> @@ -484,7 +500,7 @@
> fi
>
> # If we got max_connections, let's remove the vip
> - if [ $last_errno -eq 1040 ]; then
> + if [ $last_errno -eq "$MYSQL_TOO_MANY_CONN_ERR" ]; then
> set_reader_attr 0
> exit $OCF_SUCCESS
> fi
> @@ -496,7 +512,7 @@
> ocf_log warn "MySQL Slave IO threads currently not running."
>
> # Sanity check, are we at least on the right master
> - new_master_IP=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f1`
> + new_master_IP=`$CRM_ATTR_REPL_INFO | cut -d'|' -f1`
>
> if [ "$master_host" != "$new_master_IP" ]; then
> # Not pointing to the right master, not good, removing the VIPs
> @@ -573,7 +589,7 @@
> local new_master_IP master_log_file master_log_pos
> local master_params
>
> - new_master_IP=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f1`
> + new_master_IP=`$CRM_ATTR_REPL_INFO | cut -d'|' -f1`
>
> # Keep replication position
> get_slave_info
> @@ -585,8 +601,8 @@
> rm -f $tmpfile
> return
> else
> - master_log_file=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f2`
> - master_log_pos=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f3`
> + master_log_file=`$CRM_ATTR_REPL_INFO | cut -d'|' -f2`
> + master_log_pos=`$CRM_ATTR_REPL_INFO | cut -d'|' -f3`
> if [ -n "$master_log_file" -a -n "$master_log_pos" ]; then
> master_params=", MASTER_LOG_FILE='$master_log_file', \
> MASTER_LOG_POS=$master_log_pos"
> @@ -660,7 +676,7 @@
> while true; do
> $MYSQL $MYSQL_OPTIONS_REPL \
> -e 'SHOW PROCESSLIST\G' > $tmpfile
> - if grep 'Has read all relay log' $tmpfile >/dev/null; then
> + if grep -i 'Has read all relay log' $tmpfile >/dev/null; then
> ocf_log info "MySQL slave has finished processing relay log"
> break
> fi
> @@ -783,7 +799,7 @@
>
> mysql_status() {
> if [ ! -e $OCF_RESKEY_pid ]; then
> - ocf_log err "MySQL is not running"
> + ocf_log $1 "MySQL is not running"
> return $OCF_NOT_RUNNING;
> fi
>
> @@ -797,7 +813,7 @@
> if [ $? -eq 0 ]; then
> return $OCF_SUCCESS;
> else
> - ocf_log err "MySQL not running: removing old PID file"
> + ocf_log $1 "MySQL not running: removing old PID file"
> rm -f $OCF_RESKEY_pid
> return $OCF_NOT_RUNNING;
> fi
> @@ -811,8 +827,9 @@
> if ocf_is_probe; then
> status_loglevel="info"
> fi
> -
> +
> mysql_status $status_loglevel
> +
> rc=$?
>
> # TODO: check max connections error
> @@ -856,7 +873,7 @@
> set_reader_attr 0
> fi
>
> - mysql_status
> + mysql_status info
> if [ $? = $OCF_SUCCESS ]; then
> ocf_log info "MySQL already running"
> return $OCF_SUCCESS
> @@ -930,7 +947,7 @@
> # Let the CRM/LRM time us out if required.
> start_wait=1
> while [ $start_wait = 1 ]; do
> - mysql_status
> + mysql_status info
> rc=$?
> if [ $rc = $OCF_SUCCESS ]; then
> start_wait=0
> @@ -1019,7 +1036,7 @@
> count=0
> while [ $count -lt $shutdown_timeout ]
> do
> - mysql_status
> + mysql_status info
> rc=$?
> if [ $rc = $OCF_NOT_RUNNING ]; then
> break
> @@ -1029,7 +1046,7 @@
> ocf_log debug "MySQL still hasn't stopped yet. Waiting..."
> done
>
> - mysql_status
> + mysql_status info
> if [ $? != $OCF_NOT_RUNNING ]; then
> ocf_log info "MySQL failed to stop after ${shutdown_timeout}s using SIGTERM. Trying SIGKILL..."
> /bin/kill -KILL $pid > /dev/null
> @@ -1044,7 +1061,7 @@
> mysql_promote() {
> local master_info
>
> - if ( ! mysql_status ); then
> + if ( ! mysql_status err ); then
> return $OCF_NOT_RUNNING
> fi
> ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
> @@ -1053,7 +1070,7 @@
> # Set Master Info in CIB, cluster level attribute
> update_data_master_status
> master_info="$(get_local_ip)|$(get_master_status File)|$(get_master_status Position)"
> - ${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication -v "$master_info"
> + ${HA_SBIN_DIR}/crm_attribute --type crm_config --name ${OCF_RESKEY_replication_info_attribute} -s mysql_replication -v "$master_info"
> rm -f $tmpfile
>
> set_read_only off || return $OCF_ERR_GENERIC
> @@ -1070,7 +1087,7 @@
> }
>
> mysql_demote() {
> - if ! mysql_status; then
> + if ! mysql_status err; then
> return $OCF_NOT_RUNNING
> fi
>
> @@ -1177,6 +1194,7 @@
> # The log directory must be a directory owned by root, with permissions 0700,
> # and the log must be writable and not a symlink.
> ##########################################################################
> +DEBUG_LOG="/tmp/mysql.ocf.ra.debug/log"
> if [ "${DEBUG_LOG}" -a -w "${DEBUG_LOG}" -a ! -L "${DEBUG_LOG}" ]; then
> DEBUG_LOG_DIR="${DEBUG_LOG%/*}"
> if [ -d "${DEBUG_LOG_DIR}" ]; then
> @@ -1214,7 +1232,7 @@
> case "$1" in
> start) mysql_start;;
> stop) mysql_stop;;
> - status) mysql_status;;
> + status) mysql_status err;;
> monitor) mysql_monitor;;
> promote) mysql_promote;;
> demote) mysql_demote;;

> _______________________________________________________
> Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
> http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
> Home Page: http://linux-ha.org/

_______________________________________________________
Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
Home Page: http://linux-ha.org/
Re: Modified patch for RA [ In reply to ]
Hi Dejan,

Le 2012-05-08 11:15, Dejan Muhamedagic a écrit :
> Hi Yves,
>
> On Fri, May 04, 2012 at 04:29:34PM -0400, Yves Trudeau wrote:
>> Hi Dejan,
>> here's another modified patch for the mysql agent of the commit
>> version 4c18035 (git@github.com:y-trudeau/resource-agents.git branch
>> mysql-repl). Following a comment of Keisuke, I put back the log level
>> for mysql_status in probe mode.
>
> Cool!
>
> So, according to the discussion at github, you insist that the
> replication IP needs to be specified in the node's static
> attributes. I'm not very happy with it, because it is a
> precedence, but I won't insist on it. Let's hope for the best.

Like we discussed today, I'll add a fallback to 'uname -n' if the IP
attribute is not present for the node.

>
> There are a few concerns which went unanswered so far:
>
> 1a. In case this attribute is not set, would replication fail
> properly (with an informative error message) or misbehave? It is
> not explicitely checked for existence in the code.
>
> 1b. Mori-san suggested to make this attribute optional and in
> case it doesn't exist just to use uname. That sounds like a good
> idea to me.
>

Agree

> 2. Is it possible/plausible to have more than one mysql
> instance? If so, then the attribute name should include the
> instance name. Say ${INSTANCE_NAME}_mysql_replication_IP or
> something to that extent. Also, it would make for a better
> looking configuration. "IP" doesn't really say much.

Yes, I just added that code for a customer. I'll use
${INSTANCE_NAME}_replication_info.

>
> 3. This attribute is part of the configuration and supposed to
> be setup by the user. Please document that in the meta-data.
>
> Cheers,
>
> Dejan
>
> P.S. Any chance of finishing this by Friday?

This Friday, yes, it should be possible.

Regards,

Yves

>
>> Regards,
>>
>> Yves
>
>> --- ../../mysql.4c18035b7 2012-05-04 16:23:45.238421759 -0400
>> +++ mysql 2012-05-04 15:58:50.318419875 -0400
>> @@ -79,6 +79,7 @@
>> OCF_RESKEY_max_slave_lag_default="3600"
>> OCF_RESKEY_evict_outdated_slaves_default="false"
>> OCF_RESKEY_reader_attribute_default="readable"
>> +OCF_RESKEY_replication_info_attribute_default="replication_info"
>>
>> : ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}
>> MYSQL_BINDIR=`dirname ${OCF_RESKEY_binary}`
>> @@ -109,7 +110,8 @@
>> : ${OCF_RESKEY_max_slave_lag=${OCF_RESKEY_max_slave_lag_default}}
>> : ${OCF_RESKEY_evict_outdated_slaves=${OCF_RESKEY_evict_outdated_slaves_default}}
>>
>> -: ${OCF_RESKEY_reader_attribute=${OCF_RESKEY_evict_reader_attribute_default}}
>> +: ${OCF_RESKEY_reader_attribute=${OCF_RESKEY_reader_attribute_default}}
>> +: ${OCF_RESKEY_replication_info_attribute=${OCF_RESKEY_replication_info_attribute_default}}
>>
>> #######################################################################
>>
>> @@ -328,7 +330,19 @@
>> </longdesc>
>> <shortdesc lang="en">Sets the node attribute that determines
>> whether a node is usable for clients to read from.</shortdesc>
>> -<content type="boolean" default="${OCF_RESKEY_reader_attribute_default}" />
>> +<content type="string" default="${OCF_RESKEY_reader_attribute_default}" />
>> +</parameter>
>> +
>> +<parameter name="replication_info_attribute" unique="1" required="0">
>> +<longdesc lang="en">
>> +An attribute that stores the current master IP, replication file and position.
>> +This is queried by the agent in the post-promote notification
>> +to reconnect the slaves to the new master.
>> +
>> +This parameter is only meaningful in master/slave set configurations.
>> +</longdesc>
>> +<shortdesc lang="en">Cluster attribute storing replication information</shortdesc>
>> +<content type="string" default="${OCF_RESKEY_replication_info_attribute_default}" />
>> </parameter>
>> </parameters>
>>
>> @@ -355,10 +369,12 @@
>> MYSQL_OPTIONS_LOCAL="-S $OCF_RESKEY_socket --connect_timeout=10"
>> MYSQL_OPTIONS_REPL="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_replication_user --password=$OCF_RESKEY_replication_passwd"
>> MYSQL_OPTIONS_TEST="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_test_user --password=$OCF_RESKEY_test_passwd"
>> +MYSQL_TOO_MANY_CONN_ERR=1040
>>
>> CRM_MASTER="${HA_SBIN_DIR}/crm_master -l reboot "
>> HOSTNAME=`uname -n`
>> CRM_ATTR="${HA_SBIN_DIR}/crm_attribute -N $HOSTNAME "
>> +CRM_ATTR_REPL_INFO="${HA_SBIN_DIR}/crm_attribute --type crm_config --name ${OCF_RESKEY_replication_info_attribute} -s mysql_replication --query -q"
>> INSTANCE_ATTR_NAME=`echo ${OCF_RESOURCE_INSTANCE}| awk -F : '{print $1}'`
>>
>> #######################################################################
>> @@ -468,7 +484,7 @@
>>
>> if [ $rc -eq 0 ]; then
>> # Did we receive an error other than max_connections?
>> - if [ $last_errno -ne 0 -a $last_errno -ne 1040 ]; then
>> + if [ $last_errno -ne 0 -a $last_errno -ne "$MYSQL_TOO_MANY_CONN_ERR" ]; then
>> # Whoa. Replication ran into an error. This slave has
>> # diverged from its master. Make sure this resource
>> # doesn't restart in place.
>> @@ -484,7 +500,7 @@
>> fi
>>
>> # If we got max_connections, let's remove the vip
>> - if [ $last_errno -eq 1040 ]; then
>> + if [ $last_errno -eq "$MYSQL_TOO_MANY_CONN_ERR" ]; then
>> set_reader_attr 0
>> exit $OCF_SUCCESS
>> fi
>> @@ -496,7 +512,7 @@
>> ocf_log warn "MySQL Slave IO threads currently not running."
>>
>> # Sanity check, are we at least on the right master
>> - new_master_IP=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f1`
>> + new_master_IP=`$CRM_ATTR_REPL_INFO | cut -d'|' -f1`
>>
>> if [ "$master_host" != "$new_master_IP" ]; then
>> # Not pointing to the right master, not good, removing the VIPs
>> @@ -573,7 +589,7 @@
>> local new_master_IP master_log_file master_log_pos
>> local master_params
>>
>> - new_master_IP=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f1`
>> + new_master_IP=`$CRM_ATTR_REPL_INFO | cut -d'|' -f1`
>>
>> # Keep replication position
>> get_slave_info
>> @@ -585,8 +601,8 @@
>> rm -f $tmpfile
>> return
>> else
>> - master_log_file=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f2`
>> - master_log_pos=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f3`
>> + master_log_file=`$CRM_ATTR_REPL_INFO | cut -d'|' -f2`
>> + master_log_pos=`$CRM_ATTR_REPL_INFO | cut -d'|' -f3`
>> if [ -n "$master_log_file" -a -n "$master_log_pos" ]; then
>> master_params=", MASTER_LOG_FILE='$master_log_file', \
>> MASTER_LOG_POS=$master_log_pos"
>> @@ -660,7 +676,7 @@
>> while true; do
>> $MYSQL $MYSQL_OPTIONS_REPL \
>> -e 'SHOW PROCESSLIST\G'> $tmpfile
>> - if grep 'Has read all relay log' $tmpfile>/dev/null; then
>> + if grep -i 'Has read all relay log' $tmpfile>/dev/null; then
>> ocf_log info "MySQL slave has finished processing relay log"
>> break
>> fi
>> @@ -783,7 +799,7 @@
>>
>> mysql_status() {
>> if [ ! -e $OCF_RESKEY_pid ]; then
>> - ocf_log err "MySQL is not running"
>> + ocf_log $1 "MySQL is not running"
>> return $OCF_NOT_RUNNING;
>> fi
>>
>> @@ -797,7 +813,7 @@
>> if [ $? -eq 0 ]; then
>> return $OCF_SUCCESS;
>> else
>> - ocf_log err "MySQL not running: removing old PID file"
>> + ocf_log $1 "MySQL not running: removing old PID file"
>> rm -f $OCF_RESKEY_pid
>> return $OCF_NOT_RUNNING;
>> fi
>> @@ -811,8 +827,9 @@
>> if ocf_is_probe; then
>> status_loglevel="info"
>> fi
>> -
>> +
>> mysql_status $status_loglevel
>> +
>> rc=$?
>>
>> # TODO: check max connections error
>> @@ -856,7 +873,7 @@
>> set_reader_attr 0
>> fi
>>
>> - mysql_status
>> + mysql_status info
>> if [ $? = $OCF_SUCCESS ]; then
>> ocf_log info "MySQL already running"
>> return $OCF_SUCCESS
>> @@ -930,7 +947,7 @@
>> # Let the CRM/LRM time us out if required.
>> start_wait=1
>> while [ $start_wait = 1 ]; do
>> - mysql_status
>> + mysql_status info
>> rc=$?
>> if [ $rc = $OCF_SUCCESS ]; then
>> start_wait=0
>> @@ -1019,7 +1036,7 @@
>> count=0
>> while [ $count -lt $shutdown_timeout ]
>> do
>> - mysql_status
>> + mysql_status info
>> rc=$?
>> if [ $rc = $OCF_NOT_RUNNING ]; then
>> break
>> @@ -1029,7 +1046,7 @@
>> ocf_log debug "MySQL still hasn't stopped yet. Waiting..."
>> done
>>
>> - mysql_status
>> + mysql_status info
>> if [ $? != $OCF_NOT_RUNNING ]; then
>> ocf_log info "MySQL failed to stop after ${shutdown_timeout}s using SIGTERM. Trying SIGKILL..."
>> /bin/kill -KILL $pid> /dev/null
>> @@ -1044,7 +1061,7 @@
>> mysql_promote() {
>> local master_info
>>
>> - if ( ! mysql_status ); then
>> + if ( ! mysql_status err ); then
>> return $OCF_NOT_RUNNING
>> fi
>> ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
>> @@ -1053,7 +1070,7 @@
>> # Set Master Info in CIB, cluster level attribute
>> update_data_master_status
>> master_info="$(get_local_ip)|$(get_master_status File)|$(get_master_status Position)"
>> - ${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication -v "$master_info"
>> + ${HA_SBIN_DIR}/crm_attribute --type crm_config --name ${OCF_RESKEY_replication_info_attribute} -s mysql_replication -v "$master_info"
>> rm -f $tmpfile
>>
>> set_read_only off || return $OCF_ERR_GENERIC
>> @@ -1070,7 +1087,7 @@
>> }
>>
>> mysql_demote() {
>> - if ! mysql_status; then
>> + if ! mysql_status err; then
>> return $OCF_NOT_RUNNING
>> fi
>>
>> @@ -1177,6 +1194,7 @@
>> # The log directory must be a directory owned by root, with permissions 0700,
>> # and the log must be writable and not a symlink.
>> ##########################################################################
>> +DEBUG_LOG="/tmp/mysql.ocf.ra.debug/log"
>> if [ "${DEBUG_LOG}" -a -w "${DEBUG_LOG}" -a ! -L "${DEBUG_LOG}" ]; then
>> DEBUG_LOG_DIR="${DEBUG_LOG%/*}"
>> if [ -d "${DEBUG_LOG_DIR}" ]; then
>> @@ -1214,7 +1232,7 @@
>> case "$1" in
>> start) mysql_start;;
>> stop) mysql_stop;;
>> - status) mysql_status;;
>> + status) mysql_status err;;
>> monitor) mysql_monitor;;
>> promote) mysql_promote;;
>> demote) mysql_demote;;
>
>> _______________________________________________________
>> Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
>> http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
>> Home Page: http://linux-ha.org/
>
> _______________________________________________________
> Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
> http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
> Home Page: http://linux-ha.org/
>
_______________________________________________________
Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
Home Page: http://linux-ha.org/
Re: Modified patch for RA [ In reply to ]
Hi Yves,

Thank you for revising the patch.
I've confirmed that this patch resumes the log level for mysql_status as before.

2012/5/5 Yves Trudeau <y.trudeau@videotron.ca>:
> Hi Dejan,
>  here's another modified patch for the mysql agent of the commit version
> 4c18035 (git@github.com:y-trudeau/resource-agents.git branch mysql-repl).
>  Following a comment of Keisuke, I put back the log level for mysql_status
> in probe mode.
>
> Regards,
>
> Yves
>
> _______________________________________________________
> Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
> http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
> Home Page: http://linux-ha.org/
>



--
Keisuke MORI
_______________________________________________________
Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
Home Page: http://linux-ha.org/
Modified patch for RA [ In reply to ]
Hi Dejan,
here's another modified patch for the mysql agent of the commit
version 4c18035 (git@github.com:y-trudeau/resource-agents.git branch
mysql-repl). This patch implements fallback on uname -n if the node IP
attribute is not present and uses the instance name for the replication
info attribute. I am also working with Raoul to get me back on track
with git.

Regards,

Yves
Re: Modified patch for RA [ In reply to ]
Hi Yves,

It would be good not to start a new thread for the same
discussion.

On Thu, May 10, 2012 at 05:06:25PM -0400, Yves Trudeau wrote:
> Hi Dejan,
> here's another modified patch for the mysql agent of the commit
> version 4c18035 (git@github.com:y-trudeau/resource-agents.git branch
> mysql-repl). This patch implements fallback on uname -n if the node
> IP attribute is not present and uses the instance name for the
> replication info attribute.

Hmm, it looks like there was a misunderstanding here. The
attribute named "IP" is still named "IP" :)

And we're still missing the documentation for that attribute.

> I am also working with Raoul to get me
> back on track with git.

Good!

Cheers,

Dejan

>
> Regards,
>
> Yves

> --- mysql.4c18035b7 2012-05-04 16:23:45.238421759 -0400
> +++ resource-agents-prm/heartbeat/mysql 2012-05-10 11:01:02.538421042 -0400
> @@ -109,7 +109,7 @@
> : ${OCF_RESKEY_max_slave_lag=${OCF_RESKEY_max_slave_lag_default}}
> : ${OCF_RESKEY_evict_outdated_slaves=${OCF_RESKEY_evict_outdated_slaves_default}}
>
> -: ${OCF_RESKEY_reader_attribute=${OCF_RESKEY_evict_reader_attribute_default}}
> +: ${OCF_RESKEY_reader_attribute=${OCF_RESKEY_reader_attribute_default}}
>
> #######################################################################
>
> @@ -328,7 +328,7 @@
> </longdesc>
> <shortdesc lang="en">Sets the node attribute that determines
> whether a node is usable for clients to read from.</shortdesc>
> -<content type="boolean" default="${OCF_RESKEY_reader_attribute_default}" />
> +<content type="string" default="${OCF_RESKEY_reader_attribute_default}" />
> </parameter>
> </parameters>
>
> @@ -355,11 +355,13 @@
> MYSQL_OPTIONS_LOCAL="-S $OCF_RESKEY_socket --connect_timeout=10"
> MYSQL_OPTIONS_REPL="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_replication_user --password=$OCF_RESKEY_replication_passwd"
> MYSQL_OPTIONS_TEST="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_test_user --password=$OCF_RESKEY_test_passwd"
> +MYSQL_TOO_MANY_CONN_ERR=1040
>
> CRM_MASTER="${HA_SBIN_DIR}/crm_master -l reboot "
> HOSTNAME=`uname -n`
> CRM_ATTR="${HA_SBIN_DIR}/crm_attribute -N $HOSTNAME "
> INSTANCE_ATTR_NAME=`echo ${OCF_RESOURCE_INSTANCE}| awk -F : '{print $1}'`
> +CRM_ATTR_REPL_INFO="${HA_SBIN_DIR}/crm_attribute --type crm_config --name ${INSTANCE_ATTR_NAME}_REPL_INFO -s mysql_replication"
>
> #######################################################################
>
> @@ -461,14 +463,14 @@
>
> check_slave() {
> # Checks slave status
> - local rc new_master_IP
> + local rc new_master
>
> get_slave_info
> rc=$?
>
> if [ $rc -eq 0 ]; then
> # Did we receive an error other than max_connections?
> - if [ $last_errno -ne 0 -a $last_errno -ne 1040 ]; then
> + if [ $last_errno -ne 0 -a $last_errno -ne "$MYSQL_TOO_MANY_CONN_ERR" ]; then
> # Whoa. Replication ran into an error. This slave has
> # diverged from its master. Make sure this resource
> # doesn't restart in place.
> @@ -484,7 +486,7 @@
> fi
>
> # If we got max_connections, let's remove the vip
> - if [ $last_errno -eq 1040 ]; then
> + if [ $last_errno -eq "$MYSQL_TOO_MANY_CONN_ERR" ]; then
> set_reader_attr 0
> exit $OCF_SUCCESS
> fi
> @@ -496,9 +498,9 @@
> ocf_log warn "MySQL Slave IO threads currently not running."
>
> # Sanity check, are we at least on the right master
> - new_master_IP=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f1`
> + new_master=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f1`
>
> - if [ "$master_host" != "$new_master_IP" ]; then
> + if [ "$master_host" != "$new_master" ]; then
> # Not pointing to the right master, not good, removing the VIPs
> set_reader_attr 0
>
> @@ -570,23 +572,23 @@
> }
>
> set_master() {
> - local new_master_IP master_log_file master_log_pos
> + local new_master master_log_file master_log_pos
> local master_params
>
> - new_master_IP=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f1`
> + new_master=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f1`
>
> # Keep replication position
> get_slave_info
>
> - if [ "$master_log_file" -a "$new_master_IP" = "$master_host" ]; then
> + if [ "$master_log_file" -a "$new_master" = "$master_host" ]; then
> # master_params=", MASTER_LOG_FILE='$master_log_file', \
> # MASTER_LOG_POS=$master_log_pos"
> ocf_log info "Kept master pos for $master_host : $master_log_file:$master_log_pos"
> rm -f $tmpfile
> return
> else
> - master_log_file=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f2`
> - master_log_pos=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f3`
> + master_log_file=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f2`
> + master_log_pos=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f3`
> if [ -n "$master_log_file" -a -n "$master_log_pos" ]; then
> master_params=", MASTER_LOG_FILE='$master_log_file', \
> MASTER_LOG_POS=$master_log_pos"
> @@ -601,7 +603,7 @@
> # reset with RESET MASTER.
>
> ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
> - -e "CHANGE MASTER TO MASTER_HOST='$new_master_IP', \
> + -e "CHANGE MASTER TO MASTER_HOST='$new_master', \
> MASTER_USER='$OCF_RESKEY_replication_user', \
> MASTER_PASSWORD='$OCF_RESKEY_replication_passwd' $master_params"
> rm -f $tmpfile
> @@ -628,15 +630,15 @@
> while true; do
> $MYSQL $MYSQL_OPTIONS_REPL \
> -e 'SHOW PROCESSLIST\G' > $tmpfile
> - if grep 'Waiting for master to send event' $tmpfile >/dev/null; then
> + if grep -i 'Waiting for master to send event' $tmpfile >/dev/null; then
> ocf_log info "MySQL slave has finished reading master binary log"
> break
> fi
> - if grep 'Reconnecting after a failed master event read' $tmpfile >/dev/null; then
> + if grep -i 'Reconnecting after a failed master event read' $tmpfile >/dev/null; then
> ocf_log info "Master is down, no more binary logs to come"
> break
> fi
> - if grep 'Connecting to master' $tmpfile >/dev/null; then
> + if grep -i 'Connecting to master' $tmpfile >/dev/null; then
> ocf_log info "Master is down, no more binary logs to come"
> break
> fi
> @@ -660,7 +662,7 @@
> while true; do
> $MYSQL $MYSQL_OPTIONS_REPL \
> -e 'SHOW PROCESSLIST\G' > $tmpfile
> - if grep 'Has read all relay log' $tmpfile >/dev/null; then
> + if grep -i 'Has read all relay log' $tmpfile >/dev/null; then
> ocf_log info "MySQL slave has finished processing relay log"
> break
> fi
> @@ -744,8 +746,15 @@
> # Determines what IP address is attached to the current host. The output of the
> # crm_attribute command looks like this:
> # scope=nodes name=IP value=10.2.2.161
> +# If the IP node attribute is not defined, fallback is to uname -n
> get_local_ip() {
> - $CRM_ATTR -l forever -n IP -q -G
> + local IP
> + IP=`$CRM_ATTR -l forever -n IP -q -G`
> + if [ ! $? -eq 0 ]; then
> + uname -n
> + else
> + echo $IP
> + fi
> }
>
> #######################################################################
> @@ -783,7 +792,7 @@
>
> mysql_status() {
> if [ ! -e $OCF_RESKEY_pid ]; then
> - ocf_log err "MySQL is not running"
> + ocf_log $1 "MySQL is not running"
> return $OCF_NOT_RUNNING;
> fi
>
> @@ -797,7 +806,7 @@
> if [ $? -eq 0 ]; then
> return $OCF_SUCCESS;
> else
> - ocf_log err "MySQL not running: removing old PID file"
> + ocf_log $1 "MySQL not running: removing old PID file"
> rm -f $OCF_RESKEY_pid
> return $OCF_NOT_RUNNING;
> fi
> @@ -811,8 +820,9 @@
> if ocf_is_probe; then
> status_loglevel="info"
> fi
> -
> +
> mysql_status $status_loglevel
> +
> rc=$?
>
> # TODO: check max connections error
> @@ -856,7 +866,7 @@
> set_reader_attr 0
> fi
>
> - mysql_status
> + mysql_status info
> if [ $? = $OCF_SUCCESS ]; then
> ocf_log info "MySQL already running"
> return $OCF_SUCCESS
> @@ -930,7 +940,7 @@
> # Let the CRM/LRM time us out if required.
> start_wait=1
> while [ $start_wait = 1 ]; do
> - mysql_status
> + mysql_status info
> rc=$?
> if [ $rc = $OCF_SUCCESS ]; then
> start_wait=0
> @@ -1019,7 +1029,7 @@
> count=0
> while [ $count -lt $shutdown_timeout ]
> do
> - mysql_status
> + mysql_status info
> rc=$?
> if [ $rc = $OCF_NOT_RUNNING ]; then
> break
> @@ -1029,7 +1039,7 @@
> ocf_log debug "MySQL still hasn't stopped yet. Waiting..."
> done
>
> - mysql_status
> + mysql_status info
> if [ $? != $OCF_NOT_RUNNING ]; then
> ocf_log info "MySQL failed to stop after ${shutdown_timeout}s using SIGTERM. Trying SIGKILL..."
> /bin/kill -KILL $pid > /dev/null
> @@ -1044,7 +1054,7 @@
> mysql_promote() {
> local master_info
>
> - if ( ! mysql_status ); then
> + if ( ! mysql_status err ); then
> return $OCF_NOT_RUNNING
> fi
> ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
> @@ -1053,7 +1063,7 @@
> # Set Master Info in CIB, cluster level attribute
> update_data_master_status
> master_info="$(get_local_ip)|$(get_master_status File)|$(get_master_status Position)"
> - ${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication -v "$master_info"
> + ${CRM_ATTR_REPL_INFO} -v "$master_info"
> rm -f $tmpfile
>
> set_read_only off || return $OCF_ERR_GENERIC
> @@ -1070,7 +1080,7 @@
> }
>
> mysql_demote() {
> - if ! mysql_status; then
> + if ! mysql_status err; then
> return $OCF_NOT_RUNNING
> fi
>
> @@ -1177,6 +1187,7 @@
> # The log directory must be a directory owned by root, with permissions 0700,
> # and the log must be writable and not a symlink.
> ##########################################################################
> +DEBUG_LOG="/tmp/mysql.ocf.ra.debug/log"
> if [ "${DEBUG_LOG}" -a -w "${DEBUG_LOG}" -a ! -L "${DEBUG_LOG}" ]; then
> DEBUG_LOG_DIR="${DEBUG_LOG%/*}"
> if [ -d "${DEBUG_LOG_DIR}" ]; then
> @@ -1214,7 +1225,7 @@
> case "$1" in
> start) mysql_start;;
> stop) mysql_stop;;
> - status) mysql_status;;
> + status) mysql_status err;;
> monitor) mysql_monitor;;
> promote) mysql_promote;;
> demote) mysql_demote;;

> _______________________________________________________
> Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
> http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
> Home Page: http://linux-ha.org/

_______________________________________________________
Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
Home Page: http://linux-ha.org/
Re: Modified patch for RA [ In reply to ]
Hi Dejan,
I changed the name of the attribute to REPL_MASTER_IP and added 2
lines of comment for the get_local_ip function. Is that inline with
what you want?

Regards,

Yves

Le 2012-05-11 04:45, Dejan Muhamedagic a écrit :
> Hi Yves,
>
> It would be good not to start a new thread for the same
> discussion.
>
> On Thu, May 10, 2012 at 05:06:25PM -0400, Yves Trudeau wrote:
>> Hi Dejan,
>> here's another modified patch for the mysql agent of the commit
>> version 4c18035 (git@github.com:y-trudeau/resource-agents.git branch
>> mysql-repl). This patch implements fallback on uname -n if the node
>> IP attribute is not present and uses the instance name for the
>> replication info attribute.
>
> Hmm, it looks like there was a misunderstanding here. The
> attribute named "IP" is still named "IP" :)
>
> And we're still missing the documentation for that attribute.
>
>> I am also working with Raoul to get me
>> back on track with git.
>
> Good!
>
> Cheers,
>
> Dejan
>
>>
>> Regards,
>>
>> Yves
>
>> --- mysql.4c18035b7 2012-05-04 16:23:45.238421759 -0400
>> +++ resource-agents-prm/heartbeat/mysql 2012-05-10 11:01:02.538421042 -0400
>> @@ -109,7 +109,7 @@
>> : ${OCF_RESKEY_max_slave_lag=${OCF_RESKEY_max_slave_lag_default}}
>> : ${OCF_RESKEY_evict_outdated_slaves=${OCF_RESKEY_evict_outdated_slaves_default}}
>>
>> -: ${OCF_RESKEY_reader_attribute=${OCF_RESKEY_evict_reader_attribute_default}}
>> +: ${OCF_RESKEY_reader_attribute=${OCF_RESKEY_reader_attribute_default}}
>>
>> #######################################################################
>>
>> @@ -328,7 +328,7 @@
>> </longdesc>
>> <shortdesc lang="en">Sets the node attribute that determines
>> whether a node is usable for clients to read from.</shortdesc>
>> -<content type="boolean" default="${OCF_RESKEY_reader_attribute_default}" />
>> +<content type="string" default="${OCF_RESKEY_reader_attribute_default}" />
>> </parameter>
>> </parameters>
>>
>> @@ -355,11 +355,13 @@
>> MYSQL_OPTIONS_LOCAL="-S $OCF_RESKEY_socket --connect_timeout=10"
>> MYSQL_OPTIONS_REPL="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_replication_user --password=$OCF_RESKEY_replication_passwd"
>> MYSQL_OPTIONS_TEST="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_test_user --password=$OCF_RESKEY_test_passwd"
>> +MYSQL_TOO_MANY_CONN_ERR=1040
>>
>> CRM_MASTER="${HA_SBIN_DIR}/crm_master -l reboot "
>> HOSTNAME=`uname -n`
>> CRM_ATTR="${HA_SBIN_DIR}/crm_attribute -N $HOSTNAME "
>> INSTANCE_ATTR_NAME=`echo ${OCF_RESOURCE_INSTANCE}| awk -F : '{print $1}'`
>> +CRM_ATTR_REPL_INFO="${HA_SBIN_DIR}/crm_attribute --type crm_config --name ${INSTANCE_ATTR_NAME}_REPL_INFO -s mysql_replication"
>>
>> #######################################################################
>>
>> @@ -461,14 +463,14 @@
>>
>> check_slave() {
>> # Checks slave status
>> - local rc new_master_IP
>> + local rc new_master
>>
>> get_slave_info
>> rc=$?
>>
>> if [ $rc -eq 0 ]; then
>> # Did we receive an error other than max_connections?
>> - if [ $last_errno -ne 0 -a $last_errno -ne 1040 ]; then
>> + if [ $last_errno -ne 0 -a $last_errno -ne "$MYSQL_TOO_MANY_CONN_ERR" ]; then
>> # Whoa. Replication ran into an error. This slave has
>> # diverged from its master. Make sure this resource
>> # doesn't restart in place.
>> @@ -484,7 +486,7 @@
>> fi
>>
>> # If we got max_connections, let's remove the vip
>> - if [ $last_errno -eq 1040 ]; then
>> + if [ $last_errno -eq "$MYSQL_TOO_MANY_CONN_ERR" ]; then
>> set_reader_attr 0
>> exit $OCF_SUCCESS
>> fi
>> @@ -496,9 +498,9 @@
>> ocf_log warn "MySQL Slave IO threads currently not running."
>>
>> # Sanity check, are we at least on the right master
>> - new_master_IP=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f1`
>> + new_master=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f1`
>>
>> - if [ "$master_host" != "$new_master_IP" ]; then
>> + if [ "$master_host" != "$new_master" ]; then
>> # Not pointing to the right master, not good, removing the VIPs
>> set_reader_attr 0
>>
>> @@ -570,23 +572,23 @@
>> }
>>
>> set_master() {
>> - local new_master_IP master_log_file master_log_pos
>> + local new_master master_log_file master_log_pos
>> local master_params
>>
>> - new_master_IP=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f1`
>> + new_master=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f1`
>>
>> # Keep replication position
>> get_slave_info
>>
>> - if [ "$master_log_file" -a "$new_master_IP" = "$master_host" ]; then
>> + if [ "$master_log_file" -a "$new_master" = "$master_host" ]; then
>> # master_params=", MASTER_LOG_FILE='$master_log_file', \
>> # MASTER_LOG_POS=$master_log_pos"
>> ocf_log info "Kept master pos for $master_host : $master_log_file:$master_log_pos"
>> rm -f $tmpfile
>> return
>> else
>> - master_log_file=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f2`
>> - master_log_pos=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f3`
>> + master_log_file=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f2`
>> + master_log_pos=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f3`
>> if [ -n "$master_log_file" -a -n "$master_log_pos" ]; then
>> master_params=", MASTER_LOG_FILE='$master_log_file', \
>> MASTER_LOG_POS=$master_log_pos"
>> @@ -601,7 +603,7 @@
>> # reset with RESET MASTER.
>>
>> ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
>> - -e "CHANGE MASTER TO MASTER_HOST='$new_master_IP', \
>> + -e "CHANGE MASTER TO MASTER_HOST='$new_master', \
>> MASTER_USER='$OCF_RESKEY_replication_user', \
>> MASTER_PASSWORD='$OCF_RESKEY_replication_passwd' $master_params"
>> rm -f $tmpfile
>> @@ -628,15 +630,15 @@
>> while true; do
>> $MYSQL $MYSQL_OPTIONS_REPL \
>> -e 'SHOW PROCESSLIST\G'> $tmpfile
>> - if grep 'Waiting for master to send event' $tmpfile>/dev/null; then
>> + if grep -i 'Waiting for master to send event' $tmpfile>/dev/null; then
>> ocf_log info "MySQL slave has finished reading master binary log"
>> break
>> fi
>> - if grep 'Reconnecting after a failed master event read' $tmpfile>/dev/null; then
>> + if grep -i 'Reconnecting after a failed master event read' $tmpfile>/dev/null; then
>> ocf_log info "Master is down, no more binary logs to come"
>> break
>> fi
>> - if grep 'Connecting to master' $tmpfile>/dev/null; then
>> + if grep -i 'Connecting to master' $tmpfile>/dev/null; then
>> ocf_log info "Master is down, no more binary logs to come"
>> break
>> fi
>> @@ -660,7 +662,7 @@
>> while true; do
>> $MYSQL $MYSQL_OPTIONS_REPL \
>> -e 'SHOW PROCESSLIST\G'> $tmpfile
>> - if grep 'Has read all relay log' $tmpfile>/dev/null; then
>> + if grep -i 'Has read all relay log' $tmpfile>/dev/null; then
>> ocf_log info "MySQL slave has finished processing relay log"
>> break
>> fi
>> @@ -744,8 +746,15 @@
>> # Determines what IP address is attached to the current host. The output of the
>> # crm_attribute command looks like this:
>> # scope=nodes name=IP value=10.2.2.161
>> +# If the IP node attribute is not defined, fallback is to uname -n
>> get_local_ip() {
>> - $CRM_ATTR -l forever -n IP -q -G
>> + local IP
>> + IP=`$CRM_ATTR -l forever -n IP -q -G`
>> + if [ ! $? -eq 0 ]; then
>> + uname -n
>> + else
>> + echo $IP
>> + fi
>> }
>>
>> #######################################################################
>> @@ -783,7 +792,7 @@
>>
>> mysql_status() {
>> if [ ! -e $OCF_RESKEY_pid ]; then
>> - ocf_log err "MySQL is not running"
>> + ocf_log $1 "MySQL is not running"
>> return $OCF_NOT_RUNNING;
>> fi
>>
>> @@ -797,7 +806,7 @@
>> if [ $? -eq 0 ]; then
>> return $OCF_SUCCESS;
>> else
>> - ocf_log err "MySQL not running: removing old PID file"
>> + ocf_log $1 "MySQL not running: removing old PID file"
>> rm -f $OCF_RESKEY_pid
>> return $OCF_NOT_RUNNING;
>> fi
>> @@ -811,8 +820,9 @@
>> if ocf_is_probe; then
>> status_loglevel="info"
>> fi
>> -
>> +
>> mysql_status $status_loglevel
>> +
>> rc=$?
>>
>> # TODO: check max connections error
>> @@ -856,7 +866,7 @@
>> set_reader_attr 0
>> fi
>>
>> - mysql_status
>> + mysql_status info
>> if [ $? = $OCF_SUCCESS ]; then
>> ocf_log info "MySQL already running"
>> return $OCF_SUCCESS
>> @@ -930,7 +940,7 @@
>> # Let the CRM/LRM time us out if required.
>> start_wait=1
>> while [ $start_wait = 1 ]; do
>> - mysql_status
>> + mysql_status info
>> rc=$?
>> if [ $rc = $OCF_SUCCESS ]; then
>> start_wait=0
>> @@ -1019,7 +1029,7 @@
>> count=0
>> while [ $count -lt $shutdown_timeout ]
>> do
>> - mysql_status
>> + mysql_status info
>> rc=$?
>> if [ $rc = $OCF_NOT_RUNNING ]; then
>> break
>> @@ -1029,7 +1039,7 @@
>> ocf_log debug "MySQL still hasn't stopped yet. Waiting..."
>> done
>>
>> - mysql_status
>> + mysql_status info
>> if [ $? != $OCF_NOT_RUNNING ]; then
>> ocf_log info "MySQL failed to stop after ${shutdown_timeout}s using SIGTERM. Trying SIGKILL..."
>> /bin/kill -KILL $pid> /dev/null
>> @@ -1044,7 +1054,7 @@
>> mysql_promote() {
>> local master_info
>>
>> - if ( ! mysql_status ); then
>> + if ( ! mysql_status err ); then
>> return $OCF_NOT_RUNNING
>> fi
>> ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
>> @@ -1053,7 +1063,7 @@
>> # Set Master Info in CIB, cluster level attribute
>> update_data_master_status
>> master_info="$(get_local_ip)|$(get_master_status File)|$(get_master_status Position)"
>> - ${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication -v "$master_info"
>> + ${CRM_ATTR_REPL_INFO} -v "$master_info"
>> rm -f $tmpfile
>>
>> set_read_only off || return $OCF_ERR_GENERIC
>> @@ -1070,7 +1080,7 @@
>> }
>>
>> mysql_demote() {
>> - if ! mysql_status; then
>> + if ! mysql_status err; then
>> return $OCF_NOT_RUNNING
>> fi
>>
>> @@ -1177,6 +1187,7 @@
>> # The log directory must be a directory owned by root, with permissions 0700,
>> # and the log must be writable and not a symlink.
>> ##########################################################################
>> +DEBUG_LOG="/tmp/mysql.ocf.ra.debug/log"
>> if [ "${DEBUG_LOG}" -a -w "${DEBUG_LOG}" -a ! -L "${DEBUG_LOG}" ]; then
>> DEBUG_LOG_DIR="${DEBUG_LOG%/*}"
>> if [ -d "${DEBUG_LOG_DIR}" ]; then
>> @@ -1214,7 +1225,7 @@
>> case "$1" in
>> start) mysql_start;;
>> stop) mysql_stop;;
>> - status) mysql_status;;
>> + status) mysql_status err;;
>> monitor) mysql_monitor;;
>> promote) mysql_promote;;
>> demote) mysql_demote;;
>
>> _______________________________________________________
>> Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
>> http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
>> Home Page: http://linux-ha.org/
>
> _______________________________________________________
> Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
> http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
> Home Page: http://linux-ha.org/
>
Re: Modified patch for RA [ In reply to ]
Hi Yves,

On Fri, May 11, 2012 at 08:45:06AM -0400, Yves Trudeau wrote:
> Hi Dejan,
> I changed the name of the attribute to REPL_MASTER_IP

Let's quote from one of previous emails:

2. Is it possible/plausible to have more than one mysql
instance? If so, then the attribute name should include the
instance name. Say ${INSTANCE_NAME}_mysql_replication_IP or
something to that extent. Also, it would make for a better
looking configuration. "IP" doesn't really say much.

> and added 2
> lines of comment for the get_local_ip function.

Again, a quote:

3. This attribute is part of the configuration and supposed to
be setup by the user. Please document that in the meta-data.

Note that one of the key words here is "meta-data." That is
supposed to be documentation for the users, not for developers.
Users don't normally read the code.

> Is that inline with
> what you want?

OT:

It doesn't really matter what _I_ want. We're having a discussion
here on how to improve the feature. It is just by chance that I am
right now the only one talking about it.

Cheers,

Dejan

> Regards,
>
> Yves
>
> Le 2012-05-11 04:45, Dejan Muhamedagic a écrit :
> >Hi Yves,
> >
> >It would be good not to start a new thread for the same
> >discussion.
> >
> >On Thu, May 10, 2012 at 05:06:25PM -0400, Yves Trudeau wrote:
> >>Hi Dejan,
> >> here's another modified patch for the mysql agent of the commit
> >>version 4c18035 (git@github.com:y-trudeau/resource-agents.git branch
> >>mysql-repl). This patch implements fallback on uname -n if the node
> >>IP attribute is not present and uses the instance name for the
> >>replication info attribute.
> >
> >Hmm, it looks like there was a misunderstanding here. The
> >attribute named "IP" is still named "IP" :)
> >
> >And we're still missing the documentation for that attribute.
> >
> >>I am also working with Raoul to get me
> >>back on track with git.
> >
> >Good!
> >
> >Cheers,
> >
> >Dejan
> >
> >>
> >>Regards,
> >>
> >>Yves
> >
> >>--- mysql.4c18035b7 2012-05-04 16:23:45.238421759 -0400
> >>+++ resource-agents-prm/heartbeat/mysql 2012-05-10 11:01:02.538421042 -0400
> >>@@ -109,7 +109,7 @@
> >> : ${OCF_RESKEY_max_slave_lag=${OCF_RESKEY_max_slave_lag_default}}
> >> : ${OCF_RESKEY_evict_outdated_slaves=${OCF_RESKEY_evict_outdated_slaves_default}}
> >>
> >>-: ${OCF_RESKEY_reader_attribute=${OCF_RESKEY_evict_reader_attribute_default}}
> >>+: ${OCF_RESKEY_reader_attribute=${OCF_RESKEY_reader_attribute_default}}
> >>
> >> #######################################################################
> >>
> >>@@ -328,7 +328,7 @@
> >> </longdesc>
> >> <shortdesc lang="en">Sets the node attribute that determines
> >> whether a node is usable for clients to read from.</shortdesc>
> >>-<content type="boolean" default="${OCF_RESKEY_reader_attribute_default}" />
> >>+<content type="string" default="${OCF_RESKEY_reader_attribute_default}" />
> >> </parameter>
> >> </parameters>
> >>
> >>@@ -355,11 +355,13 @@
> >> MYSQL_OPTIONS_LOCAL="-S $OCF_RESKEY_socket --connect_timeout=10"
> >> MYSQL_OPTIONS_REPL="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_replication_user --password=$OCF_RESKEY_replication_passwd"
> >> MYSQL_OPTIONS_TEST="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_test_user --password=$OCF_RESKEY_test_passwd"
> >>+MYSQL_TOO_MANY_CONN_ERR=1040
> >>
> >> CRM_MASTER="${HA_SBIN_DIR}/crm_master -l reboot "
> >> HOSTNAME=`uname -n`
> >> CRM_ATTR="${HA_SBIN_DIR}/crm_attribute -N $HOSTNAME "
> >> INSTANCE_ATTR_NAME=`echo ${OCF_RESOURCE_INSTANCE}| awk -F : '{print $1}'`
> >>+CRM_ATTR_REPL_INFO="${HA_SBIN_DIR}/crm_attribute --type crm_config --name ${INSTANCE_ATTR_NAME}_REPL_INFO -s mysql_replication"
> >>
> >> #######################################################################
> >>
> >>@@ -461,14 +463,14 @@
> >>
> >> check_slave() {
> >> # Checks slave status
> >>- local rc new_master_IP
> >>+ local rc new_master
> >>
> >> get_slave_info
> >> rc=$?
> >>
> >> if [ $rc -eq 0 ]; then
> >> # Did we receive an error other than max_connections?
> >>- if [ $last_errno -ne 0 -a $last_errno -ne 1040 ]; then
> >>+ if [ $last_errno -ne 0 -a $last_errno -ne "$MYSQL_TOO_MANY_CONN_ERR" ]; then
> >> # Whoa. Replication ran into an error. This slave has
> >> # diverged from its master. Make sure this resource
> >> # doesn't restart in place.
> >>@@ -484,7 +486,7 @@
> >> fi
> >>
> >> # If we got max_connections, let's remove the vip
> >>- if [ $last_errno -eq 1040 ]; then
> >>+ if [ $last_errno -eq "$MYSQL_TOO_MANY_CONN_ERR" ]; then
> >> set_reader_attr 0
> >> exit $OCF_SUCCESS
> >> fi
> >>@@ -496,9 +498,9 @@
> >> ocf_log warn "MySQL Slave IO threads currently not running."
> >>
> >> # Sanity check, are we at least on the right master
> >>- new_master_IP=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f1`
> >>+ new_master=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f1`
> >>
> >>- if [ "$master_host" != "$new_master_IP" ]; then
> >>+ if [ "$master_host" != "$new_master" ]; then
> >> # Not pointing to the right master, not good, removing the VIPs
> >> set_reader_attr 0
> >>
> >>@@ -570,23 +572,23 @@
> >> }
> >>
> >> set_master() {
> >>- local new_master_IP master_log_file master_log_pos
> >>+ local new_master master_log_file master_log_pos
> >> local master_params
> >>
> >>- new_master_IP=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f1`
> >>+ new_master=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f1`
> >>
> >> # Keep replication position
> >> get_slave_info
> >>
> >>- if [ "$master_log_file" -a "$new_master_IP" = "$master_host" ]; then
> >>+ if [ "$master_log_file" -a "$new_master" = "$master_host" ]; then
> >> # master_params=", MASTER_LOG_FILE='$master_log_file', \
> >> # MASTER_LOG_POS=$master_log_pos"
> >> ocf_log info "Kept master pos for $master_host : $master_log_file:$master_log_pos"
> >> rm -f $tmpfile
> >> return
> >> else
> >>- master_log_file=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f2`
> >>- master_log_pos=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f3`
> >>+ master_log_file=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f2`
> >>+ master_log_pos=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f3`
> >> if [ -n "$master_log_file" -a -n "$master_log_pos" ]; then
> >> master_params=", MASTER_LOG_FILE='$master_log_file', \
> >> MASTER_LOG_POS=$master_log_pos"
> >>@@ -601,7 +603,7 @@
> >> # reset with RESET MASTER.
> >>
> >> ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
> >>- -e "CHANGE MASTER TO MASTER_HOST='$new_master_IP', \
> >>+ -e "CHANGE MASTER TO MASTER_HOST='$new_master', \
> >> MASTER_USER='$OCF_RESKEY_replication_user', \
> >> MASTER_PASSWORD='$OCF_RESKEY_replication_passwd' $master_params"
> >> rm -f $tmpfile
> >>@@ -628,15 +630,15 @@
> >> while true; do
> >> $MYSQL $MYSQL_OPTIONS_REPL \
> >> -e 'SHOW PROCESSLIST\G'> $tmpfile
> >>- if grep 'Waiting for master to send event' $tmpfile>/dev/null; then
> >>+ if grep -i 'Waiting for master to send event' $tmpfile>/dev/null; then
> >> ocf_log info "MySQL slave has finished reading master binary log"
> >> break
> >> fi
> >>- if grep 'Reconnecting after a failed master event read' $tmpfile>/dev/null; then
> >>+ if grep -i 'Reconnecting after a failed master event read' $tmpfile>/dev/null; then
> >> ocf_log info "Master is down, no more binary logs to come"
> >> break
> >> fi
> >>- if grep 'Connecting to master' $tmpfile>/dev/null; then
> >>+ if grep -i 'Connecting to master' $tmpfile>/dev/null; then
> >> ocf_log info "Master is down, no more binary logs to come"
> >> break
> >> fi
> >>@@ -660,7 +662,7 @@
> >> while true; do
> >> $MYSQL $MYSQL_OPTIONS_REPL \
> >> -e 'SHOW PROCESSLIST\G'> $tmpfile
> >>- if grep 'Has read all relay log' $tmpfile>/dev/null; then
> >>+ if grep -i 'Has read all relay log' $tmpfile>/dev/null; then
> >> ocf_log info "MySQL slave has finished processing relay log"
> >> break
> >> fi
> >>@@ -744,8 +746,15 @@
> >> # Determines what IP address is attached to the current host. The output of the
> >> # crm_attribute command looks like this:
> >> # scope=nodes name=IP value=10.2.2.161
> >>+# If the IP node attribute is not defined, fallback is to uname -n
> >> get_local_ip() {
> >>- $CRM_ATTR -l forever -n IP -q -G
> >>+ local IP
> >>+ IP=`$CRM_ATTR -l forever -n IP -q -G`
> >>+ if [ ! $? -eq 0 ]; then
> >>+ uname -n
> >>+ else
> >>+ echo $IP
> >>+ fi
> >> }
> >>
> >> #######################################################################
> >>@@ -783,7 +792,7 @@
> >>
> >> mysql_status() {
> >> if [ ! -e $OCF_RESKEY_pid ]; then
> >>- ocf_log err "MySQL is not running"
> >>+ ocf_log $1 "MySQL is not running"
> >> return $OCF_NOT_RUNNING;
> >> fi
> >>
> >>@@ -797,7 +806,7 @@
> >> if [ $? -eq 0 ]; then
> >> return $OCF_SUCCESS;
> >> else
> >>- ocf_log err "MySQL not running: removing old PID file"
> >>+ ocf_log $1 "MySQL not running: removing old PID file"
> >> rm -f $OCF_RESKEY_pid
> >> return $OCF_NOT_RUNNING;
> >> fi
> >>@@ -811,8 +820,9 @@
> >> if ocf_is_probe; then
> >> status_loglevel="info"
> >> fi
> >>-
> >>+
> >> mysql_status $status_loglevel
> >>+
> >> rc=$?
> >>
> >> # TODO: check max connections error
> >>@@ -856,7 +866,7 @@
> >> set_reader_attr 0
> >> fi
> >>
> >>- mysql_status
> >>+ mysql_status info
> >> if [ $? = $OCF_SUCCESS ]; then
> >> ocf_log info "MySQL already running"
> >> return $OCF_SUCCESS
> >>@@ -930,7 +940,7 @@
> >> # Let the CRM/LRM time us out if required.
> >> start_wait=1
> >> while [ $start_wait = 1 ]; do
> >>- mysql_status
> >>+ mysql_status info
> >> rc=$?
> >> if [ $rc = $OCF_SUCCESS ]; then
> >> start_wait=0
> >>@@ -1019,7 +1029,7 @@
> >> count=0
> >> while [ $count -lt $shutdown_timeout ]
> >> do
> >>- mysql_status
> >>+ mysql_status info
> >> rc=$?
> >> if [ $rc = $OCF_NOT_RUNNING ]; then
> >> break
> >>@@ -1029,7 +1039,7 @@
> >> ocf_log debug "MySQL still hasn't stopped yet. Waiting..."
> >> done
> >>
> >>- mysql_status
> >>+ mysql_status info
> >> if [ $? != $OCF_NOT_RUNNING ]; then
> >> ocf_log info "MySQL failed to stop after ${shutdown_timeout}s using SIGTERM. Trying SIGKILL..."
> >> /bin/kill -KILL $pid> /dev/null
> >>@@ -1044,7 +1054,7 @@
> >> mysql_promote() {
> >> local master_info
> >>
> >>- if ( ! mysql_status ); then
> >>+ if ( ! mysql_status err ); then
> >> return $OCF_NOT_RUNNING
> >> fi
> >> ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
> >>@@ -1053,7 +1063,7 @@
> >> # Set Master Info in CIB, cluster level attribute
> >> update_data_master_status
> >> master_info="$(get_local_ip)|$(get_master_status File)|$(get_master_status Position)"
> >>- ${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication -v "$master_info"
> >>+ ${CRM_ATTR_REPL_INFO} -v "$master_info"
> >> rm -f $tmpfile
> >>
> >> set_read_only off || return $OCF_ERR_GENERIC
> >>@@ -1070,7 +1080,7 @@
> >> }
> >>
> >> mysql_demote() {
> >>- if ! mysql_status; then
> >>+ if ! mysql_status err; then
> >> return $OCF_NOT_RUNNING
> >> fi
> >>
> >>@@ -1177,6 +1187,7 @@
> >> # The log directory must be a directory owned by root, with permissions 0700,
> >> # and the log must be writable and not a symlink.
> >> ##########################################################################
> >>+DEBUG_LOG="/tmp/mysql.ocf.ra.debug/log"
> >> if [ "${DEBUG_LOG}" -a -w "${DEBUG_LOG}" -a ! -L "${DEBUG_LOG}" ]; then
> >> DEBUG_LOG_DIR="${DEBUG_LOG%/*}"
> >> if [ -d "${DEBUG_LOG_DIR}" ]; then
> >>@@ -1214,7 +1225,7 @@
> >> case "$1" in
> >> start) mysql_start;;
> >> stop) mysql_stop;;
> >>- status) mysql_status;;
> >>+ status) mysql_status err;;
> >> monitor) mysql_monitor;;
> >> promote) mysql_promote;;
> >> demote) mysql_demote;;
> >
> >>_______________________________________________________
> >>Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
> >>http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
> >>Home Page: http://linux-ha.org/
> >
> >_______________________________________________________
> >Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
> >http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
> >Home Page: http://linux-ha.org/
> >

> --- mysql.4c18035b7 2012-05-04 16:23:45.238421759 -0400
> +++ resource-agents-prm/heartbeat/mysql 2012-05-11 08:41:20.868420414 -0400
> @@ -109,7 +109,7 @@
> : ${OCF_RESKEY_max_slave_lag=${OCF_RESKEY_max_slave_lag_default}}
> : ${OCF_RESKEY_evict_outdated_slaves=${OCF_RESKEY_evict_outdated_slaves_default}}
>
> -: ${OCF_RESKEY_reader_attribute=${OCF_RESKEY_evict_reader_attribute_default}}
> +: ${OCF_RESKEY_reader_attribute=${OCF_RESKEY_reader_attribute_default}}
>
> #######################################################################
>
> @@ -328,7 +328,7 @@
> </longdesc>
> <shortdesc lang="en">Sets the node attribute that determines
> whether a node is usable for clients to read from.</shortdesc>
> -<content type="boolean" default="${OCF_RESKEY_reader_attribute_default}" />
> +<content type="string" default="${OCF_RESKEY_reader_attribute_default}" />
> </parameter>
> </parameters>
>
> @@ -355,11 +355,13 @@
> MYSQL_OPTIONS_LOCAL="-S $OCF_RESKEY_socket --connect_timeout=10"
> MYSQL_OPTIONS_REPL="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_replication_user --password=$OCF_RESKEY_replication_passwd"
> MYSQL_OPTIONS_TEST="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_test_user --password=$OCF_RESKEY_test_passwd"
> +MYSQL_TOO_MANY_CONN_ERR=1040
>
> CRM_MASTER="${HA_SBIN_DIR}/crm_master -l reboot "
> HOSTNAME=`uname -n`
> CRM_ATTR="${HA_SBIN_DIR}/crm_attribute -N $HOSTNAME "
> INSTANCE_ATTR_NAME=`echo ${OCF_RESOURCE_INSTANCE}| awk -F : '{print $1}'`
> +CRM_ATTR_REPL_INFO="${HA_SBIN_DIR}/crm_attribute --type crm_config --name ${INSTANCE_ATTR_NAME}_REPL_INFO -s mysql_replication"
>
> #######################################################################
>
> @@ -461,14 +463,14 @@
>
> check_slave() {
> # Checks slave status
> - local rc new_master_IP
> + local rc new_master
>
> get_slave_info
> rc=$?
>
> if [ $rc -eq 0 ]; then
> # Did we receive an error other than max_connections?
> - if [ $last_errno -ne 0 -a $last_errno -ne 1040 ]; then
> + if [ $last_errno -ne 0 -a $last_errno -ne "$MYSQL_TOO_MANY_CONN_ERR" ]; then
> # Whoa. Replication ran into an error. This slave has
> # diverged from its master. Make sure this resource
> # doesn't restart in place.
> @@ -484,7 +486,7 @@
> fi
>
> # If we got max_connections, let's remove the vip
> - if [ $last_errno -eq 1040 ]; then
> + if [ $last_errno -eq "$MYSQL_TOO_MANY_CONN_ERR" ]; then
> set_reader_attr 0
> exit $OCF_SUCCESS
> fi
> @@ -496,9 +498,9 @@
> ocf_log warn "MySQL Slave IO threads currently not running."
>
> # Sanity check, are we at least on the right master
> - new_master_IP=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f1`
> + new_master=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f1`
>
> - if [ "$master_host" != "$new_master_IP" ]; then
> + if [ "$master_host" != "$new_master" ]; then
> # Not pointing to the right master, not good, removing the VIPs
> set_reader_attr 0
>
> @@ -570,23 +572,23 @@
> }
>
> set_master() {
> - local new_master_IP master_log_file master_log_pos
> + local new_master master_log_file master_log_pos
> local master_params
>
> - new_master_IP=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f1`
> + new_master=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f1`
>
> # Keep replication position
> get_slave_info
>
> - if [ "$master_log_file" -a "$new_master_IP" = "$master_host" ]; then
> + if [ "$master_log_file" -a "$new_master" = "$master_host" ]; then
> # master_params=", MASTER_LOG_FILE='$master_log_file', \
> # MASTER_LOG_POS=$master_log_pos"
> ocf_log info "Kept master pos for $master_host : $master_log_file:$master_log_pos"
> rm -f $tmpfile
> return
> else
> - master_log_file=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f2`
> - master_log_pos=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f3`
> + master_log_file=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f2`
> + master_log_pos=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f3`
> if [ -n "$master_log_file" -a -n "$master_log_pos" ]; then
> master_params=", MASTER_LOG_FILE='$master_log_file', \
> MASTER_LOG_POS=$master_log_pos"
> @@ -601,7 +603,7 @@
> # reset with RESET MASTER.
>
> ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
> - -e "CHANGE MASTER TO MASTER_HOST='$new_master_IP', \
> + -e "CHANGE MASTER TO MASTER_HOST='$new_master', \
> MASTER_USER='$OCF_RESKEY_replication_user', \
> MASTER_PASSWORD='$OCF_RESKEY_replication_passwd' $master_params"
> rm -f $tmpfile
> @@ -628,15 +630,15 @@
> while true; do
> $MYSQL $MYSQL_OPTIONS_REPL \
> -e 'SHOW PROCESSLIST\G' > $tmpfile
> - if grep 'Waiting for master to send event' $tmpfile >/dev/null; then
> + if grep -i 'Waiting for master to send event' $tmpfile >/dev/null; then
> ocf_log info "MySQL slave has finished reading master binary log"
> break
> fi
> - if grep 'Reconnecting after a failed master event read' $tmpfile >/dev/null; then
> + if grep -i 'Reconnecting after a failed master event read' $tmpfile >/dev/null; then
> ocf_log info "Master is down, no more binary logs to come"
> break
> fi
> - if grep 'Connecting to master' $tmpfile >/dev/null; then
> + if grep -i 'Connecting to master' $tmpfile >/dev/null; then
> ocf_log info "Master is down, no more binary logs to come"
> break
> fi
> @@ -660,7 +662,7 @@
> while true; do
> $MYSQL $MYSQL_OPTIONS_REPL \
> -e 'SHOW PROCESSLIST\G' > $tmpfile
> - if grep 'Has read all relay log' $tmpfile >/dev/null; then
> + if grep -i 'Has read all relay log' $tmpfile >/dev/null; then
> ocf_log info "MySQL slave has finished processing relay log"
> break
> fi
> @@ -744,8 +746,17 @@
> # Determines what IP address is attached to the current host. The output of the
> # crm_attribute command looks like this:
> # scope=nodes name=IP value=10.2.2.161
> +# If the REPL_MASTER_IP node attribute is not defined, fallback is to uname -n
> +# The REPL_MASTER_IP is the IP address that will be used for the
> +# change master to command.
> get_local_ip() {
> - $CRM_ATTR -l forever -n IP -q -G
> + local IP
> + IP=`$CRM_ATTR -l forever -n REPL_MASTER_IP -q -G`
> + if [ ! $? -eq 0 ]; then
> + uname -n
> + else
> + echo $IP
> + fi
> }
>
> #######################################################################
> @@ -783,7 +794,7 @@
>
> mysql_status() {
> if [ ! -e $OCF_RESKEY_pid ]; then
> - ocf_log err "MySQL is not running"
> + ocf_log $1 "MySQL is not running"
> return $OCF_NOT_RUNNING;
> fi
>
> @@ -797,7 +808,7 @@
> if [ $? -eq 0 ]; then
> return $OCF_SUCCESS;
> else
> - ocf_log err "MySQL not running: removing old PID file"
> + ocf_log $1 "MySQL not running: removing old PID file"
> rm -f $OCF_RESKEY_pid
> return $OCF_NOT_RUNNING;
> fi
> @@ -811,8 +822,9 @@
> if ocf_is_probe; then
> status_loglevel="info"
> fi
> -
> +
> mysql_status $status_loglevel
> +
> rc=$?
>
> # TODO: check max connections error
> @@ -856,7 +868,7 @@
> set_reader_attr 0
> fi
>
> - mysql_status
> + mysql_status info
> if [ $? = $OCF_SUCCESS ]; then
> ocf_log info "MySQL already running"
> return $OCF_SUCCESS
> @@ -930,7 +942,7 @@
> # Let the CRM/LRM time us out if required.
> start_wait=1
> while [ $start_wait = 1 ]; do
> - mysql_status
> + mysql_status info
> rc=$?
> if [ $rc = $OCF_SUCCESS ]; then
> start_wait=0
> @@ -1019,7 +1031,7 @@
> count=0
> while [ $count -lt $shutdown_timeout ]
> do
> - mysql_status
> + mysql_status info
> rc=$?
> if [ $rc = $OCF_NOT_RUNNING ]; then
> break
> @@ -1029,7 +1041,7 @@
> ocf_log debug "MySQL still hasn't stopped yet. Waiting..."
> done
>
> - mysql_status
> + mysql_status info
> if [ $? != $OCF_NOT_RUNNING ]; then
> ocf_log info "MySQL failed to stop after ${shutdown_timeout}s using SIGTERM. Trying SIGKILL..."
> /bin/kill -KILL $pid > /dev/null
> @@ -1044,7 +1056,7 @@
> mysql_promote() {
> local master_info
>
> - if ( ! mysql_status ); then
> + if ( ! mysql_status err ); then
> return $OCF_NOT_RUNNING
> fi
> ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
> @@ -1053,7 +1065,7 @@
> # Set Master Info in CIB, cluster level attribute
> update_data_master_status
> master_info="$(get_local_ip)|$(get_master_status File)|$(get_master_status Position)"
> - ${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication -v "$master_info"
> + ${CRM_ATTR_REPL_INFO} -v "$master_info"
> rm -f $tmpfile
>
> set_read_only off || return $OCF_ERR_GENERIC
> @@ -1070,7 +1082,7 @@
> }
>
> mysql_demote() {
> - if ! mysql_status; then
> + if ! mysql_status err; then
> return $OCF_NOT_RUNNING
> fi
>
> @@ -1177,6 +1189,7 @@
> # The log directory must be a directory owned by root, with permissions 0700,
> # and the log must be writable and not a symlink.
> ##########################################################################
> +DEBUG_LOG="/tmp/mysql.ocf.ra.debug/log"
> if [ "${DEBUG_LOG}" -a -w "${DEBUG_LOG}" -a ! -L "${DEBUG_LOG}" ]; then
> DEBUG_LOG_DIR="${DEBUG_LOG%/*}"
> if [ -d "${DEBUG_LOG_DIR}" ]; then
> @@ -1214,7 +1227,7 @@
> case "$1" in
> start) mysql_start;;
> stop) mysql_stop;;
> - status) mysql_status;;
> + status) mysql_status err;;
> monitor) mysql_monitor;;
> promote) mysql_promote;;
> demote) mysql_demote;;

> _______________________________________________________
> Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
> http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
> Home Page: http://linux-ha.org/

_______________________________________________________
Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
Home Page: http://linux-ha.org/
Re: Modified patch for RA [ In reply to ]
Hi Dejan,
ok, here the latest version using
${INSTANCE_ATTR_NAME}_MYSQL_MASTER_IP and I agree with the merits of
this :) I added a paragraph explaining the use of the attribute in the
longdesc of the meta-data.

Regards,

Yves


Le 2012-05-11 09:21, Dejan Muhamedagic a écrit :
> Hi Yves,
>
> On Fri, May 11, 2012 at 08:45:06AM -0400, Yves Trudeau wrote:
>> Hi Dejan,
>> I changed the name of the attribute to REPL_MASTER_IP
>
> Let's quote from one of previous emails:
>
> 2. Is it possible/plausible to have more than one mysql
> instance? If so, then the attribute name should include the
> instance name. Say ${INSTANCE_NAME}_mysql_replication_IP or
> something to that extent. Also, it would make for a better
> looking configuration. "IP" doesn't really say much.
>
>> and added 2
>> lines of comment for the get_local_ip function.
>
> Again, a quote:
>
> 3. This attribute is part of the configuration and supposed to
> be setup by the user. Please document that in the meta-data.
>
> Note that one of the key words here is "meta-data." That is
> supposed to be documentation for the users, not for developers.
> Users don't normally read the code.
>
>> Is that inline with
>> what you want?
>
> OT:
>
> It doesn't really matter what _I_ want. We're having a discussion
> here on how to improve the feature. It is just by chance that I am
> right now the only one talking about it.
>
> Cheers,
>
> Dejan
>
>> Regards,
>>
>> Yves
>>
>> Le 2012-05-11 04:45, Dejan Muhamedagic a écrit :
>>> Hi Yves,
>>>
>>> It would be good not to start a new thread for the same
>>> discussion.
>>>
>>> On Thu, May 10, 2012 at 05:06:25PM -0400, Yves Trudeau wrote:
>>>> Hi Dejan,
>>>> here's another modified patch for the mysql agent of the commit
>>>> version 4c18035 (git@github.com:y-trudeau/resource-agents.git branch
>>>> mysql-repl). This patch implements fallback on uname -n if the node
>>>> IP attribute is not present and uses the instance name for the
>>>> replication info attribute.
>>>
>>> Hmm, it looks like there was a misunderstanding here. The
>>> attribute named "IP" is still named "IP" :)
>>>
>>> And we're still missing the documentation for that attribute.
>>>
>>>> I am also working with Raoul to get me
>>>> back on track with git.
>>>
>>> Good!
>>>
>>> Cheers,
>>>
>>> Dejan
>>>
>>>>
>>>> Regards,
>>>>
>>>> Yves
>>>
>>>> --- mysql.4c18035b7 2012-05-04 16:23:45.238421759 -0400
>>>> +++ resource-agents-prm/heartbeat/mysql 2012-05-10 11:01:02.538421042 -0400
>>>> @@ -109,7 +109,7 @@
>>>> : ${OCF_RESKEY_max_slave_lag=${OCF_RESKEY_max_slave_lag_default}}
>>>> : ${OCF_RESKEY_evict_outdated_slaves=${OCF_RESKEY_evict_outdated_slaves_default}}
>>>>
>>>> -: ${OCF_RESKEY_reader_attribute=${OCF_RESKEY_evict_reader_attribute_default}}
>>>> +: ${OCF_RESKEY_reader_attribute=${OCF_RESKEY_reader_attribute_default}}
>>>>
>>>> #######################################################################
>>>>
>>>> @@ -328,7 +328,7 @@
>>>> </longdesc>
>>>> <shortdesc lang="en">Sets the node attribute that determines
>>>> whether a node is usable for clients to read from.</shortdesc>
>>>> -<content type="boolean" default="${OCF_RESKEY_reader_attribute_default}" />
>>>> +<content type="string" default="${OCF_RESKEY_reader_attribute_default}" />
>>>> </parameter>
>>>> </parameters>
>>>>
>>>> @@ -355,11 +355,13 @@
>>>> MYSQL_OPTIONS_LOCAL="-S $OCF_RESKEY_socket --connect_timeout=10"
>>>> MYSQL_OPTIONS_REPL="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_replication_user --password=$OCF_RESKEY_replication_passwd"
>>>> MYSQL_OPTIONS_TEST="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_test_user --password=$OCF_RESKEY_test_passwd"
>>>> +MYSQL_TOO_MANY_CONN_ERR=1040
>>>>
>>>> CRM_MASTER="${HA_SBIN_DIR}/crm_master -l reboot "
>>>> HOSTNAME=`uname -n`
>>>> CRM_ATTR="${HA_SBIN_DIR}/crm_attribute -N $HOSTNAME "
>>>> INSTANCE_ATTR_NAME=`echo ${OCF_RESOURCE_INSTANCE}| awk -F : '{print $1}'`
>>>> +CRM_ATTR_REPL_INFO="${HA_SBIN_DIR}/crm_attribute --type crm_config --name ${INSTANCE_ATTR_NAME}_REPL_INFO -s mysql_replication"
>>>>
>>>> #######################################################################
>>>>
>>>> @@ -461,14 +463,14 @@
>>>>
>>>> check_slave() {
>>>> # Checks slave status
>>>> - local rc new_master_IP
>>>> + local rc new_master
>>>>
>>>> get_slave_info
>>>> rc=$?
>>>>
>>>> if [ $rc -eq 0 ]; then
>>>> # Did we receive an error other than max_connections?
>>>> - if [ $last_errno -ne 0 -a $last_errno -ne 1040 ]; then
>>>> + if [ $last_errno -ne 0 -a $last_errno -ne "$MYSQL_TOO_MANY_CONN_ERR" ]; then
>>>> # Whoa. Replication ran into an error. This slave has
>>>> # diverged from its master. Make sure this resource
>>>> # doesn't restart in place.
>>>> @@ -484,7 +486,7 @@
>>>> fi
>>>>
>>>> # If we got max_connections, let's remove the vip
>>>> - if [ $last_errno -eq 1040 ]; then
>>>> + if [ $last_errno -eq "$MYSQL_TOO_MANY_CONN_ERR" ]; then
>>>> set_reader_attr 0
>>>> exit $OCF_SUCCESS
>>>> fi
>>>> @@ -496,9 +498,9 @@
>>>> ocf_log warn "MySQL Slave IO threads currently not running."
>>>>
>>>> # Sanity check, are we at least on the right master
>>>> - new_master_IP=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f1`
>>>> + new_master=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f1`
>>>>
>>>> - if [ "$master_host" != "$new_master_IP" ]; then
>>>> + if [ "$master_host" != "$new_master" ]; then
>>>> # Not pointing to the right master, not good, removing the VIPs
>>>> set_reader_attr 0
>>>>
>>>> @@ -570,23 +572,23 @@
>>>> }
>>>>
>>>> set_master() {
>>>> - local new_master_IP master_log_file master_log_pos
>>>> + local new_master master_log_file master_log_pos
>>>> local master_params
>>>>
>>>> - new_master_IP=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f1`
>>>> + new_master=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f1`
>>>>
>>>> # Keep replication position
>>>> get_slave_info
>>>>
>>>> - if [ "$master_log_file" -a "$new_master_IP" = "$master_host" ]; then
>>>> + if [ "$master_log_file" -a "$new_master" = "$master_host" ]; then
>>>> # master_params=", MASTER_LOG_FILE='$master_log_file', \
>>>> # MASTER_LOG_POS=$master_log_pos"
>>>> ocf_log info "Kept master pos for $master_host : $master_log_file:$master_log_pos"
>>>> rm -f $tmpfile
>>>> return
>>>> else
>>>> - master_log_file=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f2`
>>>> - master_log_pos=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f3`
>>>> + master_log_file=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f2`
>>>> + master_log_pos=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f3`
>>>> if [ -n "$master_log_file" -a -n "$master_log_pos" ]; then
>>>> master_params=", MASTER_LOG_FILE='$master_log_file', \
>>>> MASTER_LOG_POS=$master_log_pos"
>>>> @@ -601,7 +603,7 @@
>>>> # reset with RESET MASTER.
>>>>
>>>> ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
>>>> - -e "CHANGE MASTER TO MASTER_HOST='$new_master_IP', \
>>>> + -e "CHANGE MASTER TO MASTER_HOST='$new_master', \
>>>> MASTER_USER='$OCF_RESKEY_replication_user', \
>>>> MASTER_PASSWORD='$OCF_RESKEY_replication_passwd' $master_params"
>>>> rm -f $tmpfile
>>>> @@ -628,15 +630,15 @@
>>>> while true; do
>>>> $MYSQL $MYSQL_OPTIONS_REPL \
>>>> -e 'SHOW PROCESSLIST\G'> $tmpfile
>>>> - if grep 'Waiting for master to send event' $tmpfile>/dev/null; then
>>>> + if grep -i 'Waiting for master to send event' $tmpfile>/dev/null; then
>>>> ocf_log info "MySQL slave has finished reading master binary log"
>>>> break
>>>> fi
>>>> - if grep 'Reconnecting after a failed master event read' $tmpfile>/dev/null; then
>>>> + if grep -i 'Reconnecting after a failed master event read' $tmpfile>/dev/null; then
>>>> ocf_log info "Master is down, no more binary logs to come"
>>>> break
>>>> fi
>>>> - if grep 'Connecting to master' $tmpfile>/dev/null; then
>>>> + if grep -i 'Connecting to master' $tmpfile>/dev/null; then
>>>> ocf_log info "Master is down, no more binary logs to come"
>>>> break
>>>> fi
>>>> @@ -660,7 +662,7 @@
>>>> while true; do
>>>> $MYSQL $MYSQL_OPTIONS_REPL \
>>>> -e 'SHOW PROCESSLIST\G'> $tmpfile
>>>> - if grep 'Has read all relay log' $tmpfile>/dev/null; then
>>>> + if grep -i 'Has read all relay log' $tmpfile>/dev/null; then
>>>> ocf_log info "MySQL slave has finished processing relay log"
>>>> break
>>>> fi
>>>> @@ -744,8 +746,15 @@
>>>> # Determines what IP address is attached to the current host. The output of the
>>>> # crm_attribute command looks like this:
>>>> # scope=nodes name=IP value=10.2.2.161
>>>> +# If the IP node attribute is not defined, fallback is to uname -n
>>>> get_local_ip() {
>>>> - $CRM_ATTR -l forever -n IP -q -G
>>>> + local IP
>>>> + IP=`$CRM_ATTR -l forever -n IP -q -G`
>>>> + if [ ! $? -eq 0 ]; then
>>>> + uname -n
>>>> + else
>>>> + echo $IP
>>>> + fi
>>>> }
>>>>
>>>> #######################################################################
>>>> @@ -783,7 +792,7 @@
>>>>
>>>> mysql_status() {
>>>> if [ ! -e $OCF_RESKEY_pid ]; then
>>>> - ocf_log err "MySQL is not running"
>>>> + ocf_log $1 "MySQL is not running"
>>>> return $OCF_NOT_RUNNING;
>>>> fi
>>>>
>>>> @@ -797,7 +806,7 @@
>>>> if [ $? -eq 0 ]; then
>>>> return $OCF_SUCCESS;
>>>> else
>>>> - ocf_log err "MySQL not running: removing old PID file"
>>>> + ocf_log $1 "MySQL not running: removing old PID file"
>>>> rm -f $OCF_RESKEY_pid
>>>> return $OCF_NOT_RUNNING;
>>>> fi
>>>> @@ -811,8 +820,9 @@
>>>> if ocf_is_probe; then
>>>> status_loglevel="info"
>>>> fi
>>>> -
>>>> +
>>>> mysql_status $status_loglevel
>>>> +
>>>> rc=$?
>>>>
>>>> # TODO: check max connections error
>>>> @@ -856,7 +866,7 @@
>>>> set_reader_attr 0
>>>> fi
>>>>
>>>> - mysql_status
>>>> + mysql_status info
>>>> if [ $? = $OCF_SUCCESS ]; then
>>>> ocf_log info "MySQL already running"
>>>> return $OCF_SUCCESS
>>>> @@ -930,7 +940,7 @@
>>>> # Let the CRM/LRM time us out if required.
>>>> start_wait=1
>>>> while [ $start_wait = 1 ]; do
>>>> - mysql_status
>>>> + mysql_status info
>>>> rc=$?
>>>> if [ $rc = $OCF_SUCCESS ]; then
>>>> start_wait=0
>>>> @@ -1019,7 +1029,7 @@
>>>> count=0
>>>> while [ $count -lt $shutdown_timeout ]
>>>> do
>>>> - mysql_status
>>>> + mysql_status info
>>>> rc=$?
>>>> if [ $rc = $OCF_NOT_RUNNING ]; then
>>>> break
>>>> @@ -1029,7 +1039,7 @@
>>>> ocf_log debug "MySQL still hasn't stopped yet. Waiting..."
>>>> done
>>>>
>>>> - mysql_status
>>>> + mysql_status info
>>>> if [ $? != $OCF_NOT_RUNNING ]; then
>>>> ocf_log info "MySQL failed to stop after ${shutdown_timeout}s using SIGTERM. Trying SIGKILL..."
>>>> /bin/kill -KILL $pid> /dev/null
>>>> @@ -1044,7 +1054,7 @@
>>>> mysql_promote() {
>>>> local master_info
>>>>
>>>> - if ( ! mysql_status ); then
>>>> + if ( ! mysql_status err ); then
>>>> return $OCF_NOT_RUNNING
>>>> fi
>>>> ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
>>>> @@ -1053,7 +1063,7 @@
>>>> # Set Master Info in CIB, cluster level attribute
>>>> update_data_master_status
>>>> master_info="$(get_local_ip)|$(get_master_status File)|$(get_master_status Position)"
>>>> - ${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication -v "$master_info"
>>>> + ${CRM_ATTR_REPL_INFO} -v "$master_info"
>>>> rm -f $tmpfile
>>>>
>>>> set_read_only off || return $OCF_ERR_GENERIC
>>>> @@ -1070,7 +1080,7 @@
>>>> }
>>>>
>>>> mysql_demote() {
>>>> - if ! mysql_status; then
>>>> + if ! mysql_status err; then
>>>> return $OCF_NOT_RUNNING
>>>> fi
>>>>
>>>> @@ -1177,6 +1187,7 @@
>>>> # The log directory must be a directory owned by root, with permissions 0700,
>>>> # and the log must be writable and not a symlink.
>>>> ##########################################################################
>>>> +DEBUG_LOG="/tmp/mysql.ocf.ra.debug/log"
>>>> if [ "${DEBUG_LOG}" -a -w "${DEBUG_LOG}" -a ! -L "${DEBUG_LOG}" ]; then
>>>> DEBUG_LOG_DIR="${DEBUG_LOG%/*}"
>>>> if [ -d "${DEBUG_LOG_DIR}" ]; then
>>>> @@ -1214,7 +1225,7 @@
>>>> case "$1" in
>>>> start) mysql_start;;
>>>> stop) mysql_stop;;
>>>> - status) mysql_status;;
>>>> + status) mysql_status err;;
>>>> monitor) mysql_monitor;;
>>>> promote) mysql_promote;;
>>>> demote) mysql_demote;;
>>>
>>>> _______________________________________________________
>>>> Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
>>>> http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
>>>> Home Page: http://linux-ha.org/
>>>
>>> _______________________________________________________
>>> Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
>>> http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
>>> Home Page: http://linux-ha.org/
>>>
>
>> --- mysql.4c18035b7 2012-05-04 16:23:45.238421759 -0400
>> +++ resource-agents-prm/heartbeat/mysql 2012-05-11 08:41:20.868420414 -0400
>> @@ -109,7 +109,7 @@
>> : ${OCF_RESKEY_max_slave_lag=${OCF_RESKEY_max_slave_lag_default}}
>> : ${OCF_RESKEY_evict_outdated_slaves=${OCF_RESKEY_evict_outdated_slaves_default}}
>>
>> -: ${OCF_RESKEY_reader_attribute=${OCF_RESKEY_evict_reader_attribute_default}}
>> +: ${OCF_RESKEY_reader_attribute=${OCF_RESKEY_reader_attribute_default}}
>>
>> #######################################################################
>>
>> @@ -328,7 +328,7 @@
>> </longdesc>
>> <shortdesc lang="en">Sets the node attribute that determines
>> whether a node is usable for clients to read from.</shortdesc>
>> -<content type="boolean" default="${OCF_RESKEY_reader_attribute_default}" />
>> +<content type="string" default="${OCF_RESKEY_reader_attribute_default}" />
>> </parameter>
>> </parameters>
>>
>> @@ -355,11 +355,13 @@
>> MYSQL_OPTIONS_LOCAL="-S $OCF_RESKEY_socket --connect_timeout=10"
>> MYSQL_OPTIONS_REPL="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_replication_user --password=$OCF_RESKEY_replication_passwd"
>> MYSQL_OPTIONS_TEST="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_test_user --password=$OCF_RESKEY_test_passwd"
>> +MYSQL_TOO_MANY_CONN_ERR=1040
>>
>> CRM_MASTER="${HA_SBIN_DIR}/crm_master -l reboot "
>> HOSTNAME=`uname -n`
>> CRM_ATTR="${HA_SBIN_DIR}/crm_attribute -N $HOSTNAME "
>> INSTANCE_ATTR_NAME=`echo ${OCF_RESOURCE_INSTANCE}| awk -F : '{print $1}'`
>> +CRM_ATTR_REPL_INFO="${HA_SBIN_DIR}/crm_attribute --type crm_config --name ${INSTANCE_ATTR_NAME}_REPL_INFO -s mysql_replication"
>>
>> #######################################################################
>>
>> @@ -461,14 +463,14 @@
>>
>> check_slave() {
>> # Checks slave status
>> - local rc new_master_IP
>> + local rc new_master
>>
>> get_slave_info
>> rc=$?
>>
>> if [ $rc -eq 0 ]; then
>> # Did we receive an error other than max_connections?
>> - if [ $last_errno -ne 0 -a $last_errno -ne 1040 ]; then
>> + if [ $last_errno -ne 0 -a $last_errno -ne "$MYSQL_TOO_MANY_CONN_ERR" ]; then
>> # Whoa. Replication ran into an error. This slave has
>> # diverged from its master. Make sure this resource
>> # doesn't restart in place.
>> @@ -484,7 +486,7 @@
>> fi
>>
>> # If we got max_connections, let's remove the vip
>> - if [ $last_errno -eq 1040 ]; then
>> + if [ $last_errno -eq "$MYSQL_TOO_MANY_CONN_ERR" ]; then
>> set_reader_attr 0
>> exit $OCF_SUCCESS
>> fi
>> @@ -496,9 +498,9 @@
>> ocf_log warn "MySQL Slave IO threads currently not running."
>>
>> # Sanity check, are we at least on the right master
>> - new_master_IP=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f1`
>> + new_master=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f1`
>>
>> - if [ "$master_host" != "$new_master_IP" ]; then
>> + if [ "$master_host" != "$new_master" ]; then
>> # Not pointing to the right master, not good, removing the VIPs
>> set_reader_attr 0
>>
>> @@ -570,23 +572,23 @@
>> }
>>
>> set_master() {
>> - local new_master_IP master_log_file master_log_pos
>> + local new_master master_log_file master_log_pos
>> local master_params
>>
>> - new_master_IP=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f1`
>> + new_master=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f1`
>>
>> # Keep replication position
>> get_slave_info
>>
>> - if [ "$master_log_file" -a "$new_master_IP" = "$master_host" ]; then
>> + if [ "$master_log_file" -a "$new_master" = "$master_host" ]; then
>> # master_params=", MASTER_LOG_FILE='$master_log_file', \
>> # MASTER_LOG_POS=$master_log_pos"
>> ocf_log info "Kept master pos for $master_host : $master_log_file:$master_log_pos"
>> rm -f $tmpfile
>> return
>> else
>> - master_log_file=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f2`
>> - master_log_pos=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f3`
>> + master_log_file=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f2`
>> + master_log_pos=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f3`
>> if [ -n "$master_log_file" -a -n "$master_log_pos" ]; then
>> master_params=", MASTER_LOG_FILE='$master_log_file', \
>> MASTER_LOG_POS=$master_log_pos"
>> @@ -601,7 +603,7 @@
>> # reset with RESET MASTER.
>>
>> ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
>> - -e "CHANGE MASTER TO MASTER_HOST='$new_master_IP', \
>> + -e "CHANGE MASTER TO MASTER_HOST='$new_master', \
>> MASTER_USER='$OCF_RESKEY_replication_user', \
>> MASTER_PASSWORD='$OCF_RESKEY_replication_passwd' $master_params"
>> rm -f $tmpfile
>> @@ -628,15 +630,15 @@
>> while true; do
>> $MYSQL $MYSQL_OPTIONS_REPL \
>> -e 'SHOW PROCESSLIST\G'> $tmpfile
>> - if grep 'Waiting for master to send event' $tmpfile>/dev/null; then
>> + if grep -i 'Waiting for master to send event' $tmpfile>/dev/null; then
>> ocf_log info "MySQL slave has finished reading master binary log"
>> break
>> fi
>> - if grep 'Reconnecting after a failed master event read' $tmpfile>/dev/null; then
>> + if grep -i 'Reconnecting after a failed master event read' $tmpfile>/dev/null; then
>> ocf_log info "Master is down, no more binary logs to come"
>> break
>> fi
>> - if grep 'Connecting to master' $tmpfile>/dev/null; then
>> + if grep -i 'Connecting to master' $tmpfile>/dev/null; then
>> ocf_log info "Master is down, no more binary logs to come"
>> break
>> fi
>> @@ -660,7 +662,7 @@
>> while true; do
>> $MYSQL $MYSQL_OPTIONS_REPL \
>> -e 'SHOW PROCESSLIST\G'> $tmpfile
>> - if grep 'Has read all relay log' $tmpfile>/dev/null; then
>> + if grep -i 'Has read all relay log' $tmpfile>/dev/null; then
>> ocf_log info "MySQL slave has finished processing relay log"
>> break
>> fi
>> @@ -744,8 +746,17 @@
>> # Determines what IP address is attached to the current host. The output of the
>> # crm_attribute command looks like this:
>> # scope=nodes name=IP value=10.2.2.161
>> +# If the REPL_MASTER_IP node attribute is not defined, fallback is to uname -n
>> +# The REPL_MASTER_IP is the IP address that will be used for the
>> +# change master to command.
>> get_local_ip() {
>> - $CRM_ATTR -l forever -n IP -q -G
>> + local IP
>> + IP=`$CRM_ATTR -l forever -n REPL_MASTER_IP -q -G`
>> + if [ ! $? -eq 0 ]; then
>> + uname -n
>> + else
>> + echo $IP
>> + fi
>> }
>>
>> #######################################################################
>> @@ -783,7 +794,7 @@
>>
>> mysql_status() {
>> if [ ! -e $OCF_RESKEY_pid ]; then
>> - ocf_log err "MySQL is not running"
>> + ocf_log $1 "MySQL is not running"
>> return $OCF_NOT_RUNNING;
>> fi
>>
>> @@ -797,7 +808,7 @@
>> if [ $? -eq 0 ]; then
>> return $OCF_SUCCESS;
>> else
>> - ocf_log err "MySQL not running: removing old PID file"
>> + ocf_log $1 "MySQL not running: removing old PID file"
>> rm -f $OCF_RESKEY_pid
>> return $OCF_NOT_RUNNING;
>> fi
>> @@ -811,8 +822,9 @@
>> if ocf_is_probe; then
>> status_loglevel="info"
>> fi
>> -
>> +
>> mysql_status $status_loglevel
>> +
>> rc=$?
>>
>> # TODO: check max connections error
>> @@ -856,7 +868,7 @@
>> set_reader_attr 0
>> fi
>>
>> - mysql_status
>> + mysql_status info
>> if [ $? = $OCF_SUCCESS ]; then
>> ocf_log info "MySQL already running"
>> return $OCF_SUCCESS
>> @@ -930,7 +942,7 @@
>> # Let the CRM/LRM time us out if required.
>> start_wait=1
>> while [ $start_wait = 1 ]; do
>> - mysql_status
>> + mysql_status info
>> rc=$?
>> if [ $rc = $OCF_SUCCESS ]; then
>> start_wait=0
>> @@ -1019,7 +1031,7 @@
>> count=0
>> while [ $count -lt $shutdown_timeout ]
>> do
>> - mysql_status
>> + mysql_status info
>> rc=$?
>> if [ $rc = $OCF_NOT_RUNNING ]; then
>> break
>> @@ -1029,7 +1041,7 @@
>> ocf_log debug "MySQL still hasn't stopped yet. Waiting..."
>> done
>>
>> - mysql_status
>> + mysql_status info
>> if [ $? != $OCF_NOT_RUNNING ]; then
>> ocf_log info "MySQL failed to stop after ${shutdown_timeout}s using SIGTERM. Trying SIGKILL..."
>> /bin/kill -KILL $pid> /dev/null
>> @@ -1044,7 +1056,7 @@
>> mysql_promote() {
>> local master_info
>>
>> - if ( ! mysql_status ); then
>> + if ( ! mysql_status err ); then
>> return $OCF_NOT_RUNNING
>> fi
>> ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
>> @@ -1053,7 +1065,7 @@
>> # Set Master Info in CIB, cluster level attribute
>> update_data_master_status
>> master_info="$(get_local_ip)|$(get_master_status File)|$(get_master_status Position)"
>> - ${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication -v "$master_info"
>> + ${CRM_ATTR_REPL_INFO} -v "$master_info"
>> rm -f $tmpfile
>>
>> set_read_only off || return $OCF_ERR_GENERIC
>> @@ -1070,7 +1082,7 @@
>> }
>>
>> mysql_demote() {
>> - if ! mysql_status; then
>> + if ! mysql_status err; then
>> return $OCF_NOT_RUNNING
>> fi
>>
>> @@ -1177,6 +1189,7 @@
>> # The log directory must be a directory owned by root, with permissions 0700,
>> # and the log must be writable and not a symlink.
>> ##########################################################################
>> +DEBUG_LOG="/tmp/mysql.ocf.ra.debug/log"
>> if [ "${DEBUG_LOG}" -a -w "${DEBUG_LOG}" -a ! -L "${DEBUG_LOG}" ]; then
>> DEBUG_LOG_DIR="${DEBUG_LOG%/*}"
>> if [ -d "${DEBUG_LOG_DIR}" ]; then
>> @@ -1214,7 +1227,7 @@
>> case "$1" in
>> start) mysql_start;;
>> stop) mysql_stop;;
>> - status) mysql_status;;
>> + status) mysql_status err;;
>> monitor) mysql_monitor;;
>> promote) mysql_promote;;
>> demote) mysql_demote;;
>
>> _______________________________________________________
>> Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
>> http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
>> Home Page: http://linux-ha.org/
>
> _______________________________________________________
> Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
> http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
> Home Page: http://linux-ha.org/
>
Re: Modified patch for RA [ In reply to ]
Hi,

On Fri, May 11, 2012 at 10:25:19AM -0400, Yves Trudeau wrote:
> Hi Dejan,
> ok, here the latest version using
> ${INSTANCE_ATTR_NAME}_MYSQL_MASTER_IP

I'd vote for less "yelling" as cluster configurations are mostly
lower case.

> and I agree with the merits of
> this :) I added a paragraph explaining the use of the attribute in
> the longdesc of the meta-data.

Excellent! Looks good to me. If nobody objects, we can push this
come Monday.

BTW, on what repository is this based? Can you produce a set of
patches to be applied to upstream?

Cheers,

Dejan




> Regards,
>
> Yves
>
>
> Le 2012-05-11 09:21, Dejan Muhamedagic a écrit :
> >Hi Yves,
> >
> >On Fri, May 11, 2012 at 08:45:06AM -0400, Yves Trudeau wrote:
> >>Hi Dejan,
> >> I changed the name of the attribute to REPL_MASTER_IP
> >
> >Let's quote from one of previous emails:
> >
> > 2. Is it possible/plausible to have more than one mysql
> > instance? If so, then the attribute name should include the
> > instance name. Say ${INSTANCE_NAME}_mysql_replication_IP or
> > something to that extent. Also, it would make for a better
> > looking configuration. "IP" doesn't really say much.
> >
> >>and added 2
> >>lines of comment for the get_local_ip function.
> >
> >Again, a quote:
> >
> > 3. This attribute is part of the configuration and supposed to
> > be setup by the user. Please document that in the meta-data.
> >
> >Note that one of the key words here is "meta-data." That is
> >supposed to be documentation for the users, not for developers.
> >Users don't normally read the code.
> >
> >>Is that inline with
> >>what you want?
> >
> >OT:
> >
> >It doesn't really matter what _I_ want. We're having a discussion
> >here on how to improve the feature. It is just by chance that I am
> >right now the only one talking about it.
> >
> >Cheers,
> >
> >Dejan
> >
> >>Regards,
> >>
> >>Yves
> >>
> >>Le 2012-05-11 04:45, Dejan Muhamedagic a écrit :
> >>>Hi Yves,
> >>>
> >>>It would be good not to start a new thread for the same
> >>>discussion.
> >>>
> >>>On Thu, May 10, 2012 at 05:06:25PM -0400, Yves Trudeau wrote:
> >>>>Hi Dejan,
> >>>> here's another modified patch for the mysql agent of the commit
> >>>>version 4c18035 (git@github.com:y-trudeau/resource-agents.git branch
> >>>>mysql-repl). This patch implements fallback on uname -n if the node
> >>>>IP attribute is not present and uses the instance name for the
> >>>>replication info attribute.
> >>>
> >>>Hmm, it looks like there was a misunderstanding here. The
> >>>attribute named "IP" is still named "IP" :)
> >>>
> >>>And we're still missing the documentation for that attribute.
> >>>
> >>>>I am also working with Raoul to get me
> >>>>back on track with git.
> >>>
> >>>Good!
> >>>
> >>>Cheers,
> >>>
> >>>Dejan
> >>>
> >>>>
> >>>>Regards,
> >>>>
> >>>>Yves
> >>>
> >>>>--- mysql.4c18035b7 2012-05-04 16:23:45.238421759 -0400
> >>>>+++ resource-agents-prm/heartbeat/mysql 2012-05-10 11:01:02.538421042 -0400
> >>>>@@ -109,7 +109,7 @@
> >>>> : ${OCF_RESKEY_max_slave_lag=${OCF_RESKEY_max_slave_lag_default}}
> >>>> : ${OCF_RESKEY_evict_outdated_slaves=${OCF_RESKEY_evict_outdated_slaves_default}}
> >>>>
> >>>>-: ${OCF_RESKEY_reader_attribute=${OCF_RESKEY_evict_reader_attribute_default}}
> >>>>+: ${OCF_RESKEY_reader_attribute=${OCF_RESKEY_reader_attribute_default}}
> >>>>
> >>>> #######################################################################
> >>>>
> >>>>@@ -328,7 +328,7 @@
> >>>> </longdesc>
> >>>> <shortdesc lang="en">Sets the node attribute that determines
> >>>> whether a node is usable for clients to read from.</shortdesc>
> >>>>-<content type="boolean" default="${OCF_RESKEY_reader_attribute_default}" />
> >>>>+<content type="string" default="${OCF_RESKEY_reader_attribute_default}" />
> >>>> </parameter>
> >>>> </parameters>
> >>>>
> >>>>@@ -355,11 +355,13 @@
> >>>> MYSQL_OPTIONS_LOCAL="-S $OCF_RESKEY_socket --connect_timeout=10"
> >>>> MYSQL_OPTIONS_REPL="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_replication_user --password=$OCF_RESKEY_replication_passwd"
> >>>> MYSQL_OPTIONS_TEST="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_test_user --password=$OCF_RESKEY_test_passwd"
> >>>>+MYSQL_TOO_MANY_CONN_ERR=1040
> >>>>
> >>>> CRM_MASTER="${HA_SBIN_DIR}/crm_master -l reboot "
> >>>> HOSTNAME=`uname -n`
> >>>> CRM_ATTR="${HA_SBIN_DIR}/crm_attribute -N $HOSTNAME "
> >>>> INSTANCE_ATTR_NAME=`echo ${OCF_RESOURCE_INSTANCE}| awk -F : '{print $1}'`
> >>>>+CRM_ATTR_REPL_INFO="${HA_SBIN_DIR}/crm_attribute --type crm_config --name ${INSTANCE_ATTR_NAME}_REPL_INFO -s mysql_replication"
> >>>>
> >>>> #######################################################################
> >>>>
> >>>>@@ -461,14 +463,14 @@
> >>>>
> >>>> check_slave() {
> >>>> # Checks slave status
> >>>>- local rc new_master_IP
> >>>>+ local rc new_master
> >>>>
> >>>> get_slave_info
> >>>> rc=$?
> >>>>
> >>>> if [ $rc -eq 0 ]; then
> >>>> # Did we receive an error other than max_connections?
> >>>>- if [ $last_errno -ne 0 -a $last_errno -ne 1040 ]; then
> >>>>+ if [ $last_errno -ne 0 -a $last_errno -ne "$MYSQL_TOO_MANY_CONN_ERR" ]; then
> >>>> # Whoa. Replication ran into an error. This slave has
> >>>> # diverged from its master. Make sure this resource
> >>>> # doesn't restart in place.
> >>>>@@ -484,7 +486,7 @@
> >>>> fi
> >>>>
> >>>> # If we got max_connections, let's remove the vip
> >>>>- if [ $last_errno -eq 1040 ]; then
> >>>>+ if [ $last_errno -eq "$MYSQL_TOO_MANY_CONN_ERR" ]; then
> >>>> set_reader_attr 0
> >>>> exit $OCF_SUCCESS
> >>>> fi
> >>>>@@ -496,9 +498,9 @@
> >>>> ocf_log warn "MySQL Slave IO threads currently not running."
> >>>>
> >>>> # Sanity check, are we at least on the right master
> >>>>- new_master_IP=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f1`
> >>>>+ new_master=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f1`
> >>>>
> >>>>- if [ "$master_host" != "$new_master_IP" ]; then
> >>>>+ if [ "$master_host" != "$new_master" ]; then
> >>>> # Not pointing to the right master, not good, removing the VIPs
> >>>> set_reader_attr 0
> >>>>
> >>>>@@ -570,23 +572,23 @@
> >>>> }
> >>>>
> >>>> set_master() {
> >>>>- local new_master_IP master_log_file master_log_pos
> >>>>+ local new_master master_log_file master_log_pos
> >>>> local master_params
> >>>>
> >>>>- new_master_IP=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f1`
> >>>>+ new_master=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f1`
> >>>>
> >>>> # Keep replication position
> >>>> get_slave_info
> >>>>
> >>>>- if [ "$master_log_file" -a "$new_master_IP" = "$master_host" ]; then
> >>>>+ if [ "$master_log_file" -a "$new_master" = "$master_host" ]; then
> >>>> # master_params=", MASTER_LOG_FILE='$master_log_file', \
> >>>> # MASTER_LOG_POS=$master_log_pos"
> >>>> ocf_log info "Kept master pos for $master_host : $master_log_file:$master_log_pos"
> >>>> rm -f $tmpfile
> >>>> return
> >>>> else
> >>>>- master_log_file=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f2`
> >>>>- master_log_pos=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f3`
> >>>>+ master_log_file=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f2`
> >>>>+ master_log_pos=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f3`
> >>>> if [ -n "$master_log_file" -a -n "$master_log_pos" ]; then
> >>>> master_params=", MASTER_LOG_FILE='$master_log_file', \
> >>>> MASTER_LOG_POS=$master_log_pos"
> >>>>@@ -601,7 +603,7 @@
> >>>> # reset with RESET MASTER.
> >>>>
> >>>> ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
> >>>>- -e "CHANGE MASTER TO MASTER_HOST='$new_master_IP', \
> >>>>+ -e "CHANGE MASTER TO MASTER_HOST='$new_master', \
> >>>> MASTER_USER='$OCF_RESKEY_replication_user', \
> >>>> MASTER_PASSWORD='$OCF_RESKEY_replication_passwd' $master_params"
> >>>> rm -f $tmpfile
> >>>>@@ -628,15 +630,15 @@
> >>>> while true; do
> >>>> $MYSQL $MYSQL_OPTIONS_REPL \
> >>>> -e 'SHOW PROCESSLIST\G'> $tmpfile
> >>>>- if grep 'Waiting for master to send event' $tmpfile>/dev/null; then
> >>>>+ if grep -i 'Waiting for master to send event' $tmpfile>/dev/null; then
> >>>> ocf_log info "MySQL slave has finished reading master binary log"
> >>>> break
> >>>> fi
> >>>>- if grep 'Reconnecting after a failed master event read' $tmpfile>/dev/null; then
> >>>>+ if grep -i 'Reconnecting after a failed master event read' $tmpfile>/dev/null; then
> >>>> ocf_log info "Master is down, no more binary logs to come"
> >>>> break
> >>>> fi
> >>>>- if grep 'Connecting to master' $tmpfile>/dev/null; then
> >>>>+ if grep -i 'Connecting to master' $tmpfile>/dev/null; then
> >>>> ocf_log info "Master is down, no more binary logs to come"
> >>>> break
> >>>> fi
> >>>>@@ -660,7 +662,7 @@
> >>>> while true; do
> >>>> $MYSQL $MYSQL_OPTIONS_REPL \
> >>>> -e 'SHOW PROCESSLIST\G'> $tmpfile
> >>>>- if grep 'Has read all relay log' $tmpfile>/dev/null; then
> >>>>+ if grep -i 'Has read all relay log' $tmpfile>/dev/null; then
> >>>> ocf_log info "MySQL slave has finished processing relay log"
> >>>> break
> >>>> fi
> >>>>@@ -744,8 +746,15 @@
> >>>> # Determines what IP address is attached to the current host. The output of the
> >>>> # crm_attribute command looks like this:
> >>>> # scope=nodes name=IP value=10.2.2.161
> >>>>+# If the IP node attribute is not defined, fallback is to uname -n
> >>>> get_local_ip() {
> >>>>- $CRM_ATTR -l forever -n IP -q -G
> >>>>+ local IP
> >>>>+ IP=`$CRM_ATTR -l forever -n IP -q -G`
> >>>>+ if [ ! $? -eq 0 ]; then
> >>>>+ uname -n
> >>>>+ else
> >>>>+ echo $IP
> >>>>+ fi
> >>>> }
> >>>>
> >>>> #######################################################################
> >>>>@@ -783,7 +792,7 @@
> >>>>
> >>>> mysql_status() {
> >>>> if [ ! -e $OCF_RESKEY_pid ]; then
> >>>>- ocf_log err "MySQL is not running"
> >>>>+ ocf_log $1 "MySQL is not running"
> >>>> return $OCF_NOT_RUNNING;
> >>>> fi
> >>>>
> >>>>@@ -797,7 +806,7 @@
> >>>> if [ $? -eq 0 ]; then
> >>>> return $OCF_SUCCESS;
> >>>> else
> >>>>- ocf_log err "MySQL not running: removing old PID file"
> >>>>+ ocf_log $1 "MySQL not running: removing old PID file"
> >>>> rm -f $OCF_RESKEY_pid
> >>>> return $OCF_NOT_RUNNING;
> >>>> fi
> >>>>@@ -811,8 +820,9 @@
> >>>> if ocf_is_probe; then
> >>>> status_loglevel="info"
> >>>> fi
> >>>>-
> >>>>+
> >>>> mysql_status $status_loglevel
> >>>>+
> >>>> rc=$?
> >>>>
> >>>> # TODO: check max connections error
> >>>>@@ -856,7 +866,7 @@
> >>>> set_reader_attr 0
> >>>> fi
> >>>>
> >>>>- mysql_status
> >>>>+ mysql_status info
> >>>> if [ $? = $OCF_SUCCESS ]; then
> >>>> ocf_log info "MySQL already running"
> >>>> return $OCF_SUCCESS
> >>>>@@ -930,7 +940,7 @@
> >>>> # Let the CRM/LRM time us out if required.
> >>>> start_wait=1
> >>>> while [ $start_wait = 1 ]; do
> >>>>- mysql_status
> >>>>+ mysql_status info
> >>>> rc=$?
> >>>> if [ $rc = $OCF_SUCCESS ]; then
> >>>> start_wait=0
> >>>>@@ -1019,7 +1029,7 @@
> >>>> count=0
> >>>> while [ $count -lt $shutdown_timeout ]
> >>>> do
> >>>>- mysql_status
> >>>>+ mysql_status info
> >>>> rc=$?
> >>>> if [ $rc = $OCF_NOT_RUNNING ]; then
> >>>> break
> >>>>@@ -1029,7 +1039,7 @@
> >>>> ocf_log debug "MySQL still hasn't stopped yet. Waiting..."
> >>>> done
> >>>>
> >>>>- mysql_status
> >>>>+ mysql_status info
> >>>> if [ $? != $OCF_NOT_RUNNING ]; then
> >>>> ocf_log info "MySQL failed to stop after ${shutdown_timeout}s using SIGTERM. Trying SIGKILL..."
> >>>> /bin/kill -KILL $pid> /dev/null
> >>>>@@ -1044,7 +1054,7 @@
> >>>> mysql_promote() {
> >>>> local master_info
> >>>>
> >>>>- if ( ! mysql_status ); then
> >>>>+ if ( ! mysql_status err ); then
> >>>> return $OCF_NOT_RUNNING
> >>>> fi
> >>>> ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
> >>>>@@ -1053,7 +1063,7 @@
> >>>> # Set Master Info in CIB, cluster level attribute
> >>>> update_data_master_status
> >>>> master_info="$(get_local_ip)|$(get_master_status File)|$(get_master_status Position)"
> >>>>- ${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication -v "$master_info"
> >>>>+ ${CRM_ATTR_REPL_INFO} -v "$master_info"
> >>>> rm -f $tmpfile
> >>>>
> >>>> set_read_only off || return $OCF_ERR_GENERIC
> >>>>@@ -1070,7 +1080,7 @@
> >>>> }
> >>>>
> >>>> mysql_demote() {
> >>>>- if ! mysql_status; then
> >>>>+ if ! mysql_status err; then
> >>>> return $OCF_NOT_RUNNING
> >>>> fi
> >>>>
> >>>>@@ -1177,6 +1187,7 @@
> >>>> # The log directory must be a directory owned by root, with permissions 0700,
> >>>> # and the log must be writable and not a symlink.
> >>>> ##########################################################################
> >>>>+DEBUG_LOG="/tmp/mysql.ocf.ra.debug/log"
> >>>> if [ "${DEBUG_LOG}" -a -w "${DEBUG_LOG}" -a ! -L "${DEBUG_LOG}" ]; then
> >>>> DEBUG_LOG_DIR="${DEBUG_LOG%/*}"
> >>>> if [ -d "${DEBUG_LOG_DIR}" ]; then
> >>>>@@ -1214,7 +1225,7 @@
> >>>> case "$1" in
> >>>> start) mysql_start;;
> >>>> stop) mysql_stop;;
> >>>>- status) mysql_status;;
> >>>>+ status) mysql_status err;;
> >>>> monitor) mysql_monitor;;
> >>>> promote) mysql_promote;;
> >>>> demote) mysql_demote;;
> >>>
> >>>>_______________________________________________________
> >>>>Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
> >>>>http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
> >>>>Home Page: http://linux-ha.org/
> >>>
> >>>_______________________________________________________
> >>>Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
> >>>http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
> >>>Home Page: http://linux-ha.org/
> >>>
> >
> >>--- mysql.4c18035b7 2012-05-04 16:23:45.238421759 -0400
> >>+++ resource-agents-prm/heartbeat/mysql 2012-05-11 08:41:20.868420414 -0400
> >>@@ -109,7 +109,7 @@
> >> : ${OCF_RESKEY_max_slave_lag=${OCF_RESKEY_max_slave_lag_default}}
> >> : ${OCF_RESKEY_evict_outdated_slaves=${OCF_RESKEY_evict_outdated_slaves_default}}
> >>
> >>-: ${OCF_RESKEY_reader_attribute=${OCF_RESKEY_evict_reader_attribute_default}}
> >>+: ${OCF_RESKEY_reader_attribute=${OCF_RESKEY_reader_attribute_default}}
> >>
> >> #######################################################################
> >>
> >>@@ -328,7 +328,7 @@
> >> </longdesc>
> >> <shortdesc lang="en">Sets the node attribute that determines
> >> whether a node is usable for clients to read from.</shortdesc>
> >>-<content type="boolean" default="${OCF_RESKEY_reader_attribute_default}" />
> >>+<content type="string" default="${OCF_RESKEY_reader_attribute_default}" />
> >> </parameter>
> >> </parameters>
> >>
> >>@@ -355,11 +355,13 @@
> >> MYSQL_OPTIONS_LOCAL="-S $OCF_RESKEY_socket --connect_timeout=10"
> >> MYSQL_OPTIONS_REPL="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_replication_user --password=$OCF_RESKEY_replication_passwd"
> >> MYSQL_OPTIONS_TEST="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_test_user --password=$OCF_RESKEY_test_passwd"
> >>+MYSQL_TOO_MANY_CONN_ERR=1040
> >>
> >> CRM_MASTER="${HA_SBIN_DIR}/crm_master -l reboot "
> >> HOSTNAME=`uname -n`
> >> CRM_ATTR="${HA_SBIN_DIR}/crm_attribute -N $HOSTNAME "
> >> INSTANCE_ATTR_NAME=`echo ${OCF_RESOURCE_INSTANCE}| awk -F : '{print $1}'`
> >>+CRM_ATTR_REPL_INFO="${HA_SBIN_DIR}/crm_attribute --type crm_config --name ${INSTANCE_ATTR_NAME}_REPL_INFO -s mysql_replication"
> >>
> >> #######################################################################
> >>
> >>@@ -461,14 +463,14 @@
> >>
> >> check_slave() {
> >> # Checks slave status
> >>- local rc new_master_IP
> >>+ local rc new_master
> >>
> >> get_slave_info
> >> rc=$?
> >>
> >> if [ $rc -eq 0 ]; then
> >> # Did we receive an error other than max_connections?
> >>- if [ $last_errno -ne 0 -a $last_errno -ne 1040 ]; then
> >>+ if [ $last_errno -ne 0 -a $last_errno -ne "$MYSQL_TOO_MANY_CONN_ERR" ]; then
> >> # Whoa. Replication ran into an error. This slave has
> >> # diverged from its master. Make sure this resource
> >> # doesn't restart in place.
> >>@@ -484,7 +486,7 @@
> >> fi
> >>
> >> # If we got max_connections, let's remove the vip
> >>- if [ $last_errno -eq 1040 ]; then
> >>+ if [ $last_errno -eq "$MYSQL_TOO_MANY_CONN_ERR" ]; then
> >> set_reader_attr 0
> >> exit $OCF_SUCCESS
> >> fi
> >>@@ -496,9 +498,9 @@
> >> ocf_log warn "MySQL Slave IO threads currently not running."
> >>
> >> # Sanity check, are we at least on the right master
> >>- new_master_IP=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f1`
> >>+ new_master=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f1`
> >>
> >>- if [ "$master_host" != "$new_master_IP" ]; then
> >>+ if [ "$master_host" != "$new_master" ]; then
> >> # Not pointing to the right master, not good, removing the VIPs
> >> set_reader_attr 0
> >>
> >>@@ -570,23 +572,23 @@
> >> }
> >>
> >> set_master() {
> >>- local new_master_IP master_log_file master_log_pos
> >>+ local new_master master_log_file master_log_pos
> >> local master_params
> >>
> >>- new_master_IP=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f1`
> >>+ new_master=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f1`
> >>
> >> # Keep replication position
> >> get_slave_info
> >>
> >>- if [ "$master_log_file" -a "$new_master_IP" = "$master_host" ]; then
> >>+ if [ "$master_log_file" -a "$new_master" = "$master_host" ]; then
> >> # master_params=", MASTER_LOG_FILE='$master_log_file', \
> >> # MASTER_LOG_POS=$master_log_pos"
> >> ocf_log info "Kept master pos for $master_host : $master_log_file:$master_log_pos"
> >> rm -f $tmpfile
> >> return
> >> else
> >>- master_log_file=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f2`
> >>- master_log_pos=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f3`
> >>+ master_log_file=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f2`
> >>+ master_log_pos=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f3`
> >> if [ -n "$master_log_file" -a -n "$master_log_pos" ]; then
> >> master_params=", MASTER_LOG_FILE='$master_log_file', \
> >> MASTER_LOG_POS=$master_log_pos"
> >>@@ -601,7 +603,7 @@
> >> # reset with RESET MASTER.
> >>
> >> ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
> >>- -e "CHANGE MASTER TO MASTER_HOST='$new_master_IP', \
> >>+ -e "CHANGE MASTER TO MASTER_HOST='$new_master', \
> >> MASTER_USER='$OCF_RESKEY_replication_user', \
> >> MASTER_PASSWORD='$OCF_RESKEY_replication_passwd' $master_params"
> >> rm -f $tmpfile
> >>@@ -628,15 +630,15 @@
> >> while true; do
> >> $MYSQL $MYSQL_OPTIONS_REPL \
> >> -e 'SHOW PROCESSLIST\G'> $tmpfile
> >>- if grep 'Waiting for master to send event' $tmpfile>/dev/null; then
> >>+ if grep -i 'Waiting for master to send event' $tmpfile>/dev/null; then
> >> ocf_log info "MySQL slave has finished reading master binary log"
> >> break
> >> fi
> >>- if grep 'Reconnecting after a failed master event read' $tmpfile>/dev/null; then
> >>+ if grep -i 'Reconnecting after a failed master event read' $tmpfile>/dev/null; then
> >> ocf_log info "Master is down, no more binary logs to come"
> >> break
> >> fi
> >>- if grep 'Connecting to master' $tmpfile>/dev/null; then
> >>+ if grep -i 'Connecting to master' $tmpfile>/dev/null; then
> >> ocf_log info "Master is down, no more binary logs to come"
> >> break
> >> fi
> >>@@ -660,7 +662,7 @@
> >> while true; do
> >> $MYSQL $MYSQL_OPTIONS_REPL \
> >> -e 'SHOW PROCESSLIST\G'> $tmpfile
> >>- if grep 'Has read all relay log' $tmpfile>/dev/null; then
> >>+ if grep -i 'Has read all relay log' $tmpfile>/dev/null; then
> >> ocf_log info "MySQL slave has finished processing relay log"
> >> break
> >> fi
> >>@@ -744,8 +746,17 @@
> >> # Determines what IP address is attached to the current host. The output of the
> >> # crm_attribute command looks like this:
> >> # scope=nodes name=IP value=10.2.2.161
> >>+# If the REPL_MASTER_IP node attribute is not defined, fallback is to uname -n
> >>+# The REPL_MASTER_IP is the IP address that will be used for the
> >>+# change master to command.
> >> get_local_ip() {
> >>- $CRM_ATTR -l forever -n IP -q -G
> >>+ local IP
> >>+ IP=`$CRM_ATTR -l forever -n REPL_MASTER_IP -q -G`
> >>+ if [ ! $? -eq 0 ]; then
> >>+ uname -n
> >>+ else
> >>+ echo $IP
> >>+ fi
> >> }
> >>
> >> #######################################################################
> >>@@ -783,7 +794,7 @@
> >>
> >> mysql_status() {
> >> if [ ! -e $OCF_RESKEY_pid ]; then
> >>- ocf_log err "MySQL is not running"
> >>+ ocf_log $1 "MySQL is not running"
> >> return $OCF_NOT_RUNNING;
> >> fi
> >>
> >>@@ -797,7 +808,7 @@
> >> if [ $? -eq 0 ]; then
> >> return $OCF_SUCCESS;
> >> else
> >>- ocf_log err "MySQL not running: removing old PID file"
> >>+ ocf_log $1 "MySQL not running: removing old PID file"
> >> rm -f $OCF_RESKEY_pid
> >> return $OCF_NOT_RUNNING;
> >> fi
> >>@@ -811,8 +822,9 @@
> >> if ocf_is_probe; then
> >> status_loglevel="info"
> >> fi
> >>-
> >>+
> >> mysql_status $status_loglevel
> >>+
> >> rc=$?
> >>
> >> # TODO: check max connections error
> >>@@ -856,7 +868,7 @@
> >> set_reader_attr 0
> >> fi
> >>
> >>- mysql_status
> >>+ mysql_status info
> >> if [ $? = $OCF_SUCCESS ]; then
> >> ocf_log info "MySQL already running"
> >> return $OCF_SUCCESS
> >>@@ -930,7 +942,7 @@
> >> # Let the CRM/LRM time us out if required.
> >> start_wait=1
> >> while [ $start_wait = 1 ]; do
> >>- mysql_status
> >>+ mysql_status info
> >> rc=$?
> >> if [ $rc = $OCF_SUCCESS ]; then
> >> start_wait=0
> >>@@ -1019,7 +1031,7 @@
> >> count=0
> >> while [ $count -lt $shutdown_timeout ]
> >> do
> >>- mysql_status
> >>+ mysql_status info
> >> rc=$?
> >> if [ $rc = $OCF_NOT_RUNNING ]; then
> >> break
> >>@@ -1029,7 +1041,7 @@
> >> ocf_log debug "MySQL still hasn't stopped yet. Waiting..."
> >> done
> >>
> >>- mysql_status
> >>+ mysql_status info
> >> if [ $? != $OCF_NOT_RUNNING ]; then
> >> ocf_log info "MySQL failed to stop after ${shutdown_timeout}s using SIGTERM. Trying SIGKILL..."
> >> /bin/kill -KILL $pid> /dev/null
> >>@@ -1044,7 +1056,7 @@
> >> mysql_promote() {
> >> local master_info
> >>
> >>- if ( ! mysql_status ); then
> >>+ if ( ! mysql_status err ); then
> >> return $OCF_NOT_RUNNING
> >> fi
> >> ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
> >>@@ -1053,7 +1065,7 @@
> >> # Set Master Info in CIB, cluster level attribute
> >> update_data_master_status
> >> master_info="$(get_local_ip)|$(get_master_status File)|$(get_master_status Position)"
> >>- ${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication -v "$master_info"
> >>+ ${CRM_ATTR_REPL_INFO} -v "$master_info"
> >> rm -f $tmpfile
> >>
> >> set_read_only off || return $OCF_ERR_GENERIC
> >>@@ -1070,7 +1082,7 @@
> >> }
> >>
> >> mysql_demote() {
> >>- if ! mysql_status; then
> >>+ if ! mysql_status err; then
> >> return $OCF_NOT_RUNNING
> >> fi
> >>
> >>@@ -1177,6 +1189,7 @@
> >> # The log directory must be a directory owned by root, with permissions 0700,
> >> # and the log must be writable and not a symlink.
> >> ##########################################################################
> >>+DEBUG_LOG="/tmp/mysql.ocf.ra.debug/log"
> >> if [ "${DEBUG_LOG}" -a -w "${DEBUG_LOG}" -a ! -L "${DEBUG_LOG}" ]; then
> >> DEBUG_LOG_DIR="${DEBUG_LOG%/*}"
> >> if [ -d "${DEBUG_LOG_DIR}" ]; then
> >>@@ -1214,7 +1227,7 @@
> >> case "$1" in
> >> start) mysql_start;;
> >> stop) mysql_stop;;
> >>- status) mysql_status;;
> >>+ status) mysql_status err;;
> >> monitor) mysql_monitor;;
> >> promote) mysql_promote;;
> >> demote) mysql_demote;;
> >
> >>_______________________________________________________
> >>Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
> >>http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
> >>Home Page: http://linux-ha.org/
> >
> >_______________________________________________________
> >Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
> >http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
> >Home Page: http://linux-ha.org/
> >

> --- mysql.4c18035b7 2012-05-04 16:23:45.238421759 -0400
> +++ resource-agents-prm/heartbeat/mysql 2012-05-11 10:14:28.148420134 -0400
> @@ -109,7 +109,22 @@
> : ${OCF_RESKEY_max_slave_lag=${OCF_RESKEY_max_slave_lag_default}}
> : ${OCF_RESKEY_evict_outdated_slaves=${OCF_RESKEY_evict_outdated_slaves_default}}
>
> -: ${OCF_RESKEY_reader_attribute=${OCF_RESKEY_evict_reader_attribute_default}}
> +: ${OCF_RESKEY_reader_attribute=${OCF_RESKEY_reader_attribute_default}}
> +
> +#######################################################################
> +# Convenience variables
> +
> +MYSQL=$OCF_RESKEY_client_binary
> +MYSQL_OPTIONS_LOCAL="-S $OCF_RESKEY_socket --connect_timeout=10"
> +MYSQL_OPTIONS_REPL="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_replication_user --password=$OCF_RESKEY_replication_passwd"
> +MYSQL_OPTIONS_TEST="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_test_user --password=$OCF_RESKEY_test_passwd"
> +MYSQL_TOO_MANY_CONN_ERR=1040
> +
> +CRM_MASTER="${HA_SBIN_DIR}/crm_master -l reboot "
> +HOSTNAME=`uname -n`
> +CRM_ATTR="${HA_SBIN_DIR}/crm_attribute -N $HOSTNAME "
> +INSTANCE_ATTR_NAME=`echo ${OCF_RESOURCE_INSTANCE}| awk -F : '{print $1}'`
> +CRM_ATTR_REPL_INFO="${HA_SBIN_DIR}/crm_attribute --type crm_config --name ${INSTANCE_ATTR_NAME}_REPL_INFO -s mysql_replication"
>
> #######################################################################
>
> @@ -141,6 +156,13 @@
> Resource script for MySQL.
> May manage a standalone MySQL database, a clone set with externally
> managed replication, or a complete master/slave replication setup.
> +
> +While managing replication, the default behavior is to use uname -n
> +values in the change master to command. Other IPs can be specified
> +manually by adding a node attribute \${INSTANCE_ATTR_NAME}_MYSQL_MASTER_IP
> +giving the IP to use for replication. For example, if the mysql primitive
> +you are using is p_mysql, the attribute to set will be
> +p_mysql_MYSQL_MASTER_IP.
> </longdesc>
> <shortdesc lang="en">Manages a MySQL database instance</shortdesc>
> <parameters>
> @@ -328,7 +350,7 @@
> </longdesc>
> <shortdesc lang="en">Sets the node attribute that determines
> whether a node is usable for clients to read from.</shortdesc>
> -<content type="boolean" default="${OCF_RESKEY_reader_attribute_default}" />
> +<content type="string" default="${OCF_RESKEY_reader_attribute_default}" />
> </parameter>
> </parameters>
>
> @@ -348,20 +370,6 @@
> </resource-agent>
> END
> }
> -#######################################################################
> -# Convenience variables
> -
> -MYSQL=$OCF_RESKEY_client_binary
> -MYSQL_OPTIONS_LOCAL="-S $OCF_RESKEY_socket --connect_timeout=10"
> -MYSQL_OPTIONS_REPL="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_replication_user --password=$OCF_RESKEY_replication_passwd"
> -MYSQL_OPTIONS_TEST="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_test_user --password=$OCF_RESKEY_test_passwd"
> -
> -CRM_MASTER="${HA_SBIN_DIR}/crm_master -l reboot "
> -HOSTNAME=`uname -n`
> -CRM_ATTR="${HA_SBIN_DIR}/crm_attribute -N $HOSTNAME "
> -INSTANCE_ATTR_NAME=`echo ${OCF_RESOURCE_INSTANCE}| awk -F : '{print $1}'`
> -
> -#######################################################################
>
> # Convenience functions
>
> @@ -461,14 +469,14 @@
>
> check_slave() {
> # Checks slave status
> - local rc new_master_IP
> + local rc new_master
>
> get_slave_info
> rc=$?
>
> if [ $rc -eq 0 ]; then
> # Did we receive an error other than max_connections?
> - if [ $last_errno -ne 0 -a $last_errno -ne 1040 ]; then
> + if [ $last_errno -ne 0 -a $last_errno -ne "$MYSQL_TOO_MANY_CONN_ERR" ]; then
> # Whoa. Replication ran into an error. This slave has
> # diverged from its master. Make sure this resource
> # doesn't restart in place.
> @@ -484,7 +492,7 @@
> fi
>
> # If we got max_connections, let's remove the vip
> - if [ $last_errno -eq 1040 ]; then
> + if [ $last_errno -eq "$MYSQL_TOO_MANY_CONN_ERR" ]; then
> set_reader_attr 0
> exit $OCF_SUCCESS
> fi
> @@ -496,9 +504,9 @@
> ocf_log warn "MySQL Slave IO threads currently not running."
>
> # Sanity check, are we at least on the right master
> - new_master_IP=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f1`
> + new_master=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f1`
>
> - if [ "$master_host" != "$new_master_IP" ]; then
> + if [ "$master_host" != "$new_master" ]; then
> # Not pointing to the right master, not good, removing the VIPs
> set_reader_attr 0
>
> @@ -570,23 +578,23 @@
> }
>
> set_master() {
> - local new_master_IP master_log_file master_log_pos
> + local new_master master_log_file master_log_pos
> local master_params
>
> - new_master_IP=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f1`
> + new_master=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f1`
>
> # Keep replication position
> get_slave_info
>
> - if [ "$master_log_file" -a "$new_master_IP" = "$master_host" ]; then
> + if [ "$master_log_file" -a "$new_master" = "$master_host" ]; then
> # master_params=", MASTER_LOG_FILE='$master_log_file', \
> # MASTER_LOG_POS=$master_log_pos"
> ocf_log info "Kept master pos for $master_host : $master_log_file:$master_log_pos"
> rm -f $tmpfile
> return
> else
> - master_log_file=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f2`
> - master_log_pos=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f3`
> + master_log_file=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f2`
> + master_log_pos=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f3`
> if [ -n "$master_log_file" -a -n "$master_log_pos" ]; then
> master_params=", MASTER_LOG_FILE='$master_log_file', \
> MASTER_LOG_POS=$master_log_pos"
> @@ -601,7 +609,7 @@
> # reset with RESET MASTER.
>
> ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
> - -e "CHANGE MASTER TO MASTER_HOST='$new_master_IP', \
> + -e "CHANGE MASTER TO MASTER_HOST='$new_master', \
> MASTER_USER='$OCF_RESKEY_replication_user', \
> MASTER_PASSWORD='$OCF_RESKEY_replication_passwd' $master_params"
> rm -f $tmpfile
> @@ -628,15 +636,15 @@
> while true; do
> $MYSQL $MYSQL_OPTIONS_REPL \
> -e 'SHOW PROCESSLIST\G' > $tmpfile
> - if grep 'Waiting for master to send event' $tmpfile >/dev/null; then
> + if grep -i 'Waiting for master to send event' $tmpfile >/dev/null; then
> ocf_log info "MySQL slave has finished reading master binary log"
> break
> fi
> - if grep 'Reconnecting after a failed master event read' $tmpfile >/dev/null; then
> + if grep -i 'Reconnecting after a failed master event read' $tmpfile >/dev/null; then
> ocf_log info "Master is down, no more binary logs to come"
> break
> fi
> - if grep 'Connecting to master' $tmpfile >/dev/null; then
> + if grep -i 'Connecting to master' $tmpfile >/dev/null; then
> ocf_log info "Master is down, no more binary logs to come"
> break
> fi
> @@ -660,7 +668,7 @@
> while true; do
> $MYSQL $MYSQL_OPTIONS_REPL \
> -e 'SHOW PROCESSLIST\G' > $tmpfile
> - if grep 'Has read all relay log' $tmpfile >/dev/null; then
> + if grep -i 'Has read all relay log' $tmpfile >/dev/null; then
> ocf_log info "MySQL slave has finished processing relay log"
> break
> fi
> @@ -744,8 +752,17 @@
> # Determines what IP address is attached to the current host. The output of the
> # crm_attribute command looks like this:
> # scope=nodes name=IP value=10.2.2.161
> +# If the ${INSTANCE_ATTR_NAME}_MYSQL_MASTER_IP node attribute is not defined, fallback is to uname -n
> +# The ${INSTANCE_ATTR_NAME}_MYSQL_MASTER_IP is the IP address that will be used for the
> +# change master to command.
> get_local_ip() {
> - $CRM_ATTR -l forever -n IP -q -G
> + local IP
> + IP=`$CRM_ATTR -l forever -n ${INSTANCE_ATTR_NAME}_MYSQL_MASTER_IP -q -G`
> + if [ ! $? -eq 0 ]; then
> + uname -n
> + else
> + echo $IP
> + fi
> }
>
> #######################################################################
> @@ -783,7 +800,7 @@
>
> mysql_status() {
> if [ ! -e $OCF_RESKEY_pid ]; then
> - ocf_log err "MySQL is not running"
> + ocf_log $1 "MySQL is not running"
> return $OCF_NOT_RUNNING;
> fi
>
> @@ -797,7 +814,7 @@
> if [ $? -eq 0 ]; then
> return $OCF_SUCCESS;
> else
> - ocf_log err "MySQL not running: removing old PID file"
> + ocf_log $1 "MySQL not running: removing old PID file"
> rm -f $OCF_RESKEY_pid
> return $OCF_NOT_RUNNING;
> fi
> @@ -811,8 +828,9 @@
> if ocf_is_probe; then
> status_loglevel="info"
> fi
> -
> +
> mysql_status $status_loglevel
> +
> rc=$?
>
> # TODO: check max connections error
> @@ -856,7 +874,7 @@
> set_reader_attr 0
> fi
>
> - mysql_status
> + mysql_status info
> if [ $? = $OCF_SUCCESS ]; then
> ocf_log info "MySQL already running"
> return $OCF_SUCCESS
> @@ -930,7 +948,7 @@
> # Let the CRM/LRM time us out if required.
> start_wait=1
> while [ $start_wait = 1 ]; do
> - mysql_status
> + mysql_status info
> rc=$?
> if [ $rc = $OCF_SUCCESS ]; then
> start_wait=0
> @@ -1019,7 +1037,7 @@
> count=0
> while [ $count -lt $shutdown_timeout ]
> do
> - mysql_status
> + mysql_status info
> rc=$?
> if [ $rc = $OCF_NOT_RUNNING ]; then
> break
> @@ -1029,7 +1047,7 @@
> ocf_log debug "MySQL still hasn't stopped yet. Waiting..."
> done
>
> - mysql_status
> + mysql_status info
> if [ $? != $OCF_NOT_RUNNING ]; then
> ocf_log info "MySQL failed to stop after ${shutdown_timeout}s using SIGTERM. Trying SIGKILL..."
> /bin/kill -KILL $pid > /dev/null
> @@ -1044,7 +1062,7 @@
> mysql_promote() {
> local master_info
>
> - if ( ! mysql_status ); then
> + if ( ! mysql_status err ); then
> return $OCF_NOT_RUNNING
> fi
> ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
> @@ -1053,7 +1071,7 @@
> # Set Master Info in CIB, cluster level attribute
> update_data_master_status
> master_info="$(get_local_ip)|$(get_master_status File)|$(get_master_status Position)"
> - ${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication -v "$master_info"
> + ${CRM_ATTR_REPL_INFO} -v "$master_info"
> rm -f $tmpfile
>
> set_read_only off || return $OCF_ERR_GENERIC
> @@ -1070,7 +1088,7 @@
> }
>
> mysql_demote() {
> - if ! mysql_status; then
> + if ! mysql_status err; then
> return $OCF_NOT_RUNNING
> fi
>
> @@ -1177,6 +1195,7 @@
> # The log directory must be a directory owned by root, with permissions 0700,
> # and the log must be writable and not a symlink.
> ##########################################################################
> +DEBUG_LOG="/tmp/mysql.ocf.ra.debug/log"
> if [ "${DEBUG_LOG}" -a -w "${DEBUG_LOG}" -a ! -L "${DEBUG_LOG}" ]; then
> DEBUG_LOG_DIR="${DEBUG_LOG%/*}"
> if [ -d "${DEBUG_LOG_DIR}" ]; then
> @@ -1214,7 +1233,7 @@
> case "$1" in
> start) mysql_start;;
> stop) mysql_stop;;
> - status) mysql_status;;
> + status) mysql_status err;;
> monitor) mysql_monitor;;
> promote) mysql_promote;;
> demote) mysql_demote;;

> _______________________________________________________
> Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
> http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
> Home Page: http://linux-ha.org/

_______________________________________________________
Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
Home Page: http://linux-ha.org/
Re: Modified patch for RA [ In reply to ]
Hi Dejan,

Le 2012-05-11 11:04, Dejan Muhamedagic a écrit :
> Hi,
>
> On Fri, May 11, 2012 at 10:25:19AM -0400, Yves Trudeau wrote:
>> Hi Dejan,
>> ok, here the latest version using
>> ${INSTANCE_ATTR_NAME}_MYSQL_MASTER_IP
>
> I'd vote for less "yelling" as cluster configurations are mostly
> lower case.

:) Ok, never thought about the impact of the upper case...

>
>> and I agree with the merits of
>> this :) I added a paragraph explaining the use of the attribute in
>> the longdesc of the meta-data.
>
> Excellent! Looks good to me. If nobody objects, we can push this
> come Monday.
>
> BTW, on what repository is this based? Can you produce a set of
> patches to be applied to upstream?

set of patches... here's _one_ patch bringing the latest commit of
ClusterLabs/resource-agents to my version. I also lowered the case of
the attribute.

Regards,

Yves





>
> Cheers,
>
> Dejan
>
>
>
>
>> Regards,
>>
>> Yves
>>
>>
>> Le 2012-05-11 09:21, Dejan Muhamedagic a écrit :
>>> Hi Yves,
>>>
>>> On Fri, May 11, 2012 at 08:45:06AM -0400, Yves Trudeau wrote:
>>>> Hi Dejan,
>>>> I changed the name of the attribute to REPL_MASTER_IP
>>>
>>> Let's quote from one of previous emails:
>>>
>>> 2. Is it possible/plausible to have more than one mysql
>>> instance? If so, then the attribute name should include the
>>> instance name. Say ${INSTANCE_NAME}_mysql_replication_IP or
>>> something to that extent. Also, it would make for a better
>>> looking configuration. "IP" doesn't really say much.
>>>
>>>> and added 2
>>>> lines of comment for the get_local_ip function.
>>>
>>> Again, a quote:
>>>
>>> 3. This attribute is part of the configuration and supposed to
>>> be setup by the user. Please document that in the meta-data.
>>>
>>> Note that one of the key words here is "meta-data." That is
>>> supposed to be documentation for the users, not for developers.
>>> Users don't normally read the code.
>>>
>>>> Is that inline with
>>>> what you want?
>>>
>>> OT:
>>>
>>> It doesn't really matter what _I_ want. We're having a discussion
>>> here on how to improve the feature. It is just by chance that I am
>>> right now the only one talking about it.
>>>
>>> Cheers,
>>>
>>> Dejan
>>>
>>>> Regards,
>>>>
>>>> Yves
>>>>
>>>> Le 2012-05-11 04:45, Dejan Muhamedagic a écrit :
>>>>> Hi Yves,
>>>>>
>>>>> It would be good not to start a new thread for the same
>>>>> discussion.
>>>>>
>>>>> On Thu, May 10, 2012 at 05:06:25PM -0400, Yves Trudeau wrote:
>>>>>> Hi Dejan,
>>>>>> here's another modified patch for the mysql agent of the commit
>>>>>> version 4c18035 (git@github.com:y-trudeau/resource-agents.git branch
>>>>>> mysql-repl). This patch implements fallback on uname -n if the node
>>>>>> IP attribute is not present and uses the instance name for the
>>>>>> replication info attribute.
>>>>>
>>>>> Hmm, it looks like there was a misunderstanding here. The
>>>>> attribute named "IP" is still named "IP" :)
>>>>>
>>>>> And we're still missing the documentation for that attribute.
>>>>>
>>>>>> I am also working with Raoul to get me
>>>>>> back on track with git.
>>>>>
>>>>> Good!
>>>>>
>>>>> Cheers,
>>>>>
>>>>> Dejan
>>>>>
>>>>>>
>>>>>> Regards,
>>>>>>
>>>>>> Yves
>>>>>
>>>>>> --- mysql.4c18035b7 2012-05-04 16:23:45.238421759 -0400
>>>>>> +++ resource-agents-prm/heartbeat/mysql 2012-05-10 11:01:02.538421042 -0400
>>>>>> @@ -109,7 +109,7 @@
>>>>>> : ${OCF_RESKEY_max_slave_lag=${OCF_RESKEY_max_slave_lag_default}}
>>>>>> : ${OCF_RESKEY_evict_outdated_slaves=${OCF_RESKEY_evict_outdated_slaves_default}}
>>>>>>
>>>>>> -: ${OCF_RESKEY_reader_attribute=${OCF_RESKEY_evict_reader_attribute_default}}
>>>>>> +: ${OCF_RESKEY_reader_attribute=${OCF_RESKEY_reader_attribute_default}}
>>>>>>
>>>>>> #######################################################################
>>>>>>
>>>>>> @@ -328,7 +328,7 @@
>>>>>> </longdesc>
>>>>>> <shortdesc lang="en">Sets the node attribute that determines
>>>>>> whether a node is usable for clients to read from.</shortdesc>
>>>>>> -<content type="boolean" default="${OCF_RESKEY_reader_attribute_default}" />
>>>>>> +<content type="string" default="${OCF_RESKEY_reader_attribute_default}" />
>>>>>> </parameter>
>>>>>> </parameters>
>>>>>>
>>>>>> @@ -355,11 +355,13 @@
>>>>>> MYSQL_OPTIONS_LOCAL="-S $OCF_RESKEY_socket --connect_timeout=10"
>>>>>> MYSQL_OPTIONS_REPL="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_replication_user --password=$OCF_RESKEY_replication_passwd"
>>>>>> MYSQL_OPTIONS_TEST="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_test_user --password=$OCF_RESKEY_test_passwd"
>>>>>> +MYSQL_TOO_MANY_CONN_ERR=1040
>>>>>>
>>>>>> CRM_MASTER="${HA_SBIN_DIR}/crm_master -l reboot "
>>>>>> HOSTNAME=`uname -n`
>>>>>> CRM_ATTR="${HA_SBIN_DIR}/crm_attribute -N $HOSTNAME "
>>>>>> INSTANCE_ATTR_NAME=`echo ${OCF_RESOURCE_INSTANCE}| awk -F : '{print $1}'`
>>>>>> +CRM_ATTR_REPL_INFO="${HA_SBIN_DIR}/crm_attribute --type crm_config --name ${INSTANCE_ATTR_NAME}_REPL_INFO -s mysql_replication"
>>>>>>
>>>>>> #######################################################################
>>>>>>
>>>>>> @@ -461,14 +463,14 @@
>>>>>>
>>>>>> check_slave() {
>>>>>> # Checks slave status
>>>>>> - local rc new_master_IP
>>>>>> + local rc new_master
>>>>>>
>>>>>> get_slave_info
>>>>>> rc=$?
>>>>>>
>>>>>> if [ $rc -eq 0 ]; then
>>>>>> # Did we receive an error other than max_connections?
>>>>>> - if [ $last_errno -ne 0 -a $last_errno -ne 1040 ]; then
>>>>>> + if [ $last_errno -ne 0 -a $last_errno -ne "$MYSQL_TOO_MANY_CONN_ERR" ]; then
>>>>>> # Whoa. Replication ran into an error. This slave has
>>>>>> # diverged from its master. Make sure this resource
>>>>>> # doesn't restart in place.
>>>>>> @@ -484,7 +486,7 @@
>>>>>> fi
>>>>>>
>>>>>> # If we got max_connections, let's remove the vip
>>>>>> - if [ $last_errno -eq 1040 ]; then
>>>>>> + if [ $last_errno -eq "$MYSQL_TOO_MANY_CONN_ERR" ]; then
>>>>>> set_reader_attr 0
>>>>>> exit $OCF_SUCCESS
>>>>>> fi
>>>>>> @@ -496,9 +498,9 @@
>>>>>> ocf_log warn "MySQL Slave IO threads currently not running."
>>>>>>
>>>>>> # Sanity check, are we at least on the right master
>>>>>> - new_master_IP=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f1`
>>>>>> + new_master=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f1`
>>>>>>
>>>>>> - if [ "$master_host" != "$new_master_IP" ]; then
>>>>>> + if [ "$master_host" != "$new_master" ]; then
>>>>>> # Not pointing to the right master, not good, removing the VIPs
>>>>>> set_reader_attr 0
>>>>>>
>>>>>> @@ -570,23 +572,23 @@
>>>>>> }
>>>>>>
>>>>>> set_master() {
>>>>>> - local new_master_IP master_log_file master_log_pos
>>>>>> + local new_master master_log_file master_log_pos
>>>>>> local master_params
>>>>>>
>>>>>> - new_master_IP=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f1`
>>>>>> + new_master=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f1`
>>>>>>
>>>>>> # Keep replication position
>>>>>> get_slave_info
>>>>>>
>>>>>> - if [ "$master_log_file" -a "$new_master_IP" = "$master_host" ]; then
>>>>>> + if [ "$master_log_file" -a "$new_master" = "$master_host" ]; then
>>>>>> # master_params=", MASTER_LOG_FILE='$master_log_file', \
>>>>>> # MASTER_LOG_POS=$master_log_pos"
>>>>>> ocf_log info "Kept master pos for $master_host : $master_log_file:$master_log_pos"
>>>>>> rm -f $tmpfile
>>>>>> return
>>>>>> else
>>>>>> - master_log_file=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f2`
>>>>>> - master_log_pos=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f3`
>>>>>> + master_log_file=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f2`
>>>>>> + master_log_pos=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f3`
>>>>>> if [ -n "$master_log_file" -a -n "$master_log_pos" ]; then
>>>>>> master_params=", MASTER_LOG_FILE='$master_log_file', \
>>>>>> MASTER_LOG_POS=$master_log_pos"
>>>>>> @@ -601,7 +603,7 @@
>>>>>> # reset with RESET MASTER.
>>>>>>
>>>>>> ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
>>>>>> - -e "CHANGE MASTER TO MASTER_HOST='$new_master_IP', \
>>>>>> + -e "CHANGE MASTER TO MASTER_HOST='$new_master', \
>>>>>> MASTER_USER='$OCF_RESKEY_replication_user', \
>>>>>> MASTER_PASSWORD='$OCF_RESKEY_replication_passwd' $master_params"
>>>>>> rm -f $tmpfile
>>>>>> @@ -628,15 +630,15 @@
>>>>>> while true; do
>>>>>> $MYSQL $MYSQL_OPTIONS_REPL \
>>>>>> -e 'SHOW PROCESSLIST\G'> $tmpfile
>>>>>> - if grep 'Waiting for master to send event' $tmpfile>/dev/null; then
>>>>>> + if grep -i 'Waiting for master to send event' $tmpfile>/dev/null; then
>>>>>> ocf_log info "MySQL slave has finished reading master binary log"
>>>>>> break
>>>>>> fi
>>>>>> - if grep 'Reconnecting after a failed master event read' $tmpfile>/dev/null; then
>>>>>> + if grep -i 'Reconnecting after a failed master event read' $tmpfile>/dev/null; then
>>>>>> ocf_log info "Master is down, no more binary logs to come"
>>>>>> break
>>>>>> fi
>>>>>> - if grep 'Connecting to master' $tmpfile>/dev/null; then
>>>>>> + if grep -i 'Connecting to master' $tmpfile>/dev/null; then
>>>>>> ocf_log info "Master is down, no more binary logs to come"
>>>>>> break
>>>>>> fi
>>>>>> @@ -660,7 +662,7 @@
>>>>>> while true; do
>>>>>> $MYSQL $MYSQL_OPTIONS_REPL \
>>>>>> -e 'SHOW PROCESSLIST\G'> $tmpfile
>>>>>> - if grep 'Has read all relay log' $tmpfile>/dev/null; then
>>>>>> + if grep -i 'Has read all relay log' $tmpfile>/dev/null; then
>>>>>> ocf_log info "MySQL slave has finished processing relay log"
>>>>>> break
>>>>>> fi
>>>>>> @@ -744,8 +746,15 @@
>>>>>> # Determines what IP address is attached to the current host. The output of the
>>>>>> # crm_attribute command looks like this:
>>>>>> # scope=nodes name=IP value=10.2.2.161
>>>>>> +# If the IP node attribute is not defined, fallback is to uname -n
>>>>>> get_local_ip() {
>>>>>> - $CRM_ATTR -l forever -n IP -q -G
>>>>>> + local IP
>>>>>> + IP=`$CRM_ATTR -l forever -n IP -q -G`
>>>>>> + if [ ! $? -eq 0 ]; then
>>>>>> + uname -n
>>>>>> + else
>>>>>> + echo $IP
>>>>>> + fi
>>>>>> }
>>>>>>
>>>>>> #######################################################################
>>>>>> @@ -783,7 +792,7 @@
>>>>>>
>>>>>> mysql_status() {
>>>>>> if [ ! -e $OCF_RESKEY_pid ]; then
>>>>>> - ocf_log err "MySQL is not running"
>>>>>> + ocf_log $1 "MySQL is not running"
>>>>>> return $OCF_NOT_RUNNING;
>>>>>> fi
>>>>>>
>>>>>> @@ -797,7 +806,7 @@
>>>>>> if [ $? -eq 0 ]; then
>>>>>> return $OCF_SUCCESS;
>>>>>> else
>>>>>> - ocf_log err "MySQL not running: removing old PID file"
>>>>>> + ocf_log $1 "MySQL not running: removing old PID file"
>>>>>> rm -f $OCF_RESKEY_pid
>>>>>> return $OCF_NOT_RUNNING;
>>>>>> fi
>>>>>> @@ -811,8 +820,9 @@
>>>>>> if ocf_is_probe; then
>>>>>> status_loglevel="info"
>>>>>> fi
>>>>>> -
>>>>>> +
>>>>>> mysql_status $status_loglevel
>>>>>> +
>>>>>> rc=$?
>>>>>>
>>>>>> # TODO: check max connections error
>>>>>> @@ -856,7 +866,7 @@
>>>>>> set_reader_attr 0
>>>>>> fi
>>>>>>
>>>>>> - mysql_status
>>>>>> + mysql_status info
>>>>>> if [ $? = $OCF_SUCCESS ]; then
>>>>>> ocf_log info "MySQL already running"
>>>>>> return $OCF_SUCCESS
>>>>>> @@ -930,7 +940,7 @@
>>>>>> # Let the CRM/LRM time us out if required.
>>>>>> start_wait=1
>>>>>> while [ $start_wait = 1 ]; do
>>>>>> - mysql_status
>>>>>> + mysql_status info
>>>>>> rc=$?
>>>>>> if [ $rc = $OCF_SUCCESS ]; then
>>>>>> start_wait=0
>>>>>> @@ -1019,7 +1029,7 @@
>>>>>> count=0
>>>>>> while [ $count -lt $shutdown_timeout ]
>>>>>> do
>>>>>> - mysql_status
>>>>>> + mysql_status info
>>>>>> rc=$?
>>>>>> if [ $rc = $OCF_NOT_RUNNING ]; then
>>>>>> break
>>>>>> @@ -1029,7 +1039,7 @@
>>>>>> ocf_log debug "MySQL still hasn't stopped yet. Waiting..."
>>>>>> done
>>>>>>
>>>>>> - mysql_status
>>>>>> + mysql_status info
>>>>>> if [ $? != $OCF_NOT_RUNNING ]; then
>>>>>> ocf_log info "MySQL failed to stop after ${shutdown_timeout}s using SIGTERM. Trying SIGKILL..."
>>>>>> /bin/kill -KILL $pid> /dev/null
>>>>>> @@ -1044,7 +1054,7 @@
>>>>>> mysql_promote() {
>>>>>> local master_info
>>>>>>
>>>>>> - if ( ! mysql_status ); then
>>>>>> + if ( ! mysql_status err ); then
>>>>>> return $OCF_NOT_RUNNING
>>>>>> fi
>>>>>> ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
>>>>>> @@ -1053,7 +1063,7 @@
>>>>>> # Set Master Info in CIB, cluster level attribute
>>>>>> update_data_master_status
>>>>>> master_info="$(get_local_ip)|$(get_master_status File)|$(get_master_status Position)"
>>>>>> - ${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication -v "$master_info"
>>>>>> + ${CRM_ATTR_REPL_INFO} -v "$master_info"
>>>>>> rm -f $tmpfile
>>>>>>
>>>>>> set_read_only off || return $OCF_ERR_GENERIC
>>>>>> @@ -1070,7 +1080,7 @@
>>>>>> }
>>>>>>
>>>>>> mysql_demote() {
>>>>>> - if ! mysql_status; then
>>>>>> + if ! mysql_status err; then
>>>>>> return $OCF_NOT_RUNNING
>>>>>> fi
>>>>>>
>>>>>> @@ -1177,6 +1187,7 @@
>>>>>> # The log directory must be a directory owned by root, with permissions 0700,
>>>>>> # and the log must be writable and not a symlink.
>>>>>> ##########################################################################
>>>>>> +DEBUG_LOG="/tmp/mysql.ocf.ra.debug/log"
>>>>>> if [ "${DEBUG_LOG}" -a -w "${DEBUG_LOG}" -a ! -L "${DEBUG_LOG}" ]; then
>>>>>> DEBUG_LOG_DIR="${DEBUG_LOG%/*}"
>>>>>> if [ -d "${DEBUG_LOG_DIR}" ]; then
>>>>>> @@ -1214,7 +1225,7 @@
>>>>>> case "$1" in
>>>>>> start) mysql_start;;
>>>>>> stop) mysql_stop;;
>>>>>> - status) mysql_status;;
>>>>>> + status) mysql_status err;;
>>>>>> monitor) mysql_monitor;;
>>>>>> promote) mysql_promote;;
>>>>>> demote) mysql_demote;;
>>>>>
>>>>>> _______________________________________________________
>>>>>> Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
>>>>>> http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
>>>>>> Home Page: http://linux-ha.org/
>>>>>
>>>>> _______________________________________________________
>>>>> Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
>>>>> http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
>>>>> Home Page: http://linux-ha.org/
>>>>>
>>>
>>>> --- mysql.4c18035b7 2012-05-04 16:23:45.238421759 -0400
>>>> +++ resource-agents-prm/heartbeat/mysql 2012-05-11 08:41:20.868420414 -0400
>>>> @@ -109,7 +109,7 @@
>>>> : ${OCF_RESKEY_max_slave_lag=${OCF_RESKEY_max_slave_lag_default}}
>>>> : ${OCF_RESKEY_evict_outdated_slaves=${OCF_RESKEY_evict_outdated_slaves_default}}
>>>>
>>>> -: ${OCF_RESKEY_reader_attribute=${OCF_RESKEY_evict_reader_attribute_default}}
>>>> +: ${OCF_RESKEY_reader_attribute=${OCF_RESKEY_reader_attribute_default}}
>>>>
>>>> #######################################################################
>>>>
>>>> @@ -328,7 +328,7 @@
>>>> </longdesc>
>>>> <shortdesc lang="en">Sets the node attribute that determines
>>>> whether a node is usable for clients to read from.</shortdesc>
>>>> -<content type="boolean" default="${OCF_RESKEY_reader_attribute_default}" />
>>>> +<content type="string" default="${OCF_RESKEY_reader_attribute_default}" />
>>>> </parameter>
>>>> </parameters>
>>>>
>>>> @@ -355,11 +355,13 @@
>>>> MYSQL_OPTIONS_LOCAL="-S $OCF_RESKEY_socket --connect_timeout=10"
>>>> MYSQL_OPTIONS_REPL="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_replication_user --password=$OCF_RESKEY_replication_passwd"
>>>> MYSQL_OPTIONS_TEST="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_test_user --password=$OCF_RESKEY_test_passwd"
>>>> +MYSQL_TOO_MANY_CONN_ERR=1040
>>>>
>>>> CRM_MASTER="${HA_SBIN_DIR}/crm_master -l reboot "
>>>> HOSTNAME=`uname -n`
>>>> CRM_ATTR="${HA_SBIN_DIR}/crm_attribute -N $HOSTNAME "
>>>> INSTANCE_ATTR_NAME=`echo ${OCF_RESOURCE_INSTANCE}| awk -F : '{print $1}'`
>>>> +CRM_ATTR_REPL_INFO="${HA_SBIN_DIR}/crm_attribute --type crm_config --name ${INSTANCE_ATTR_NAME}_REPL_INFO -s mysql_replication"
>>>>
>>>> #######################################################################
>>>>
>>>> @@ -461,14 +463,14 @@
>>>>
>>>> check_slave() {
>>>> # Checks slave status
>>>> - local rc new_master_IP
>>>> + local rc new_master
>>>>
>>>> get_slave_info
>>>> rc=$?
>>>>
>>>> if [ $rc -eq 0 ]; then
>>>> # Did we receive an error other than max_connections?
>>>> - if [ $last_errno -ne 0 -a $last_errno -ne 1040 ]; then
>>>> + if [ $last_errno -ne 0 -a $last_errno -ne "$MYSQL_TOO_MANY_CONN_ERR" ]; then
>>>> # Whoa. Replication ran into an error. This slave has
>>>> # diverged from its master. Make sure this resource
>>>> # doesn't restart in place.
>>>> @@ -484,7 +486,7 @@
>>>> fi
>>>>
>>>> # If we got max_connections, let's remove the vip
>>>> - if [ $last_errno -eq 1040 ]; then
>>>> + if [ $last_errno -eq "$MYSQL_TOO_MANY_CONN_ERR" ]; then
>>>> set_reader_attr 0
>>>> exit $OCF_SUCCESS
>>>> fi
>>>> @@ -496,9 +498,9 @@
>>>> ocf_log warn "MySQL Slave IO threads currently not running."
>>>>
>>>> # Sanity check, are we at least on the right master
>>>> - new_master_IP=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f1`
>>>> + new_master=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f1`
>>>>
>>>> - if [ "$master_host" != "$new_master_IP" ]; then
>>>> + if [ "$master_host" != "$new_master" ]; then
>>>> # Not pointing to the right master, not good, removing the VIPs
>>>> set_reader_attr 0
>>>>
>>>> @@ -570,23 +572,23 @@
>>>> }
>>>>
>>>> set_master() {
>>>> - local new_master_IP master_log_file master_log_pos
>>>> + local new_master master_log_file master_log_pos
>>>> local master_params
>>>>
>>>> - new_master_IP=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f1`
>>>> + new_master=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f1`
>>>>
>>>> # Keep replication position
>>>> get_slave_info
>>>>
>>>> - if [ "$master_log_file" -a "$new_master_IP" = "$master_host" ]; then
>>>> + if [ "$master_log_file" -a "$new_master" = "$master_host" ]; then
>>>> # master_params=", MASTER_LOG_FILE='$master_log_file', \
>>>> # MASTER_LOG_POS=$master_log_pos"
>>>> ocf_log info "Kept master pos for $master_host : $master_log_file:$master_log_pos"
>>>> rm -f $tmpfile
>>>> return
>>>> else
>>>> - master_log_file=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f2`
>>>> - master_log_pos=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f3`
>>>> + master_log_file=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f2`
>>>> + master_log_pos=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f3`
>>>> if [ -n "$master_log_file" -a -n "$master_log_pos" ]; then
>>>> master_params=", MASTER_LOG_FILE='$master_log_file', \
>>>> MASTER_LOG_POS=$master_log_pos"
>>>> @@ -601,7 +603,7 @@
>>>> # reset with RESET MASTER.
>>>>
>>>> ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
>>>> - -e "CHANGE MASTER TO MASTER_HOST='$new_master_IP', \
>>>> + -e "CHANGE MASTER TO MASTER_HOST='$new_master', \
>>>> MASTER_USER='$OCF_RESKEY_replication_user', \
>>>> MASTER_PASSWORD='$OCF_RESKEY_replication_passwd' $master_params"
>>>> rm -f $tmpfile
>>>> @@ -628,15 +630,15 @@
>>>> while true; do
>>>> $MYSQL $MYSQL_OPTIONS_REPL \
>>>> -e 'SHOW PROCESSLIST\G'> $tmpfile
>>>> - if grep 'Waiting for master to send event' $tmpfile>/dev/null; then
>>>> + if grep -i 'Waiting for master to send event' $tmpfile>/dev/null; then
>>>> ocf_log info "MySQL slave has finished reading master binary log"
>>>> break
>>>> fi
>>>> - if grep 'Reconnecting after a failed master event read' $tmpfile>/dev/null; then
>>>> + if grep -i 'Reconnecting after a failed master event read' $tmpfile>/dev/null; then
>>>> ocf_log info "Master is down, no more binary logs to come"
>>>> break
>>>> fi
>>>> - if grep 'Connecting to master' $tmpfile>/dev/null; then
>>>> + if grep -i 'Connecting to master' $tmpfile>/dev/null; then
>>>> ocf_log info "Master is down, no more binary logs to come"
>>>> break
>>>> fi
>>>> @@ -660,7 +662,7 @@
>>>> while true; do
>>>> $MYSQL $MYSQL_OPTIONS_REPL \
>>>> -e 'SHOW PROCESSLIST\G'> $tmpfile
>>>> - if grep 'Has read all relay log' $tmpfile>/dev/null; then
>>>> + if grep -i 'Has read all relay log' $tmpfile>/dev/null; then
>>>> ocf_log info "MySQL slave has finished processing relay log"
>>>> break
>>>> fi
>>>> @@ -744,8 +746,17 @@
>>>> # Determines what IP address is attached to the current host. The output of the
>>>> # crm_attribute command looks like this:
>>>> # scope=nodes name=IP value=10.2.2.161
>>>> +# If the REPL_MASTER_IP node attribute is not defined, fallback is to uname -n
>>>> +# The REPL_MASTER_IP is the IP address that will be used for the
>>>> +# change master to command.
>>>> get_local_ip() {
>>>> - $CRM_ATTR -l forever -n IP -q -G
>>>> + local IP
>>>> + IP=`$CRM_ATTR -l forever -n REPL_MASTER_IP -q -G`
>>>> + if [ ! $? -eq 0 ]; then
>>>> + uname -n
>>>> + else
>>>> + echo $IP
>>>> + fi
>>>> }
>>>>
>>>> #######################################################################
>>>> @@ -783,7 +794,7 @@
>>>>
>>>> mysql_status() {
>>>> if [ ! -e $OCF_RESKEY_pid ]; then
>>>> - ocf_log err "MySQL is not running"
>>>> + ocf_log $1 "MySQL is not running"
>>>> return $OCF_NOT_RUNNING;
>>>> fi
>>>>
>>>> @@ -797,7 +808,7 @@
>>>> if [ $? -eq 0 ]; then
>>>> return $OCF_SUCCESS;
>>>> else
>>>> - ocf_log err "MySQL not running: removing old PID file"
>>>> + ocf_log $1 "MySQL not running: removing old PID file"
>>>> rm -f $OCF_RESKEY_pid
>>>> return $OCF_NOT_RUNNING;
>>>> fi
>>>> @@ -811,8 +822,9 @@
>>>> if ocf_is_probe; then
>>>> status_loglevel="info"
>>>> fi
>>>> -
>>>> +
>>>> mysql_status $status_loglevel
>>>> +
>>>> rc=$?
>>>>
>>>> # TODO: check max connections error
>>>> @@ -856,7 +868,7 @@
>>>> set_reader_attr 0
>>>> fi
>>>>
>>>> - mysql_status
>>>> + mysql_status info
>>>> if [ $? = $OCF_SUCCESS ]; then
>>>> ocf_log info "MySQL already running"
>>>> return $OCF_SUCCESS
>>>> @@ -930,7 +942,7 @@
>>>> # Let the CRM/LRM time us out if required.
>>>> start_wait=1
>>>> while [ $start_wait = 1 ]; do
>>>> - mysql_status
>>>> + mysql_status info
>>>> rc=$?
>>>> if [ $rc = $OCF_SUCCESS ]; then
>>>> start_wait=0
>>>> @@ -1019,7 +1031,7 @@
>>>> count=0
>>>> while [ $count -lt $shutdown_timeout ]
>>>> do
>>>> - mysql_status
>>>> + mysql_status info
>>>> rc=$?
>>>> if [ $rc = $OCF_NOT_RUNNING ]; then
>>>> break
>>>> @@ -1029,7 +1041,7 @@
>>>> ocf_log debug "MySQL still hasn't stopped yet. Waiting..."
>>>> done
>>>>
>>>> - mysql_status
>>>> + mysql_status info
>>>> if [ $? != $OCF_NOT_RUNNING ]; then
>>>> ocf_log info "MySQL failed to stop after ${shutdown_timeout}s using SIGTERM. Trying SIGKILL..."
>>>> /bin/kill -KILL $pid> /dev/null
>>>> @@ -1044,7 +1056,7 @@
>>>> mysql_promote() {
>>>> local master_info
>>>>
>>>> - if ( ! mysql_status ); then
>>>> + if ( ! mysql_status err ); then
>>>> return $OCF_NOT_RUNNING
>>>> fi
>>>> ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
>>>> @@ -1053,7 +1065,7 @@
>>>> # Set Master Info in CIB, cluster level attribute
>>>> update_data_master_status
>>>> master_info="$(get_local_ip)|$(get_master_status File)|$(get_master_status Position)"
>>>> - ${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication -v "$master_info"
>>>> + ${CRM_ATTR_REPL_INFO} -v "$master_info"
>>>> rm -f $tmpfile
>>>>
>>>> set_read_only off || return $OCF_ERR_GENERIC
>>>> @@ -1070,7 +1082,7 @@
>>>> }
>>>>
>>>> mysql_demote() {
>>>> - if ! mysql_status; then
>>>> + if ! mysql_status err; then
>>>> return $OCF_NOT_RUNNING
>>>> fi
>>>>
>>>> @@ -1177,6 +1189,7 @@
>>>> # The log directory must be a directory owned by root, with permissions 0700,
>>>> # and the log must be writable and not a symlink.
>>>> ##########################################################################
>>>> +DEBUG_LOG="/tmp/mysql.ocf.ra.debug/log"
>>>> if [ "${DEBUG_LOG}" -a -w "${DEBUG_LOG}" -a ! -L "${DEBUG_LOG}" ]; then
>>>> DEBUG_LOG_DIR="${DEBUG_LOG%/*}"
>>>> if [ -d "${DEBUG_LOG_DIR}" ]; then
>>>> @@ -1214,7 +1227,7 @@
>>>> case "$1" in
>>>> start) mysql_start;;
>>>> stop) mysql_stop;;
>>>> - status) mysql_status;;
>>>> + status) mysql_status err;;
>>>> monitor) mysql_monitor;;
>>>> promote) mysql_promote;;
>>>> demote) mysql_demote;;
>>>
>>>> _______________________________________________________
>>>> Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
>>>> http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
>>>> Home Page: http://linux-ha.org/
>>>
>>> _______________________________________________________
>>> Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
>>> http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
>>> Home Page: http://linux-ha.org/
>>>
>
>> --- mysql.4c18035b7 2012-05-04 16:23:45.238421759 -0400
>> +++ resource-agents-prm/heartbeat/mysql 2012-05-11 10:14:28.148420134 -0400
>> @@ -109,7 +109,22 @@
>> : ${OCF_RESKEY_max_slave_lag=${OCF_RESKEY_max_slave_lag_default}}
>> : ${OCF_RESKEY_evict_outdated_slaves=${OCF_RESKEY_evict_outdated_slaves_default}}
>>
>> -: ${OCF_RESKEY_reader_attribute=${OCF_RESKEY_evict_reader_attribute_default}}
>> +: ${OCF_RESKEY_reader_attribute=${OCF_RESKEY_reader_attribute_default}}
>> +
>> +#######################################################################
>> +# Convenience variables
>> +
>> +MYSQL=$OCF_RESKEY_client_binary
>> +MYSQL_OPTIONS_LOCAL="-S $OCF_RESKEY_socket --connect_timeout=10"
>> +MYSQL_OPTIONS_REPL="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_replication_user --password=$OCF_RESKEY_replication_passwd"
>> +MYSQL_OPTIONS_TEST="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_test_user --password=$OCF_RESKEY_test_passwd"
>> +MYSQL_TOO_MANY_CONN_ERR=1040
>> +
>> +CRM_MASTER="${HA_SBIN_DIR}/crm_master -l reboot "
>> +HOSTNAME=`uname -n`
>> +CRM_ATTR="${HA_SBIN_DIR}/crm_attribute -N $HOSTNAME "
>> +INSTANCE_ATTR_NAME=`echo ${OCF_RESOURCE_INSTANCE}| awk -F : '{print $1}'`
>> +CRM_ATTR_REPL_INFO="${HA_SBIN_DIR}/crm_attribute --type crm_config --name ${INSTANCE_ATTR_NAME}_REPL_INFO -s mysql_replication"
>>
>> #######################################################################
>>
>> @@ -141,6 +156,13 @@
>> Resource script for MySQL.
>> May manage a standalone MySQL database, a clone set with externally
>> managed replication, or a complete master/slave replication setup.
>> +
>> +While managing replication, the default behavior is to use uname -n
>> +values in the change master to command. Other IPs can be specified
>> +manually by adding a node attribute \${INSTANCE_ATTR_NAME}_MYSQL_MASTER_IP
>> +giving the IP to use for replication. For example, if the mysql primitive
>> +you are using is p_mysql, the attribute to set will be
>> +p_mysql_MYSQL_MASTER_IP.
>> </longdesc>
>> <shortdesc lang="en">Manages a MySQL database instance</shortdesc>
>> <parameters>
>> @@ -328,7 +350,7 @@
>> </longdesc>
>> <shortdesc lang="en">Sets the node attribute that determines
>> whether a node is usable for clients to read from.</shortdesc>
>> -<content type="boolean" default="${OCF_RESKEY_reader_attribute_default}" />
>> +<content type="string" default="${OCF_RESKEY_reader_attribute_default}" />
>> </parameter>
>> </parameters>
>>
>> @@ -348,20 +370,6 @@
>> </resource-agent>
>> END
>> }
>> -#######################################################################
>> -# Convenience variables
>> -
>> -MYSQL=$OCF_RESKEY_client_binary
>> -MYSQL_OPTIONS_LOCAL="-S $OCF_RESKEY_socket --connect_timeout=10"
>> -MYSQL_OPTIONS_REPL="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_replication_user --password=$OCF_RESKEY_replication_passwd"
>> -MYSQL_OPTIONS_TEST="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_test_user --password=$OCF_RESKEY_test_passwd"
>> -
>> -CRM_MASTER="${HA_SBIN_DIR}/crm_master -l reboot "
>> -HOSTNAME=`uname -n`
>> -CRM_ATTR="${HA_SBIN_DIR}/crm_attribute -N $HOSTNAME "
>> -INSTANCE_ATTR_NAME=`echo ${OCF_RESOURCE_INSTANCE}| awk -F : '{print $1}'`
>> -
>> -#######################################################################
>>
>> # Convenience functions
>>
>> @@ -461,14 +469,14 @@
>>
>> check_slave() {
>> # Checks slave status
>> - local rc new_master_IP
>> + local rc new_master
>>
>> get_slave_info
>> rc=$?
>>
>> if [ $rc -eq 0 ]; then
>> # Did we receive an error other than max_connections?
>> - if [ $last_errno -ne 0 -a $last_errno -ne 1040 ]; then
>> + if [ $last_errno -ne 0 -a $last_errno -ne "$MYSQL_TOO_MANY_CONN_ERR" ]; then
>> # Whoa. Replication ran into an error. This slave has
>> # diverged from its master. Make sure this resource
>> # doesn't restart in place.
>> @@ -484,7 +492,7 @@
>> fi
>>
>> # If we got max_connections, let's remove the vip
>> - if [ $last_errno -eq 1040 ]; then
>> + if [ $last_errno -eq "$MYSQL_TOO_MANY_CONN_ERR" ]; then
>> set_reader_attr 0
>> exit $OCF_SUCCESS
>> fi
>> @@ -496,9 +504,9 @@
>> ocf_log warn "MySQL Slave IO threads currently not running."
>>
>> # Sanity check, are we at least on the right master
>> - new_master_IP=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f1`
>> + new_master=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f1`
>>
>> - if [ "$master_host" != "$new_master_IP" ]; then
>> + if [ "$master_host" != "$new_master" ]; then
>> # Not pointing to the right master, not good, removing the VIPs
>> set_reader_attr 0
>>
>> @@ -570,23 +578,23 @@
>> }
>>
>> set_master() {
>> - local new_master_IP master_log_file master_log_pos
>> + local new_master master_log_file master_log_pos
>> local master_params
>>
>> - new_master_IP=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f1`
>> + new_master=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f1`
>>
>> # Keep replication position
>> get_slave_info
>>
>> - if [ "$master_log_file" -a "$new_master_IP" = "$master_host" ]; then
>> + if [ "$master_log_file" -a "$new_master" = "$master_host" ]; then
>> # master_params=", MASTER_LOG_FILE='$master_log_file', \
>> # MASTER_LOG_POS=$master_log_pos"
>> ocf_log info "Kept master pos for $master_host : $master_log_file:$master_log_pos"
>> rm -f $tmpfile
>> return
>> else
>> - master_log_file=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f2`
>> - master_log_pos=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f3`
>> + master_log_file=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f2`
>> + master_log_pos=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f3`
>> if [ -n "$master_log_file" -a -n "$master_log_pos" ]; then
>> master_params=", MASTER_LOG_FILE='$master_log_file', \
>> MASTER_LOG_POS=$master_log_pos"
>> @@ -601,7 +609,7 @@
>> # reset with RESET MASTER.
>>
>> ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
>> - -e "CHANGE MASTER TO MASTER_HOST='$new_master_IP', \
>> + -e "CHANGE MASTER TO MASTER_HOST='$new_master', \
>> MASTER_USER='$OCF_RESKEY_replication_user', \
>> MASTER_PASSWORD='$OCF_RESKEY_replication_passwd' $master_params"
>> rm -f $tmpfile
>> @@ -628,15 +636,15 @@
>> while true; do
>> $MYSQL $MYSQL_OPTIONS_REPL \
>> -e 'SHOW PROCESSLIST\G'> $tmpfile
>> - if grep 'Waiting for master to send event' $tmpfile>/dev/null; then
>> + if grep -i 'Waiting for master to send event' $tmpfile>/dev/null; then
>> ocf_log info "MySQL slave has finished reading master binary log"
>> break
>> fi
>> - if grep 'Reconnecting after a failed master event read' $tmpfile>/dev/null; then
>> + if grep -i 'Reconnecting after a failed master event read' $tmpfile>/dev/null; then
>> ocf_log info "Master is down, no more binary logs to come"
>> break
>> fi
>> - if grep 'Connecting to master' $tmpfile>/dev/null; then
>> + if grep -i 'Connecting to master' $tmpfile>/dev/null; then
>> ocf_log info "Master is down, no more binary logs to come"
>> break
>> fi
>> @@ -660,7 +668,7 @@
>> while true; do
>> $MYSQL $MYSQL_OPTIONS_REPL \
>> -e 'SHOW PROCESSLIST\G'> $tmpfile
>> - if grep 'Has read all relay log' $tmpfile>/dev/null; then
>> + if grep -i 'Has read all relay log' $tmpfile>/dev/null; then
>> ocf_log info "MySQL slave has finished processing relay log"
>> break
>> fi
>> @@ -744,8 +752,17 @@
>> # Determines what IP address is attached to the current host. The output of the
>> # crm_attribute command looks like this:
>> # scope=nodes name=IP value=10.2.2.161
>> +# If the ${INSTANCE_ATTR_NAME}_MYSQL_MASTER_IP node attribute is not defined, fallback is to uname -n
>> +# The ${INSTANCE_ATTR_NAME}_MYSQL_MASTER_IP is the IP address that will be used for the
>> +# change master to command.
>> get_local_ip() {
>> - $CRM_ATTR -l forever -n IP -q -G
>> + local IP
>> + IP=`$CRM_ATTR -l forever -n ${INSTANCE_ATTR_NAME}_MYSQL_MASTER_IP -q -G`
>> + if [ ! $? -eq 0 ]; then
>> + uname -n
>> + else
>> + echo $IP
>> + fi
>> }
>>
>> #######################################################################
>> @@ -783,7 +800,7 @@
>>
>> mysql_status() {
>> if [ ! -e $OCF_RESKEY_pid ]; then
>> - ocf_log err "MySQL is not running"
>> + ocf_log $1 "MySQL is not running"
>> return $OCF_NOT_RUNNING;
>> fi
>>
>> @@ -797,7 +814,7 @@
>> if [ $? -eq 0 ]; then
>> return $OCF_SUCCESS;
>> else
>> - ocf_log err "MySQL not running: removing old PID file"
>> + ocf_log $1 "MySQL not running: removing old PID file"
>> rm -f $OCF_RESKEY_pid
>> return $OCF_NOT_RUNNING;
>> fi
>> @@ -811,8 +828,9 @@
>> if ocf_is_probe; then
>> status_loglevel="info"
>> fi
>> -
>> +
>> mysql_status $status_loglevel
>> +
>> rc=$?
>>
>> # TODO: check max connections error
>> @@ -856,7 +874,7 @@
>> set_reader_attr 0
>> fi
>>
>> - mysql_status
>> + mysql_status info
>> if [ $? = $OCF_SUCCESS ]; then
>> ocf_log info "MySQL already running"
>> return $OCF_SUCCESS
>> @@ -930,7 +948,7 @@
>> # Let the CRM/LRM time us out if required.
>> start_wait=1
>> while [ $start_wait = 1 ]; do
>> - mysql_status
>> + mysql_status info
>> rc=$?
>> if [ $rc = $OCF_SUCCESS ]; then
>> start_wait=0
>> @@ -1019,7 +1037,7 @@
>> count=0
>> while [ $count -lt $shutdown_timeout ]
>> do
>> - mysql_status
>> + mysql_status info
>> rc=$?
>> if [ $rc = $OCF_NOT_RUNNING ]; then
>> break
>> @@ -1029,7 +1047,7 @@
>> ocf_log debug "MySQL still hasn't stopped yet. Waiting..."
>> done
>>
>> - mysql_status
>> + mysql_status info
>> if [ $? != $OCF_NOT_RUNNING ]; then
>> ocf_log info "MySQL failed to stop after ${shutdown_timeout}s using SIGTERM. Trying SIGKILL..."
>> /bin/kill -KILL $pid> /dev/null
>> @@ -1044,7 +1062,7 @@
>> mysql_promote() {
>> local master_info
>>
>> - if ( ! mysql_status ); then
>> + if ( ! mysql_status err ); then
>> return $OCF_NOT_RUNNING
>> fi
>> ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
>> @@ -1053,7 +1071,7 @@
>> # Set Master Info in CIB, cluster level attribute
>> update_data_master_status
>> master_info="$(get_local_ip)|$(get_master_status File)|$(get_master_status Position)"
>> - ${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication -v "$master_info"
>> + ${CRM_ATTR_REPL_INFO} -v "$master_info"
>> rm -f $tmpfile
>>
>> set_read_only off || return $OCF_ERR_GENERIC
>> @@ -1070,7 +1088,7 @@
>> }
>>
>> mysql_demote() {
>> - if ! mysql_status; then
>> + if ! mysql_status err; then
>> return $OCF_NOT_RUNNING
>> fi
>>
>> @@ -1177,6 +1195,7 @@
>> # The log directory must be a directory owned by root, with permissions 0700,
>> # and the log must be writable and not a symlink.
>> ##########################################################################
>> +DEBUG_LOG="/tmp/mysql.ocf.ra.debug/log"
>> if [ "${DEBUG_LOG}" -a -w "${DEBUG_LOG}" -a ! -L "${DEBUG_LOG}" ]; then
>> DEBUG_LOG_DIR="${DEBUG_LOG%/*}"
>> if [ -d "${DEBUG_LOG_DIR}" ]; then
>> @@ -1214,7 +1233,7 @@
>> case "$1" in
>> start) mysql_start;;
>> stop) mysql_stop;;
>> - status) mysql_status;;
>> + status) mysql_status err;;
>> monitor) mysql_monitor;;
>> promote) mysql_promote;;
>> demote) mysql_demote;;
>
>> _______________________________________________________
>> Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
>> http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
>> Home Page: http://linux-ha.org/
>
> _______________________________________________________
> Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
> http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
> Home Page: http://linux-ha.org/
>
Re: Modified patch for RA [ In reply to ]
Hi Yves,

On Fri, May 11, 2012 at 11:27:23AM -0400, Yves Trudeau wrote:
> Hi Dejan,
>
> Le 2012-05-11 11:04, Dejan Muhamedagic a écrit :
>> Hi,
>>
>> On Fri, May 11, 2012 at 10:25:19AM -0400, Yves Trudeau wrote:
>>> Hi Dejan,
>>> ok, here the latest version using
>>> ${INSTANCE_ATTR_NAME}_MYSQL_MASTER_IP
>>
>> I'd vote for less "yelling" as cluster configurations are mostly
>> lower case.
>
> :) Ok, never thought about the impact of the upper case...
>
>>
>>> and I agree with the merits of
>>> this :) I added a paragraph explaining the use of the attribute in
>>> the longdesc of the meta-data.
>>
>> Excellent! Looks good to me. If nobody objects, we can push this
>> come Monday.
>>
>> BTW, on what repository is this based? Can you produce a set of
>> patches to be applied to upstream?
>
> set of patches... here's _one_ patch bringing the latest commit of
> ClusterLabs/resource-agents to my version. I also lowered the case of
> the attribute.

Good!

One thing I forgot though: Can you also provide a commit
message, i.e. a patch description (what changed, what's new,
etc).

Cheers,

Dejan


> Regards,
>
> Yves
>
>
>
>
>
>>
>> Cheers,
>>
>> Dejan
>>
>>
>>
>>
>>> Regards,
>>>
>>> Yves
>>>
>>>
>>> Le 2012-05-11 09:21, Dejan Muhamedagic a écrit :
>>>> Hi Yves,
>>>>
>>>> On Fri, May 11, 2012 at 08:45:06AM -0400, Yves Trudeau wrote:
>>>>> Hi Dejan,
>>>>> I changed the name of the attribute to REPL_MASTER_IP
>>>>
>>>> Let's quote from one of previous emails:
>>>>
>>>> 2. Is it possible/plausible to have more than one mysql
>>>> instance? If so, then the attribute name should include the
>>>> instance name. Say ${INSTANCE_NAME}_mysql_replication_IP or
>>>> something to that extent. Also, it would make for a better
>>>> looking configuration. "IP" doesn't really say much.
>>>>
>>>>> and added 2
>>>>> lines of comment for the get_local_ip function.
>>>>
>>>> Again, a quote:
>>>>
>>>> 3. This attribute is part of the configuration and supposed to
>>>> be setup by the user. Please document that in the meta-data.
>>>>
>>>> Note that one of the key words here is "meta-data." That is
>>>> supposed to be documentation for the users, not for developers.
>>>> Users don't normally read the code.
>>>>
>>>>> Is that inline with
>>>>> what you want?
>>>>
>>>> OT:
>>>>
>>>> It doesn't really matter what _I_ want. We're having a discussion
>>>> here on how to improve the feature. It is just by chance that I am
>>>> right now the only one talking about it.
>>>>
>>>> Cheers,
>>>>
>>>> Dejan
>>>>
>>>>> Regards,
>>>>>
>>>>> Yves
>>>>>
>>>>> Le 2012-05-11 04:45, Dejan Muhamedagic a écrit :
>>>>>> Hi Yves,
>>>>>>
>>>>>> It would be good not to start a new thread for the same
>>>>>> discussion.
>>>>>>
>>>>>> On Thu, May 10, 2012 at 05:06:25PM -0400, Yves Trudeau wrote:
>>>>>>> Hi Dejan,
>>>>>>> here's another modified patch for the mysql agent of the commit
>>>>>>> version 4c18035 (git@github.com:y-trudeau/resource-agents.git branch
>>>>>>> mysql-repl). This patch implements fallback on uname -n if the node
>>>>>>> IP attribute is not present and uses the instance name for the
>>>>>>> replication info attribute.
>>>>>>
>>>>>> Hmm, it looks like there was a misunderstanding here. The
>>>>>> attribute named "IP" is still named "IP" :)
>>>>>>
>>>>>> And we're still missing the documentation for that attribute.
>>>>>>
>>>>>>> I am also working with Raoul to get me
>>>>>>> back on track with git.
>>>>>>
>>>>>> Good!
>>>>>>
>>>>>> Cheers,
>>>>>>
>>>>>> Dejan
>>>>>>
>>>>>>>
>>>>>>> Regards,
>>>>>>>
>>>>>>> Yves
>>>>>>
>>>>>>> --- mysql.4c18035b7 2012-05-04 16:23:45.238421759 -0400
>>>>>>> +++ resource-agents-prm/heartbeat/mysql 2012-05-10 11:01:02.538421042 -0400
>>>>>>> @@ -109,7 +109,7 @@
>>>>>>> : ${OCF_RESKEY_max_slave_lag=${OCF_RESKEY_max_slave_lag_default}}
>>>>>>> : ${OCF_RESKEY_evict_outdated_slaves=${OCF_RESKEY_evict_outdated_slaves_default}}
>>>>>>>
>>>>>>> -: ${OCF_RESKEY_reader_attribute=${OCF_RESKEY_evict_reader_attribute_default}}
>>>>>>> +: ${OCF_RESKEY_reader_attribute=${OCF_RESKEY_reader_attribute_default}}
>>>>>>>
>>>>>>> #######################################################################
>>>>>>>
>>>>>>> @@ -328,7 +328,7 @@
>>>>>>> </longdesc>
>>>>>>> <shortdesc lang="en">Sets the node attribute that determines
>>>>>>> whether a node is usable for clients to read from.</shortdesc>
>>>>>>> -<content type="boolean" default="${OCF_RESKEY_reader_attribute_default}" />
>>>>>>> +<content type="string" default="${OCF_RESKEY_reader_attribute_default}" />
>>>>>>> </parameter>
>>>>>>> </parameters>
>>>>>>>
>>>>>>> @@ -355,11 +355,13 @@
>>>>>>> MYSQL_OPTIONS_LOCAL="-S $OCF_RESKEY_socket --connect_timeout=10"
>>>>>>> MYSQL_OPTIONS_REPL="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_replication_user --password=$OCF_RESKEY_replication_passwd"
>>>>>>> MYSQL_OPTIONS_TEST="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_test_user --password=$OCF_RESKEY_test_passwd"
>>>>>>> +MYSQL_TOO_MANY_CONN_ERR=1040
>>>>>>>
>>>>>>> CRM_MASTER="${HA_SBIN_DIR}/crm_master -l reboot "
>>>>>>> HOSTNAME=`uname -n`
>>>>>>> CRM_ATTR="${HA_SBIN_DIR}/crm_attribute -N $HOSTNAME "
>>>>>>> INSTANCE_ATTR_NAME=`echo ${OCF_RESOURCE_INSTANCE}| awk -F : '{print $1}'`
>>>>>>> +CRM_ATTR_REPL_INFO="${HA_SBIN_DIR}/crm_attribute --type crm_config --name ${INSTANCE_ATTR_NAME}_REPL_INFO -s mysql_replication"
>>>>>>>
>>>>>>> #######################################################################
>>>>>>>
>>>>>>> @@ -461,14 +463,14 @@
>>>>>>>
>>>>>>> check_slave() {
>>>>>>> # Checks slave status
>>>>>>> - local rc new_master_IP
>>>>>>> + local rc new_master
>>>>>>>
>>>>>>> get_slave_info
>>>>>>> rc=$?
>>>>>>>
>>>>>>> if [ $rc -eq 0 ]; then
>>>>>>> # Did we receive an error other than max_connections?
>>>>>>> - if [ $last_errno -ne 0 -a $last_errno -ne 1040 ]; then
>>>>>>> + if [ $last_errno -ne 0 -a $last_errno -ne "$MYSQL_TOO_MANY_CONN_ERR" ]; then
>>>>>>> # Whoa. Replication ran into an error. This slave has
>>>>>>> # diverged from its master. Make sure this resource
>>>>>>> # doesn't restart in place.
>>>>>>> @@ -484,7 +486,7 @@
>>>>>>> fi
>>>>>>>
>>>>>>> # If we got max_connections, let's remove the vip
>>>>>>> - if [ $last_errno -eq 1040 ]; then
>>>>>>> + if [ $last_errno -eq "$MYSQL_TOO_MANY_CONN_ERR" ]; then
>>>>>>> set_reader_attr 0
>>>>>>> exit $OCF_SUCCESS
>>>>>>> fi
>>>>>>> @@ -496,9 +498,9 @@
>>>>>>> ocf_log warn "MySQL Slave IO threads currently not running."
>>>>>>>
>>>>>>> # Sanity check, are we at least on the right master
>>>>>>> - new_master_IP=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f1`
>>>>>>> + new_master=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f1`
>>>>>>>
>>>>>>> - if [ "$master_host" != "$new_master_IP" ]; then
>>>>>>> + if [ "$master_host" != "$new_master" ]; then
>>>>>>> # Not pointing to the right master, not good, removing the VIPs
>>>>>>> set_reader_attr 0
>>>>>>>
>>>>>>> @@ -570,23 +572,23 @@
>>>>>>> }
>>>>>>>
>>>>>>> set_master() {
>>>>>>> - local new_master_IP master_log_file master_log_pos
>>>>>>> + local new_master master_log_file master_log_pos
>>>>>>> local master_params
>>>>>>>
>>>>>>> - new_master_IP=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f1`
>>>>>>> + new_master=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f1`
>>>>>>>
>>>>>>> # Keep replication position
>>>>>>> get_slave_info
>>>>>>>
>>>>>>> - if [ "$master_log_file" -a "$new_master_IP" = "$master_host" ]; then
>>>>>>> + if [ "$master_log_file" -a "$new_master" = "$master_host" ]; then
>>>>>>> # master_params=", MASTER_LOG_FILE='$master_log_file', \
>>>>>>> # MASTER_LOG_POS=$master_log_pos"
>>>>>>> ocf_log info "Kept master pos for $master_host : $master_log_file:$master_log_pos"
>>>>>>> rm -f $tmpfile
>>>>>>> return
>>>>>>> else
>>>>>>> - master_log_file=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f2`
>>>>>>> - master_log_pos=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f3`
>>>>>>> + master_log_file=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f2`
>>>>>>> + master_log_pos=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f3`
>>>>>>> if [ -n "$master_log_file" -a -n "$master_log_pos" ]; then
>>>>>>> master_params=", MASTER_LOG_FILE='$master_log_file', \
>>>>>>> MASTER_LOG_POS=$master_log_pos"
>>>>>>> @@ -601,7 +603,7 @@
>>>>>>> # reset with RESET MASTER.
>>>>>>>
>>>>>>> ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
>>>>>>> - -e "CHANGE MASTER TO MASTER_HOST='$new_master_IP', \
>>>>>>> + -e "CHANGE MASTER TO MASTER_HOST='$new_master', \
>>>>>>> MASTER_USER='$OCF_RESKEY_replication_user', \
>>>>>>> MASTER_PASSWORD='$OCF_RESKEY_replication_passwd' $master_params"
>>>>>>> rm -f $tmpfile
>>>>>>> @@ -628,15 +630,15 @@
>>>>>>> while true; do
>>>>>>> $MYSQL $MYSQL_OPTIONS_REPL \
>>>>>>> -e 'SHOW PROCESSLIST\G'> $tmpfile
>>>>>>> - if grep 'Waiting for master to send event' $tmpfile>/dev/null; then
>>>>>>> + if grep -i 'Waiting for master to send event' $tmpfile>/dev/null; then
>>>>>>> ocf_log info "MySQL slave has finished reading master binary log"
>>>>>>> break
>>>>>>> fi
>>>>>>> - if grep 'Reconnecting after a failed master event read' $tmpfile>/dev/null; then
>>>>>>> + if grep -i 'Reconnecting after a failed master event read' $tmpfile>/dev/null; then
>>>>>>> ocf_log info "Master is down, no more binary logs to come"
>>>>>>> break
>>>>>>> fi
>>>>>>> - if grep 'Connecting to master' $tmpfile>/dev/null; then
>>>>>>> + if grep -i 'Connecting to master' $tmpfile>/dev/null; then
>>>>>>> ocf_log info "Master is down, no more binary logs to come"
>>>>>>> break
>>>>>>> fi
>>>>>>> @@ -660,7 +662,7 @@
>>>>>>> while true; do
>>>>>>> $MYSQL $MYSQL_OPTIONS_REPL \
>>>>>>> -e 'SHOW PROCESSLIST\G'> $tmpfile
>>>>>>> - if grep 'Has read all relay log' $tmpfile>/dev/null; then
>>>>>>> + if grep -i 'Has read all relay log' $tmpfile>/dev/null; then
>>>>>>> ocf_log info "MySQL slave has finished processing relay log"
>>>>>>> break
>>>>>>> fi
>>>>>>> @@ -744,8 +746,15 @@
>>>>>>> # Determines what IP address is attached to the current host. The output of the
>>>>>>> # crm_attribute command looks like this:
>>>>>>> # scope=nodes name=IP value=10.2.2.161
>>>>>>> +# If the IP node attribute is not defined, fallback is to uname -n
>>>>>>> get_local_ip() {
>>>>>>> - $CRM_ATTR -l forever -n IP -q -G
>>>>>>> + local IP
>>>>>>> + IP=`$CRM_ATTR -l forever -n IP -q -G`
>>>>>>> + if [ ! $? -eq 0 ]; then
>>>>>>> + uname -n
>>>>>>> + else
>>>>>>> + echo $IP
>>>>>>> + fi
>>>>>>> }
>>>>>>>
>>>>>>> #######################################################################
>>>>>>> @@ -783,7 +792,7 @@
>>>>>>>
>>>>>>> mysql_status() {
>>>>>>> if [ ! -e $OCF_RESKEY_pid ]; then
>>>>>>> - ocf_log err "MySQL is not running"
>>>>>>> + ocf_log $1 "MySQL is not running"
>>>>>>> return $OCF_NOT_RUNNING;
>>>>>>> fi
>>>>>>>
>>>>>>> @@ -797,7 +806,7 @@
>>>>>>> if [ $? -eq 0 ]; then
>>>>>>> return $OCF_SUCCESS;
>>>>>>> else
>>>>>>> - ocf_log err "MySQL not running: removing old PID file"
>>>>>>> + ocf_log $1 "MySQL not running: removing old PID file"
>>>>>>> rm -f $OCF_RESKEY_pid
>>>>>>> return $OCF_NOT_RUNNING;
>>>>>>> fi
>>>>>>> @@ -811,8 +820,9 @@
>>>>>>> if ocf_is_probe; then
>>>>>>> status_loglevel="info"
>>>>>>> fi
>>>>>>> -
>>>>>>> +
>>>>>>> mysql_status $status_loglevel
>>>>>>> +
>>>>>>> rc=$?
>>>>>>>
>>>>>>> # TODO: check max connections error
>>>>>>> @@ -856,7 +866,7 @@
>>>>>>> set_reader_attr 0
>>>>>>> fi
>>>>>>>
>>>>>>> - mysql_status
>>>>>>> + mysql_status info
>>>>>>> if [ $? = $OCF_SUCCESS ]; then
>>>>>>> ocf_log info "MySQL already running"
>>>>>>> return $OCF_SUCCESS
>>>>>>> @@ -930,7 +940,7 @@
>>>>>>> # Let the CRM/LRM time us out if required.
>>>>>>> start_wait=1
>>>>>>> while [ $start_wait = 1 ]; do
>>>>>>> - mysql_status
>>>>>>> + mysql_status info
>>>>>>> rc=$?
>>>>>>> if [ $rc = $OCF_SUCCESS ]; then
>>>>>>> start_wait=0
>>>>>>> @@ -1019,7 +1029,7 @@
>>>>>>> count=0
>>>>>>> while [ $count -lt $shutdown_timeout ]
>>>>>>> do
>>>>>>> - mysql_status
>>>>>>> + mysql_status info
>>>>>>> rc=$?
>>>>>>> if [ $rc = $OCF_NOT_RUNNING ]; then
>>>>>>> break
>>>>>>> @@ -1029,7 +1039,7 @@
>>>>>>> ocf_log debug "MySQL still hasn't stopped yet. Waiting..."
>>>>>>> done
>>>>>>>
>>>>>>> - mysql_status
>>>>>>> + mysql_status info
>>>>>>> if [ $? != $OCF_NOT_RUNNING ]; then
>>>>>>> ocf_log info "MySQL failed to stop after ${shutdown_timeout}s using SIGTERM. Trying SIGKILL..."
>>>>>>> /bin/kill -KILL $pid> /dev/null
>>>>>>> @@ -1044,7 +1054,7 @@
>>>>>>> mysql_promote() {
>>>>>>> local master_info
>>>>>>>
>>>>>>> - if ( ! mysql_status ); then
>>>>>>> + if ( ! mysql_status err ); then
>>>>>>> return $OCF_NOT_RUNNING
>>>>>>> fi
>>>>>>> ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
>>>>>>> @@ -1053,7 +1063,7 @@
>>>>>>> # Set Master Info in CIB, cluster level attribute
>>>>>>> update_data_master_status
>>>>>>> master_info="$(get_local_ip)|$(get_master_status File)|$(get_master_status Position)"
>>>>>>> - ${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication -v "$master_info"
>>>>>>> + ${CRM_ATTR_REPL_INFO} -v "$master_info"
>>>>>>> rm -f $tmpfile
>>>>>>>
>>>>>>> set_read_only off || return $OCF_ERR_GENERIC
>>>>>>> @@ -1070,7 +1080,7 @@
>>>>>>> }
>>>>>>>
>>>>>>> mysql_demote() {
>>>>>>> - if ! mysql_status; then
>>>>>>> + if ! mysql_status err; then
>>>>>>> return $OCF_NOT_RUNNING
>>>>>>> fi
>>>>>>>
>>>>>>> @@ -1177,6 +1187,7 @@
>>>>>>> # The log directory must be a directory owned by root, with permissions 0700,
>>>>>>> # and the log must be writable and not a symlink.
>>>>>>> ##########################################################################
>>>>>>> +DEBUG_LOG="/tmp/mysql.ocf.ra.debug/log"
>>>>>>> if [ "${DEBUG_LOG}" -a -w "${DEBUG_LOG}" -a ! -L "${DEBUG_LOG}" ]; then
>>>>>>> DEBUG_LOG_DIR="${DEBUG_LOG%/*}"
>>>>>>> if [ -d "${DEBUG_LOG_DIR}" ]; then
>>>>>>> @@ -1214,7 +1225,7 @@
>>>>>>> case "$1" in
>>>>>>> start) mysql_start;;
>>>>>>> stop) mysql_stop;;
>>>>>>> - status) mysql_status;;
>>>>>>> + status) mysql_status err;;
>>>>>>> monitor) mysql_monitor;;
>>>>>>> promote) mysql_promote;;
>>>>>>> demote) mysql_demote;;
>>>>>>
>>>>>>> _______________________________________________________
>>>>>>> Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
>>>>>>> http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
>>>>>>> Home Page: http://linux-ha.org/
>>>>>>
>>>>>> _______________________________________________________
>>>>>> Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
>>>>>> http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
>>>>>> Home Page: http://linux-ha.org/
>>>>>>
>>>>
>>>>> --- mysql.4c18035b7 2012-05-04 16:23:45.238421759 -0400
>>>>> +++ resource-agents-prm/heartbeat/mysql 2012-05-11 08:41:20.868420414 -0400
>>>>> @@ -109,7 +109,7 @@
>>>>> : ${OCF_RESKEY_max_slave_lag=${OCF_RESKEY_max_slave_lag_default}}
>>>>> : ${OCF_RESKEY_evict_outdated_slaves=${OCF_RESKEY_evict_outdated_slaves_default}}
>>>>>
>>>>> -: ${OCF_RESKEY_reader_attribute=${OCF_RESKEY_evict_reader_attribute_default}}
>>>>> +: ${OCF_RESKEY_reader_attribute=${OCF_RESKEY_reader_attribute_default}}
>>>>>
>>>>> #######################################################################
>>>>>
>>>>> @@ -328,7 +328,7 @@
>>>>> </longdesc>
>>>>> <shortdesc lang="en">Sets the node attribute that determines
>>>>> whether a node is usable for clients to read from.</shortdesc>
>>>>> -<content type="boolean" default="${OCF_RESKEY_reader_attribute_default}" />
>>>>> +<content type="string" default="${OCF_RESKEY_reader_attribute_default}" />
>>>>> </parameter>
>>>>> </parameters>
>>>>>
>>>>> @@ -355,11 +355,13 @@
>>>>> MYSQL_OPTIONS_LOCAL="-S $OCF_RESKEY_socket --connect_timeout=10"
>>>>> MYSQL_OPTIONS_REPL="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_replication_user --password=$OCF_RESKEY_replication_passwd"
>>>>> MYSQL_OPTIONS_TEST="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_test_user --password=$OCF_RESKEY_test_passwd"
>>>>> +MYSQL_TOO_MANY_CONN_ERR=1040
>>>>>
>>>>> CRM_MASTER="${HA_SBIN_DIR}/crm_master -l reboot "
>>>>> HOSTNAME=`uname -n`
>>>>> CRM_ATTR="${HA_SBIN_DIR}/crm_attribute -N $HOSTNAME "
>>>>> INSTANCE_ATTR_NAME=`echo ${OCF_RESOURCE_INSTANCE}| awk -F : '{print $1}'`
>>>>> +CRM_ATTR_REPL_INFO="${HA_SBIN_DIR}/crm_attribute --type crm_config --name ${INSTANCE_ATTR_NAME}_REPL_INFO -s mysql_replication"
>>>>>
>>>>> #######################################################################
>>>>>
>>>>> @@ -461,14 +463,14 @@
>>>>>
>>>>> check_slave() {
>>>>> # Checks slave status
>>>>> - local rc new_master_IP
>>>>> + local rc new_master
>>>>>
>>>>> get_slave_info
>>>>> rc=$?
>>>>>
>>>>> if [ $rc -eq 0 ]; then
>>>>> # Did we receive an error other than max_connections?
>>>>> - if [ $last_errno -ne 0 -a $last_errno -ne 1040 ]; then
>>>>> + if [ $last_errno -ne 0 -a $last_errno -ne "$MYSQL_TOO_MANY_CONN_ERR" ]; then
>>>>> # Whoa. Replication ran into an error. This slave has
>>>>> # diverged from its master. Make sure this resource
>>>>> # doesn't restart in place.
>>>>> @@ -484,7 +486,7 @@
>>>>> fi
>>>>>
>>>>> # If we got max_connections, let's remove the vip
>>>>> - if [ $last_errno -eq 1040 ]; then
>>>>> + if [ $last_errno -eq "$MYSQL_TOO_MANY_CONN_ERR" ]; then
>>>>> set_reader_attr 0
>>>>> exit $OCF_SUCCESS
>>>>> fi
>>>>> @@ -496,9 +498,9 @@
>>>>> ocf_log warn "MySQL Slave IO threads currently not running."
>>>>>
>>>>> # Sanity check, are we at least on the right master
>>>>> - new_master_IP=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f1`
>>>>> + new_master=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f1`
>>>>>
>>>>> - if [ "$master_host" != "$new_master_IP" ]; then
>>>>> + if [ "$master_host" != "$new_master" ]; then
>>>>> # Not pointing to the right master, not good, removing the VIPs
>>>>> set_reader_attr 0
>>>>>
>>>>> @@ -570,23 +572,23 @@
>>>>> }
>>>>>
>>>>> set_master() {
>>>>> - local new_master_IP master_log_file master_log_pos
>>>>> + local new_master master_log_file master_log_pos
>>>>> local master_params
>>>>>
>>>>> - new_master_IP=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f1`
>>>>> + new_master=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f1`
>>>>>
>>>>> # Keep replication position
>>>>> get_slave_info
>>>>>
>>>>> - if [ "$master_log_file" -a "$new_master_IP" = "$master_host" ]; then
>>>>> + if [ "$master_log_file" -a "$new_master" = "$master_host" ]; then
>>>>> # master_params=", MASTER_LOG_FILE='$master_log_file', \
>>>>> # MASTER_LOG_POS=$master_log_pos"
>>>>> ocf_log info "Kept master pos for $master_host : $master_log_file:$master_log_pos"
>>>>> rm -f $tmpfile
>>>>> return
>>>>> else
>>>>> - master_log_file=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f2`
>>>>> - master_log_pos=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f3`
>>>>> + master_log_file=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f2`
>>>>> + master_log_pos=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f3`
>>>>> if [ -n "$master_log_file" -a -n "$master_log_pos" ]; then
>>>>> master_params=", MASTER_LOG_FILE='$master_log_file', \
>>>>> MASTER_LOG_POS=$master_log_pos"
>>>>> @@ -601,7 +603,7 @@
>>>>> # reset with RESET MASTER.
>>>>>
>>>>> ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
>>>>> - -e "CHANGE MASTER TO MASTER_HOST='$new_master_IP', \
>>>>> + -e "CHANGE MASTER TO MASTER_HOST='$new_master', \
>>>>> MASTER_USER='$OCF_RESKEY_replication_user', \
>>>>> MASTER_PASSWORD='$OCF_RESKEY_replication_passwd' $master_params"
>>>>> rm -f $tmpfile
>>>>> @@ -628,15 +630,15 @@
>>>>> while true; do
>>>>> $MYSQL $MYSQL_OPTIONS_REPL \
>>>>> -e 'SHOW PROCESSLIST\G'> $tmpfile
>>>>> - if grep 'Waiting for master to send event' $tmpfile>/dev/null; then
>>>>> + if grep -i 'Waiting for master to send event' $tmpfile>/dev/null; then
>>>>> ocf_log info "MySQL slave has finished reading master binary log"
>>>>> break
>>>>> fi
>>>>> - if grep 'Reconnecting after a failed master event read' $tmpfile>/dev/null; then
>>>>> + if grep -i 'Reconnecting after a failed master event read' $tmpfile>/dev/null; then
>>>>> ocf_log info "Master is down, no more binary logs to come"
>>>>> break
>>>>> fi
>>>>> - if grep 'Connecting to master' $tmpfile>/dev/null; then
>>>>> + if grep -i 'Connecting to master' $tmpfile>/dev/null; then
>>>>> ocf_log info "Master is down, no more binary logs to come"
>>>>> break
>>>>> fi
>>>>> @@ -660,7 +662,7 @@
>>>>> while true; do
>>>>> $MYSQL $MYSQL_OPTIONS_REPL \
>>>>> -e 'SHOW PROCESSLIST\G'> $tmpfile
>>>>> - if grep 'Has read all relay log' $tmpfile>/dev/null; then
>>>>> + if grep -i 'Has read all relay log' $tmpfile>/dev/null; then
>>>>> ocf_log info "MySQL slave has finished processing relay log"
>>>>> break
>>>>> fi
>>>>> @@ -744,8 +746,17 @@
>>>>> # Determines what IP address is attached to the current host. The output of the
>>>>> # crm_attribute command looks like this:
>>>>> # scope=nodes name=IP value=10.2.2.161
>>>>> +# If the REPL_MASTER_IP node attribute is not defined, fallback is to uname -n
>>>>> +# The REPL_MASTER_IP is the IP address that will be used for the
>>>>> +# change master to command.
>>>>> get_local_ip() {
>>>>> - $CRM_ATTR -l forever -n IP -q -G
>>>>> + local IP
>>>>> + IP=`$CRM_ATTR -l forever -n REPL_MASTER_IP -q -G`
>>>>> + if [ ! $? -eq 0 ]; then
>>>>> + uname -n
>>>>> + else
>>>>> + echo $IP
>>>>> + fi
>>>>> }
>>>>>
>>>>> #######################################################################
>>>>> @@ -783,7 +794,7 @@
>>>>>
>>>>> mysql_status() {
>>>>> if [ ! -e $OCF_RESKEY_pid ]; then
>>>>> - ocf_log err "MySQL is not running"
>>>>> + ocf_log $1 "MySQL is not running"
>>>>> return $OCF_NOT_RUNNING;
>>>>> fi
>>>>>
>>>>> @@ -797,7 +808,7 @@
>>>>> if [ $? -eq 0 ]; then
>>>>> return $OCF_SUCCESS;
>>>>> else
>>>>> - ocf_log err "MySQL not running: removing old PID file"
>>>>> + ocf_log $1 "MySQL not running: removing old PID file"
>>>>> rm -f $OCF_RESKEY_pid
>>>>> return $OCF_NOT_RUNNING;
>>>>> fi
>>>>> @@ -811,8 +822,9 @@
>>>>> if ocf_is_probe; then
>>>>> status_loglevel="info"
>>>>> fi
>>>>> -
>>>>> +
>>>>> mysql_status $status_loglevel
>>>>> +
>>>>> rc=$?
>>>>>
>>>>> # TODO: check max connections error
>>>>> @@ -856,7 +868,7 @@
>>>>> set_reader_attr 0
>>>>> fi
>>>>>
>>>>> - mysql_status
>>>>> + mysql_status info
>>>>> if [ $? = $OCF_SUCCESS ]; then
>>>>> ocf_log info "MySQL already running"
>>>>> return $OCF_SUCCESS
>>>>> @@ -930,7 +942,7 @@
>>>>> # Let the CRM/LRM time us out if required.
>>>>> start_wait=1
>>>>> while [ $start_wait = 1 ]; do
>>>>> - mysql_status
>>>>> + mysql_status info
>>>>> rc=$?
>>>>> if [ $rc = $OCF_SUCCESS ]; then
>>>>> start_wait=0
>>>>> @@ -1019,7 +1031,7 @@
>>>>> count=0
>>>>> while [ $count -lt $shutdown_timeout ]
>>>>> do
>>>>> - mysql_status
>>>>> + mysql_status info
>>>>> rc=$?
>>>>> if [ $rc = $OCF_NOT_RUNNING ]; then
>>>>> break
>>>>> @@ -1029,7 +1041,7 @@
>>>>> ocf_log debug "MySQL still hasn't stopped yet. Waiting..."
>>>>> done
>>>>>
>>>>> - mysql_status
>>>>> + mysql_status info
>>>>> if [ $? != $OCF_NOT_RUNNING ]; then
>>>>> ocf_log info "MySQL failed to stop after ${shutdown_timeout}s using SIGTERM. Trying SIGKILL..."
>>>>> /bin/kill -KILL $pid> /dev/null
>>>>> @@ -1044,7 +1056,7 @@
>>>>> mysql_promote() {
>>>>> local master_info
>>>>>
>>>>> - if ( ! mysql_status ); then
>>>>> + if ( ! mysql_status err ); then
>>>>> return $OCF_NOT_RUNNING
>>>>> fi
>>>>> ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
>>>>> @@ -1053,7 +1065,7 @@
>>>>> # Set Master Info in CIB, cluster level attribute
>>>>> update_data_master_status
>>>>> master_info="$(get_local_ip)|$(get_master_status File)|$(get_master_status Position)"
>>>>> - ${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication -v "$master_info"
>>>>> + ${CRM_ATTR_REPL_INFO} -v "$master_info"
>>>>> rm -f $tmpfile
>>>>>
>>>>> set_read_only off || return $OCF_ERR_GENERIC
>>>>> @@ -1070,7 +1082,7 @@
>>>>> }
>>>>>
>>>>> mysql_demote() {
>>>>> - if ! mysql_status; then
>>>>> + if ! mysql_status err; then
>>>>> return $OCF_NOT_RUNNING
>>>>> fi
>>>>>
>>>>> @@ -1177,6 +1189,7 @@
>>>>> # The log directory must be a directory owned by root, with permissions 0700,
>>>>> # and the log must be writable and not a symlink.
>>>>> ##########################################################################
>>>>> +DEBUG_LOG="/tmp/mysql.ocf.ra.debug/log"
>>>>> if [ "${DEBUG_LOG}" -a -w "${DEBUG_LOG}" -a ! -L "${DEBUG_LOG}" ]; then
>>>>> DEBUG_LOG_DIR="${DEBUG_LOG%/*}"
>>>>> if [ -d "${DEBUG_LOG_DIR}" ]; then
>>>>> @@ -1214,7 +1227,7 @@
>>>>> case "$1" in
>>>>> start) mysql_start;;
>>>>> stop) mysql_stop;;
>>>>> - status) mysql_status;;
>>>>> + status) mysql_status err;;
>>>>> monitor) mysql_monitor;;
>>>>> promote) mysql_promote;;
>>>>> demote) mysql_demote;;
>>>>
>>>>> _______________________________________________________
>>>>> Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
>>>>> http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
>>>>> Home Page: http://linux-ha.org/
>>>>
>>>> _______________________________________________________
>>>> Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
>>>> http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
>>>> Home Page: http://linux-ha.org/
>>>>
>>
>>> --- mysql.4c18035b7 2012-05-04 16:23:45.238421759 -0400
>>> +++ resource-agents-prm/heartbeat/mysql 2012-05-11 10:14:28.148420134 -0400
>>> @@ -109,7 +109,22 @@
>>> : ${OCF_RESKEY_max_slave_lag=${OCF_RESKEY_max_slave_lag_default}}
>>> : ${OCF_RESKEY_evict_outdated_slaves=${OCF_RESKEY_evict_outdated_slaves_default}}
>>>
>>> -: ${OCF_RESKEY_reader_attribute=${OCF_RESKEY_evict_reader_attribute_default}}
>>> +: ${OCF_RESKEY_reader_attribute=${OCF_RESKEY_reader_attribute_default}}
>>> +
>>> +#######################################################################
>>> +# Convenience variables
>>> +
>>> +MYSQL=$OCF_RESKEY_client_binary
>>> +MYSQL_OPTIONS_LOCAL="-S $OCF_RESKEY_socket --connect_timeout=10"
>>> +MYSQL_OPTIONS_REPL="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_replication_user --password=$OCF_RESKEY_replication_passwd"
>>> +MYSQL_OPTIONS_TEST="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_test_user --password=$OCF_RESKEY_test_passwd"
>>> +MYSQL_TOO_MANY_CONN_ERR=1040
>>> +
>>> +CRM_MASTER="${HA_SBIN_DIR}/crm_master -l reboot "
>>> +HOSTNAME=`uname -n`
>>> +CRM_ATTR="${HA_SBIN_DIR}/crm_attribute -N $HOSTNAME "
>>> +INSTANCE_ATTR_NAME=`echo ${OCF_RESOURCE_INSTANCE}| awk -F : '{print $1}'`
>>> +CRM_ATTR_REPL_INFO="${HA_SBIN_DIR}/crm_attribute --type crm_config --name ${INSTANCE_ATTR_NAME}_REPL_INFO -s mysql_replication"
>>>
>>> #######################################################################
>>>
>>> @@ -141,6 +156,13 @@
>>> Resource script for MySQL.
>>> May manage a standalone MySQL database, a clone set with externally
>>> managed replication, or a complete master/slave replication setup.
>>> +
>>> +While managing replication, the default behavior is to use uname -n
>>> +values in the change master to command. Other IPs can be specified
>>> +manually by adding a node attribute \${INSTANCE_ATTR_NAME}_MYSQL_MASTER_IP
>>> +giving the IP to use for replication. For example, if the mysql primitive
>>> +you are using is p_mysql, the attribute to set will be
>>> +p_mysql_MYSQL_MASTER_IP.
>>> </longdesc>
>>> <shortdesc lang="en">Manages a MySQL database instance</shortdesc>
>>> <parameters>
>>> @@ -328,7 +350,7 @@
>>> </longdesc>
>>> <shortdesc lang="en">Sets the node attribute that determines
>>> whether a node is usable for clients to read from.</shortdesc>
>>> -<content type="boolean" default="${OCF_RESKEY_reader_attribute_default}" />
>>> +<content type="string" default="${OCF_RESKEY_reader_attribute_default}" />
>>> </parameter>
>>> </parameters>
>>>
>>> @@ -348,20 +370,6 @@
>>> </resource-agent>
>>> END
>>> }
>>> -#######################################################################
>>> -# Convenience variables
>>> -
>>> -MYSQL=$OCF_RESKEY_client_binary
>>> -MYSQL_OPTIONS_LOCAL="-S $OCF_RESKEY_socket --connect_timeout=10"
>>> -MYSQL_OPTIONS_REPL="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_replication_user --password=$OCF_RESKEY_replication_passwd"
>>> -MYSQL_OPTIONS_TEST="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_test_user --password=$OCF_RESKEY_test_passwd"
>>> -
>>> -CRM_MASTER="${HA_SBIN_DIR}/crm_master -l reboot "
>>> -HOSTNAME=`uname -n`
>>> -CRM_ATTR="${HA_SBIN_DIR}/crm_attribute -N $HOSTNAME "
>>> -INSTANCE_ATTR_NAME=`echo ${OCF_RESOURCE_INSTANCE}| awk -F : '{print $1}'`
>>> -
>>> -#######################################################################
>>>
>>> # Convenience functions
>>>
>>> @@ -461,14 +469,14 @@
>>>
>>> check_slave() {
>>> # Checks slave status
>>> - local rc new_master_IP
>>> + local rc new_master
>>>
>>> get_slave_info
>>> rc=$?
>>>
>>> if [ $rc -eq 0 ]; then
>>> # Did we receive an error other than max_connections?
>>> - if [ $last_errno -ne 0 -a $last_errno -ne 1040 ]; then
>>> + if [ $last_errno -ne 0 -a $last_errno -ne "$MYSQL_TOO_MANY_CONN_ERR" ]; then
>>> # Whoa. Replication ran into an error. This slave has
>>> # diverged from its master. Make sure this resource
>>> # doesn't restart in place.
>>> @@ -484,7 +492,7 @@
>>> fi
>>>
>>> # If we got max_connections, let's remove the vip
>>> - if [ $last_errno -eq 1040 ]; then
>>> + if [ $last_errno -eq "$MYSQL_TOO_MANY_CONN_ERR" ]; then
>>> set_reader_attr 0
>>> exit $OCF_SUCCESS
>>> fi
>>> @@ -496,9 +504,9 @@
>>> ocf_log warn "MySQL Slave IO threads currently not running."
>>>
>>> # Sanity check, are we at least on the right master
>>> - new_master_IP=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f1`
>>> + new_master=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f1`
>>>
>>> - if [ "$master_host" != "$new_master_IP" ]; then
>>> + if [ "$master_host" != "$new_master" ]; then
>>> # Not pointing to the right master, not good, removing the VIPs
>>> set_reader_attr 0
>>>
>>> @@ -570,23 +578,23 @@
>>> }
>>>
>>> set_master() {
>>> - local new_master_IP master_log_file master_log_pos
>>> + local new_master master_log_file master_log_pos
>>> local master_params
>>>
>>> - new_master_IP=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f1`
>>> + new_master=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f1`
>>>
>>> # Keep replication position
>>> get_slave_info
>>>
>>> - if [ "$master_log_file" -a "$new_master_IP" = "$master_host" ]; then
>>> + if [ "$master_log_file" -a "$new_master" = "$master_host" ]; then
>>> # master_params=", MASTER_LOG_FILE='$master_log_file', \
>>> # MASTER_LOG_POS=$master_log_pos"
>>> ocf_log info "Kept master pos for $master_host : $master_log_file:$master_log_pos"
>>> rm -f $tmpfile
>>> return
>>> else
>>> - master_log_file=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f2`
>>> - master_log_pos=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f3`
>>> + master_log_file=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f2`
>>> + master_log_pos=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f3`
>>> if [ -n "$master_log_file" -a -n "$master_log_pos" ]; then
>>> master_params=", MASTER_LOG_FILE='$master_log_file', \
>>> MASTER_LOG_POS=$master_log_pos"
>>> @@ -601,7 +609,7 @@
>>> # reset with RESET MASTER.
>>>
>>> ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
>>> - -e "CHANGE MASTER TO MASTER_HOST='$new_master_IP', \
>>> + -e "CHANGE MASTER TO MASTER_HOST='$new_master', \
>>> MASTER_USER='$OCF_RESKEY_replication_user', \
>>> MASTER_PASSWORD='$OCF_RESKEY_replication_passwd' $master_params"
>>> rm -f $tmpfile
>>> @@ -628,15 +636,15 @@
>>> while true; do
>>> $MYSQL $MYSQL_OPTIONS_REPL \
>>> -e 'SHOW PROCESSLIST\G'> $tmpfile
>>> - if grep 'Waiting for master to send event' $tmpfile>/dev/null; then
>>> + if grep -i 'Waiting for master to send event' $tmpfile>/dev/null; then
>>> ocf_log info "MySQL slave has finished reading master binary log"
>>> break
>>> fi
>>> - if grep 'Reconnecting after a failed master event read' $tmpfile>/dev/null; then
>>> + if grep -i 'Reconnecting after a failed master event read' $tmpfile>/dev/null; then
>>> ocf_log info "Master is down, no more binary logs to come"
>>> break
>>> fi
>>> - if grep 'Connecting to master' $tmpfile>/dev/null; then
>>> + if grep -i 'Connecting to master' $tmpfile>/dev/null; then
>>> ocf_log info "Master is down, no more binary logs to come"
>>> break
>>> fi
>>> @@ -660,7 +668,7 @@
>>> while true; do
>>> $MYSQL $MYSQL_OPTIONS_REPL \
>>> -e 'SHOW PROCESSLIST\G'> $tmpfile
>>> - if grep 'Has read all relay log' $tmpfile>/dev/null; then
>>> + if grep -i 'Has read all relay log' $tmpfile>/dev/null; then
>>> ocf_log info "MySQL slave has finished processing relay log"
>>> break
>>> fi
>>> @@ -744,8 +752,17 @@
>>> # Determines what IP address is attached to the current host. The output of the
>>> # crm_attribute command looks like this:
>>> # scope=nodes name=IP value=10.2.2.161
>>> +# If the ${INSTANCE_ATTR_NAME}_MYSQL_MASTER_IP node attribute is not defined, fallback is to uname -n
>>> +# The ${INSTANCE_ATTR_NAME}_MYSQL_MASTER_IP is the IP address that will be used for the
>>> +# change master to command.
>>> get_local_ip() {
>>> - $CRM_ATTR -l forever -n IP -q -G
>>> + local IP
>>> + IP=`$CRM_ATTR -l forever -n ${INSTANCE_ATTR_NAME}_MYSQL_MASTER_IP -q -G`
>>> + if [ ! $? -eq 0 ]; then
>>> + uname -n
>>> + else
>>> + echo $IP
>>> + fi
>>> }
>>>
>>> #######################################################################
>>> @@ -783,7 +800,7 @@
>>>
>>> mysql_status() {
>>> if [ ! -e $OCF_RESKEY_pid ]; then
>>> - ocf_log err "MySQL is not running"
>>> + ocf_log $1 "MySQL is not running"
>>> return $OCF_NOT_RUNNING;
>>> fi
>>>
>>> @@ -797,7 +814,7 @@
>>> if [ $? -eq 0 ]; then
>>> return $OCF_SUCCESS;
>>> else
>>> - ocf_log err "MySQL not running: removing old PID file"
>>> + ocf_log $1 "MySQL not running: removing old PID file"
>>> rm -f $OCF_RESKEY_pid
>>> return $OCF_NOT_RUNNING;
>>> fi
>>> @@ -811,8 +828,9 @@
>>> if ocf_is_probe; then
>>> status_loglevel="info"
>>> fi
>>> -
>>> +
>>> mysql_status $status_loglevel
>>> +
>>> rc=$?
>>>
>>> # TODO: check max connections error
>>> @@ -856,7 +874,7 @@
>>> set_reader_attr 0
>>> fi
>>>
>>> - mysql_status
>>> + mysql_status info
>>> if [ $? = $OCF_SUCCESS ]; then
>>> ocf_log info "MySQL already running"
>>> return $OCF_SUCCESS
>>> @@ -930,7 +948,7 @@
>>> # Let the CRM/LRM time us out if required.
>>> start_wait=1
>>> while [ $start_wait = 1 ]; do
>>> - mysql_status
>>> + mysql_status info
>>> rc=$?
>>> if [ $rc = $OCF_SUCCESS ]; then
>>> start_wait=0
>>> @@ -1019,7 +1037,7 @@
>>> count=0
>>> while [ $count -lt $shutdown_timeout ]
>>> do
>>> - mysql_status
>>> + mysql_status info
>>> rc=$?
>>> if [ $rc = $OCF_NOT_RUNNING ]; then
>>> break
>>> @@ -1029,7 +1047,7 @@
>>> ocf_log debug "MySQL still hasn't stopped yet. Waiting..."
>>> done
>>>
>>> - mysql_status
>>> + mysql_status info
>>> if [ $? != $OCF_NOT_RUNNING ]; then
>>> ocf_log info "MySQL failed to stop after ${shutdown_timeout}s using SIGTERM. Trying SIGKILL..."
>>> /bin/kill -KILL $pid> /dev/null
>>> @@ -1044,7 +1062,7 @@
>>> mysql_promote() {
>>> local master_info
>>>
>>> - if ( ! mysql_status ); then
>>> + if ( ! mysql_status err ); then
>>> return $OCF_NOT_RUNNING
>>> fi
>>> ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
>>> @@ -1053,7 +1071,7 @@
>>> # Set Master Info in CIB, cluster level attribute
>>> update_data_master_status
>>> master_info="$(get_local_ip)|$(get_master_status File)|$(get_master_status Position)"
>>> - ${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication -v "$master_info"
>>> + ${CRM_ATTR_REPL_INFO} -v "$master_info"
>>> rm -f $tmpfile
>>>
>>> set_read_only off || return $OCF_ERR_GENERIC
>>> @@ -1070,7 +1088,7 @@
>>> }
>>>
>>> mysql_demote() {
>>> - if ! mysql_status; then
>>> + if ! mysql_status err; then
>>> return $OCF_NOT_RUNNING
>>> fi
>>>
>>> @@ -1177,6 +1195,7 @@
>>> # The log directory must be a directory owned by root, with permissions 0700,
>>> # and the log must be writable and not a symlink.
>>> ##########################################################################
>>> +DEBUG_LOG="/tmp/mysql.ocf.ra.debug/log"
>>> if [ "${DEBUG_LOG}" -a -w "${DEBUG_LOG}" -a ! -L "${DEBUG_LOG}" ]; then
>>> DEBUG_LOG_DIR="${DEBUG_LOG%/*}"
>>> if [ -d "${DEBUG_LOG_DIR}" ]; then
>>> @@ -1214,7 +1233,7 @@
>>> case "$1" in
>>> start) mysql_start;;
>>> stop) mysql_stop;;
>>> - status) mysql_status;;
>>> + status) mysql_status err;;
>>> monitor) mysql_monitor;;
>>> promote) mysql_promote;;
>>> demote) mysql_demote;;
>>
>>> _______________________________________________________
>>> Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
>>> http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
>>> Home Page: http://linux-ha.org/
>>
>> _______________________________________________________
>> Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
>> http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
>> Home Page: http://linux-ha.org/
>>

> --- mysql.latest 2012-05-11 11:21:05.848420091 -0400
> +++ resource-agents-prm/heartbeat/mysql 2012-05-11 11:19:33.088420104 -0400
> @@ -11,6 +11,7 @@
> # Sebastian Reitenbach: add OpenBSD defaults, more cleanup
> # Narayan Newton: add Gentoo/Debian defaults
> # Marian Marinov, Florian Haas: add replication capability
> +# Yves Trudeau, Baron Schwartz: add VIP support and improve replication
> #
> # Support: linux-ha@lists.linux-ha.org
> # License: GNU General Public License (GPL)
> @@ -38,6 +39,7 @@
> # OCF_RESKEY_log
> # OCF_RESKEY_pid
> # OCF_RESKEY_socket
> +
> #######################################################################
> # Initialization:
>
> @@ -76,6 +78,7 @@
> OCF_RESKEY_replication_port_default="3306"
> OCF_RESKEY_max_slave_lag_default="3600"
> OCF_RESKEY_evict_outdated_slaves_default="false"
> +OCF_RESKEY_reader_attribute_default="readable"
>
> : ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}
> MYSQL_BINDIR=`dirname ${OCF_RESKEY_binary}`
> @@ -106,6 +109,23 @@
> : ${OCF_RESKEY_max_slave_lag=${OCF_RESKEY_max_slave_lag_default}}
> : ${OCF_RESKEY_evict_outdated_slaves=${OCF_RESKEY_evict_outdated_slaves_default}}
>
> +: ${OCF_RESKEY_reader_attribute=${OCF_RESKEY_reader_attribute_default}}
> +
> +#######################################################################
> +# Convenience variables
> +
> +MYSQL=$OCF_RESKEY_client_binary
> +MYSQL_OPTIONS_LOCAL="-S $OCF_RESKEY_socket --connect_timeout=10"
> +MYSQL_OPTIONS_REPL="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_replication_user --password=$OCF_RESKEY_replication_passwd"
> +MYSQL_OPTIONS_TEST="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_test_user --password=$OCF_RESKEY_test_passwd"
> +MYSQL_TOO_MANY_CONN_ERR=1040
> +
> +CRM_MASTER="${HA_SBIN_DIR}/crm_master -l reboot "
> +HOSTNAME=`uname -n`
> +CRM_ATTR="${HA_SBIN_DIR}/crm_attribute -N $HOSTNAME "
> +INSTANCE_ATTR_NAME=`echo ${OCF_RESOURCE_INSTANCE}| awk -F : '{print $1}'`
> +CRM_ATTR_REPL_INFO="${HA_SBIN_DIR}/crm_attribute --type crm_config --name ${INSTANCE_ATTR_NAME}_REPL_INFO -s mysql_replication"
> +
> #######################################################################
>
> usage() {
> @@ -133,9 +153,16 @@
> <version>1.0</version>
>
> <longdesc lang="en">
> -Resource script for MySQL.
> +Resource script for MySQL.
> May manage a standalone MySQL database, a clone set with externally
> managed replication, or a complete master/slave replication setup.
> +
> +While managing replication, the default behavior is to use uname -n
> +values in the change master to command. Other IPs can be specified
> +manually by adding a node attribute \${INSTANCE_ATTR_NAME}_mysql_master_IP
> +giving the IP to use for replication. For example, if the mysql primitive
> +you are using is p_mysql, the attribute to set will be
> +p_mysql_mysql_master_IP.
> </longdesc>
> <shortdesc lang="en">Manages a MySQL database instance</shortdesc>
> <parameters>
> @@ -222,7 +249,7 @@
>
> <parameter name="test_user" unique="0" required="0">
> <longdesc lang="en">
> -MySQL test user
> +MySQL test user, must have select privilege on test_table
> </longdesc>
> <shortdesc lang="en">MySQL test user</shortdesc>
> <content type="string" default="${OCF_RESKEY_test_user_default}" />
> @@ -259,7 +286,8 @@
> MySQL replication, for setting and resetting the master host, and for
> setting and unsetting read-only mode. Because of that, this user must
> have SUPER, REPLICATION SLAVE, REPLICATION CLIENT, and PROCESS
> -privileges on all nodes within the cluster.
> +privileges on all nodes within the cluster. Mandatory if you define
> +a master-slave resource.
> </longdesc>
> <shortdesc lang="en">MySQL replication user</shortdesc>
> <content type="string" default="${OCF_RESKEY_replication_user_default}" />
> @@ -268,6 +296,7 @@
> <parameter name="replication_passwd" unique="0" required="0">
> <longdesc lang="en">
> MySQL replication password. Used for replication client and slave.
> +Mandatory if you define a master-slave resource.
> </longdesc>
> <shortdesc lang="en">MySQL replication user password</shortdesc>
> <content type="string" default="${OCF_RESKEY_replication_passwd_default}" />
> @@ -307,6 +336,22 @@
> <content type="boolean" default="${OCF_RESKEY_evict_outdated_slaves_default}" />
> </parameter>
>
> +<parameter name="reader_attribute" unique="1" required="0">
> +<longdesc lang="en">
> +An attribute that the RA can manage to specify whether a node
> +can be read from. This node attribute will be 1 if it's fine to
> +read from the node, and 0 otherwise (for example, when a slave
> +has lagged too far behind the master).
> +
> +A typical example for the use of this attribute would be to tie
> +a set of IP addresses to MySQL slaves that can be read from.
> +
> +This parameter is only meaningful in master/slave set configurations.
> +</longdesc>
> +<shortdesc lang="en">Sets the node attribute that determines
> +whether a node is usable for clients to read from.</shortdesc>
> +<content type="string" default="${OCF_RESKEY_reader_attribute_default}" />
> +</parameter>
> </parameters>
>
> <actions>
> @@ -325,19 +370,6 @@
> </resource-agent>
> END
> }
> -#######################################################################
> -# Convenience variables
> -
> -MYSQL=$OCF_RESKEY_client_binary
> -MYSQL_OPTIONS_LOCAL="-S $OCF_RESKEY_socket --connect_timeout=10"
> -MYSQL_OPTIONS_REPL="--user=$OCF_RESKEY_replication_user --password=$OCF_RESKEY_replication_passwd"
> -
> -CRM_MASTER="${HA_SBIN_DIR}/crm_master -l reboot "
> -HOSTNAME=`uname -n`
> -CRM_ATTR="${HA_SBIN_DIR}/crm_attribute -N $HOSTNAME -l forever"
> -INSTANCE_ATTR_NAME=`echo ${OCF_RESOURCE_INSTANCE}| awk -F : '{print $1}'`
> -
> -#######################################################################
>
> # Convenience functions
>
> @@ -354,28 +386,17 @@
> else
> ro_val="off"
> fi
> - local mysql_options
> - mysql_options="$MYSQL_OPTIONS_LOCAL"
> - if [ -n $OCF_RESKEY_replication_user ]; then
> - mysql_options="$mysql_options $MYSQL_OPTIONS_REPL"
> - fi
> - ocf_run $MYSQL $mysql_options \
> + ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
> -e "SET GLOBAL read_only=${ro_val}"
> }
>
> get_read_only() {
> # Check if read-only is set
> - local mysql_options
> local read_only_state
>
> - mysql_options="$MYSQL_OPTIONS_LOCAL"
> - if [ -n $OCF_RESKEY_replication_user ]; then
> - mysql_options="$mysql_options $MYSQL_OPTIONS_REPL"
> - fi
> -
> - read_only_state=`$MYSQL $mysql_options \
> + read_only_state=`$MYSQL $MYSQL_OPTIONS_REPL \
> -e "SHOW VARIABLES" | grep read_only | awk '{print $2}'`
> -
> +
> if [ "$read_only_state" = "ON" ]; then
> return 0
> else
> @@ -389,7 +410,6 @@
> # SLAVE STATUS creates an empty result set, 0 otherwise.
> local rc
> local tmpfile
> - local mysql_options
>
> # Check whether this machine should be slave
> if ! ocf_is_ms || ! get_read_only; then
> @@ -398,10 +418,8 @@
>
> tmpfile=`mktemp ${HA_RSCTMP}/is_slave.${OCF_RESOURCE_INSTANCE}.XXXXXX`
>
> - mysql_options="$MYSQL_OPTIONS_LOCAL $MYSQL_OPTIONS_REPL"
> -
> - $MYSQL $mysql_options \
> - -e 'SHOW SLAVE STATUS\G' > $tmpfile
> + $MYSQL $MYSQL_OPTIONS_REPL \
> + -e 'SHOW SLAVE STATUS\G' > $tmpfile
>
> # "SHOW SLAVE STATUS" returns an empty set if instance is not a
> # replication slave
> @@ -425,10 +443,8 @@
>
> tmpfile=`mktemp ${HA_RSCTMP}/check_slave.${OCF_RESOURCE_INSTANCE}.XXXXXX`
>
> - mysql_options="$MYSQL_OPTIONS_LOCAL $MYSQL_OPTIONS_REPL"
> -
> - $MYSQL $mysql_options \
> - -e 'SHOW SLAVE STATUS\G' > $tmpfile
> + $MYSQL $MYSQL_OPTIONS_REPL \
> + -e 'SHOW SLAVE STATUS\G' > $tmpfile
>
> if [ -s $tmpfile ]; then
> master_host=`parse_slave_info Master_Host $tmpfile`
> @@ -440,7 +456,6 @@
> slave_io=`parse_slave_info Slave_IO_Running $tmpfile`
> last_errno=`parse_slave_info Last_Errno $tmpfile`
> secs_behind=`parse_slave_info Seconds_Behind_Master $tmpfile`
> -
> ocf_log debug "MySQL instance running as a replication slave"
> else
> # Instance produced an empty "SHOW SLAVE STATUS" output --
> @@ -454,19 +469,32 @@
>
> check_slave() {
> # Checks slave status
> - local rc
> + local rc new_master
>
> get_slave_info
> rc=$?
>
> if [ $rc -eq 0 ]; then
> - if [ $last_errno -ne 0 ]; then
> + # Did we receive an error other than max_connections?
> + if [ $last_errno -ne 0 -a $last_errno -ne "$MYSQL_TOO_MANY_CONN_ERR" ]; then
> # Whoa. Replication ran into an error. This slave has
> # diverged from its master. Make sure this resource
> # doesn't restart in place.
> ocf_log err "MySQL instance configured for replication, but replication has failed."
> ocf_log err "See $tmpfile for details"
> - exit $OCF_ERR_INSTALLED
> +
> + # Just pull the reader VIP away, killing MySQL here would be pretty evil
> + # on a loaded server
> +
> + set_reader_attr 0
> + exit $OCF_SUCCESS
> +
> + fi
> +
> + # If we got max_connections, let's remove the vip
> + if [ $last_errno -eq "$MYSQL_TOO_MANY_CONN_ERR" ]; then
> + set_reader_attr 0
> + exit $OCF_SUCCESS
> fi
>
> if [ "$slave_io" != 'Yes' ]; then
> @@ -474,15 +502,35 @@
> # temporarily shut down, and the slave may just be
> # reconnecting. A warning can't hurt, though.
> ocf_log warn "MySQL Slave IO threads currently not running."
> +
> + # Sanity check, are we at least on the right master
> + new_master=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f1`
> +
> + if [ "$master_host" != "$new_master" ]; then
> + # Not pointing to the right master, not good, removing the VIPs
> + set_reader_attr 0
> +
> + exit $OCF_SUCCESS
> + fi
> +
> fi
>
> if [ "$slave_sql" != 'Yes' ]; then
> # We don't have a replication SQL thread running. Not a
> - # good thing. Try to recoved by restarting the resource in
> - # place.
> + # good thing. Try to recoved by restarting the SQL thread
> + # and remove reader vip. Prevent MySQL restart.
> ocf_log err "MySQL Slave SQL threads currently not running."
> ocf_log err "See $tmpfile for details"
> - exit $OCF_ERR_GENERIC
> +
> + # Remove reader vip
> + set_reader_attr 0
> +
> + # try to restart slave
> + ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
> + -e "START SLAVE"
> +
> + # Return success to prevent a restart
> + exit $OCF_SUCCESS
> fi
>
> if ocf_is_true $OCF_RESKEY_evict_outdated_slaves; then
> @@ -491,9 +539,13 @@
> if [ $secs_behind -gt $OCF_RESKEY_max_slave_lag ]; then
> ocf_log err "MySQL Slave is $secs_behind seconds behind master (allowed maximum: $OCF_RESKEY_max_slave_lag)."
> ocf_log err "See $tmpfile for details"
> +
> + # Remove reader vip
> + set_reader_attr 0
> +
> exit $OCF_ERR_INSTALLED
> fi
> - elif ocf_is_ms; then
> + elif ocf_is_ms; then
> # Even if we're not set to evict lagging slaves, we can
> # still use the seconds behind master value to set our
> # master preference.
> @@ -506,11 +558,19 @@
> $CRM_MASTER -v $master_pref
> fi
>
> + # is the slave ok to have a VIP on it
> + if [ $secs_behind -gt $OCF_RESKEY_max_slave_lag ]; then
> + set_reader_attr 0
> + else
> + set_reader_attr 1
> + fi
> +
> ocf_log debug "MySQL instance running as a replication slave"
> rm -f $tmpfile
> else
> # Instance produced an empty "SHOW SLAVE STATUS" output --
> # instance is not a slave
> + # TODO: Needs to handle when get_slave_info will return too many connections error
> rm -f $tmpfile
> ocf_log err "check_slave invoked on an instance that is not a replication slave."
> exit $OCF_ERR_GENERIC
> @@ -518,26 +578,26 @@
> }
>
> set_master() {
> - local new_master_host master_log_file master_log_pos
> + local new_master master_log_file master_log_pos
> local master_params
>
> - new_master_host=$1
> + new_master=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f1`
>
> # Keep replication position
> get_slave_info
>
> - if [ "$master_log_file" -a "$new_master_host" = "$master_host" ]; then
> -# master_params=", MASTER_LOG_FILE='$master_log_file', \
> -# MASTER_LOG_POS=$master_log_pos"
> + if [ "$master_log_file" -a "$new_master" = "$master_host" ]; then
> + # master_params=", MASTER_LOG_FILE='$master_log_file', \
> + # MASTER_LOG_POS=$master_log_pos"
> ocf_log info "Kept master pos for $master_host : $master_log_file:$master_log_pos"
> rm -f $tmpfile
> return
> else
> - master_log_file=`$CRM_ATTR -n $new_master_host-log-file-${INSTANCE_ATTR_NAME} -q -G`
> - master_log_pos=`$CRM_ATTR -n $new_master_host-log-pos-${INSTANCE_ATTR_NAME} -q -G`
> + master_log_file=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f2`
> + master_log_pos=`$CRM_ATTR_REPL_INFO --query -q | cut -d'|' -f3`
> if [ -n "$master_log_file" -a -n "$master_log_pos" ]; then
> master_params=", MASTER_LOG_FILE='$master_log_file', \
> - MASTER_LOG_POS=$master_log_pos"
> + MASTER_LOG_POS=$master_log_pos"
> ocf_log info "Restored master pos for $new_master_host : $master_log_file:$master_log_pos"
> fi
> fi
> @@ -548,11 +608,10 @@
> # from the laste master the slave replicated from, or freshly
> # reset with RESET MASTER.
>
> - ocf_run $MYSQL $MYSQL_OPTIONS_LOCAL $MYSQL_OPTIONS_REPL \
> - -e "CHANGE MASTER TO MASTER_HOST='$new_master_host', \
> - MASTER_USER='$OCF_RESKEY_replication_user', \
> - MASTER_PASSWORD='$OCF_RESKEY_replication_passwd' $master_params"
> -
> + ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
> + -e "CHANGE MASTER TO MASTER_HOST='$new_master', \
> + MASTER_USER='$OCF_RESKEY_replication_user', \
> + MASTER_PASSWORD='$OCF_RESKEY_replication_passwd' $master_params"
> rm -f $tmpfile
> }
>
> @@ -569,15 +628,37 @@
> return $OCF_SUCCESS
> fi
>
> - local mysql_options
> - mysql_options="$MYSQL_OPTIONS_LOCAL $MYSQL_OPTIONS_REPL"
> -
> local tmpfile
> tmpfile=`mktemp ${HA_RSCTMP}/unset_master.${OCF_RESOURCE_INSTANCE}.XXXXXX`
>
> - # First, stop the slave I/O thread and wait for relay log
> + # At this point, the master is read only so there should not be much binlogs to transfer
> + # Let's wait for the last bits
> + while true; do
> + $MYSQL $MYSQL_OPTIONS_REPL \
> + -e 'SHOW PROCESSLIST\G' > $tmpfile
> + if grep -i 'Waiting for master to send event' $tmpfile >/dev/null; then
> + ocf_log info "MySQL slave has finished reading master binary log"
> + break
> + fi
> + if grep -i 'Reconnecting after a failed master event read' $tmpfile >/dev/null; then
> + ocf_log info "Master is down, no more binary logs to come"
> + break
> + fi
> + if grep -i 'Connecting to master' $tmpfile >/dev/null; then
> + ocf_log info "Master is down, no more binary logs to come"
> + break
> + fi
> + if ! grep 'system user' $tmpfile >/dev/null; then
> + ocf_log info "Slave is not running - not waiting to finish"
> + break
> + fi
> +
> + sleep 1
> + done
> +
> + # Now, stop the slave I/O thread and wait for relay log
> # processing to complete
> - ocf_run $MYSQL $mysql_options \
> + ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
> -e "STOP SLAVE IO_THREAD"
> if [ $? -gt 0 ]; then
> ocf_log err "Error stopping slave IO thread"
> @@ -585,9 +666,9 @@
> fi
>
> while true; do
> - $MYSQL $mysql_options \
> + $MYSQL $MYSQL_OPTIONS_REPL \
> -e 'SHOW PROCESSLIST\G' > $tmpfile
> - if grep '[Hh]as read all relay log' $tmpfile >/dev/null; then
> + if grep -i 'Has read all relay log' $tmpfile >/dev/null; then
> ocf_log info "MySQL slave has finished processing relay log"
> break
> fi
> @@ -597,45 +678,93 @@
> fi
> ocf_log info "Waiting for MySQL slave to finish processing relay log"
> sleep 1
> - done
> + done
> rm -f $tmpfile
>
> # Now, stop all slave activity and unset the master host
> - ocf_run $MYSQL $mysql_options \
> + ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
> -e "STOP SLAVE"
> if [ $? -gt 0 ]; then
> ocf_log err "Error stopping rest slave threads"
> exit $OCF_ERR_GENERIC
> fi
>
> - #Save current state
> - get_slave_info
> - $CRM_ATTR -n $master_host-log-file-${INSTANCE_ATTR_NAME} -v $master_log_file
> - $CRM_ATTR -n $master_host-log-pos-${INSTANCE_ATTR_NAME} -v $master_log_pos
> - rm -f $tmpfile
> -
> - ocf_run $MYSQL $mysql_options \
> - -e "CHANGE MASTER TO MASTER_HOST=''"
> + ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
> + -e "RESET SLAVE;"
> if [ $? -gt 0 ]; then
> ocf_log err "Failed to set master"
> exit $OCF_ERR_GENERIC
> fi
> }
>
> -# Start replication as slave. Master hostname as parameter
> +# Start replication as slave
> start_slave() {
> - local master_host
> -
> - master_host="$1"
> -
> - # Remove state attributes - it will be invalid after START SLAVE
> - $CRM_ATTR -n $master_host-log-file-${INSTANCE_ATTR_NAME} -D
> - $CRM_ATTR -n $master_host-log-pos-${INSTANCE_ATTR_NAME} -D
>
> - ocf_run $MYSQL $MYSQL_OPTIONS_LOCAL $MYSQL_OPTIONS_REPL \
> + ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
> -e "START SLAVE"
> }
>
> +# Set the attribute controlling the readers VIP
> +set_reader_attr() {
> + local curr_attr_value
> +
> + curr_attr_value=$(get_reader_attr)
> +
> + if [ "$curr_attr_value" -ne "$1" ]; then
> + $CRM_ATTR -l reboot --name ${OCF_RESKEY_reader_attribute} -v $1
> + fi
> +
> +}
> +
> +# get the attribute controlling the readers VIP
> +get_reader_attr() {
> + local attr_value
> + local rc
> +
> + attr_value=`$CRM_ATTR -l reboot --name ${OCF_RESKEY_reader_attribute} --query -q`
> + rc=$?
> + if [ "$rc" -eq "0" ]; then
> + echo $attr_value
> + else
> + echo -1
> + fi
> +
> +}
> +
> +# Stores data for MASTER STATUS from MySQL
> +update_data_master_status() {
> +
> + master_status_file="${HA_RSCTMP}/master_status.${OCF_RESOURCE_INSTANCE}"
> +
> + $MYSQL $MYSQL_OPTIONS_REPL -e "SHOW MASTER STATUS\G" > $master_status_file
> +}
> +
> +
> +# Returns the specified value from the stored copy of SHOW MASTER STATUS.
> +# should be call after update_data_master_status for tmpfile
> +# Arguments:
> +# $1 The value to get.
> +get_master_status() {
> + awk -v var="$1" '$1 == var ":" {print substr($0, index($0, ":") + 2)}' "$master_status_file"
> +}
> +
> +
> +# Determines what IP address is attached to the current host. The output of the
> +# crm_attribute command looks like this:
> +# scope=nodes name=IP value=10.2.2.161
> +# If the ${INSTANCE_ATTR_NAME}_MYSQL_MASTER_IP node attribute is not defined, fallback is to uname -n
> +# The ${INSTANCE_ATTR_NAME}_MYSQL_MASTER_IP is the IP address that will be used for the
> +# change master to command.
> +get_local_ip() {
> + local IP
> + IP=`$CRM_ATTR -l forever -n ${INSTANCE_ATTR_NAME}_mysql_master_IP -q -G`
> + if [ ! $? -eq 0 ]; then
> + uname -n
> + else
> + echo $IP
> + fi
> +}
> +
> #######################################################################
>
> # Functions invoked by resource manager actions
> @@ -670,12 +799,8 @@
> }
>
> mysql_status() {
> - # Set the log level of the error message (default:err)
> - local loglevel
> - loglevel=${1:-err}
> -
> if [ ! -e $OCF_RESKEY_pid ]; then
> - ocf_log $loglevel "MySQL is not running"
> + ocf_log $1 "MySQL is not running"
> return $OCF_NOT_RUNNING;
> fi
>
> @@ -689,7 +814,7 @@
> if [ $? -eq 0 ]; then
> return $OCF_SUCCESS;
> else
> - ocf_log $loglevel "MySQL not running: removing old PID file"
> + ocf_log $1 "MySQL not running: removing old PID file"
> rm -f $OCF_RESKEY_pid
> return $OCF_NOT_RUNNING;
> fi
> @@ -703,10 +828,13 @@
> if ocf_is_probe; then
> status_loglevel="info"
> fi
> -
> +
> mysql_status $status_loglevel
> +
> rc=$?
>
> + # TODO: check max connections error
> +
> # If status returned an error, return that immediately
> if [ $rc -ne $OCF_SUCCESS ]; then
> return $rc
> @@ -720,16 +848,13 @@
> check_slave
> fi
>
> - local mysql_options
> - mysql_options="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_test_user --password=$OCF_RESKEY_test_passwd"
> -
> # Check for test table
> - ocf_run -q $MYSQL $mysql_options \
> + ocf_run -q $MYSQL $MYSQL_OPTIONS_TEST \
> -e "SELECT COUNT(*) FROM $OCF_RESKEY_test_table"
> rc=$?
>
> if [ $rc -ne 0 ]; then
> - ocf_log err "Failed to select from $OCF_RESKEY_test_table";
> + ocf_log err "Failed to select from $test_table";
> return $OCF_ERR_GENERIC;
> fi
> fi
> @@ -744,6 +869,11 @@
> }
>
> mysql_start() {
> + if ocf_is_ms; then
> + # Initialize the ReaderVIP attribute, monitor will enable it
> + set_reader_attr 0
> + fi
> +
> mysql_status info
> if [ $? = $OCF_SUCCESS ]; then
> ocf_log info "MySQL already running"
> @@ -784,10 +914,10 @@
> # already existed, check whether it is writable by the configured
> # user
> for dir in $pid_dir $socket_dir; do
> - if ! su -s /bin/sh - $OCF_RESKEY_user -c "test -w $dir"; then
> - ocf_log err "Directory $dir is not writable by $OCF_RESKEY_user"
> - exit $OCF_ERR_PERM;
> - fi
> + if ! su -s /bin/sh - $OCF_RESKEY_user -c "test -w $dir"; then
> + ocf_log err "Directory $dir is not writable by $OCF_RESKEY_user"
> + exit $OCF_ERR_PERM;
> + fi
> done
>
> # Uncomment to perform permission clensing
> @@ -802,11 +932,11 @@
> fi
>
> ${OCF_RESKEY_binary} --defaults-file=$OCF_RESKEY_config \
> - --pid-file=$OCF_RESKEY_pid \
> - --socket=$OCF_RESKEY_socket \
> - --datadir=$OCF_RESKEY_datadir \
> - --user=$OCF_RESKEY_user $OCF_RESKEY_additional_parameters \
> - $mysql_extra_params >/dev/null 2>&1 &
> + --pid-file=$OCF_RESKEY_pid \
> + --socket=$OCF_RESKEY_socket \
> + --datadir=$OCF_RESKEY_datadir \
> + --user=$OCF_RESKEY_user $OCF_RESKEY_additional_parameters \
> + $mysql_extra_params >/dev/null 2>&1 &
> rc=$?
>
> if [ $rc != 0 ]; then
> @@ -823,8 +953,8 @@
> if [ $rc = $OCF_SUCCESS ]; then
> start_wait=0
>
> - elif [ $rc != $OCF_NOT_RUNNING ]; then
> - ocf_log err "MySQL start failed: $rc"
> + elif [ $rc != $OCF_NOT_RUNNING ]; then
> + ocf_log info "MySQL start failed: $rc"
> return $rc
> fi
> sleep 2
> @@ -836,30 +966,15 @@
> # already promoted a master. So, we simply start in read only
> # mode.
> set_read_only on
> -
> +
> # Now, let's see whether there is a master. We might be a new
> # node that is just joining the cluster, and the CRM may have
> # promoted a master before.
> master_host=`echo $OCF_RESKEY_CRM_meta_notify_master_uname|tr -d " "`
> if [ "$master_host" -a "$master_host" != ${HOSTNAME} ]; then
> ocf_log info "Changing MySQL configuration to replicate from $master_host."
> - set_master $master_host
> - ocf_run $MYSQL $MYSQL_OPTIONS_LOCAL $MYSQL_OPTIONS_REPL \
> - -e 'START SLAVE'
> - if [ $? -ne 0 ]; then
> - ocf_log err "Failed to start slave"
> - return $OCF_ERR_GENERIC
> - fi
> - else
> - ocf_log info "No MySQL master present, clearing replication state"
> - unset_master
> - fi
> -
> - master_host=`echo $OCF_RESKEY_CRM_meta_notify_master_uname`
> - if [ "$master_host" -a "$master_host" != ${HOSTNAME} ]; then
> - ocf_log info "Changing MySQL configuration to replicate from $master_host."
> - set_master $master_host
> - start_slave $master_host
> + set_master
> + start_slave
> if [ $? -ne 0 ]; then
> ocf_log err "Failed to start slave"
> return $OCF_ERR_GENERIC
> @@ -874,6 +989,7 @@
> # preference set by the administrator. We choose a low
> # greater-than-zero preference.
> $CRM_MASTER -v 1
> +
> fi
>
> # Initial monitor action
> @@ -886,7 +1002,7 @@
> ocf_log err "Failed initial monitor action"
> return $rc
> fi
> -
> +
> ocf_log info "MySQL started"
> return $OCF_SUCCESS
> }
> @@ -896,12 +1012,14 @@
> if ocf_is_ms; then
> # clear preference for becoming master
> $CRM_MASTER -D
> +
> + # Remove VIP capability
> + set_reader_attr 0
> fi
>
> - mysql_status info
> - rc=$?
> - if [ $rc = $OCF_NOT_RUNNING ]; then
> - return $OCF_SUCCESS
> + if [ ! -f $OCF_RESKEY_pid ]; then
> + ocf_log info "MySQL is not running"
> + return $OCF_SUCCESS
> fi
>
> pid=`cat $OCF_RESKEY_pid 2> /dev/null `
> @@ -911,14 +1029,14 @@
> ocf_log err "MySQL couldn't be stopped"
> return $OCF_ERR_GENERIC
> fi
> -
> # stop waiting
> shutdown_timeout=15
> if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then
> shutdown_timeout=$((($OCF_RESKEY_CRM_meta_timeout/1000)-5))
> fi
> count=0
> - while [ $count -lt $shutdown_timeout ]; do
> + while [ $count -lt $shutdown_timeout ]
> + do
> mysql_status info
> rc=$?
> if [ $rc = $OCF_NOT_RUNNING ]; then
> @@ -926,12 +1044,12 @@
> fi
> count=`expr $count + 1`
> sleep 1
> - ocf_log info "MySQL still hasn't stopped yet. Waiting..."
> + ocf_log debug "MySQL still hasn't stopped yet. Waiting..."
> done
>
> mysql_status info
> if [ $? != $OCF_NOT_RUNNING ]; then
> - ocf_log warn "MySQL failed to stop after ${shutdown_timeout}s using SIGTERM. Trying SIGKILL..."
> + ocf_log info "MySQL failed to stop after ${shutdown_timeout}s using SIGTERM. Trying SIGKILL..."
> /bin/kill -KILL $pid > /dev/null
> fi
>
> @@ -942,11 +1060,20 @@
> }
>
> mysql_promote() {
> - if ( ! mysql_status ); then
> + local master_info
> +
> + if ( ! mysql_status err ); then
> return $OCF_NOT_RUNNING
> fi
> - ocf_run $MYSQL $MYSQL_OPTIONS_LOCAL $MYSQL_OPTIONS_REPL \
> + ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
> -e "STOP SLAVE"
> +
> + # Set Master Info in CIB, cluster level attribute
> + update_data_master_status
> + master_info="$(get_local_ip)|$(get_master_status File)|$(get_master_status Position)"
> + ${CRM_ATTR_REPL_INFO} -v "$master_info"
> + rm -f $tmpfile
> +
> set_read_only off || return $OCF_ERR_GENERIC
>
> # Existing master gets a higher-than-default master preference, so
> @@ -954,20 +1081,17 @@
> # unnecessarily
> $CRM_MASTER -v $((${OCF_RESKEY_max_slave_lag}+1))
>
> + # A master can accept reads
> + set_reader_attr 1
> +
> return $OCF_SUCCESS
> }
>
> mysql_demote() {
> - if ! mysql_status; then
> + if ! mysql_status err; then
> return $OCF_NOT_RUNNING
> fi
>
> - set_read_only on
> - if [ $? -ne 0 ]; then
> - ocf_log err "Failed to set read-only";
> - return $OCF_ERR_GENERIC;
> - fi
> -
> # Return master preference to default, so the cluster manager gets
> # a chance to select a new master
> $CRM_MASTER -v 1
> @@ -987,68 +1111,105 @@
> ocf_log debug "Received $type_op notification."
>
> case "$type_op" in
> - 'pre-promote')
> - # A new master is about to being promoted. It's not in
> - # read-write mode yet (that only occurs when it actually
> - # executes the promote action), so we can now safely
> - # connect to it and wait for it to start replicating.
> - local master_host
> - local master_status
> - master_host=`echo $OCF_RESKEY_CRM_meta_notify_promote_uname`
> -
> - if ( ! mysql_status ); then
> - return $OCF_NOT_RUNNING
> - fi
> - if [ -z "$master_host" ]; then
> - ocf_log err "Unable to determine master host!"
> - return $OCF_ERR_GENERIC
> - fi
> + 'pre-promote')
> + # Nothing to do now here, new replication info not yet published
>
> - if [ $master_host = ${HOSTNAME} ]; then
> - ocf_log info "This will be new master"
> - else
> - ocf_log info "Changing MySQL configuration to replicate from $master_host"
> - set_master $master_host
> - if [ $? -ne 0 ]; then
> - return $OCF_ERR_GENERIC
> + ;;
> + 'post-promote')
> + # The master has completed its promotion. Now is a good
> + # time to check whether our replication slave is working
> + # correctly.
> + master_host=`echo $OCF_RESKEY_CRM_meta_notify_promote_uname|tr -d " "`
> + if [ "$master_host" = ${HOSTNAME} ]; then
> + ocf_log info "This will be the new master, ignoring post-promote notification."
> else
> - return $OCF_SUCCESS
> + ocf_log info "Resetting replication"
> + unset_master
> + if [ $? -ne 0 ]; then
> + return $OCF_ERR_GENERIC
> + fi
> +
> + ocf_log info "Changing MySQL configuration to replicate from $master_host"
> + set_master
> + if [ $? -ne 0 ]; then
> + return $OCF_ERR_GENERIC
> + fi
> +
> + start_slave
> + if [ $? -ne 0 ]; then
> + ocf_log err "Failed to start slave"
> + return $OCF_ERR_GENERIC
> + fi
> fi
> - fi
> - ;;
> - 'post-promote')
> - # The master has completed its promotion. Now is a good
> - # time to check whether our replication slave is working
> - # correctly.
> - master_host=`echo $OCF_RESKEY_CRM_meta_notify_promote_uname`
> - if [ "$master_host" = ${HOSTNAME} ]; then
> - ocf_log info "Ignoring post-promote notification for my own promotion."
> return $OCF_SUCCESS
> - fi
> - start_slave $master_host
> - if [ $? -ne 0 ]; then
> - ocf_log err "Failed to start slave"
> - return $OCF_ERR_GENERIC
> - fi
> ;;
> - 'post-demote')
> - demote_host=`echo $OCF_RESKEY_CRM_meta_notify_demote_uname`
> - if [ $demote_host = ${HOSTNAME} ]; then
> - ocf_log info "Ignoring post-demote notification for my own demotion."
> + 'pre-demote')
> + demote_host=`echo $OCF_RESKEY_CRM_meta_notify_demote_uname|tr -d " "`
> + if [ $demote_host = ${HOSTNAME} ]; then
> + ocf_log info "post-demote notification for $demote_host"
> + set_read_only on
> + if [ $? -ne 0 ]; then
> + ocf_log err "Failed to set read-only";
> + return $OCF_ERR_GENERIC;
> + fi
> +
> + # Must kill all existing user threads because they are still Read/write
> + # in order for the slaves to complete the read of binlogs
> + local tmpfile
> + tmpfile=`mktemp ${HA_RSCTMP}/threads.${OCF_RESOURCE_INSTANCE}.XXXXXX`
> + $MYSQL $MYSQL_OPTIONS_REPL \
> + -e "SHOW PROCESSLIST" > $tmpfile
> +
> + for thread in `awk '$0 !~ /Binlog Dump|system user|event_scheduler|SHOW PROCESSLIST/ && $0 ~ /^[0-9]/ {print $1}' $tmpfile`
> + do
> + ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
> + -e "KILL ${thread}"
> + done
> + else
> + ocf_log info "Ignoring post-demote notification execpt for my own demotion."
> + fi
> return $OCF_SUCCESS
> - fi
> - ocf_log info "post-demote notification for $demote_host."
> - # The former master has just been gracefully demoted.
> - unset_master
> ;;
> - *)
> - return $OCF_SUCCESS
> + 'post-demote')
> + demote_host=`echo $OCF_RESKEY_CRM_meta_notify_demote_uname|tr -d " "`
> + if [ $demote_host = ${HOSTNAME} ]; then
> + ocf_log info "Ignoring post-demote notification for my own demotion."
> + return $OCF_SUCCESS
> + fi
> + ocf_log info "post-demote notification for $demote_host."
> + # The former master has just been gracefully demoted.
> + unset_master
> + ;;
> + *)
> + return $OCF_SUCCESS
> ;;
> esac
> }
>
> #######################################################################
>
> +
> +##########################################################################
> +# If DEBUG_LOG is set, make this resource agent easy to debug: set up the
> +# debug log and direct all output to it. Otherwise, redirect to /dev/null.
> +# The log directory must be a directory owned by root, with permissions 0700,
> +# and the log must be writable and not a symlink.
> +##########################################################################
> +DEBUG_LOG="/tmp/mysql.ocf.ra.debug/log"
> +if [ "${DEBUG_LOG}" -a -w "${DEBUG_LOG}" -a ! -L "${DEBUG_LOG}" ]; then
> + DEBUG_LOG_DIR="${DEBUG_LOG%/*}"
> + if [ -d "${DEBUG_LOG_DIR}" ]; then
> + exec 9>>"$DEBUG_LOG"
> + exec 2>&9
> + date >&9
> + echo "$*" >&9
> + env | grep OCF_ | sort >&9
> + set -x
> + else
> + exec 9>/dev/null
> + fi
> +fi
> +
> case "$1" in
> meta-data) meta_data
> exit $OCF_SUCCESS;;
> @@ -1072,7 +1233,7 @@
> case "$1" in
> start) mysql_start;;
> stop) mysql_stop;;
> - status) mysql_status;;
> + status) mysql_status err;;
> monitor) mysql_monitor;;
> promote) mysql_promote;;
> demote) mysql_demote;;

> _______________________________________________________
> Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
> http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
> Home Page: http://linux-ha.org/

_______________________________________________________
Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
Home Page: http://linux-ha.org/
Re: Modified patch for RA [ In reply to ]
Hi Yves,

what about this commit?

https://github.com/bmildren/resource-agents/commit/72031e5a6a644ce6d9a1ed2ec31d4dfb9bae294b

I think that this might still be missing.

If not, please leave a comment below.

Thanks,
Raoul
--
____________________________________________________________________
DI (FH) Raoul Bhatia M.Sc. email. r.bhatia@ipax.at
Technischer Leiter

IPAX - Aloy Bhatia Hava OG web. http://www.ipax.at
Barawitzkagasse 10/2/2/11 email. office@ipax.at
1190 Wien tel. +43 1 3670030
FN 277995t HG Wien fax. +43 1 3670030 15
____________________________________________________________________


_______________________________________________________
Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
Home Page: http://linux-ha.org/
Re: Modified patch for RA [ In reply to ]
Hi Raoul and Dejan,
I completely forgot about this one but I am wondering about the
impacts. I have many setups in production and none reported any problem
related to this. The fix pretty easy though, Dejan, is it too late to
submit a patch?

Regards,

Yves


Le 2012-05-18 02:22, Raoul Bhatia [IPAX] a écrit :
> Hi Yves,
>
> what about this commit?
>
> https://github.com/bmildren/resource-agents/commit/72031e5a6a644ce6d9a1ed2ec31d4dfb9bae294b
>
>
> I think that this might still be missing.
>
> If not, please leave a comment below.
>
> Thanks,
> Raoul
_______________________________________________________
Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
Home Page: http://linux-ha.org/
Re: Modified patch for RA [ In reply to ]
On Fri, May 18, 2012 at 10:08:54AM -0400, Yves Trudeau wrote:
> Hi Raoul and Dejan,
> I completely forgot about this one but I am wondering about the
> impacts. I have many setups in production and none reported any
> problem related to this. The fix pretty easy though, Dejan, is it
> too late to submit a patch?

No, if the patch is fine. Up to you. It's very mysql-specific,
cannot offer much help.
_______________________________________________________
Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
Home Page: http://linux-ha.org/
Re: Modified patch for RA [ In reply to ]
Hi Dejan,
here's the patch for Raoul comments. The patch is over
git://github.com/ClusterLabs/resource-agents.git commit bc1991fd0e

commit message:

Better reset slave handling and cleanup for get_slave_info


Regards,

Yves

Le 2012-05-18 11:13, Dejan Muhamedagic a écrit :
> On Fri, May 18, 2012 at 10:08:54AM -0400, Yves Trudeau wrote:
>> Hi Raoul and Dejan,
>> I completely forgot about this one but I am wondering about the
>> impacts. I have many setups in production and none reported any
>> problem related to this. The fix pretty easy though, Dejan, is it
>> too late to submit a patch?
>
> No, if the patch is fine. Up to you. It's very mysql-specific,
> cannot offer much help.
> _______________________________________________________
> Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
> http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
> Home Page: http://linux-ha.org/
>
Re: Modified patch for RA [ In reply to ]
Hi Yves,

On Fri, May 18, 2012 at 11:39:14AM -0400, Yves Trudeau wrote:
> Hi Dejan,
> here's the patch for Raoul comments. The patch is over
> git://github.com/ClusterLabs/resource-agents.git commit bc1991fd0e
>
> commit message:
>
> Better reset slave handling and cleanup for get_slave_info

OK. What is the relation between this patch and the pull request
proposed by bmildren in github (if any)? Has the handling of this
changed with your previous patch?

Thanks,

Dejan

>
> Regards,
>
> Yves
>
> Le 2012-05-18 11:13, Dejan Muhamedagic a écrit :
> >On Fri, May 18, 2012 at 10:08:54AM -0400, Yves Trudeau wrote:
> >>Hi Raoul and Dejan,
> >> I completely forgot about this one but I am wondering about the
> >>impacts. I have many setups in production and none reported any
> >>problem related to this. The fix pretty easy though, Dejan, is it
> >>too late to submit a patch?
> >
> >No, if the patch is fine. Up to you. It's very mysql-specific,
> >cannot offer much help.
> >_______________________________________________________
> >Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
> >http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
> >Home Page: http://linux-ha.org/
> >

> --- mysql.bc1991fd0e 2012-05-18 11:30:51.358422574 -0400
> +++ resource-agents-prm/heartbeat/mysql 2012-05-18 11:32:38.418420112 -0400
> @@ -413,23 +413,27 @@
>
> # Check whether this machine should be slave
> if ! ocf_is_ms || ! get_read_only; then
> - return 1;
> + return 1
> fi
> +
> + get_slave_info
> + rc=$?
>
> - tmpfile=`mktemp ${HA_RSCTMP}/is_slave.${OCF_RESOURCE_INSTANCE}.XXXXXX`
> -
> - $MYSQL $MYSQL_OPTIONS_REPL \
> - -e 'SHOW SLAVE STATUS\G' > $tmpfile
> -
> - # "SHOW SLAVE STATUS" returns an empty set if instance is not a
> - # replication slave
> - if [ -s $tmpfile ]; then
> - rm -f $tmpfile
> - return 0
> + if [ $rc -eq 0 ]; then
> + # show slave status is not empty
> + # Is there a master_log_file defined? (master_log_file is deleted
> + # by reset slave
> + if [ "$master_log_file" ]; then
> + return 0
> + else
> + return 1
> + fi
> + else
> + # "SHOW SLAVE STATUS" returns an empty set if instance is not a
> + # replication slave
> + return 1
> fi
> -
> - rm -f $tmpfile
> - return 1
> +
> }
>
> parse_slave_info() {
> @@ -440,31 +444,36 @@
> get_slave_info() {
> # Warning: this sets $tmpfile and LEAVE this file! You must delete it after use!
> local mysql_options
> +
> + if [ "$master_log_file" -a "$master_host" ]; then
> + # variables are already defined, get_slave_info has been run before
> + return $OCF_SUCCESS
> + else
> + tmpfile=`mktemp ${HA_RSCTMP}/check_slave.${OCF_RESOURCE_INSTANCE}.XXXXXX`
>
> - tmpfile=`mktemp ${HA_RSCTMP}/check_slave.${OCF_RESOURCE_INSTANCE}.XXXXXX`
> + $MYSQL $MYSQL_OPTIONS_REPL \
> + -e 'SHOW SLAVE STATUS\G' > $tmpfile
>
> - $MYSQL $MYSQL_OPTIONS_REPL \
> - -e 'SHOW SLAVE STATUS\G' > $tmpfile
> + if [ -s $tmpfile ]; then
> + master_host=`parse_slave_info Master_Host $tmpfile`
> + master_user=`parse_slave_info Master_User $tmpfile`
> + master_port=`parse_slave_info Master_Port $tmpfile`
> + master_log_file=`parse_slave_info Master_Log_File $tmpfile`
> + master_log_pos=`parse_slave_info Read_Master_Log_Pos $tmpfile`
> + slave_sql=`parse_slave_info Slave_SQL_Running $tmpfile`
> + slave_io=`parse_slave_info Slave_IO_Running $tmpfile`
> + last_errno=`parse_slave_info Last_Errno $tmpfile`
> + secs_behind=`parse_slave_info Seconds_Behind_Master $tmpfile`
> + ocf_log debug "MySQL instance running as a replication slave"
> + else
> + # Instance produced an empty "SHOW SLAVE STATUS" output --
> + # instance is not a slave
> + ocf_log err "check_slave invoked on an instance that is not a replication slave."
> + return $OCF_ERR_GENERIC
> + fi
>
> - if [ -s $tmpfile ]; then
> - master_host=`parse_slave_info Master_Host $tmpfile`
> - master_user=`parse_slave_info Master_User $tmpfile`
> - master_port=`parse_slave_info Master_Port $tmpfile`
> - master_log_file=`parse_slave_info Master_Log_File $tmpfile`
> - master_log_pos=`parse_slave_info Read_Master_Log_Pos $tmpfile`
> - slave_sql=`parse_slave_info Slave_SQL_Running $tmpfile`
> - slave_io=`parse_slave_info Slave_IO_Running $tmpfile`
> - last_errno=`parse_slave_info Last_Errno $tmpfile`
> - secs_behind=`parse_slave_info Seconds_Behind_Master $tmpfile`
> - ocf_log debug "MySQL instance running as a replication slave"
> - else
> - # Instance produced an empty "SHOW SLAVE STATUS" output --
> - # instance is not a slave
> - ocf_log err "check_slave invoked on an instance that is not a replication slave."
> - return $OCF_ERR_GENERIC
> + return $OCF_SUCCESS
> fi
> -
> - return $OCF_SUCCESS
> }
>
> check_slave() {
> @@ -692,7 +701,7 @@
> ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
> -e "RESET SLAVE;"
> if [ $? -gt 0 ]; then
> - ocf_log err "Failed to set master"
> + ocf_log err "Failed to reset slave"
> exit $OCF_ERR_GENERIC
> fi
> }

> _______________________________________________________
> Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
> http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
> Home Page: http://linux-ha.org/

_______________________________________________________
Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
Home Page: http://linux-ha.org/
Re: Modified patch for RA [ In reply to ]
Hi Dejan,
yes, the existance of master_log_file is now checked since reset
slave doesn't clear master_host.

Regards,

Yves

Le 2012-05-18 12:00, Dejan Muhamedagic a écrit :
> Hi Yves,
>
> On Fri, May 18, 2012 at 11:39:14AM -0400, Yves Trudeau wrote:
>> Hi Dejan,
>> here's the patch for Raoul comments. The patch is over
>> git://github.com/ClusterLabs/resource-agents.git commit bc1991fd0e
>>
>> commit message:
>>
>> Better reset slave handling and cleanup for get_slave_info
>
> OK. What is the relation between this patch and the pull request
> proposed by bmildren in github (if any)? Has the handling of this
> changed with your previous patch?
>
> Thanks,
>
> Dejan
>
>>
>> Regards,
>>
>> Yves
>>
>> Le 2012-05-18 11:13, Dejan Muhamedagic a écrit :
>>> On Fri, May 18, 2012 at 10:08:54AM -0400, Yves Trudeau wrote:
>>>> Hi Raoul and Dejan,
>>>> I completely forgot about this one but I am wondering about the
>>>> impacts. I have many setups in production and none reported any
>>>> problem related to this. The fix pretty easy though, Dejan, is it
>>>> too late to submit a patch?
>>>
>>> No, if the patch is fine. Up to you. It's very mysql-specific,
>>> cannot offer much help.
>>> _______________________________________________________
>>> Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
>>> http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
>>> Home Page: http://linux-ha.org/
>>>
>
>> --- mysql.bc1991fd0e 2012-05-18 11:30:51.358422574 -0400
>> +++ resource-agents-prm/heartbeat/mysql 2012-05-18 11:32:38.418420112 -0400
>> @@ -413,23 +413,27 @@
>>
>> # Check whether this machine should be slave
>> if ! ocf_is_ms || ! get_read_only; then
>> - return 1;
>> + return 1
>> fi
>> +
>> + get_slave_info
>> + rc=$?
>>
>> - tmpfile=`mktemp ${HA_RSCTMP}/is_slave.${OCF_RESOURCE_INSTANCE}.XXXXXX`
>> -
>> - $MYSQL $MYSQL_OPTIONS_REPL \
>> - -e 'SHOW SLAVE STATUS\G'> $tmpfile
>> -
>> - # "SHOW SLAVE STATUS" returns an empty set if instance is not a
>> - # replication slave
>> - if [ -s $tmpfile ]; then
>> - rm -f $tmpfile
>> - return 0
>> + if [ $rc -eq 0 ]; then
>> + # show slave status is not empty
>> + # Is there a master_log_file defined? (master_log_file is deleted
>> + # by reset slave
>> + if [ "$master_log_file" ]; then
>> + return 0
>> + else
>> + return 1
>> + fi
>> + else
>> + # "SHOW SLAVE STATUS" returns an empty set if instance is not a
>> + # replication slave
>> + return 1
>> fi
>> -
>> - rm -f $tmpfile
>> - return 1
>> +
>> }
>>
>> parse_slave_info() {
>> @@ -440,31 +444,36 @@
>> get_slave_info() {
>> # Warning: this sets $tmpfile and LEAVE this file! You must delete it after use!
>> local mysql_options
>> +
>> + if [ "$master_log_file" -a "$master_host" ]; then
>> + # variables are already defined, get_slave_info has been run before
>> + return $OCF_SUCCESS
>> + else
>> + tmpfile=`mktemp ${HA_RSCTMP}/check_slave.${OCF_RESOURCE_INSTANCE}.XXXXXX`
>>
>> - tmpfile=`mktemp ${HA_RSCTMP}/check_slave.${OCF_RESOURCE_INSTANCE}.XXXXXX`
>> + $MYSQL $MYSQL_OPTIONS_REPL \
>> + -e 'SHOW SLAVE STATUS\G'> $tmpfile
>>
>> - $MYSQL $MYSQL_OPTIONS_REPL \
>> - -e 'SHOW SLAVE STATUS\G'> $tmpfile
>> + if [ -s $tmpfile ]; then
>> + master_host=`parse_slave_info Master_Host $tmpfile`
>> + master_user=`parse_slave_info Master_User $tmpfile`
>> + master_port=`parse_slave_info Master_Port $tmpfile`
>> + master_log_file=`parse_slave_info Master_Log_File $tmpfile`
>> + master_log_pos=`parse_slave_info Read_Master_Log_Pos $tmpfile`
>> + slave_sql=`parse_slave_info Slave_SQL_Running $tmpfile`
>> + slave_io=`parse_slave_info Slave_IO_Running $tmpfile`
>> + last_errno=`parse_slave_info Last_Errno $tmpfile`
>> + secs_behind=`parse_slave_info Seconds_Behind_Master $tmpfile`
>> + ocf_log debug "MySQL instance running as a replication slave"
>> + else
>> + # Instance produced an empty "SHOW SLAVE STATUS" output --
>> + # instance is not a slave
>> + ocf_log err "check_slave invoked on an instance that is not a replication slave."
>> + return $OCF_ERR_GENERIC
>> + fi
>>
>> - if [ -s $tmpfile ]; then
>> - master_host=`parse_slave_info Master_Host $tmpfile`
>> - master_user=`parse_slave_info Master_User $tmpfile`
>> - master_port=`parse_slave_info Master_Port $tmpfile`
>> - master_log_file=`parse_slave_info Master_Log_File $tmpfile`
>> - master_log_pos=`parse_slave_info Read_Master_Log_Pos $tmpfile`
>> - slave_sql=`parse_slave_info Slave_SQL_Running $tmpfile`
>> - slave_io=`parse_slave_info Slave_IO_Running $tmpfile`
>> - last_errno=`parse_slave_info Last_Errno $tmpfile`
>> - secs_behind=`parse_slave_info Seconds_Behind_Master $tmpfile`
>> - ocf_log debug "MySQL instance running as a replication slave"
>> - else
>> - # Instance produced an empty "SHOW SLAVE STATUS" output --
>> - # instance is not a slave
>> - ocf_log err "check_slave invoked on an instance that is not a replication slave."
>> - return $OCF_ERR_GENERIC
>> + return $OCF_SUCCESS
>> fi
>> -
>> - return $OCF_SUCCESS
>> }
>>
>> check_slave() {
>> @@ -692,7 +701,7 @@
>> ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
>> -e "RESET SLAVE;"
>> if [ $? -gt 0 ]; then
>> - ocf_log err "Failed to set master"
>> + ocf_log err "Failed to reset slave"
>> exit $OCF_ERR_GENERIC
>> fi
>> }
>
>> _______________________________________________________
>> Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
>> http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
>> Home Page: http://linux-ha.org/
>
> _______________________________________________________
> Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
> http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
> Home Page: http://linux-ha.org/
>
_______________________________________________________
Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
Home Page: http://linux-ha.org/
Re: Modified patch for RA [ In reply to ]
Hi Yves,

On Fri, May 18, 2012 at 12:18:13PM -0400, Yves Trudeau wrote:
> Hi Dejan,
> yes, the existance of master_log_file is now checked since reset
> slave doesn't clear master_host.

Good. Patch applied upstream now. Thanks!

Dejan

P.S. BTW, usually not best to top-post.

> Regards,
>
> Yves
>
> Le 2012-05-18 12:00, Dejan Muhamedagic a écrit :
> > Hi Yves,
> >
> > On Fri, May 18, 2012 at 11:39:14AM -0400, Yves Trudeau wrote:
> >> Hi Dejan,
> >> here's the patch for Raoul comments. The patch is over
> >> git://github.com/ClusterLabs/resource-agents.git commit bc1991fd0e
> >>
> >> commit message:
> >>
> >> Better reset slave handling and cleanup for get_slave_info
> >
> > OK. What is the relation between this patch and the pull request
> > proposed by bmildren in github (if any)? Has the handling of this
> > changed with your previous patch?
> >
> > Thanks,
> >
> > Dejan
> >
> >>
> >> Regards,
> >>
> >> Yves
> >>
> >> Le 2012-05-18 11:13, Dejan Muhamedagic a écrit :
> >>> On Fri, May 18, 2012 at 10:08:54AM -0400, Yves Trudeau wrote:
> >>>> Hi Raoul and Dejan,
> >>>> I completely forgot about this one but I am wondering about the
> >>>> impacts. I have many setups in production and none reported any
> >>>> problem related to this. The fix pretty easy though, Dejan, is it
> >>>> too late to submit a patch?
> >>>
> >>> No, if the patch is fine. Up to you. It's very mysql-specific,
> >>> cannot offer much help.
> >>> _______________________________________________________
> >>> Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
> >>> http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
> >>> Home Page: http://linux-ha.org/
> >>>
> >
> >> --- mysql.bc1991fd0e 2012-05-18 11:30:51.358422574 -0400
> >> +++ resource-agents-prm/heartbeat/mysql 2012-05-18 11:32:38.418420112 -0400
> >> @@ -413,23 +413,27 @@
> >>
> >> # Check whether this machine should be slave
> >> if ! ocf_is_ms || ! get_read_only; then
> >> - return 1;
> >> + return 1
> >> fi
> >> +
> >> + get_slave_info
> >> + rc=$?
> >>
> >> - tmpfile=`mktemp ${HA_RSCTMP}/is_slave.${OCF_RESOURCE_INSTANCE}.XXXXXX`
> >> -
> >> - $MYSQL $MYSQL_OPTIONS_REPL \
> >> - -e 'SHOW SLAVE STATUS\G'> $tmpfile
> >> -
> >> - # "SHOW SLAVE STATUS" returns an empty set if instance is not a
> >> - # replication slave
> >> - if [ -s $tmpfile ]; then
> >> - rm -f $tmpfile
> >> - return 0
> >> + if [ $rc -eq 0 ]; then
> >> + # show slave status is not empty
> >> + # Is there a master_log_file defined? (master_log_file is deleted
> >> + # by reset slave
> >> + if [ "$master_log_file" ]; then
> >> + return 0
> >> + else
> >> + return 1
> >> + fi
> >> + else
> >> + # "SHOW SLAVE STATUS" returns an empty set if instance is not a
> >> + # replication slave
> >> + return 1
> >> fi
> >> -
> >> - rm -f $tmpfile
> >> - return 1
> >> +
> >> }
> >>
> >> parse_slave_info() {
> >> @@ -440,31 +444,36 @@
> >> get_slave_info() {
> >> # Warning: this sets $tmpfile and LEAVE this file! You must delete it after use!
> >> local mysql_options
> >> +
> >> + if [ "$master_log_file" -a "$master_host" ]; then
> >> + # variables are already defined, get_slave_info has been run before
> >> + return $OCF_SUCCESS
> >> + else
> >> + tmpfile=`mktemp ${HA_RSCTMP}/check_slave.${OCF_RESOURCE_INSTANCE}.XXXXXX`
> >>
> >> - tmpfile=`mktemp ${HA_RSCTMP}/check_slave.${OCF_RESOURCE_INSTANCE}.XXXXXX`
> >> + $MYSQL $MYSQL_OPTIONS_REPL \
> >> + -e 'SHOW SLAVE STATUS\G'> $tmpfile
> >>
> >> - $MYSQL $MYSQL_OPTIONS_REPL \
> >> - -e 'SHOW SLAVE STATUS\G'> $tmpfile
> >> + if [ -s $tmpfile ]; then
> >> + master_host=`parse_slave_info Master_Host $tmpfile`
> >> + master_user=`parse_slave_info Master_User $tmpfile`
> >> + master_port=`parse_slave_info Master_Port $tmpfile`
> >> + master_log_file=`parse_slave_info Master_Log_File $tmpfile`
> >> + master_log_pos=`parse_slave_info Read_Master_Log_Pos $tmpfile`
> >> + slave_sql=`parse_slave_info Slave_SQL_Running $tmpfile`
> >> + slave_io=`parse_slave_info Slave_IO_Running $tmpfile`
> >> + last_errno=`parse_slave_info Last_Errno $tmpfile`
> >> + secs_behind=`parse_slave_info Seconds_Behind_Master $tmpfile`
> >> + ocf_log debug "MySQL instance running as a replication slave"
> >> + else
> >> + # Instance produced an empty "SHOW SLAVE STATUS" output --
> >> + # instance is not a slave
> >> + ocf_log err "check_slave invoked on an instance that is not a replication slave."
> >> + return $OCF_ERR_GENERIC
> >> + fi
> >>
> >> - if [ -s $tmpfile ]; then
> >> - master_host=`parse_slave_info Master_Host $tmpfile`
> >> - master_user=`parse_slave_info Master_User $tmpfile`
> >> - master_port=`parse_slave_info Master_Port $tmpfile`
> >> - master_log_file=`parse_slave_info Master_Log_File $tmpfile`
> >> - master_log_pos=`parse_slave_info Read_Master_Log_Pos $tmpfile`
> >> - slave_sql=`parse_slave_info Slave_SQL_Running $tmpfile`
> >> - slave_io=`parse_slave_info Slave_IO_Running $tmpfile`
> >> - last_errno=`parse_slave_info Last_Errno $tmpfile`
> >> - secs_behind=`parse_slave_info Seconds_Behind_Master $tmpfile`
> >> - ocf_log debug "MySQL instance running as a replication slave"
> >> - else
> >> - # Instance produced an empty "SHOW SLAVE STATUS" output --
> >> - # instance is not a slave
> >> - ocf_log err "check_slave invoked on an instance that is not a replication slave."
> >> - return $OCF_ERR_GENERIC
> >> + return $OCF_SUCCESS
> >> fi
> >> -
> >> - return $OCF_SUCCESS
> >> }
> >>
> >> check_slave() {
> >> @@ -692,7 +701,7 @@
> >> ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
> >> -e "RESET SLAVE;"
> >> if [ $? -gt 0 ]; then
> >> - ocf_log err "Failed to set master"
> >> + ocf_log err "Failed to reset slave"
> >> exit $OCF_ERR_GENERIC
> >> fi
> >> }
> >
> >> _______________________________________________________
> >> Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
> >> http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
> >> Home Page: http://linux-ha.org/
> >
> > _______________________________________________________
> > Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
> > http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
> > Home Page: http://linux-ha.org/
> >
> _______________________________________________________
> Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
> http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
> Home Page: http://linux-ha.org/
_______________________________________________________
Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
Home Page: http://linux-ha.org/