Hi Yves,
On Fri, May 04, 2012 at 04:29:34PM -0400, Yves Trudeau wrote:
> Hi Dejan,
> here's another modified patch for the mysql agent of the commit
> version 4c18035 (git@github.com:y-trudeau/resource-agents.git branch
> mysql-repl). Following a comment of Keisuke, I put back the log level
> for mysql_status in probe mode.
Cool!
So, according to the discussion at github, you insist that the
replication IP needs to be specified in the node's static
attributes. I'm not very happy with it, because it is a
precedence, but I won't insist on it. Let's hope for the best.
There are a few concerns which went unanswered so far:
1a. In case this attribute is not set, would replication fail
properly (with an informative error message) or misbehave? It is
not explicitely checked for existence in the code.
1b. Mori-san suggested to make this attribute optional and in
case it doesn't exist just to use uname. That sounds like a good
idea to me.
2. Is it possible/plausible to have more than one mysql
instance? If so, then the attribute name should include the
instance name. Say ${INSTANCE_NAME}_mysql_replication_IP or
something to that extent. Also, it would make for a better
looking configuration. "IP" doesn't really say much.
3. This attribute is part of the configuration and supposed to
be setup by the user. Please document that in the meta-data.
Cheers,
Dejan
P.S. Any chance of finishing this by Friday?
> Regards,
>
> Yves
> --- ../../mysql.4c18035b7 2012-05-04 16:23:45.238421759 -0400
> +++ mysql 2012-05-04 15:58:50.318419875 -0400
> @@ -79,6 +79,7 @@
> OCF_RESKEY_max_slave_lag_default="3600"
> OCF_RESKEY_evict_outdated_slaves_default="false"
> OCF_RESKEY_reader_attribute_default="readable"
> +OCF_RESKEY_replication_info_attribute_default="replication_info"
>
> : ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}
> MYSQL_BINDIR=`dirname ${OCF_RESKEY_binary}`
> @@ -109,7 +110,8 @@
> : ${OCF_RESKEY_max_slave_lag=${OCF_RESKEY_max_slave_lag_default}}
> : ${OCF_RESKEY_evict_outdated_slaves=${OCF_RESKEY_evict_outdated_slaves_default}}
>
> -: ${OCF_RESKEY_reader_attribute=${OCF_RESKEY_evict_reader_attribute_default}}
> +: ${OCF_RESKEY_reader_attribute=${OCF_RESKEY_reader_attribute_default}}
> +: ${OCF_RESKEY_replication_info_attribute=${OCF_RESKEY_replication_info_attribute_default}}
>
> #######################################################################
>
> @@ -328,7 +330,19 @@
> </longdesc>
> <shortdesc lang="en">Sets the node attribute that determines
> whether a node is usable for clients to read from.</shortdesc>
> -<content type="boolean" default="${OCF_RESKEY_reader_attribute_default}" />
> +<content type="string" default="${OCF_RESKEY_reader_attribute_default}" />
> +</parameter>
> +
> +<parameter name="replication_info_attribute" unique="1" required="0">
> +<longdesc lang="en">
> +An attribute that stores the current master IP, replication file and position.
> +This is queried by the agent in the post-promote notification
> +to reconnect the slaves to the new master.
> +
> +This parameter is only meaningful in master/slave set configurations.
> +</longdesc>
> +<shortdesc lang="en">Cluster attribute storing replication information</shortdesc>
> +<content type="string" default="${OCF_RESKEY_replication_info_attribute_default}" />
> </parameter>
> </parameters>
>
> @@ -355,10 +369,12 @@
> MYSQL_OPTIONS_LOCAL="-S $OCF_RESKEY_socket --connect_timeout=10"
> MYSQL_OPTIONS_REPL="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_replication_user --password=$OCF_RESKEY_replication_passwd"
> MYSQL_OPTIONS_TEST="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_test_user --password=$OCF_RESKEY_test_passwd"
> +MYSQL_TOO_MANY_CONN_ERR=1040
>
> CRM_MASTER="${HA_SBIN_DIR}/crm_master -l reboot "
> HOSTNAME=`uname -n`
> CRM_ATTR="${HA_SBIN_DIR}/crm_attribute -N $HOSTNAME "
> +CRM_ATTR_REPL_INFO="${HA_SBIN_DIR}/crm_attribute --type crm_config --name ${OCF_RESKEY_replication_info_attribute} -s mysql_replication --query -q"
> INSTANCE_ATTR_NAME=`echo ${OCF_RESOURCE_INSTANCE}| awk -F : '{print $1}'`
>
> #######################################################################
> @@ -468,7 +484,7 @@
>
> if [ $rc -eq 0 ]; then
> # Did we receive an error other than max_connections?
> - if [ $last_errno -ne 0 -a $last_errno -ne 1040 ]; then
> + if [ $last_errno -ne 0 -a $last_errno -ne "$MYSQL_TOO_MANY_CONN_ERR" ]; then
> # Whoa. Replication ran into an error. This slave has
> # diverged from its master. Make sure this resource
> # doesn't restart in place.
> @@ -484,7 +500,7 @@
> fi
>
> # If we got max_connections, let's remove the vip
> - if [ $last_errno -eq 1040 ]; then
> + if [ $last_errno -eq "$MYSQL_TOO_MANY_CONN_ERR" ]; then
> set_reader_attr 0
> exit $OCF_SUCCESS
> fi
> @@ -496,7 +512,7 @@
> ocf_log warn "MySQL Slave IO threads currently not running."
>
> # Sanity check, are we at least on the right master
> - new_master_IP=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f1`
> + new_master_IP=`$CRM_ATTR_REPL_INFO | cut -d'|' -f1`
>
> if [ "$master_host" != "$new_master_IP" ]; then
> # Not pointing to the right master, not good, removing the VIPs
> @@ -573,7 +589,7 @@
> local new_master_IP master_log_file master_log_pos
> local master_params
>
> - new_master_IP=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f1`
> + new_master_IP=`$CRM_ATTR_REPL_INFO | cut -d'|' -f1`
>
> # Keep replication position
> get_slave_info
> @@ -585,8 +601,8 @@
> rm -f $tmpfile
> return
> else
> - master_log_file=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f2`
> - master_log_pos=`${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication --query -q | cut -d'|' -f3`
> + master_log_file=`$CRM_ATTR_REPL_INFO | cut -d'|' -f2`
> + master_log_pos=`$CRM_ATTR_REPL_INFO | cut -d'|' -f3`
> if [ -n "$master_log_file" -a -n "$master_log_pos" ]; then
> master_params=", MASTER_LOG_FILE='$master_log_file', \
> MASTER_LOG_POS=$master_log_pos"
> @@ -660,7 +676,7 @@
> while true; do
> $MYSQL $MYSQL_OPTIONS_REPL \
> -e 'SHOW PROCESSLIST\G' > $tmpfile
> - if grep 'Has read all relay log' $tmpfile >/dev/null; then
> + if grep -i 'Has read all relay log' $tmpfile >/dev/null; then
> ocf_log info "MySQL slave has finished processing relay log"
> break
> fi
> @@ -783,7 +799,7 @@
>
> mysql_status() {
> if [ ! -e $OCF_RESKEY_pid ]; then
> - ocf_log err "MySQL is not running"
> + ocf_log $1 "MySQL is not running"
> return $OCF_NOT_RUNNING;
> fi
>
> @@ -797,7 +813,7 @@
> if [ $? -eq 0 ]; then
> return $OCF_SUCCESS;
> else
> - ocf_log err "MySQL not running: removing old PID file"
> + ocf_log $1 "MySQL not running: removing old PID file"
> rm -f $OCF_RESKEY_pid
> return $OCF_NOT_RUNNING;
> fi
> @@ -811,8 +827,9 @@
> if ocf_is_probe; then
> status_loglevel="info"
> fi
> -
> +
> mysql_status $status_loglevel
> +
> rc=$?
>
> # TODO: check max connections error
> @@ -856,7 +873,7 @@
> set_reader_attr 0
> fi
>
> - mysql_status
> + mysql_status info
> if [ $? = $OCF_SUCCESS ]; then
> ocf_log info "MySQL already running"
> return $OCF_SUCCESS
> @@ -930,7 +947,7 @@
> # Let the CRM/LRM time us out if required.
> start_wait=1
> while [ $start_wait = 1 ]; do
> - mysql_status
> + mysql_status info
> rc=$?
> if [ $rc = $OCF_SUCCESS ]; then
> start_wait=0
> @@ -1019,7 +1036,7 @@
> count=0
> while [ $count -lt $shutdown_timeout ]
> do
> - mysql_status
> + mysql_status info
> rc=$?
> if [ $rc = $OCF_NOT_RUNNING ]; then
> break
> @@ -1029,7 +1046,7 @@
> ocf_log debug "MySQL still hasn't stopped yet. Waiting..."
> done
>
> - mysql_status
> + mysql_status info
> if [ $? != $OCF_NOT_RUNNING ]; then
> ocf_log info "MySQL failed to stop after ${shutdown_timeout}s using SIGTERM. Trying SIGKILL..."
> /bin/kill -KILL $pid > /dev/null
> @@ -1044,7 +1061,7 @@
> mysql_promote() {
> local master_info
>
> - if ( ! mysql_status ); then
> + if ( ! mysql_status err ); then
> return $OCF_NOT_RUNNING
> fi
> ocf_run $MYSQL $MYSQL_OPTIONS_REPL \
> @@ -1053,7 +1070,7 @@
> # Set Master Info in CIB, cluster level attribute
> update_data_master_status
> master_info="$(get_local_ip)|$(get_master_status File)|$(get_master_status Position)"
> - ${HA_SBIN_DIR}/crm_attribute --type crm_config --name replication_info -s mysql_replication -v "$master_info"
> + ${HA_SBIN_DIR}/crm_attribute --type crm_config --name ${OCF_RESKEY_replication_info_attribute} -s mysql_replication -v "$master_info"
> rm -f $tmpfile
>
> set_read_only off || return $OCF_ERR_GENERIC
> @@ -1070,7 +1087,7 @@
> }
>
> mysql_demote() {
> - if ! mysql_status; then
> + if ! mysql_status err; then
> return $OCF_NOT_RUNNING
> fi
>
> @@ -1177,6 +1194,7 @@
> # The log directory must be a directory owned by root, with permissions 0700,
> # and the log must be writable and not a symlink.
> ##########################################################################
> +DEBUG_LOG="/tmp/mysql.ocf.ra.debug/log"
> if [ "${DEBUG_LOG}" -a -w "${DEBUG_LOG}" -a ! -L "${DEBUG_LOG}" ]; then
> DEBUG_LOG_DIR="${DEBUG_LOG%/*}"
> if [ -d "${DEBUG_LOG_DIR}" ]; then
> @@ -1214,7 +1232,7 @@
> case "$1" in
> start) mysql_start;;
> stop) mysql_stop;;
> - status) mysql_status;;
> + status) mysql_status err;;
> monitor) mysql_monitor;;
> promote) mysql_promote;;
> demote) mysql_demote;;
> _______________________________________________________
> Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
> http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
> Home Page: http://linux-ha.org/
_______________________________________________________
Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev Home Page:
http://linux-ha.org/