Hi Tomo,
On Wed, Aug 17, 2011 at 10:44:26AM +0900, nozawat wrote:
> Hi Dejan
>
> I made a resource agent of rsyslog for the cause in syslog-ng.
Many thanks for the contribution and sorry for the delay. Please
find the review below. I think it looks quite good.
BTW, any chance to create an ocft test file. That should be easy
with rsyslog.
Cheers,
Dejan
> #!/bin/bash
> #
> # Description: Manages a rsyslog instance, provided by NTT OSSC as an
> # OCF High-Availability resource under Heartbeat/LinuxHA control
> #
> # Copyright (c) 2011 NIPPON TELEGRAPH AND TELEPHONE CORPORATION
> #
> ##############################################################################
> # OCF parameters:
> # OCF_RESKEY_rsyslog_binary : Path to rsyslog binary.
> # Default is "/sbin/rsyslogd"
> # OCF_RESKEY_configfile : Configuration file
> # OCF_RESKEY_start_opts : Startup options
> # OCF_RESKEY_kill_term_timeout: Number of seconds to await to confirm a
> # normal stop method
> #
> # Only OCF_RESKEY_configfile must be specified. Each of the rests
> # has its default value or refers OCF_RESKEY_configfile to make
> # its value when no explicit value is given.
> #
> # Further infomation for setup:
> # There are sample configurations at the end of this file.
> #
> ###############################################################################
>
> : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
> . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
>
> usage()
> {
> cat <<-!
> usage: $0 action
>
> action:
> start : start a new rsyslog instance
>
> stop : stop the running rsyslog instance
>
> status : return the status of rsyslog, run or down
>
> monitor : return TRUE if the rsyslog appears to be working.
>
> meta-data : show meta data message
>
> validate-all: validate the instance parameters
> !
> return $OCF_ERR_UNIMPLEMENTED
> }
>
> metadata_rsyslog()
> {
> cat <<END
> <?xml version="1.0"?>
> <!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
> <resource-agent name="rsyslog">
> <version>1.0</version>
>
> <longdesc lang="en">
> This script manages a rsyslog instance as an HA resource.
> </longdesc>
> <shortdesc lang="en">rsyslog resource agent</shortdesc>
>
> <parameters>
>
> <parameter name="configfile" unique="0" required="1">
Should be unique too.
> <longdesc lang="en">
> This parameter specifies a configuration file
> for a rsyslog instance managed by this RA.
> </longdesc>
> <shortdesc>Configuration file</shortdesc>
> <content type="string" default=""/>
> </parameter>
>
> <parameter name="rsyslog_binary" unique="0">
> <longdesc lang="en">
> This parameter specifies rsyslog's executable file.
> </longdesc>
> <shortdesc>rsyslog executable</shortdesc>
> <content type="string" default="/sbin/rsyslogd"/>
> </parameter>
>
> <parameter name="start_opts" unique="0">
> <longdesc lang="en">
> This parameter specifies startup options for a
> rsyslog instance managed by this RA. When no value is given, no startup
> options is used. Don't use option '-F'. It causes a stuck of a start action.
> </longdesc>
> <shortdesc>Start options</shortdesc>
> <content type="string" default=""/>
> </parameter>
>
> <parameter name="kill_term_timeout" unique="0">
> <longdesc lang="en">
> On a stop action, a normal stop method(pkill -TERM) is firstly used.
> And then the confirmation of its completion is waited for
> the specified seconds by this parameter.
> The default value is 10.
> </longdesc>
> <shortdesc>Number of seconds to await to confirm a normal stop method</shortdesc>
> <content type="integer" default="10"/>
> </parameter>
It is typical to use 2/3 of the stop op timeout meta parameter
(see some other resource agents) as default. Or similar. I
consider this parameter an overkill.
> </parameters>
>
> <actions>
> <action name="start" timeout="60s" />
> <action name="stop" timeout="120s" />
> <action name="status" timeout="60s" />
> <action name="monitor" depth="0" timeout="60s" interval="60s" />
Don't have much experience with rsyslog, but aren't these
timeout suggestions a bit too long?
> <action name="meta-data" timeout="5s" />
> <action name="validate-all" timeout="5"/>
> </actions>
> </resource-agent>
> END
> return $OCF_SUCCESS
> }
>
> monitor_rsyslog()
> {
> set -- $(pgrep -f "$PROCESS_PATTERN" 2>/dev/null)
> case $# in
> 0) ocf_log debug "No rsyslog process for $CONFIGFILE"
> return $OCF_NOT_RUNNING;;
> 1) return $OCF_SUCCESS;;
> esac
> ocf_log warn "Multiple rsyslog process for $CONFIGFILE"
> return $OCF_SUCCESS
> }
>
> start_rsyslog()
> {
> monitor_rsyslog
> if [[ $? = "$OCF_SUCCESS" ]]; then
> return $OCF_SUCCESS
> fi
>
> ocf_run "$RSYSLOG_EXE" -f "$CONFIGFILE" $START_OPTS
> ocf_status=$?
local ocf_status=$?
> if [[ "$ocf_status" != "$OCF_SUCCESS" ]]; then
Usual to use test(1) '[.' rather than bash "[.[.". But not as
important if you want to keep it since the agent is already
/bin/bash.
> return $OCF_ERR_GENERIC
> fi
>
> while true; do
> monitor_rsyslog
> if [[ $? = "$OCF_SUCCESS" ]]; then
> return $OCF_SUCCESS
> fi
> sleep 1
> done
> }
>
> stop_rsyslog()
> {
> pkill -TERM -f "$PROCESS_PATTERN"
>
> typeset lapse_sec=0
> while pgrep -f "$PROCESS_PATTERN" > /dev/null; do
> sleep 1
> lapse_sec=$(( lapse_sec + 1 ))
> ocf_log debug "stop_rsyslog[$RSYSLOG_NAME]: stop NORM $lapse_sec/$KILL_TERM_TIMEOUT"
> if [ $lapse_sec -ge $KILL_TERM_TIMEOUT ]; then
> break
> fi
> done
>
> lapse_sec=0
> while pgrep -f "$PROCESS_PATTERN" > /dev/null; do
> pkill -KILL -f "$PROCESS_PATTERN"
> sleep 1
> lapse_sec=$(( lapse_sec + 1 ))
> ocf_log debug "stop_rsyslog[$RSYSLOG_NAME]: suspend rsyslog by SIGKILL ($lapse_sec/@@@)"
> done
>
> return $OCF_SUCCESS
> }
>
> status_rsyslog()
> {
> monitor_rsyslog
> rc=$?
> if [ $rc = $OCF_SUCCESS ]; then
> echo "rsyslog service is running."
> elif [ $rc = $OCF_NOT_RUNNING ]; then
> echo "rsyslog service is stopped."
> else
> echo "Mutiple rsyslog process for $CONFIGFILE."
Essentially unreachable (already handled in monitor_rsyslog).
> fi
> return $rc
> }
>
> validate_all_rsyslog()
> {
> ocf_log info "validate_all_rsyslog[$RSYSLOG_NAME]"
> return $OCF_SUCCESS
> }
>
> if [[ "$1" = "meta-data" ]]; then
> metadata_rsyslog
> exit $?
> fi
>
> CONFIGFILE="${OCF_RESKEY_configfile}"
> if [[ -z "$CONFIGFILE" ]]; then
> ocf_log err "undefined parameter:configfile"
> exit $OCF_ERR_CONFIGURED
> fi
>
> RSYSLOG_NAME=${CONFIGFILE##*/}
> RSYSLOG_NAME=${RSYSLOG_NAME%.*}
You can drop this variable completely, ocf_log/debug should log
the RA instance name.
> RSYSLOG_EXE="${OCF_RESKEY_rsyslog_binary-/sbin/rsyslogd}"
> if [[ ! -x "$RSYSLOG_EXE" ]]; then
> ocf_log err "Invalid value:rsyslog_binary:$RSYSLOG_EXE"
> exit $OCF_ERR_CONFIGURED
> fi
>
> KILL_TERM_TIMEOUT="${OCF_RESKEY_kill_term_timeout-10}"
> if ! ocf_is_decimal "$KILL_TERM_TIMEOUT"; then
> ocf_log err "Invalid value:kill_term_timeout:$KILL_TERM_TIMEOUT"
> exit $OCF_ERR_CONFIGURED
> fi
>
> START_OPTS=${OCF_RESKEY_start_opts}
> PROCESS_PATTERN="$RSYSLOG_EXE -f $CONFIGFILE"
>
> COMMAND=$1
>
> case "$COMMAND" in
> start)
> ocf_log debug "[$RSYSLOG_NAME] Enter rsyslog start"
> start_rsyslog
> func_status=$?
> ocf_log debug "[$RSYSLOG_NAME] Leave rsyslog start $func_status"
> exit $func_status
> ;;
> stop)
> ocf_log debug "[$RSYSLOG_NAME] Enter rsyslog stop"
> stop_rsyslog
> func_status=$?
> ocf_log debug "[$RSYSLOG_NAME] Leave rsyslog stop $func_status"
> exit $func_status
> ;;
> status)
> status_rsyslog
> exit $?
> ;;
> monitor)
> monitor_rsyslog
> func_status=$?
> exit $func_status
> ;;
> validate-all)
> validate_all_rsyslog
> exit $?
> ;;
> *)
> usage
> ;;
> esac
_______________________________________________________
Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev Home Page:
http://linux-ha.org/