#!/bin/bash

# lhbadm - handle some common heartbeat/lustre failover ops

PATH=/sbin:/usr/sbin:/usr/bin:$PATH:/usr/lib64/heartbeat:/usr/lib/heartbeat

declare -r prog=lhbadm

die ()
{
    echo "$prog: $@"
    exit 1
}

warn ()
{
    echo "$prog: $@"
}

usage ()
{
    echo "Usage: $prog status|lstatus|failback|failover"
    echo "  status -   print one-line heartbeat-lustre status"
    echo "  failover - fail all my active resources over to partner"
    echo "  failback - fail my normal resources back"
    exit 1
}

test_mounts ()
{
    local label
    local lcount=0
    local fcount=0
    local ltot=0
    local ftot=0

    for label in $(ldev -l); do
        ltot=$((ltot + 1))
        if [ "$(service lustre status $label)" == "running" ]; then
            lcount=$((lcount + 1))
        fi
    done
    for label in $(ldev -f); do
        ftot=$((ftot+ 1))
        if [ "$(service lustre status $label)" == "running" ]; then
            fcount=$((fcount + 1))
        fi
    done

    if [ $(($lcount + $fcount)) == 0 ]; then
        echo none
    elif [ $lcount == $ltot -a $fcount == 0 ]; then
        echo local
    elif [ $lcount == 0     -a $fcount == $ftot ]; then
        echo foreign
    elif [ $lcount == $ltot -a $fcount == $ftot ]; then
        echo all
    else
        echo partial
    fi
}

status ()
{
    local rstat fstat
    local labels

    rstat=$(cl_status rscstatus) || die "cl_status rscstatus failed"
    fstat=$(service lustre status)

    if [ "$fstat" == "running" ]; then
        fstat=$(test_mounts)
    fi

    echo $rstat-$fstat
}

wait_for_transition ()
{
    while sleep 5; do
        state=$(cl_status rscstatus) || die "cl_status rscstatus failed"
        [ "$state" == "transition" ] || break
    done
}

failover ()
{
    local s

    [ "$(id -un)" == "root" ] || die "failover requires root privileges"
    [ $# -gt 0 ] || die "please include a descriptive reason for the logs"

    s=$(status)
    logger -s -t Lustre-ha -p user.err "failover start, status=$s, reason: $*"

    hb_standby all 2>/dev/null 1>&2 || die "hb_standby all failed"
    wait_for_transition

    s=$(status)
    logger -s -t Lustre-ha -p user.err "failover complete, status=$s"
}

failback ()
{
    local s

    [ "$(id -un)" == "root" ] || die "failback requires root privileges"
    [ $# -gt 0 ] || die "please include a descriptive reason for the logs"

    s=$(status)
    logger -s -t Lustre-ha -p user.err "failback start, status=$s, reason: $*"

    hb_takeover local || die "hb_takeover local failed"
    wait_for_transition

    s=$(status)
    logger -s -t Lustre-ha -p user.err "failover complete, status=$s"
}


#
# MAIN
#

[ $# == 0 ] && usage
[ -x /usr/bin/cl_status ] || die "Heartbeat is not installed"
hstat=$(cl_status hbstatus) || die "$hstat"

case "$1" in
    status)   status ;;
    lstatus)  lstatus ;;
    failback) shift; failback $*;;
    failover) shift; failover $*;;
    *) usage ;;
esac

#  vi: ts=4 sw=4 expandtab
