#!/bin/bash

SRIOV_NAMESPACE=${SRIOV_NAMESPACE:-"openshift-sriov-network-operator"}

# echoes the command provided as $@ and then runs it
echo_and_eval () {
    echo "> $*"
    echo ""
    eval "$@"
    echo ""
}

# runs the command provided as $@, and either returns silently with
# status 0 or else logs an error message with the command's output
try_eval () {
	tmpfile=`mktemp`
	if ! eval "$@" >& $tmpfile; then
		status=1
		echo "ERROR: Could not run '$*':"
		sed -e 's/^/  /' $tmpfile
		echo ""
    else
		status=0
	fi
	rm -f $tmpfile
	return $status
}

do_operator () {
	while read node addr; do
		stat=$(oc get sriovnetworknodestate $node -n $SRIOV_NAMESPACE --template '{{.status.syncStatus}}' 2>/dev/null)
		if [ "$stat" == "Succeeded" ]; then
			printf "$node ($addr) syncStatus: succeeded\n"
			sriovNodes+=($node)
		elif [ "$stat" == "InProgress" ]; then
			printf "$node ($addr) syncStatus: in progress\n"
		elif [ "$stat" == "Failed" ]; then
			err=$(oc get sriovnetworknodestate $node -n $SRIOV_NAMESPACE --template '{{.status.lastSyncError}}')
			printf "$node ($addr) syncStatus error: $err\n"
		else
			printf "$node ($addr) is not configured as a SR-IOV node\n"
		fi
	done < $logdir/meta/nodeinfo
}

# TODO: collect node info with debug pod
do_node () {
	while read node addr; do
		if ip addr show | grep -q "inet $addr/"; then
			printf "checking devices on node $node\n"

			# Grab node device log
			log_nodedevices $node

			# Grab node system log
			log_nodesystem $node
		fi
	done < $logdir/meta/nodeinfo
}

log_nodeinfo () {
	# Outputs a list of nodes in the form "nodename IP"
	oc get nodes --template '{{range .items}}{{$name := .metadata.name}}{{range .status.addresses}}{{if eq .type "InternalIP"}}{{$name}} {{.address}}{{"\n"}}{{end}}{{end}}{{end}}' > $logdir/meta/nodeinfo
}

log_operatorconfig () {
	# Output default operator config in the form "daemonNodeSelector enableInjector enableOperatorWebhook logLevel"
	oc get sriovoperatorconfig default -n $SRIOV_NAMESPACE --template '{{.spec.enableInjector}} {{.spec.enableOperatorWebhook}} {{.spec.logLevel}}' > $logdir/meta/operatorconfig
}

log_policy () {
	# Output a list of policies in the form ""
	oc get sriovnetworknodepolicy -n $SRIOV_NAMESPACE --template '{{range .items}}{{if ne .metadata.name "default"}}{{.metadata.name}} {{.spec.deviceType}} {{.spec.linkType}} {{.spec.numVfs}} {{.spec.resourceName}} {{.spec.isRdma}} {{.spec.priority}} {{"\n"}}{{end}}{{end}}' > $logdir/meta/policy
}

log_sriovstate () {
	while read node addr; do
		len=$(oc get sriovnetworknodestate $node -n $SRIOV_NAMESPACE --template '{{len .spec.interfaces}}' 2>/dev/null)
		# Outputs a list of node PF devices in the form "nodename interfacename pciaddress numvfs linktype"
		for i in $(seq 0 $(($len-1))); do
			oc get sriovnetworknodestate $node -n $SRIOV_NAMESPACE --template "{{.metadata.name}} {{(index .spec.interfaces $i).name}} {{(index .spec.interfaces $i).pciAddress}} {{(index .spec.interfaces $i).linkType}} {{(index .spec.interfaces $i).numVfs}}{{\"\n\"}}" >> $logdir/meta/state
		done
	done < $logdir/meta/nodeinfo
}

log_nodesystem () {
	node=$1
	lognode=$logdir/nodes/$node
	mkdir -p $lognode

	dmesg                                                  > $lognode/dmesg
	try_eval "cat /proc/cmdline"                           > $lognode/cmdline
	try_eval "cat /etc/udev/rules.d/10-nm-unmanaged.rules" > $lognode/udev
}

log_nodedevices () {
	node=$1
	lognode=$logdir/nodes/$node
	mkdir -p $lognode

	ls -al /sys/class/net/ > $lognode/sys-class-net
	ip link show           > $lognode/sys-class-net

	while read dev_node dev_name dev_pci dev_link dev_numvfs; do
		if [ "$node" == "$dev_node" ]; then
			echo_and_eval "ip -d link show $dev_name"                                    >> $lognode/sys-class-net-$dev_name
			echo_and_eval "ls -al /sys/class/net/ | grep $dev_name"                      >> $lognode/sys-class-net-$dev_name
			echo_and_eval "ls -al /sys/class/net/$dev_name/device"                       >> $lognode/sys-class-net-$dev_name
			echo_and_eval "ls -al /sys/class/net/$dev_name/device/ | grep virtfn"        >> $lognode/sys-class-net-$dev_name
		fi
	done < $logdir/meta/state
}

do_operator_and_nodes () {
	log_nodeinfo
	log_operatorconfig
	log_policy
	log_sriovstate

	printf "\n"
	printf "Analyzing SR-IOV operator APIs\n"
	do_operator

	printf "\n"
	printf "TODO: Analyzing nodes\n"

}

help() {
    # Display Help
    echo
    echo "This script queries and displays some important sriov network information from the cluster:
- TODO(zshi): Add information on what this script collects and arguments that it accepts if any.
"
    echo
    echo "Usage: oc rsh -n <NETWORK-TOOLS-NAMESPACE> <network-tools-podname> sriov_network_info"
    echo "or"
    echo "oc adm network-tools -- sriov_network_info"
    echo "or"
    echo "podman run <IMAGE_ID> sriov_network_info"
    echo
}

main () {
	logdir=$(mktemp --tmpdir -d openshift-sriov-debug-XXXXXXXXX)
	echo $logdir
	mkdir $logdir/meta
	cp $self $logdir/meta/debug.sh
	mkdir $logdir/master
	mkdir $logdir/nodes
	do_operator_and_nodes |& tee $logdir/log

	dumpname=openshift-sriov-debug-$(date --iso-8601).tgz
	(cd $logdir; tar -cf - --transform='s/^\./openshift-sriov-debug/' .) | gzip -c > $dumpname
	echo ""
	echo "Output is in $dumpname"
}

while getopts ":h" option; do
    case $option in
        h) # display Help
            help
            exit;;
    esac
done

main
