#!/bin/bash

check_node_gather_pods_ready() {
    line=$(oc get ds perf-node-gather-daemonset -o=custom-columns=DESIRED:.status.desiredNumberScheduled,READY:.status.numberReady --no-headers -n perf-node-gather)

    IFS=$' '
    read desired ready <<< $line
    IFS=$'\n'

    if [[ $ready -eq $desired ]]
    then
       return 0
    else
       return 1
    fi
}

IFS=$'\n'

BASE_COLLECTION_PATH="/must-gather"
NODES_PATH=${BASE_COLLECTION_PATH}/nodes
mkdir -p ${NODES_PATH}
NAMESPACE_MANIFEST="/etc/node-gather/namespace.yaml"
SERVICEACCOUNT_MANIFEST="/etc/node-gather/serviceaccount.yaml"
DAEMONSET_MANIFEST="/etc/node-gather/daemonset.yaml"
NAMESPACE=$(cat /var/run/secrets/kubernetes.io/serviceaccount/namespace)
POD_NAME=$(oc get pods --field-selector=status.podIP=$(hostname -I) -n $NAMESPACE -o'custom-columns=name:metadata.name' --no-headers)
MUST_GATHER_IMAGE=$(oc get pod -n $NAMESPACE $POD_NAME -o jsonpath="{.spec.initContainers[0].image}")

sed -i -e "s#MUST_GATHER_IMAGE#$MUST_GATHER_IMAGE#" $DAEMONSET_MANIFEST

oc create -f $NAMESPACE_MANIFEST
oc create -f $SERVICEACCOUNT_MANIFEST
oc adm policy add-scc-to-user privileged -n perf-node-gather -z perf-node-gather
oc create -f $DAEMONSET_MANIFEST

COUNTER=0
until check_node_gather_pods_ready || [ $COUNTER -eq 300 ]; do
   (( COUNTER++ ))
   sleep 1
done

for line in $(oc get pod -o=custom-columns=NODE:.spec.nodeName --no-headers --field-selector=status.phase!=Running -n perf-node-gather)
do
    echo "Failed to collect perf-node-gather data from node ${line} due to pod scheduling failure." >> ${NODES_PATH}/skipped_nodes.txt
done

for line in $(oc get pod -o=custom-columns=NODE:.spec.nodeName,NAME:.metadata.name --no-headers --field-selector=status.phase=Running -n perf-node-gather)
do
    node=$(echo $line | awk -F ' ' '{print $1}')
    pod=$(echo $line | awk -F ' ' '{print $2}')
    NODE_PATH=${NODES_PATH}/$node
    mkdir -p ${NODE_PATH}

    oc exec $pod -n perf-node-gather -- lspci -nvv 2>/dev/null >> $NODE_PATH/lspci
    oc exec $pod -n perf-node-gather -- lscpu -e 2>/dev/null >> $NODE_PATH/lscpu
    oc exec $pod -n perf-node-gather -- cat /proc/cmdline 2>/dev/null >> $NODE_PATH/proc_cmdline
done

# Collect journal logs for specified units for all nodes
NODE_UNITS=(kubelet)
for NODE in $(oc get nodes --no-headers -o custom-columns=':metadata.name'); do
    NODE_PATH=${NODES_PATH}/$NODE
    mkdir -p ${NODE_PATH}
    for UNIT in ${NODE_UNITS[@]}; do
        oc adm node-logs $NODE -u $UNIT > ${NODE_PATH}/${NODE}_logs_$UNIT &
    done
done

oc delete -f $DAEMONSET_MANIFEST
oc delete -f $SERVICEACCOUNT_MANIFEST
oc delete -f $NAMESPACE_MANIFEST
