#!/bin/bash
# Fail immediately if one of the commands in the script fails
set -e
# Input:
#  - $1:  output-dir
#  - $2:  version1-dir (the latest GA version)
#  - $3:  version2-dir (the latest patch)
#  - $4:  distribution name (e.g. eap6.x, brms-engine)
#  - $5:  version of the patch (e.g. 6.1.1 or 6.1.2)
#  - $6:  directory where to place the generated updates-list.txt (this will be input for future patches)
#  - $7:  directory with updates-list.txt files from previous patches
#  - $8:  comma-separated list of relative paths which should be included as a whole (instead of doing a diff inside)
#  - $9:  directory with checksums.txt file from previous patches
#  - $10: directory where to put newly create aggregated checksums.txt files
#
# Output:
#  - <output-dir>
#      -- remove-list.txt
#      -- updates-list.txt
#      -- checksums.txt # contains aggregated checksums of the files in the latest patch + all previous versions
#      -- new-content/ # directory with updates relative paths to the version1-dir/version2-dir root

# Algorithm:
#  1) Create list of all nested files inside both version1-dir and version2-dir
#  2) Compute the base remove-list.txt. Contains files that are in version1, but no in version2 (those need to be removed)
#  4) Create the diff (the files added/updated in version2 in comparison to version1)
#      - go over file-list-dir2
#       - if the same file does not exist in the dir1, add it to the result (+ update remove-list and updates-list)
#       - if the same file exists in dir1, create md5sum and compare
#           - if the checksum is the same no need to include the file, otherwise include it in the result (+ update remove-list and updates-list)

# example params ./create-diff.sh <output-dir> <distro1> <distro2> brms-eap6.x 6.1.1 <upates-lists-dir> <prev-updates-lists-dir> <list-of-paths-to-copy-as-wholes>

### Helper functions ###

# Includes specified file ($1) as part of the patch diff
function copy_file_into_diff_dir {
    FILE=$1
    OLD_PWD=`pwd`
    cd "${VERSION2_DIR}"
    cp --parents "${FILE}" "${OUTPUT_DIR}/new-content"
    cd "${OLD_PWD}"
    echo "${FILE}" >> "${UPDATES_LIST}"
    echo "${FILE}" >> "${REMOVE_LIST}"
}

# Creates checksums file in the form of <path>=<checksum>
# $1: basedir
# $2: file with list of relative paths
# $3: resulting file
function create_checksums_file {
    BASEDIR="$1"
    RELATIVE_PATHS_FILE="$2"
    RESULT_FILE="$3"
    for RELATIVE_PATH in `cat "$RELATIVE_PATHS_FILE"`; do
        MD5SUM=`md5sum "${BASEDIR}/${RELATIVE_PATH}" | awk '{print $1}'`
        echo "$RELATIVE_PATH=$MD5SUM" >> "$RESULT_FILE"
    done
}

# Merges the specified checksums files into single resulting file
# $1: first checksum file
# $2: second checksum file
# $3: resulting file
function merge_checksum_files {
    FILE1="$1"
    FILE2="$2"
    RESULT_FILE="$3"
    TMP_FILE="$RESULT_FILE".tmp
    rm -rf "${TMP_FILE}"

    for LINE in `cat "$FILE2"`; do
        RELATIVE_PATH=`echo -n "$LINE" | cut -d= -f1`
        CHECKSUMS=`echo -n "$LINE" | cut -d= -f2`
        # check if the other file contains the same path, if yes merge them, otherwise just append the current one
        if MATCHED_LINE=`cat "$FILE1" | grep "$RELATIVE_PATH="`; then
            # don't duplicate the checksums; if the same checksum already exists, just include that
            if `echo -n "$MATCHED_LINE" | grep -q "$CHECKSUMS"`; then
                echo "$MATCHED_LINE" >> "$TMP_FILE"
            else
                echo "$MATCHED_LINE,$CHECKSUMS" >> "${TMP_FILE}"
            fi
        else
            echo "${LINE}" >> "${TMP_FILE}"
        fi
    done
    # now include files that are in $FILE1 and not in $FILE2
    for LINE in `cat "$FILE1"`; do
        RELATIVE_PATH=`echo -n "$LINE" | cut -d= -f1`
        #CHECKSUMS=`echo -n $LINE | cut -d= -f2`
        if ! `cat "$FILE2" | grep -q "$RELATIVE_PATH"`; then
            echo "$LINE" >> "$TMP_FILE"
        fi
    done
    cat "$TMP_FILE" | sort > "$RESULT_FILE"
    rm -rf "$TMP_FILE"
}

OUTPUT_DIR="$1"
VERSION1_DIR="$2"
VERSION2_DIR="$3"
DISTRO_NAME="$4"
VERSION="$5" # e.g. 6.1.1 or 6.1.2
UPDATES_LISTS_DIR="$6"
PREVIOUS_UPDATES_LISTS_DIR="$7" # dir with updates-list.txt files from previous patches
WHOLE_DIRS_UPDATE="$8" # comma-separated list
PREVIOUS_CHECKSUMS_DIR="$9"
AGGREGATED_CHECKSUMS_DIR="${10}"

# TODO handle properly input params
# just a trivial check to see if at least distribution dirs were specified
if [ x"${VERSION1_DIR}" == "x" ]; then
    echo "Distribution dir needs to be specified as a first parameter, no param specified!"
    exit 1
fi

if [ x"${VERSION2_DIR}" == "x" ]; then
    echo "Distribution dir needs to be specified as a second parameter, no param specified!"
    exit 1
fi

TMP_DIR="target/create-diff-tmp"
rm -rf "${TMP_DIR}"
mkdir --parents "${TMP_DIR}"

rm -rf "${OUTPUT_DIR}"
mkdir --parents "${OUTPUT_DIR}"

echo "Output dir: ${OUTPUT_DIR}"
echo "Version1 (older) dir: ${VERSION1_DIR}"
echo "Version2 (newer) dir: ${VERSION2_DIR}"

# make sure there is at least empty file, if no differences found
REMOVE_LIST="${OUTPUT_DIR}/remove-list.txt"
CHECKSUMS_FILE="${OUTPUT_DIR}/checksums.txt"

UPDATES_LIST="${TMP_DIR}/updates-list.txt"
rm -f "${UPDATES_LIST}"
touch "${UPDATES_LIST}"

mkdir "${OUTPUT_DIR}/new-content"

# for each specified path: remove the path in old location, copy the path from new location to new-content dir
# and update remove-list and updates-list
for RELATIVE_PATH in $(echo ${WHOLE_DIRS_UPDATE} | tr "," "\n")
do
    echo "Including the entire directory ${RELATIVE_PATH} (instead of creating diff)"
    mkdir -p "${OUTPUT_DIR}/new-content/${RELATIVE_PATH}"
    mv "${VERSION2_DIR}/${RELATIVE_PATH}/"* "${OUTPUT_DIR}/new-content/${RELATIVE_PATH}"
    rm -rf "${VERSION2_DIR}/${RELATIVE_PATH}"
    rm -rf "${VERSION1_DIR}/${RELATIVE_PATH}"
    echo "${RELATIVE_PATH}" >> "${UPDATES_LIST}"
    echo "${RELATIVE_PATH}" >> "${REMOVE_LIST}"
done

find "${VERSION1_DIR}" -type f -printf '%P\n' | sort > "${TMP_DIR}/version1-file-list.txt"
find "${VERSION2_DIR}" -type f -printf '%P\n' | sort > "${TMP_DIR}/version2-file-list.txt"

# files only in version1 (older) needs to be added to the remove list
grep -Fxv -f "${TMP_DIR}/version2-file-list.txt" "${TMP_DIR}/version1-file-list.txt" >> "${REMOVE_LIST}"

# create the actual patch dir contents (things new/updated in version2)
IFS=$'\n'       # make newlines the only separator
for FILE in `cat "${TMP_DIR}/version2-file-list.txt"`; do
    # if the file is not in version1, include it directly and go to next file
    if [ ! -f "$VERSION1_DIR/$FILE" ]; then
        # we need to preserve just the right level of parent dirs, so the "cd" is bit of hack
        OLD_PWD=`pwd`
        cd "${VERSION2_DIR}"
        cp --parents "${FILE}" "${OUTPUT_DIR}/new-content"
        cd "${OLD_PWD}"
        # this file should _not_ go into remove-list as it is just in new distro (nothing to remove from the old one)
        echo "${FILE}" >> "${UPDATES_LIST}"
        continue
    fi
    # now the file is both in version1 and version2. Detect if it changed and if so, include it in the patch dir
    MD5SUM1=`md5sum "${VERSION1_DIR}/${FILE}" | awk '{print $1}'`
    MD5SUM2=`md5sum "${VERSION2_DIR}/${FILE}" | awk '{print $1}'`
    #echo "${FILE}, ${MD5SUM1}" >> ${CHECKSUM_FILE}
    #echo $MD5SUM1
    #echo $MD5SUM2
    # add the file in case the checksums don't match
    if [ ! "${MD5SUM1}" == "${MD5SUM2}" ]; then
        copy_file_into_diff_dir "${FILE}"
    # _or_ if the file was already updated as part of a previous patch
    # (that way we make sure the file will be upgraded in the older version, even if it did not change between the two versions being diffed)
    elif [ -d "${PREVIOUS_UPDATES_LISTS_DIR}" ] && grep -Fxq "${FILE}" `find "${PREVIOUS_UPDATES_LISTS_DIR}" -name "${DISTRO_NAME}*"`; then
        copy_file_into_diff_dir "${FILE}"
    fi
    #echo "Excluding $FILE from the patch as the file did not change"
done

if [ -d "${PREVIOUS_CHECKSUMS_DIR}" ]; then
    # previous checksums available, just append the checksums from the current patch (the previous checksums already contain
    # everything except the checksums for the current patch)
    PREVIOUS_CHECKSUMS_FILE=`find "${PREVIOUS_CHECKSUMS_DIR}" -name "${DISTRO_NAME}*"`
    VERSION2_CHECKSUMS_FILE="${TMP_DIR}/version2-checksums.txt"
    rm -rf "$VERSION2_CHECKSUMS_FILE" "$CHECKSUMS_FILE"
    create_checksums_file "${VERSION2_DIR}" "${TMP_DIR}/version2-file-list.txt" "$VERSION2_CHECKSUMS_FILE"
    merge_checksum_files "$PREVIOUS_CHECKSUMS_FILE" "$VERSION2_CHECKSUMS_FILE" "$CHECKSUMS_FILE"
else
    # no previous checksums available; we need to include the checksums from both the latest release and the current patch
    VERSION1_CHECKSUMS_FILE="${TMP_DIR}/version1-checksums.txt"
    VERSION2_CHECKSUMS_FILE="${TMP_DIR}/version2-checksums.txt"
    rm -rf "$VERSION1_CHECKSUMS_FILE" "$VERSION2_CHECKSUMS_FILE" "$CHECKSUMS_FILE"
    create_checksums_file "${VERSION1_DIR}" "${TMP_DIR}/version1-file-list.txt" "$VERSION1_CHECKSUMS_FILE"
    create_checksums_file "${VERSION2_DIR}" "${TMP_DIR}/version2-file-list.txt" "$VERSION2_CHECKSUMS_FILE"
    merge_checksum_files "$VERSION1_CHECKSUMS_FILE" "$VERSION2_CHECKSUMS_FILE" "$CHECKSUMS_FILE"
fi

# the checksums.txt will be needed in newer patches as well, so copy it in defined location which will then be included
# in one of the assemblies
cp "${CHECKSUMS_FILE}" "$AGGREGATED_CHECKSUMS_DIR/${DISTRO_NAME}-${VERSION}-aggregated-checksums.txt"

# sort the remove-list so that it is easily comparable with the updates-list
cat "${REMOVE_LIST}" | sort > "${TMP_DIR}/sorted-remove-list.txt" && cp -r "${TMP_DIR}/sorted-remove-list.txt" "${REMOVE_LIST}"
cat "${UPDATES_LIST}" | sort > "${TMP_DIR}/sorted-updates-list.txt" && cp -r "${TMP_DIR}/sorted-updates-list.txt" "${UPDATES_LIST}"
cp "${UPDATES_LIST}" "${UPDATES_LISTS_DIR}/${DISTRO_NAME}-${VERSION}-updates-list.txt"

TMP_REMOVE_LIST="${TMP_DIR}/remove-list-with-duplicates.txt"
# replace the "-redhat-<number>" suffix with wildcard (*) to make sure we remove all possible versions introduced by one-offs
# there are no previous update-lists to include (this is the first patch being generated)
cat "${REMOVE_LIST}" `find "${PREVIOUS_UPDATES_LISTS_DIR}" -name "${DISTRO_NAME}*"` |\
 sed 's/-redhat-.*.jar/*.jar/g' | sort | uniq > "${TMP_REMOVE_LIST}"

# remove-list can not contain entries which are also part of the latest patch distro (it is both useless and also buggy)
grep -F -x -v -f "${TMP_DIR}/version2-file-list.txt" "${TMP_REMOVE_LIST}" > "${REMOVE_LIST}"
