# This script will recurse throught the folders below where it is run (or under the specified --root) and find all 
# the Zip files. It will then check that all the JAR files in the zip files are Redhat versions.
# It will also check that there are not multiple copies of a jar (RH and non-RH).
# If it finds either of these issues it will issue a sys.exit(1) which can be used to fail the build.
import sys
import re
import os
import argparse

from zipfile import ZipFile
from collections import Counter

splitter = re.compile("(.*)-\d+\.\d+\.*\d*")

def find_zips(filepath):

    zip_files = []

    for dir_tuple in os.walk(filepath):
       for filename in dir_tuple[2]:
           if ".zip" in filename:
               zip_files.append(dir_tuple[0]+ "/" + filename)

    return zip_files

def check_zip(filename, exclude_jars=None):

    excluded_jar_names = ["sources", "javadoc", "scaladoc"]
    if exclude_jars:
        excluded_jar_names.extend(exclude_jars)

    with ZipFile(filename) as output_zip:
        jar_list = [file.split("/")[-1] for file in output_zip.namelist() if ("jar" in file and "licenses" not in file)]

    # Check for duplicate jars with different versions by splitting out the lib name and
    # checking for duplicate entries
    lib_names = []
    for jarname in jar_list[1:]:
        try:
            lib_names.append(splitter.split(jarname)[1])
        except IndexError:
            raise RuntimeError("Could not parse jar file name: '{}'".format(jarname))

    duplicates = [lib for lib, count in Counter(lib_names).items() if count > 1]

    # Find any non-RH libs and skip the first empty string (which is there due to the libs
    # root folder being listed).
    non_RH_libs = [jar for jar in jar_list[1:] if "redhat" not in jar]

    errors = []

    if non_RH_libs:
        errors.append("Found non-Redhat Libraries in {}: {}".format(filename, sorted(non_RH_libs)))

    if duplicates:
        dup_jars = []
        for duplicate in duplicates:
            per_jar_dups = []
            for jarname in jar_list:
                if duplicate in jarname:
                    per_jar_dups.append(jarname)

            # If any of the duplicates for this lib name are in the excluded_jar_names
            # we can ignore this
            real_dups = []
            for jarname in per_jar_dups:
                real_dups.append(not any([name in jarname for name in excluded_jar_names]))

            if all(real_dups):
                dup_jars.extend(per_jar_dups)

        if dup_jars:
            errors.append("Found duplicate entries in {}: {}".format(filename, dup_jars))

    return errors

def dup_list(arg_str):

    return arg_str.split()


def create_parser():

     parser = argparse.ArgumentParser(description='Output checking script for the AMQ Streams Distributables')

     parser.add_argument("--root", "-r", required=False, default=".",
                         help="The root folder from which to scan down from. Defaults to the folder this script is in")

     parser.add_argument("--exclude_jars", "-ej", required=False, type=dup_list,
                         help="A space separated list of (partial) jar names to ignore from the duplicates list")

     return parser

if __name__ == "__main__":

    parser = create_parser()
    args = parser.parse_args()

    zip_files = find_zips(args.root)

    if not zip_files:
        print("No archive files found below this directory")
        sys.exit(1)
    else:
        print("Checking the following files: {}".format(zip_files))

    errors = []
    for zip_file in zip_files:
        errors.extend(check_zip(zip_file, exclude_jars=args.exclude_jars))

    if errors:
        for error in errors:
            print(error)
        sys.exit(1)
    else:
        print("All Archives ({}) passed the tests".format(zip_files))
        sys.exit(0)