import csv

from google.cloud import storage

BUCKET_NAME = "cyark-data-platform.appspot.com"

INVALID_FOLDERS_NAMES_OUTPUT_PATH = "Output/invalid_folder_names.csv"
INVALID_FILE_NAMES_OUTPUT_PATH = "Output/invalid_file_names.csv"

VALID_GOOGLE_CLOUD_FILE_DATA_TYPES = ["lidar_terrestrial",
                                      "lidar_aerial",
                                      "photogrammetry_terrestrial",
                                      "photogrammetry_aerial",
                                      "data_derivatives"]

def is_valid_doi(doi: str) -> bool:
    """
    Returns whether the doi string is in the correct DOI structure.

    :param doi: The DOI string to be checked
    :return: Whether the doi string is valid
    """
    # check doi length
    if len(doi) != 9:
        return False

    # check that struct is XXXX-XXXX, where X is an alphanumeric char
    return doi[0:4].isalnum() and doi[4] == '-' and doi[5:9].isalnum()


def is_valid_file_name(filename: str) -> bool:
    """
    Returns whether the filename string is in the correct structure.

    :param filename: The filename string to be checked
    :return: Whether the filename string is valid
    """
    # check if doi prefix on the file name matches the directory
    directory = filename[0:9]
    file_doi = filename[10:19]

    if directory != file_doi:
        return False

    # check if file data type is valid
    file_data_type = filename[20:len(filename) - 4]

    if file_data_type not in VALID_GOOGLE_CLOUD_FILE_DATA_TYPES:
        return False

    # check if file extension is valid
    file_extension = filename[-4:]

    return file_extension == ".zip"


def get_problem_descriptions(filename: str) -> list[str]:
    """
    Analyzes the filename and return a list of problems, along with the problem descriptions.

    :param filename: The filename string to be analyze
    :return: The list of problem descriptions
    """
    problem_descriptions = []

    # check if doi prefix on the file name matches the directory
    directory = filename[0:9]
    file_doi = filename[10:19]

    if directory != file_doi:
        problem_descriptions.append("Different DOI prefix: In file " + filename + ", file prefix is " + file_doi +
                                    " and directory name is " + directory + ".  File prefix must match the directory "
                                    "name.")

    # check if file data type is valid
    file_data_type = filename[20:len(filename) - 4]

    if file_data_type not in VALID_GOOGLE_CLOUD_FILE_DATA_TYPES:
        problem_descriptions.append("Invalid File Data Type: In file " + filename + ", data type is " +
                                    file_data_type + ". Valid data types are as follows: " +
                                    str(VALID_GOOGLE_CLOUD_FILE_DATA_TYPES) + ".")

    # check if file extension is valid
    file_extension = filename[-4:]

    if file_extension != ".zip":
        problem_descriptions.append("Invalid File Extension: In file " + filename + ", file extension is " +
                                    file_extension + ". Valid file extensions are as follows: [.zip].")

    return problem_descriptions


def get_file_name_to_problem_description_dict(filenames: list[str]) -> dict[str, list[str]]:
    """
    Analyzes a list of filenames and returns each  list of problems for its respective filename in a dict.

    :param filenames: The list of file names to analyze
    :return: A dict of str, which are the filenames, to list of problems descriptions
    """
    file_name_to_problem_description_dict = {}
    for name in filenames:
        # generate problem description
        problem = get_problem_descriptions(name)
        file_name_to_problem_description_dict[name] = problem

    return file_name_to_problem_description_dict


def write_invalid_folder_names_to_file(file_path: str, invalid_folder_names: list[str] ) -> None:
    """
    Writes the list of invalid folder names to the given file path. Will create the file if it does not exist.

    :param file_path: The string path of the file to be written.
    :param invalid_folder_names: The list of invalid folder names to be written
    :return: None
    """
    f = open(file_path, 'w', newline="")
    writer = csv.writer(f)

    writer.writerow(["Invalid Folder Names"])
    for invalid_folder in invalid_folder_names:
        writer.writerow([invalid_folder])

    f.close()


def write_invalid_file_names_to_file(file_path: str, invalid_files_to_problems_dict: dict[str, list[str]]) -> None:
    """
    Writes the dict of invalid files and their respective problem descriptions to the given file path. Will create the
    file if it does not exist.

    :param file_path: The string path of the file to be written.
    :param invalid_files_to_problems_dict: The dict of invalid file names and problems to be written
    :return: None
    """
    f = open(file_path, 'w', newline="")
    writer = csv.writer(f)

    writer.writerow(["File Path", "Problem"])
    for invalid_filename in invalid_files_to_problems_dict:
        problems_list = invalid_files_to_problems_dict[invalid_filename]

        writer.writerow([invalid_filename, problems_list[0]])

        for i in range(1, len(problems_list)):
            writer.writerow(["", problems_list[i]])

    f.close()


def print_results(invalid_folders_in: list[str], problems_in: dict[str, list[str]]) -> None:
    """
    Prints to console the results of the bucket check, which includes the list of invalid folders and the list of invalid
    filenames, along with their respective problem descriptions.

    :param invalid_folders_in: The list invalid folders to print
    :param problems_in: The dict of filenames and list of problem descriptions to print
    :return: None
    """
    # print invalid folders
    print("Invalid Folders (results outputted to " + INVALID_FOLDERS_NAMES_OUTPUT_PATH + "):")
    for invalid_folder in invalid_folders_in:
        print(invalid_folder)

    print()

    # print problems
    print("Problems (results outputted to " + INVALID_FILE_NAMES_OUTPUT_PATH + "):")
    for invalid_filename in problems_in:
        problems_list = problems_in[invalid_filename]

        print(invalid_filename + ":")

        for i in range(0, len(problems_list)):
            print(problems_list[i])


if __name__ == '__main__':
    client = storage.Client()
    bucket = client.get_bucket(BUCKET_NAME)
    blobs = bucket.list_blobs()

    folders = []
    for blob in blobs:
        if blob.name.split('/')[0] not in folders:
            folders.append(blob.name.split('/')[0])

    invalid_folders = []
    invalid_files = []
    for folder in folders:
        if not is_valid_doi(folder):
            invalid_folders.append(folder)
        else:
            blobs = client.list_blobs(BUCKET_NAME, prefix=folder + "/")

            files = []
            for blob in blobs:
                files.append(blob.name)

            files.pop(0)

            for file in files:
                if not is_valid_file_name(file):
                    invalid_files.append(file)

    problems = get_file_name_to_problem_description_dict(invalid_files)

    write_invalid_folder_names_to_file(INVALID_FOLDERS_NAMES_OUTPUT_PATH, invalid_folders)
    write_invalid_file_names_to_file(INVALID_FILE_NAMES_OUTPUT_PATH, problems)

    print_results(invalid_folders, problems)


