import getopt
import shutil
import threading
import os
import sys

from google.cloud import storage
import dropbox


class Transfer:
    BUCKET_NAME = "cyark-data-platform.appspot.com"
    dbx = None
    local_destination_path = "./"
    max_batch_size_gb = 0
    file_blobs = []
    files_to_download = []
    num_files_to_download = 0

    ACCESS_TOKEN = "WhGkJyUkU6UAAAAAAAAAAWcFuGG3G2DMfS1Lr5anC4Vg1DqU1z6r1yvj3dIjysFi"
    DROPBOX_DESTINATION = "/CyArk Cloud/OpenHeritage3D/"
    UPLOAD_SIZE = 64 * 1024 * 1024

    def __init__(self, max_batch_size_gb_in: float = 1, local_destination_path_in: str = "./",
                 is_downloading_all_in: bool = False):
        """
        Default constructor for Transfer object.

        :param max_batch_size_gb_in: The max size of each bath in gigabytes. Default value is 1 gigabyte.
        :param local_destination_path_in: The path to the local directory, where Google Bucket files will be downloaded
                                          to and upload from to Dropbox. The contents will be deleted, so creating a new
                                          directory is advised.
        :param is_downloading_all_in: Whether to transfer all files, regardless if they have been successfully
                                      transferred already
        """
        self.max_batch_size_gb = max_batch_size_gb_in
        self.local_destination_path = local_destination_path_in

        # instantiate dropbox object
        self.dbx = dropbox.Dropbox(self.ACCESS_TOKEN)

        # instantiate google cloud storage object
        client = storage.Client()
        cyark_bucket = client.get_bucket(self.BUCKET_NAME)
        blobs = cyark_bucket.list_blobs()

        # get only the file blobs (i.e. name does not end with '/')
        for blob in blobs:
            if blob.name[-1] != '/':
                self.file_blobs.append(blob)
                self.files_to_download.append(blob.name)

        if not is_downloading_all_in:
            (missing_files, invalid_files) = self.get_file_discrepancies()
            self.files_to_download = missing_files + invalid_files

        max_batch_size_bytes = self.max_batch_size_gb * 1e9
        for file in self.file_blobs:
            if (file.size > max_batch_size_bytes) and (file.name in self.files_to_download):
                print(file.name + " (" + "{:.2f}".format(file.size / 1e9) + " GB) is too big. Skipping")
                self.files_to_download.remove(file.name)


    def perform_transfer(self) -> None:
        """
        Performs a full transfer of the files in Google Cloud to Dropbox. Only transfers the files that have been added
        to a list of files to transfer. After attempting a full transfer, searches for files that were not successfully
        transferred and attempts to again transfer the missing files and invalid files. This is done until all of the
        files have been successfully transferred.

        :return: None
        """
        while len(self.files_to_download) > 0:
            self.num_files_to_download = len(self.files_to_download)
            print("Downloading " + str(self.num_files_to_download) + " files.")

            # get only the blobs that need to be downloaded
            blobs_to_download = []
            for blob in self.file_blobs:
                if blob.name in self.files_to_download:
                    blobs_to_download.append(blob)

            self.transfer_to_dropbox(blobs_to_download)

            (missing_files, invalid_files) = self.get_file_discrepancies()
            self.files_to_download = missing_files + invalid_files


    def transfer_to_dropbox(self, files: list[storage.blob.Blob]) -> None:
        """
        Attempts to transfer the list of files from Google Cloud to Dropbox. Performs transfers in batches with a max
        size of the max batch size. First computes the batch of files to transfer, then downloads the batch from Google
        Cloud to the location of the local destination path, then uploads the downloaded files in the local destination
        path to Dropbox, and finally deletes all the downloaded files in the local destination path. This is repeated
        until all files have been attempted to be transferred. Note: it is not guaranteed that each file will be
        successfully transferred. Larger files have a greater chance of have connection issues during a transfer.

        :param files: The list of blobs which represent the files in Google Cloud to transfer to Dropbox
        :return: None
        """
        max_batch_size_bytes = self.max_batch_size_gb * 1e9

        local_file_paths = []
        upload_results = []

        current_batch = []
        current_batch_size = 0
        for file in files:
            if current_batch_size + file.size <= max_batch_size_bytes:
                current_batch.append(file)
                current_batch_size += file.size
            else:
                local_file_path, upload_result = self.transfer_batch(current_batch)
                local_file_paths.append(local_file_path)
                upload_results.append(upload_result)

                current_batch.clear()
                current_batch.append(file)
                current_batch_size = file.size

        if len(current_batch) != 0:
            local_file_paths_batch, upload_result_batch = self.transfer_batch(current_batch)
            local_file_paths += local_file_paths_batch
            upload_results += upload_result_batch

        print("Finished downloading {} blobs".format(len(local_file_paths)))
        print("Finished uploading {} files".format(len(upload_results)))

    def transfer_batch(self, batch: list[storage.blob.Blob]) -> tuple[list[str], list[str]]:
        """
        Attempts to transfer the given batch of files from Google Cloud to Dropbox. The batch should be within the max
        batch size. First downloads the batch from Google Cloud to the location of the local destination path, then
        uploads the batch files in the local destination path to Dropbox, and finally deletes all the batch files in the
        local destination path. Returns the a list of the local paths of the files that were downloaded and a list of
        the upload results of each file. Note: it is not guaranteed that each file in the batch will be successfully
        transferred. Larger files have a greater chance of have connection issues during a transfer.

        :param batch: The list of blobs representing the batch of files in Google Cloud to transfer to Dropbox
        :return: A tuple in the format: tuple[local_file_paths, upload_results]. local_file_paths and upload_results are
                 lists of strings
        """
        local_file_paths = self.download_batch(batch)
        upload_results = self.upload_batch(local_file_paths)
        self.clear_local_files(local_file_paths)
        return local_file_paths, upload_results

    def download_batch(self, batch: list[storage.blob.Blob], max_threads: int = 10) -> list[str]:
        """
        Attempts to download the given batch of files from Google Cloud to the local destination path. The batch should
        be within the max batch size. Utilizes multi-threading to speed up the download process, with a default of max
        10 threads. Returns a list of the downloaded file paths. Note: it is not guaranteed that each file in the batch
        will be successfully downloaded. Larger files have a greater chance of have connection issues during a download
        and there may be issues of there not being enough space in the hard drive if the max batch size is set to high.

        :param batch: The list of blobs representing the batch of files in Google Cloud to download to the local
                      destination path
        :param max_threads: The max number of threads to use in the download process
        :return: A list of strings representing the local file paths of the downloaded files
        """
        def download_file(file: storage.blob.Blob, results: list[str]) -> None:
            """
            Attempts to download the given file from Google Cloud to the local destination path. Adds the local path of
            the downloaded file to the results list.

            :param file: The blob which represents the file in Google Cloud to download
            :param results: The list of results to add the new file path to
            :return: None
            """
            print("Downloading " + file.name + " (" + "{:.2f}".format(file.size / 1e9) + " GB)")
            # calculate directory path
            directory_name = local_destination_path
            for directory in file.name.split('/')[0:-1]:
                directory_name += directory + "/"

            if not os.path.exists(directory_name):
                # Create a new directory because it does not exist
                try:
                    os.makedirs(directory_name)
                except FileExistsError:
                    pass

            file.download_to_filename(local_destination_path + file.name)
            results.append(file.name)
            print("Downloaded " + file.name + " (" + str(len(results)) + "/" + str(self.num_files_to_download) + \
                  " files)")

        print("Downloading from Google Cloud to \'" + local_destination_path + "\': ")

        downloaded_file_paths = []
        threads = []
        active_thread_count = 0
        for batch_file in batch:
            thread = threading.Thread(target=download_file, kwargs={"file": batch_file,
                                                                    "results": downloaded_file_paths})
            threads.append(thread)
            thread.start()
            active_thread_count += 1
            if active_thread_count == max_threads:
                # finish up threads in batches of size max_threads.  A better implementation would be a queue
                #   from which the threads can feed, but this is good enough if the blob size is roughtly the same.
                for thread in threads:
                    thread.join()
                threads = []
                active_thread_count = 0

        # wait for the last of the threads to be finished
        for thread in threads:
            thread.join()

        self.num_files_to_download -= len(downloaded_file_paths)

        print()

        return downloaded_file_paths

    def upload_batch(self, batch: list[str], max_threads: int = 10) -> list[str]:
        """
        Attempts to upload the given batch of files Google Cloud to the local destination path. It does not matter how
        large the batch size is. Utilizes multi-threading to speed up the uploading process, with a default of max 10
        threads. Returns a list of the results of the file uploads. Note: it is not guaranteed that each file in the
        batch will be successfully uploaded. Larger files have a greater chance of have connection issues during an
        upload.

        :param batch: A list of strings representing the local file paths of each of the files in the batch
        :param max_threads: The max number of threads to use in the upload process
        :return: A list of the results of the file uploads
        """
        def upload_file(file: str, results: list[str]) -> None:
            """
            Attempts to upload the given file from given path to Dropbox. Adds the result of the upload to the results
            list.

            :param file: The string path of the local file to uplaod to Dropbox
            :param results: The list of results to add the upload result to
            :return: None
            """
            f = open(local_destination_path + file, 'rb')
            file_size = os.path.getsize(local_destination_path + file)

            print("Downloading " + file + " (" + "{:.2f}".format(file_size / 1e9) + " GB)")
            if file_size <= self.UPLOAD_SIZE:
                print(self.dbx.files_upload(f.read(), self.DROPBOX_DESTINATION + file))
            else:
                upload_session_start_result = self.dbx.files_upload_session_start(f.read(self.UPLOAD_SIZE))
                cursor = dropbox.files.UploadSessionCursor(session_id=upload_session_start_result.session_id,
                                                           offset=f.tell())
                commit = dropbox.files.CommitInfo(path=self.DROPBOX_DESTINATION + file)

                while f.tell() <= file_size:
                    percentage = f.tell() / file_size * 100
                    print(file + " " + "{:.2f}".format(percentage) + "% uploaded")
                    if (file_size - f.tell()) <= self.UPLOAD_SIZE:
                        self.dbx.files_upload_session_finish(f.read(self.UPLOAD_SIZE), cursor, commit)
                        break
                    else:
                        results.append(self.dbx.files_upload_session_append_v2(f.read(self.UPLOAD_SIZE), cursor))
                        cursor.offset = f.tell()

            f.close()
            print(file)

        print("Uploading to Dropbox folder\'" + self.DROPBOX_DESTINATION + "\': ")

        upload_file_results = []
        threads = []
        active_thread_count = 0
        for batch_file in batch:
            thread = threading.Thread(target=upload_file, kwargs={"file": batch_file, "results": upload_file_results})
            threads.append(thread)
            thread.start()
            active_thread_count += 1
            if active_thread_count == max_threads:
                # finish up threads in batches of size max_threads.  A better implementation would be a queue
                #   from which the threads can feed, but this is good enough if the blob size is roughtly the same.
                for thread in threads:
                    thread.join()
                threads = []
                active_thread_count = 0

        # wait for the last of the threads to be finished
        for thread in threads:
            thread.join()

        print()

        return upload_file_results

    def clear_local_files(self, batch: list[str]) -> None:
        """
        Deletes the files in the batch, which are in the local destination path

        :return: None
        """
        print("Cleaning Target File: " + self.local_destination_path)
        for batch_file in batch:
            file_path = os.path.join(self.local_destination_path, batch_file)
            try:
                if os.path.isfile(file_path) or os.path.islink(file_path):
                    os.unlink(file_path)
                elif os.path.isdir(file_path):
                    shutil.rmtree(file_path)
            except Exception as e:
                print('Failed to delete %s. Reason: %s' % (file_path, e))

    def get_file_discrepancies(self) -> tuple[list[str], list[str]]:
        """
        Returns a tuple consisting of a list of files that ore in Google Cloud, but are not in Dropbox, and a list of
        files that have different sizes in Google Cloud and Dropbox.

        :return: A tuple in the format: tuple[missing_from_dropbox_files, wrong_size_files].
                 missing_from_dropbox_files and wrong_size_files are lists of strings
        """
        # Get list of files in Dropbox folder
        dropbox_files = []
        result = self.dbx.files_list_folder(self.DROPBOX_DESTINATION, recursive=True)
        dropbox_files += self.process_entries(result.entries)
        while result.has_more:
            result = self.dbx.files_list_folder_continue(result.cursor)
            dropbox_files += self.process_entries(result.entries)

        missing_from_dropbox_files = []
        wrong_size_files = []
        # for each Google Cloud Storage file
        for blob in self.file_blobs:
            # look for corresponding file in dropbox
            gcs_file_found = False
            for dropbox_file in dropbox_files:
                if blob.name == dropbox_file.path_display[len(self.DROPBOX_DESTINATION):]:
                    gcs_file_found = True
                    if blob.size != dropbox_file.size:
                        wrong_size_files.append(blob.name)
                    break

            if not gcs_file_found:
                missing_from_dropbox_files.append(blob.name)

        if (len(missing_from_dropbox_files) == 0) and (len(wrong_size_files) == 0):
            print("Everything was transfer successfully")
        else:
            if len(missing_from_dropbox_files) != 0:
                print("Missing files (The following files are not in Dropbox but are in Google Cloud Storage):")
                for missing_from_dropbox_file in missing_from_dropbox_files:
                    print(missing_from_dropbox_file)
                print()

            if len(wrong_size_files) != 0:
                print("Files with wrong size (The following files have different sizes in Dropbox and Google Cloud "
                      "Storage):")
                for wrong_size_file in wrong_size_files:
                    print(wrong_size_file)

        return missing_from_dropbox_files, wrong_size_files


    @staticmethod
    def process_entries(entries: list[dropbox.files.ListFolderResult]) -> list[dropbox.files.FileMetadata]:
        """
        Takes a list of ListFolderResults and returns a list comprised of the elements of the given list that are
        instances of FileMetadata. In other words, returns a list of elements which represent files in Dropbox.

        :param entries: A list of ListFolderResult entries to process
        :return: A list of ListFolderResults and returns a list comprised of the elements of the given list that are
                 instances of FileMetadata
        """
        file_list = []
        for entry in entries:
            if isinstance(entry, dropbox.files.FileMetadata):
                file_list.append(entry)

        return file_list


if __name__ == '__main__':
    argv = sys.argv[1:]
    opts, args = getopt.getopt(argv, "a", ["All"])

    max_batch_size = 0
    local_destination_path = "./"
    if len(args) == 0:
        Transfer(sys.maxsize)
        exit()
    elif len(args) == 1:
        max_batch_size = float(args[0])
    elif len(args) == 2:
        max_batch_size = float(args[0])
        local_destination_path = args[1] + "/"
    else:
        print("Error: Wrong Usage!")
        exit()

    is_downloading_all = False
    if (('-a', '') in opts) or (("--All", '') in opts):
        print("Downloading Full Google Cloud Storage Content")
        is_downloading_all = True

    transfer = Transfer(max_batch_size, local_destination_path, is_downloading_all)
    transfer.perform_transfer()

    print("Transfer Complete.")
