import pandas as pd
import csv
import database_parser


def read_dois() -> list[str]:
    """
    Returns a list of string doi's from the Projects Master csv file

    :return: A list of string doi's from the Projects Master csv file
    """
    data = pd.read_csv(r'CSV_files/Projects_MASTER.csv')
    dois = data['doi'].T.values.tolist()

    return dois


def get_missing_datasets_dois() -> list[str]:
    """
    Returns a list of dois that are in the Project Master csv file but are missing from the Datasets csv file.

    :return: A list of missing datasets
    """
    dois = read_dois()
    doi_to_datasets_dict = database_parser.get_doi_to_datasets_dict("CSV_files/Datasets.csv")

    for doi in doi_to_datasets_dict.keys():
        dois.remove(doi)

    return dois


def write_to_file(file_path: str, missing_datasets_dois_list: list[str]) -> None:
    """
    Writes the given list of missing dataset dois to the given file path.

    :param file_path: The string file path to write to
    :param missing_datasets_dois_list: The list of missing dataset dois to write
    :return: None
    """
    f = open(file_path, 'w', newline="")
    writer = csv.writer(f)

    writer.writerow(["doi"])
    for doi in missing_datasets_dois_list:
        writer.writerow([doi])

    f.close()


if __name__ == '__main__':
    final_missing_datasets_dois = get_missing_datasets_dois()
    print(final_missing_datasets_dois)
    print("There are " + str(len(final_missing_datasets_dois)) + " dois that have no datasets")
    write_to_file("Output/missing_datasets_dois.csv", final_missing_datasets_dois)
