import csv
import datetime
import os.path
import sys
import database_parser
from datetime import date


DEFAULT_DIRECTORY = "xml_files"

ORGANIZATION_NAME_INDEX = 0
ORGANIZATION_URL_INDEX = 1
IS_AUTHORITY_INDEX = 2
IS_COLLECTOR_INDEX = 3
IS_FUNDER_INDEX = 4
IS_PARTNER_INDEX = 5
IS_CONTRIBUTOR_INDEX = 6

DOI_INDEX = 0
PROJECT_NAME_INDEX = 1
COUNTRY_INDEX = 2
LATITUDE_INDEX = 3
LONGITUDE_INDEX = 4
STATUS_INDEX = 5
PROJECT_DESCRIPTION_INDEX = 6
SITE_DESCRIPTION_INDEX = 7
COLLECTION_DATE_START_INDEX = 8
COLLECTION_DATE_END_INDEX = 9
PUBLISH_DATE_INDEX = 10
LICENSE_TYPE_INDEX = 11
LICENSE_LINK_INDEX = 12
EXTERNAL_PROJECT_LINK_INDEX = 13
ADDITIONAL_INFO_LINK_INDEX = 14
KEYWORDS_INDEX = 15
POTREE_VIEWER_INDEX = 16


def generate_files(project_master_path: str, project_entities_path: str, organizations_path: str, directory_in: str) \
                   -> None:
    """
    Generates xml files of doi projects by parsing the data in the database csv files. The xml files will have a format
    that can be uploaded to datacite.

    :param project_master_path: The path to the project_master csv file
    :param project_entities_path: The path to the project_entities csv file
    :param organizations_path:  The path to the organization csv file
    :param directory_in: The path to the directory that the generated files will be placed in
    :return: None
    """
    doi_to_ids_dict = database_parser.get_doi_to_ids_dict(project_entities_path)
    id_to_organization_dict = database_parser.get_id_to_organization_dict(organizations_path)
    doi_to_organizations_dict = database_parser.get_doi_to_organizations_dict(doi_to_ids_dict, id_to_organization_dict)

    projects_master_reader = database_parser.get_projects_master_reader(project_master_path)
    header = next(projects_master_reader)

    # Check file as empty
    if header is not None:
        dois = doi_to_organizations_dict.keys()
        for project in projects_master_reader:
            doi = project[DOI_INDEX]

            organizations = []
            if doi in dois:
                organizations = doi_to_organizations_dict[doi]

            xml_str = generate_xml(project, organizations)

            # create and write to file
            f = open(directory_in + doi + ".xml", 'w', newline="", encoding='Latin1')
            f.write(xml_str)
            f.close()


def generate_xml(project_in: list[str] , organizations: list[list[str]]) -> str:
    """
    Generates an xml string for the given project data for uploading to datacite.

    :param project_in: A list containing strings, which represent different attributes and data of the project
    :param organizations: A list of organization data, which are list of strings that represent different attributes
                          and data. Each list of strings are in the following format: list[ORGANIZATION_NAME,
                          ORGANIZATION_URL, IS_AUTHORITY, IS_COLLECTOR, IS_FUNDER, IS_PARTNER, IS_CONTRIBUTOR]
    :return: The xml string of the project data
    """
    xml_str = ""

    # xml file header
    xml_str += "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"

    # resource start
    xml_str += ("<resource xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instan"
                "ce\" xmlns=\"http://datacite.org/schema/kernel-4\" xsi:schema"
                "Location=\"http://datacite.org/schema/kernel-4 http://schema."
                "datacite.org/meta/kernel-4/metadata.xsd\">\n"
                )

    # identifier
    xml_str += get_identifier_str(project_in[DOI_INDEX])

    # Creators
    xml_str += get_creators_str(organizations)

    # Title
    xml_str += get_title_str(project_in[PROJECT_NAME_INDEX])

    publication_year = project_in[PUBLISH_DATE_INDEX].split('-')[0]
    if publication_year == "":
        publication_year = str(date.today().year)

    xml_str += "\t<publisher>OpenHeritage3D</publisher>\n" + \
               "\t<publicationYear>" + publication_year + "</publicationYear>\n" + \
               "\t<resourceType resourceTypeGeneral=\"Dataset\">Dataset<" + \
               "/resourceType>\n"

    xml_str += get_subjects_str(project_in[KEYWORDS_INDEX])
    xml_str += (project_in[COLLECTION_DATE_START_INDEX],
                             project_in[COLLECTION_DATE_END_INDEX],
                             project_in[PUBLISH_DATE_INDEX])

    xml_str += "\t<sizes/>\n" + \
               "\t<formats/>\n" + \
               "\t<version/>\n"

    xml_str += get_rights_str(project_in[LICENSE_TYPE_INDEX], project_in[LICENSE_LINK_INDEX])

    xml_str += get_description_str(project_in[PROJECT_DESCRIPTION_INDEX], project_in[SITE_DESCRIPTION_INDEX],
                                   project_in[EXTERNAL_PROJECT_LINK_INDEX], project_in[ADDITIONAL_INFO_LINK_INDEX])

    xml_str += get_geo_locations_str(project_in[LATITUDE_INDEX], project_in[LONGITUDE_INDEX])

    xml_str += "</resource>\n"

    return xml_str


FIELD_NAME_COL_INDEX = 0
VALUE_COL_INDEX = 3

DOI_ROW_INDEX = 2
PROJECT_NAME_ROW_INDEX = 3
COUNTRY_ROW_INDEX = 4
LATITUDE_ROW_INDEX = 5
LONGITUDE_ROW_INDEX = 6
STATUS_ROW_INDEX = 7
PROJECT_DESCRIPTION_ROW_INDEX = 8
SITE_DESCRIPTION_ROW_INDEX = 9
COLLECTION_DATE_START_ROW_INDEX = 10
COLLECTION_DATE_END_ROW_INDEX = 11
PUBLISH_DATE_ROW_INDEX = 12
LICENSE_TYPE_ROW_INDEX = 13
LICENSE_LINK_ROW_INDEX = 14
EXTERNAL_PROJECT_LINK_ROW_INDEX = 15
ADDITIONAL_INFO_LINK_ROW_INDEX = 16
KEYWORDS_ROW_INDEX = 17


def generate_xml_from_csv(doi_csv_path: str) -> str:
    """
    Generates and returns an xml string from the data in the given doi csv file.

    :param doi_csv_path: The path the doi csv to parse and generate an xml string for
    :return: The xml string of the doi csv file
    """
    doi_csv = open(doi_csv_path, 'r', encoding='Latin1')
    doi_reader = csv.reader(doi_csv)

    fields = []
    values = []
    for row in doi_reader:
        fields.append(row[FIELD_NAME_COL_INDEX])
        values.append(row[VALUE_COL_INDEX])

    xml_str = ""

    # xml file header
    xml_str += "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"

    # resource start
    xml_str += ("<resource xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instan"
                "ce\" xmlns=\"http://datacite.org/schema/kernel-4\" xsi:schema"
                "Location=\"http://datacite.org/schema/kernel-4 http://schema."
                "datacite.org/meta/kernel-4/metadata.xsd\">\n"
                )

    xml_str += get_identifier_str(values[DOI_ROW_INDEX])

    # Creators
    organizations_index = 0
    for i in range(len(fields)):
        if fields[i] == "organizationName":
            organizations_index = i
            break

    organizations = []
    for i in range(organizations_index, len(fields), 4):
        organization = values[i:i + 2]
        organization_type = values[i + 2]

        # Check if we are done with organizations
        if organization == ['', '']:
            break

        # Check if organization is already in list
        if organization not in organizations:
            if organization_type == "Contributor":
                organizations.insert(0, organization)
            else:
                organizations.append(organization)
        elif organization_type == "Contributor":
            # move organization to beginning of the list
            organizations.remove(organization)
            organizations.insert(0, organization)

    xml_str += get_creators_str(organizations)

    # Title
    xml_str += get_title_str(values[PROJECT_NAME_ROW_INDEX])


    if values[PUBLISH_DATE_ROW_INDEX] == "":
        year = str(date.today().year)
    elif "/" in values[PUBLISH_DATE_ROW_INDEX]:
        [month, day, year] = values[PUBLISH_DATE_ROW_INDEX].split("/")
    else:
        [year, month, day] = values[PUBLISH_DATE_ROW_INDEX].split("-")
    publication_year = year
        

    xml_str += "\t<publisher>OpenHeritage3D</publisher>\n" + \
               "\t<publicationYear>" + publication_year + "</publicationYear>\n" + \
               "\t<resourceType resourceTypeGeneral=\"Dataset\">Dataset<" + \
               "/resourceType>\n"

    xml_str += get_subjects_str(values[KEYWORDS_ROW_INDEX])

    xml_str += get_dates_str(values[COLLECTION_DATE_START_ROW_INDEX], values[COLLECTION_DATE_END_ROW_INDEX],
                             values[PUBLISH_DATE_ROW_INDEX])

    xml_str += "\t<sizes/>\n" + \
               "\t<formats/>\n" + \
               "\t<version/>\n"

    xml_str += get_rights_str(values[LICENSE_TYPE_ROW_INDEX], values[LICENSE_LINK_ROW_INDEX])

    xml_str += get_description_str(values[PROJECT_DESCRIPTION_ROW_INDEX], values[SITE_DESCRIPTION_ROW_INDEX],
                                   values[EXTERNAL_PROJECT_LINK_ROW_INDEX], values[ADDITIONAL_INFO_LINK_ROW_INDEX])

    xml_str += get_geo_locations_str(values[LATITUDE_ROW_INDEX], values[LONGITUDE_ROW_INDEX])

    xml_str += "</resource>\n"

    doi_csv.close()

    return xml_str


def get_identifier_str(doi: str) -> str:
    """
    Returns the xml string of the doi identifier.

    :param doi: A string of the doi
    :return: The xml string of the doi identifier
    """
    return ("\t<identifier identifierType=\"DOI\">10.26301/" + doi.upper() +
            "</identifier>\n"
            )


def get_creators_str(organizations_list: list[list[str]]) -> str:
    """
    Returns the xml string of the creators of the doi project.

    :param organizations_list: A list of organization data, which are list of strings that represent different
                               attributes and data. Each list of strings are in the following format:
                               list[ORGANIZATION_NAME, ORGANIZATION_URL, IS_AUTHORITY, IS_COLLECTOR, IS_FUNDER,
                               IS_PARTNER, IS_CONTRIBUTOR]
    :return: The xml string of the creators of the doi project
    """
    creators_str = "\t<creators>\n"

    if len(organizations_list) == 0:
        creator_str = "\t\t<creator>\n"
        creator_str += "\t\t\t<creatorName nameType=\"Organizational\">" + \
                       "Creators Not Specified" + \
                       "</creatorName>\n"
        creator_str += "\t\t\t<nameIdentifier nameIdentifierScheme=\"" + \
                       "Other\" schemeURI=\"\">" + \
                       "No URL" + \
                       "</nameIdentifier>\n"
        creator_str += "\t\t</creator>\n"

        creators_str += creator_str
    else:
        for organization in organizations_list:
            creator_str = "\t\t<creator>\n"
            creator_str += "\t\t\t<creatorName nameType=\"Personal\">" + \
                           organization[ORGANIZATION_NAME_INDEX] + \
                           "</creatorName>\n"
            creator_str += "\t\t\t<nameIdentifier nameIdentifierScheme=\"" + \
                           "Other\" schemeURI=\"\">" + \
                           organization[ORGANIZATION_URL_INDEX] + \
                           "</nameIdentifier>\n"
            creator_str += "\t\t</creator>\n"

            creators_str += creator_str

    creators_str += "\t</creators>\n"

    return creators_str


def get_title_str(title: str) -> str:
    """
    Returns the xml string of the doi project title.

    :param title: The string of the doi project title
    :return: The xml string of the doi project title
    """
    title_str = "\t<titles>\n"
    title_str += "\t\t<title titleType=\"Other\">" + \
                 title + "</title>\n"
    title_str += "\t</titles>\n"

    return title_str


def get_subjects_str(keywords: list[str]) -> str:
    """
    Returns the xml string of the doi project subjects.

    :param keywords: A list of strings representing the keywords of the doi project
    :return: The xml string of the doi project subjects
    """
    subjects_str = "\t<subjects>\n"

    keywords_list = keywords.split(', ')

    for keyword in keywords_list:
        subjects_str += "\t\t<subject schemeURI=\"http://www.oecd.org/science/inno\" valueURI=" \
                        "\"http://www.oecd.org/science/inno/38235147.pdf\">" + keyword + "</subject>\n"

    subjects_str += "\t</subjects>\n"

    return subjects_str


def get_dates_str(date_start: str, date_end: str, publish_date: str) -> str:
    """
    Returns the xml string of the start date, end date, and publish date.

    :param date_start: A string representing the start date of the doi project
    :param date_end: A string representing the end date of the doi project
    :param publish_date: A string representing the publish date of the doi project
    :return: The xml string of the start date, end date, and publish date
    """
    dates_str = "\t<dates>\n"

    if date_start != '\\\\N':
        collection_start_date = date_start
        if "/" in date_start:
            [month, day, year] = date_start.split("/")
            collection_start_date = year + "-" + month + "-" + day

        dates_str += "\t\t<date dateType=\"Collected\" dateInformation=\"Start\">" + collection_start_date + "</date>\n"

    if date_end != '\\\\N':
        collection_end_date = date_end
        if "/" in date_end:
            [month, day, year] = date_end.split("/")
            collection_end_date = year + "-" + month + "-" + day

        dates_str += "\t\t<date dateType=\"Collected\" dateInformation=\"End\">" + collection_end_date + "</date>\n"


    if publish_date == "":
        (year, day, month) = str(datetime.date.today()).split("-")
        collection_publish_date = year + "-" + month + "-" + day
    elif "/" in publish_date:
        [month, day, year] = publish_date.split("/")
        collection_publish_date = year + "-" + month + "-" + day
    else:
        collection_publish_date = publish_date
    dates_str += "\t\t<date dateType=\"Submitted\" dateInformation=\"Publish\">" + collection_publish_date + "</date>\n"

    dates_str += "\t</dates>\n"

    return dates_str


def get_rights_str(license_code: str, license_link: str) -> str:
    """
    Returns an xml string of the doi project rights, from the given license code and license link.

    :param license_code: The string of the license code of the doi project
    :param license_link: The string of the license link of the doi project
    :return: The xml string of the doi project rights
    """
    rights_str = "\t<rightsList>\n"

    license_type = ""
    if license_code == "CC BY":
        license_type = "Creative Commons Attribution "
    elif license_code == "CC BY-NC":
        license_type = "Creative Commons Attribution Non Commercial "
    elif license_code == "CC BY-NC-ND":
        license_type = "Creative Commons Attribution Non Commercial No Derivatives "
    elif license_code == "CC BY-NC-SA":
        license_type = "Creative Commons Attribution Non Commercial Share Alike "
    else:
        return ""

    split_license_link = license_link.split('/')
    if split_license_link[len(split_license_link) - 1] == "legalcode":
        license_type += split_license_link[len(split_license_link) - 2]
    else:
        license_type += split_license_link[len(split_license_link) - 1]

    rights_str += "\t\t<rights rightsURI=\"" + license_link + "\">" + \
                  license_type + " International</rights>\n"

    rights_str += "\t</rightsList>\n"

    return rights_str


def get_description_str(project_description: str, site_description: str, external_project_link: str,
                        additional_info_link: str) -> str:
    """
    Returns the xml string of the description of the doi project. This description is comprised of the project
    description and the site description. Additionally, it includes the external project link and additional info link
    if they are not empty values.

    :param project_description: The string of the doi project description
    :param site_description: The string of the doi site description
    :param external_project_link: The string of the doi external project link
    :param additional_info_link: The string of the doi additional info link
    :return: The xml string of the description of the doi project
    """
    description_str = "\t<descriptions>\n"

    project_description = project_description.split("\n")[0]
    site_description = site_description.split("\n")[0]
    description_str += "\t\t<description descriptionType=\"Abstract\">" + \
                       remove_tags_from_description(project_description) + " " + \
                       remove_tags_from_description(site_description)

    if external_project_link != "":
        description_str += " External Project Link: " + external_project_link

    if additional_info_link != "":
        description_str += " Additional Info Link: " + additional_info_link \

    description_str += "</description>\n"

    description_str += "\t</descriptions>\n"

    return description_str


def remove_tags_from_description(description: str) -> str:
    """
    Returns a version of the description string without any of the tags. Tags, which are most commonly used in html
    formatting are defined as the text between and including "<" and ">" characters.

    :param description: The string of the original doi project description
    :return: A version of the description string without any of the tags
    """
    open_tag_index = description.find('<')
    closed_tag_index = description[open_tag_index + 1:].find('>')

    while (open_tag_index != -1) or (closed_tag_index != -1):
        if closed_tag_index == -1:
            description = description[0:open_tag_index] + description[open_tag_index + 1:]
        elif (open_tag_index == -1) or (closed_tag_index < open_tag_index):
            description = description[0:closed_tag_index] + description[closed_tag_index + 1:]
        else:
            description = description[0:open_tag_index] + description[closed_tag_index + 1:]

        open_tag_index = description.find('<')
        closed_tag_index = description[open_tag_index + 1:].find('>')

    return description


def get_geo_locations_str(latitude: str, longitude: str) -> str:
    """
    Returns the xml string of the geographical locations of the doi project. Checks to make sure that the given
    parameters are numeric values that can represent real latitude and longitude values. Returns an empty string if
    invalid values are given.

    :param latitude: A string of the latitude value. Must be a numeric value between -90 and 90
    :param longitude: A string of the longitude value. Must be a numeric value between -180 and 180
    :return: The xml string of the geographical locations of the doi project. Empty string if parameters are invalid.
    """
    # check if valid lat/long value
    try:
        if (float(latitude) < -90) or (float(latitude) > 90) or \
           (float(longitude) < -180) or (float(longitude) > 180):
            return ""
    except ValueError:
        return ""

    geo_locations_str = "\t<geoLocations>\n"

    geo_locations_str += "\t\t<geoLocation>\n"

    geo_locations_str += "\t\t\t<geoLocationPoint>\n"

    geo_locations_str += "\t\t\t\t<pointLatitude>" + latitude + "</pointLatitude>\n"
    geo_locations_str += "\t\t\t\t<pointLongitude>" + longitude + "</pointLongitude>\n"

    geo_locations_str += "\t\t\t</geoLocationPoint>\n"

    geo_locations_str += "\t\t</geoLocation>\n"

    geo_locations_str += "\t</geoLocations>\n"

    return geo_locations_str


if __name__ == '__main__':
    if len(sys.argv) == 1:
        generate_files("CSV_files/Projects_MASTER.csv", "CSV_files/Project_Entities.csv", "CSV_files/Organizations.csv", DEFAULT_DIRECTORY + "/")

        if not os.path.exists(DEFAULT_DIRECTORY):
            os.makedirs(DEFAULT_DIRECTORY)
    elif len(sys.argv) == 2:
        directory = sys.argv[1]
        if not os.path.exists(directory):
            os.makedirs(directory)

        generate_files("CSV_files/Projects_MASTER.csv", "CSV_files/Project_Entities.csv", "CSV_files/Organizations.csv", directory + "/")
    else:
        print("Error: too many arguments. Provide 0 or 1 arguments")
