Why Gemfury? Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Debian packages RPM packages NuGet packages

Repository URL to install this package:

Details    
Size: Mime:
# -*- coding: utf-8 -*-

"""This module contains the metadata (and other) models that are used in the ``kiara_plugin.onboarding`` package.

Those models are convenience wrappers that make it easier for *kiara* to find, create, manage and version metadata -- but also
other type of models -- that is attached to data, as well as *kiara* modules.

Metadata models must be a sub-class of [kiara.metadata.MetadataModel][kiara.metadata.MetadataModel]. Other models usually
sub-class a pydantic BaseModel or implement custom base classes.
"""
import os.path
from abc import abstractmethod
from typing import List, Tuple, Union

from kiara.exceptions import KiaraException
from kiara.models import KiaraModel
from kiara.models.filesystem import FolderImportConfig, KiaraFile, KiaraFileBundle


class OnboardDataModel(KiaraModel):

    _kiara_model_id: str = None  # type: ignore

    @classmethod
    def get_config_fields(cls) -> List[str]:
        return sorted(cls.__fields__.keys())

    @classmethod
    @abstractmethod
    def accepts_uri(cls, uri: str) -> Tuple[bool, str]:
        pass

    @classmethod
    def accepts_bundle_uri(cls, uri: str) -> Tuple[bool, str]:
        return cls.accepts_uri(uri)

    @abstractmethod
    def retrieve(
        self, uri: str, file_name: Union[None, str], attach_metadata: bool
    ) -> KiaraFile:
        pass

    def retrieve_bundle(
        self, uri: str, import_config: FolderImportConfig, attach_metadata: bool
    ) -> KiaraFileBundle:
        raise NotImplementedError()


class FileFromLocalModel(OnboardDataModel):

    _kiara_model_id: str = "onboarding.file.from.local_file"

    @classmethod
    def accepts_uri(cls, uri: str) -> Tuple[bool, str]:

        if os.path.isfile(os.path.abspath(uri)):
            return True, "local file exists and is file"
        else:
            return False, "local file does not exist or is not a file"

    @classmethod
    def accepts_bundle_uri(cls, uri: str) -> Tuple[bool, str]:

        if os.path.isdir(os.path.abspath(uri)):
            return True, "local folder exists and is folder"
        else:
            return False, "local folder does not exist or is not a folder"

    def retrieve(
        self, uri: str, file_name: Union[None, str], attach_metadata: bool
    ) -> KiaraFile:

        if not os.path.exists(os.path.abspath(uri)):
            raise KiaraException(
                f"Can't create file from path '{uri}': path does not exist."
            )
        if not os.path.isfile(os.path.abspath(uri)):
            raise KiaraException(
                f"Can't create file from path '{uri}': path is not a file."
            )

        return KiaraFile.load_file(uri)

    def retrieve_bundle(
        self, uri: str, import_config: FolderImportConfig, attach_metadata: bool
    ) -> KiaraFileBundle:

        if not os.path.exists(os.path.abspath(uri)):
            raise KiaraException(
                f"Can't create file from path '{uri}': path does not exist."
            )
        if not os.path.isdir(os.path.abspath(uri)):
            raise KiaraException(
                f"Can't create file from path '{uri}': path is not a directory."
            )

        return KiaraFileBundle.import_folder(source=uri, import_config=import_config)


class FileFromRemoteModel(OnboardDataModel):

    _kiara_model_id: str = "onboarding.file.from.url"

    @classmethod
    def accepts_uri(cls, uri: str) -> Tuple[bool, str]:

        accepted_protocols = ["http", "https"]
        for protocol in accepted_protocols:
            if uri.startswith(f"{protocol}://"):
                return True, "url is valid (starts with http or https)"

        return False, "url is not valid (does not start with http or https)"

    def retrieve(
        self, uri: str, file_name: Union[None, str], attach_metadata: bool
    ) -> KiaraFile:
        from kiara_plugin.onboarding.utils.download import download_file

        result_file: KiaraFile = download_file(  # type: ignore
            url=uri, file_name=file_name, attach_metadata=attach_metadata
        )
        return result_file

    def retrieve_bundle(
        self, uri: str, import_config: FolderImportConfig, attach_metadata: bool
    ) -> KiaraFileBundle:
        from kiara_plugin.onboarding.utils.download import download_file_bundle

        result_bundle = download_file_bundle(
            url=uri, import_config=import_config, attach_metadata=attach_metadata
        )
        return result_bundle


class FileFromZenodoModel(OnboardDataModel):

    _kiara_model_id: str = "onboarding.file.from.zenodo"

    @classmethod
    def accepts_uri(cls, uri: str) -> Tuple[bool, str]:

        if uri.startswith("zenodo:"):
            return True, "url is valid (follows format 'zenodo:<doi>')"

        elif "/zenodo." in uri:
            return True, "url is valid (contains '/zenodo.')"

        return False, "url is not valid (does not follow format 'zenodo:<doi>')"

    def retrieve(
        self, uri: str, file_name: Union[None, str], attach_metadata: bool
    ) -> KiaraFile:

        import pyzenodo3

        from kiara_plugin.onboarding.utils.download import download_file

        if uri.startswith("zenodo:"):
            doi = uri[len("zenodo:") :]
        elif "/zenodo." in uri:
            doi = uri

        tokens = doi.split("/zenodo.")
        if len(tokens) != 2:
            raise KiaraException(
                msg=f"Can't parse Zenodo DOI from URI for single file download: {doi}"
            )

        path_components = tokens[1].split("/", maxsplit=1)
        if len(path_components) != 2:
            raise KiaraException(
                msg=f"Can't parse Zenodo DOI from URI for single file download: {doi}"
            )

        file_path = path_components[1]
        _doi = f"{tokens[0]}/zenodo.{path_components[0]}"

        zen = pyzenodo3.Zenodo()
        record = zen.find_record_by_doi(_doi)

        match = None
        for _available_file in record.data["files"]:
            if file_path == _available_file["key"]:
                match = _available_file
                break

        if not match:
            msg = "Available files:\n"
            for key in record.data["files"]:
                msg += f"  - {key['key']}\n"
            raise KiaraException(
                msg=f"Can't find file '{file_path}' in Zenodo record. {msg}"
            )

        url = match["links"]["self"]
        checksum = match["checksum"][4:]

        file_name = file_path.split("/")[-1]

        file_model: KiaraFile
        file_model, md5_digest = download_file(  # type: ignore
            url=url,
            target=None,
            file_name=file_name,
            attach_metadata=attach_metadata,
            return_md5_hash=True,
        )

        if checksum != md5_digest:
            raise KiaraException(
                msg=f"Can't download file '{file_name}', invalid checksum: {checksum} != {md5_digest}"
            )

        if attach_metadata:
            file_model.metadata["zenodo_record_data"] = record.data

        return file_model

    def retrieve_bundle(
        self, uri: str, import_config: FolderImportConfig, attach_metadata: bool
    ) -> KiaraFileBundle:

        import shutil

        import pyzenodo3

        from kiara_plugin.onboarding.utils.download import download_file

        if uri.startswith("zenodo:"):
            doi = uri[len("zenodo:") :]
        elif "/zenodo." in uri:
            doi = uri

        tokens = doi.split("/zenodo.")
        if len(tokens) != 2:
            raise KiaraException(
                msg=f"Can't parse Zenodo DOI from URI for single file download: {doi}"
            )

        path_components = tokens[1].split("/", maxsplit=1)

        if len(path_components) == 2:
            zid = path_components[0]
            file_path = path_components[1]
        else:
            zid = path_components[0]
            file_path = None

        _doi = f"{tokens[0]}/zenodo.{zid}"

        if not file_path:

            zen = pyzenodo3.Zenodo()

            record = zen.find_record_by_doi(_doi)

            path = KiaraFileBundle.create_tmp_dir()
            shutil.rmtree(path, ignore_errors=True)
            path.mkdir()

            for file_data in record.data["files"]:
                url = file_data["links"]["self"]
                file_name = file_data["key"]
                checksum = file_data["checksum"][4:]

                target = os.path.join(path, file_name)
                file_model: KiaraFile
                file_model, md5_digest = download_file(  # type: ignore
                    url=url,
                    target=target,
                    file_name=file_name,
                    attach_metadata=attach_metadata,
                    return_md5_hash=True,
                )

                if checksum != md5_digest:
                    raise KiaraException(
                        msg=f"Can't download file '{file_name}', invalid checksum: {checksum} != {md5_digest}"
                    )

            bundle = KiaraFileBundle.import_folder(path.as_posix())
            if attach_metadata:
                bundle.metadata["zenodo_record_data"] = record.data

        else:

            zen = pyzenodo3.Zenodo()
            record = zen.find_record_by_doi(_doi)

            match = None
            for _available_file in record.data["files"]:
                if file_path == _available_file["key"]:
                    match = _available_file
                    break

            if not match:
                msg = "Available files:\n"
                for key in record.data["files"]:
                    msg += f"  - {key['key']}\n"
                raise KiaraException(
                    msg=f"Can't find file '{file_path}' in Zenodo record. {msg}"
                )

            url = match["links"]["self"]
            checksum = match["checksum"][4:]

            file_name = file_path.split("/")[-1]

            file_model, md5_digest = download_file(  # type: ignore
                url=url,
                target=None,
                file_name=file_name,
                attach_metadata=attach_metadata,
                return_md5_hash=True,
            )

            if checksum != md5_digest:
                raise KiaraException(
                    msg=f"Can't download file '{file_name}', invalid checksum: {checksum} != {md5_digest}"
                )

            bundle = KiaraFileBundle.from_archive_file(
                archive_file=file_model, import_config=import_config
            )
            if attach_metadata:
                bundle.metadata["zenodo_record_data"] = record.data

        return bundle


class FileFromZoteroModel(OnboardDataModel):

    _kiara_model_id = "onboarding.file.from.zotero"

    @classmethod
    def accepts_uri(cls, uri: str) -> Tuple[bool, str]:
        if uri.startswith("zotero:"):
            return True, "uri is a zotero uri"
        else:
            return False, "uri is not a zotero uri, must start with 'zotero:'"


class FileFromGithubModel(OnboardDataModel):

    _kiara_model_id: str = "onboarding.file.from.github"

    @classmethod
    def accepts_uri(cls, uri: str) -> Tuple[bool, str]:

        if uri.startswith("gh:") or uri.startswith("github:"):
            return True, "uri is a github uri"

        return False, "uri is not a github uri, must start with 'gh:' or 'github:'"

    def retrieve(
        self, uri: str, file_name: Union[None, str], attach_metadata: bool
    ) -> KiaraFile:
        from kiara_plugin.onboarding.utils.download import download_file

        tokens = uri.split(":")[1].split("/", maxsplit=3)
        if len(tokens) != 4:
            raise KiaraException(
                msg=f"Can't parse github uri '{uri}' for single file download. Required format: 'gh:<user>/<repo>/<branch_or_tag>/<path>'"
            )

        url = f"https://raw.githubusercontent.com/{tokens[0]}/{tokens[1]}/{tokens[2]}/{tokens[3]}"

        result_file: KiaraFile = download_file(  # type: ignore
            url=url, attach_metadata=attach_metadata
        )
        return result_file

    def retrieve_bundle(
        self, uri: str, import_config: FolderImportConfig, attach_metadata: bool
    ) -> KiaraFileBundle:

        from kiara_plugin.onboarding.utils.download import download_file

        tokens = uri.split(":")[1].split("/", maxsplit=3)
        if len(tokens) == 3:
            sub_path = None
        elif len(tokens) != 4:
            raise KiaraException(
                msg=f"Can't parse github uri '{uri}' for single file download. Required format: 'gh:<user>/<repo>/<branch_or_tag>/<path>'"
            )
        else:
            sub_path = tokens[3]

        url = f"https://github.com/{tokens[0]}/{tokens[1]}/archive/refs/heads/{tokens[2]}.zip"
        file_name = f"{tokens[1]}-{tokens[2]}.zip"

        archive_zip: KiaraFile
        archive_zip = download_file(  # type: ignore
            url=url,
            attach_metadata=attach_metadata,
            file_name=file_name,
            return_md5_hash=False,
        )

        base_sub_path = f"{tokens[1]}-{tokens[2]}"

        if sub_path:
            if import_config.sub_path:
                new_sub_path = "/".join([base_sub_path, sub_path, import_config.sub_path])  # type: ignore
            else:
                new_sub_path = "/".join([base_sub_path, sub_path])
        elif import_config.sub_path:
            new_sub_path = "/".join([base_sub_path, import_config.sub_path])
        else:
            new_sub_path = base_sub_path

        import_config_new = import_config.copy(update={"sub_path": new_sub_path})

        result_bundle = KiaraFileBundle.from_archive_file(
            archive_file=archive_zip, import_config=import_config_new
        )

        return result_bundle