Why Gemfury? Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Debian packages RPM packages NuGet packages

Repository URL to install this package:

Details    
kiara-plugin.tabular / tabular / models / __init__.py
Size: Mime:
# -*- coding: utf-8 -*-

"""This module contains the metadata (and other) models that are used in the ``kiara_plugin.tabular`` package.

Those models are convenience wrappers that make it easier for *kiara* to find, create, manage and version metadata -- but also
other type of models -- that is attached to data, as well as *kiara* modules.

Metadata models must be a sub-class of [kiara.metadata.MetadataModel][kiara.metadata.MetadataModel]. Other models usually
sub-class a pydantic BaseModel or implement custom base classes.
"""
from typing import TYPE_CHECKING, Any, Dict, List, Union

from pydantic import BaseModel, Field

from kiara.models import KiaraModel

if TYPE_CHECKING:
    from kiara_plugin.tabular.models.table import KiaraTable


class StorageBackend(BaseModel):
    """Describes the storage backend type that is used, and (optionally) some backend-specific properties."""

    name: str = Field(description="The name of the storage backend.")
    properties: Dict[str, Any] = Field(
        description="Backend-specific properties.", default_factory=dict
    )


class ColumnSchema(BaseModel):
    """Describes properties of a single column of the 'table' data type."""

    type_name: str = Field(
        description="The type name of the column (backend-specific)."
    )
    metadata: Dict[str, Dict[str, Any]] = Field(
        description="Other metadata for the column.", default_factory=dict
    )

    def _retrieve_data_to_hash(self) -> Any:

        return self.dict()


class TableMetadata(KiaraModel):
    """Describes properties for the 'table' data type."""

    @classmethod
    def create_from_table(cls, table: "KiaraTable") -> "TableMetadata":

        arrow_table = table.arrow_table
        table_schema: Dict[str, Any] = {}

        backend_properties: Dict[str, Any] = {"column_types": {}}

        for name in arrow_table.schema.names:
            field = arrow_table.schema.field(name)
            md = table.get_column_metadata(column_name=name)
            _type = field.type
            backend_properties["column_types"][name] = {
                "type_id": _type.id,
                "size": arrow_table[name].nbytes,
            }
            _d = {
                "type_name": str(_type),
                "metadata": md,
            }
            table_schema[name] = _d

        backend = StorageBackend(name="arrow", properties=backend_properties)
        schema = {
            "column_names": table.column_names,
            "column_schema": table_schema,
            "backend": backend,
            "rows": table.num_rows,
            "size": arrow_table.nbytes,
        }

        result = TableMetadata.construct(**schema)
        return result

    column_names: List[str] = Field(description="The name of the columns of the table.")
    column_schema: Dict[str, ColumnSchema] = Field(
        description="The schema description of the table."
    )
    backend: StorageBackend = Field(description="The storage backend that is used.")
    rows: int = Field(description="The number of rows the table contains.")
    size: Union[int, None] = Field(
        description="The tables size in bytes.", default=None
    )

    def _retrieve_data_to_hash(self) -> Any:

        return {
            "column_schemas": {k: v.dict() for k, v in self.column_schema.items()},
            "rows": self.rows,
            "size": self.size,
        }