Why Gemfury? Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Debian packages RPM packages NuGet packages

Repository URL to install this package:

Details    
tdw-catalog / tdw_catalog / data_dictionary.py
Size: Mime:
import json
from datetime import datetime
from typing import TYPE_CHECKING, Dict, List, Optional, Union
from tdw_catalog.errors import CatalogInvalidArgumentException, CatalogUnknownException, _convert_error
from tdw_catalog import Catalog
from tdw_catalog.utils import ColumnType

if TYPE_CHECKING:
    from tdw_catalog import glossary_term
    from tdw_catalog.lineage.column_relationship import LineageColumn
    from tdw_catalog.dataset import Dataset


def _add_index(serializedColumn: dict, position: int) -> dict:
    res = dict(serializedColumn)
    res["position"] = position
    return res

class Column():
    """
    A single :class:`.Column` within a :class:`.DataDictionary`

    Attributes
    __________
    key : str
        The column name for this :class:`.Column`, within the actual :class:`.Warehouse` where the data lives
    type : ColumnType
        The data type for this :class:`.Column`. Available types can be found in :class:`.ColumnType`.
    name: Optional[str]
        An optional friendly name for this :class:`.Column`, which is visually used in place of the ``key`` throughout the :class:`.Catalog`
    description: Optional[str]
        An optional description for this :class:`.Column`
    """
    _dataset: 'Dataset'
    _id: str
    _key: str
    _type: ColumnType
    _name: Optional[str]
    _description: Optional[str]
    _glossary_term_ids: Optional[List[str]]

    @classmethod
    def _from_property(cls, dataset: 'Dataset', p: Dict[str, any]):
        # create the correct Column class based on whether the dataset is connected,
        # and whether it's a currency column
        c = (CurrencyColumn() if dataset.is_connected else
             MetadataOnlyCurrencyColumn()) if p["type"] == "currency" else (
                 Column() if dataset.is_connected else MetadataOnlyColumn())
        # then, fill in the column fields
        c._dataset = dataset
        c._id = p["id"]
        c._key = p["key"]
        try:
            c._type = ColumnType[p["type"].upper()]
        except KeyError:
            raise CatalogUnknownException(
                message=
                "Unable to parse data dictionary. Unknown column type: {ctype}"
                .format(ctype=p["type"]))
        c._name = p["title"] if "title" in p else None
        c._description = p["description"] if "description" in p else None
        c._glossary_term_ids = p[
            "glossary_term_ids"] if "glossary_term_ids" in p else []
        try:
            if p["type"] == "currency":
                currency_json = json.loads(p["meta"])
                c._symbol = currency_json[
                    "symbol"] if "symbol" in currency_json else ""
        except json.decoder.JSONDecodeError:
            c._symbol = None
        return c

    def __init__(self,
                 key: str = None,
                 type: ColumnType = None,
                 name: Optional[str] = None,
                 description: Optional[str] = None):
        """
        Initializes a fresh :class:`.Column`, for inclusion in a :class:`.MetadataOnlyDataDictionary`

        Parameters
        ----------
        key : str
            The underlying column name for this :class:`.Column`, within the actual :class:`.Warehouse` where this data lives
        type : ColumnType
            The data type for this :class:`.Column`. Available types can be found in :class:`.ColumnType`.
        name: Optional[str]
            A friendly name for this :class:`.Column`, which is visually used in place of the ``key`` throughout the :class:`.Catalog`
        description: Optional[str]
            An optional description for this :class:`.Column`
        """
        self._id = None
        self._key = key
        self._type = type
        self._name = name
        self._description = description
        self._glossary_term_ids = None

    @property
    def key(self) -> str:
        return self._key

    @property
    def type(self) -> ColumnType:
        return self._type

    @property
    def name(self) -> Optional[str]:
        return self._name

    @name.setter
    def name(self, name: str):
        self._name = name

    @property
    def description(self) -> str:
        return self._description

    @description.setter
    def description(self, description: str):
        self._description = description

    def serialize(self) -> Dict:
        return {
            "id": "" if self._id is None else self._id,
            "key": self._key,
            "title": "" if self._name is None else self._name,
            "description":
            "" if self._description is None else self._description,
            "type": self._type,
            "glossary_term_ids": self._glossary_term_ids,
        }

    def apply_glossary_term(
            self, glossary_term: 'glossary_term.GlossaryTerm') -> None:
        """
        Apply a :class:`.GlossaryTerm` to this :class:`.Column`. The
        containing :class:`.DataDictionary` must be saved for the change
        to take permanent effect.

        Parameters
        ----------
        glossary_term : GlossaryTerm
            The :class:`.GlossaryTerm` to classify this :class:`.Column` with

        Returns
        -------
        None

        Raises
        ------
        CatalogInvalidArgumentException
            If the :class:`.Organization` of the :class:`.GlossaryTerm` does not
            match the :class:`.Organization` which the :class:`.Dataset` was retrieved
            from.
        """
        if glossary_term.organization_id != self._dataset._context_organization.id:
            raise CatalogInvalidArgumentException(
                message=
                "Organization ID of the supplied GlossaryTerm does not match the Organization which this Dataset was retrieved from."
            )
        if self._glossary_term_ids is None:
            self._glossary_term_ids = []
        self._glossary_term_ids = self._glossary_term_ids + [glossary_term.id]

    def remove_glossary_term(
            self, glossary_term: 'glossary_term.GlossaryTerm') -> None:
        """
        Remove a :class:`.GlossaryTerm` from this :class:`.Column`. The
        containing :class:`.DataDictionary` must be saved for the change
        to take permanent effect.

        Parameters
        ----------
        glossary_term : GlossaryTerm
            The :class:`.GlossaryTerm` to be removed from this :class:`.Column`

        Returns
        -------
        None
        """
        if self._glossary_term_ids is None:
            return
        self._glossary_term_ids = [
            t for t in self._glossary_term_ids if t != glossary_term.id
        ]

    def list_glossary_terms(self) -> 'List[glossary_term.GlossaryTerm]':
        """
        Return a list of :class:`.GlossaryTerm`\\ s that have been applied to this :class:`.Column`

        Parameters
        ----------
        None

        Returns
        -------
        List[glossary_term.GlossaryTerm]
          The list of :class:`.GlossaryTerm`\\ s that have been applied to this :class:`.Column`

        Raises
        ------
        CatalogPermissionDeniedException
          If the caller does not have permission to list :class:`.GlossaryTerm`\\ s on a :class:`.Dataset`\\ 's :class:`.Column`\\ s
        CatalogInternalException
          If call to the :class:`.Catalog` server fails
        """
        from tdw_catalog import glossary_term
        if self._glossary_term_ids is None:
            return []
        else:
            return list(
                map(
                    lambda gt: glossary_term.GlossaryTerm.get(
                        client=self._dataset._client, id=gt),
                    self._glossary_term_ids))

    def _to_lineage_column(self) -> 'LineageColumn':
        from tdw_catalog.lineage.column_relationship import LineageColumn
        return LineageColumn(self.key, self.name, self.type)


class CurrencyColumn(Column):
    """
    A currency-specific extension of :class:`.Column`, with an added
    currency symbol (such as $)

    Attributes
    __________
    symbol : Optional[str]
        An optional currency symbol (e.g. ``'$'``)
    """
    _symbol: Optional[str]

    def __init__(self,
                 key: str = None,
                 type: ColumnType = None,
                 name: Optional[str] = None,
                 description: Optional[str] = None,
                 symbol: Optional[str] = None):
        """
        Initializes a fresh :class:`.Column`, for inclusion in a :class:`.MetadataOnlyDataDictionary`

        Parameters
        ----------
        key : str
            The underlying column name for this :class:`.Column`, within the actual :class:`.Warehouse` where this data lives
        type : ColumnType
            The data type for this :class:`.Column`. Available types can be found in :class:`.ColumnType`.
        name: Optional[str]
            A friendly name for this :class:`.Column`, which is visually used in place of the ``key`` throughout the :class:`.Catalog`
        description: Optional[str]
            An optional description for this :class:`.Column`
        symbol : Optional[str]
            An optional currency symbol (e.g. ``'$'``)
        """
        super().__init__(key, type, name, description)
        self._symbol = symbol

    @property
    def symbol(self) -> Optional[str]:
        return self._symbol

    @symbol.setter
    def symbol(self, symbol: str):
        self._symbol = symbol

    def serialize(self) -> Dict:
        result = super().serialize()
        result["meta"] = json.dumps(
            {"symbol": self.symbol if self.symbol is not None else ""})
        return result


class MetadataOnlyColumn(Column):
    """
    Identical to :class:`.Column`, but within a :class:`.MetadataOnlyDataDictionary` attached
    to a :class:`.Dataset` which is not connected to data. When not connected, all aspects of
    a data dictionary can be freely modified (including ``key`` and ``type``), as there is no
    underlying data providing/constraining the dictionary.

    Attributes
    __________
    key : str
        The column name for this :class:`.Column`, within the actual :class:`.Warehouse` where the data lives
    type : ColumnType
        The data type for this :class:`.Column`. Available types can be found in :class:`.ColumnType`.
    name: str
        An optional friendly name for this :class:`.Column`, which is visually used in place of the ``key`` throughout the :class:`.Catalog`
    description: Optional[str]
        An optional description for this :class:`.Column`
    """

    @Column.key.setter
    def key(self, key: str):
        self._key = key

    @Column.type.setter
    def type(self, type: ColumnType):
        self._type = type


class MetadataOnlyCurrencyColumn(CurrencyColumn, MetadataOnlyColumn):
    """
    The :class:`.MetadataOnlyColumn` version of :class:`.CurrencyColumn`

    Attributes
    __________
    symbol : Optional[str]
        The currency symbol
    """
    pass


class DataDictionary():
    """
    A :class:`.DataDictionary` describes the schema of data represented
    by a :class:`.Dataset` as a sequence of :class:`.Column`\ s, each with
    a ``key``, ``title``, ``type``, and optional ``description``.

    A :class:`.DataDictionary` behaves as a ``dict`` - columns can be accessed
    via their key as follows: ``data_dictionary["column_name"]``.

    Attributes
    __________
    last_updated_at: datetime
        The last time this :class:`.DataDictionary` was updated, either by hand
        (for :class:`.Dataset`\ s which are not connected) or
        via a schedule metrics collection (for :class:`.ConnectedDataset`\ s  which are)
    columns: List[Column]
        The list of :class:`.Column`\ s which make up this :class:`.DataDictionary`
    """
    _dataset: 'Dataset'
    _last_updated_at: datetime
    _version_id: Optional[str]
    _columns: List[Column]

    def __init__(self, dataset: 'Dataset', last_updated_at: datetime,
                 version_id: Optional[str], columns: List[Column]):
        self._dataset = dataset
        self._last_updated_at = last_updated_at
        self._version_id = version_id
        self._columns = columns

    @property
    def last_updated_at(self) -> datetime:
        """
        Returns the last time this :class:`.DataDictionary` was modified
        """
        return self._last_updated_at

    def columns(self) -> List[Column]:
        """
        Returns all :class:`.Column`\\ s in this :class:`.DataDictionary`
        """
        return self._columns

    def __getitem__(self, key: str) -> Union[Column, CurrencyColumn]:
        """
        Access a column in this dictionary using its key
        """
        return next(iter([c for c in self._columns if c.key == key]), None)

    def __len__(self) -> int:
        """
        Returns the number of :class:`.Column`\\ s in this :class:`.DataDictionary`
        """
        return len(self._columns)

    def has_key(self, key: str) -> bool:
        """
        Returns ``true`` if and only if a :class:`.Column` with the given ``key`` exists in this :class:`.DataDictionary`
        """
        return len([c for c in self._columns if c.key == key]) > 0

    def __contains__(self, key: str) -> bool:
        """
        Returns ``true`` if and only if a :class:`.Column` with the given ``key`` exists in this :class:`.DataDictionary`
        """
        return self.has_key(key)

    def save(self):
        """
        Update this :class:`.DataDictionary`, saving all changes to its schema

        Raises
        ------
        CatalogPermissionDeniedException
            If the caller is not allowed to update this :class:`.DataDictionary`
        CatalogException
            If call to the :class:`.Catalog` server fails
        """
        if self._dataset.is_connected:
            self._dataset._client._update_properties(
                dataset_id=self._dataset.id,
                version_id=self._version_id,
                properties=[
                    _add_index(p.serialize(), i) for i, p in enumerate(self._columns)
                ],
                organization_id=self._dataset._context_organization.id
                if self._dataset._context_organization is not None else None)
        else:
            self._dataset._client._replace_properties(
                dataset_id=self._dataset.id,
                properties=[
                    _add_index(p.serialize(), i) for i, p in enumerate(self._columns)
                ],
                organization_id=self._dataset._context_organization.id
                if self._dataset._context_organization is not None else None)


class MetadataOnlyDataDictionary(DataDictionary):
    """
    A :class:`.MetadataOnlyDataDictionary` is identical to a :class:`.DataDictionary`,
    but is attached to a :class:`.Dataset` which is not connected to data.

    Because the :class:`.Dataset` is not connected, all aspects of the dictionary
    can be modified freely, including column keys, types, etc. (because they are
    not constrained by existing underlying data).

    A :class:`.MetaDataOnlyDataDictionary` behaves as a ``dict`` - columns can be accessed
    (and overwritten) via their key as follows: ``data_dictionary["column_name"] = ...``.

    Attributes
    __________
    last_updated_at: datetime
        The last time this :class:`.DataDictionary` was updated, either by hand
        (for :class:`.Dataset`\ s which are not connected) or
        via a schedule metrics collection (for :class:`.ConnectedDataset`\ s  which are)
    columns: List[MetadataOnlyColumn]
        The list of :class:`.MetadataOnlyColumn`\ s which make up this :class:`.DataDictionary`
    """

    def __delitem__(self, key: str):
        """
        Removes a specific :class:`.Column` from this :class:`.MetadataOnlyDataDictionary` by key

        Parameters
        ----------
        key : str
            The key of the :class:`.Column`
        """
        return self.remove(key)

    def remove(self, key: str) -> 'MetadataOnlyDataDictionary':
        """
        Removes a specific :class:`.Column` from this :class:`.MetadataOnlyDataDictionary` by key

        Parameters
        ----------
        key : str
            The key of the :class:`.Column`

        Returns
        -------
        MetadataOnlyDataDictionary
            A reference to itself for method chaining
        """
        self._columns = [c for c in self._columns if c.key != key]
        return self

    def __setitem__(self, key: str, item: Column):
        """
        Overwrites a specific :class:`.Column` in this :class:`.MetadataOnlyDataDictionary` by key

        Parameters
        ----------
        key : str
            The key of the :class:`.Column`
        item: Column
            The :class:`.Column` to insert
        """
        self.remove(item.key)
        return self.add(self, item)

    def add(self,
            col: Column,
            index: Optional[int] = None) -> 'MetadataOnlyDataDictionary':
        """
        Appends a specific :class:`.Column` to this :class:`.MetadataOnlyDataDictionary`,
        or inserts it at a specific ``index``.

        Parameters
        ----------
        col: Column
            The :class:`.Column` to insert
        index: Optional[int]
            The optional index to insert the new :class:`.Column` at

        Returns
        -------
        MetadataOnlyDataDictionary
            A reference to itself for method chaining
        """
        if index is None:
            self._columns.append(col)
        else:
            self._columns.insert(index, col)
        return self

    def columns(self) -> List[MetadataOnlyColumn]:
        """
        Returns all :class:`.Column`\\ s in this :class:`.MetadataOnlyDataDictionary`
        """
        return self._columns

    def clear(self) -> 'MetadataOnlyDataDictionary':
        """
        Removes all :class:`.Column`\\ s from this :class:`.MetadataOnlyDataDictionary`

        Returns
        -------
        MetadataOnlyDataDictionary
            A reference to itself for method chaining
        """
        self._columns = []

        return self