Why Gemfury? Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Debian packages RPM packages NuGet packages

Repository URL to install this package:

Details    
ray / _private / accelerators / accelerator.py
Size: Mime:
from abc import ABC, abstractmethod
from typing import Dict, List, Optional, Tuple

# https://github.com/ray-project/ray/issues/54868
# In the future, ray will avoid overriding the accelerator ids environment variables
# when the number of accelerators is zero.
# For example, when this environment variable is set, if a user sets `num_gpus=0`
# in the `ray.init()` call, the environment variable `CUDA_VISIBLE_DEVICES` will
# not be set to an empty string.
#
# This environment variable is used to disable this behavior temporarily.
# And to avoid breaking changes, this environment variable is set to True by default
# to follow the previous behavior.
#
RAY_ACCEL_ENV_VAR_OVERRIDE_ON_ZERO_ENV_VAR = "RAY_ACCEL_ENV_VAR_OVERRIDE_ON_ZERO"


class AcceleratorManager(ABC):
    """This class contains all the functions needed for supporting
    an accelerator family in Ray."""

    @staticmethod
    @abstractmethod
    def get_resource_name() -> str:
        """Get the name of the resource representing this accelerator family.

        Returns:
            The resource name: e.g., the resource name for NVIDIA GPUs is "GPU"
        """

    @staticmethod
    @abstractmethod
    def get_visible_accelerator_ids_env_var() -> str:
        """Get the env var that sets the ids of visible accelerators of this family.

        Returns:
            The env var for setting visible accelerator ids: e.g.,
                CUDA_VISIBLE_DEVICES for NVIDIA GPUs.
        """

    @staticmethod
    @abstractmethod
    def get_current_node_num_accelerators() -> int:
        """Get the total number of accelerators of this family on the current node.

        Returns:
            The detected total number of accelerators of this family.
            Return 0 if the current node doesn't contain accelerators of this family.
        """

    @staticmethod
    @abstractmethod
    def get_current_node_accelerator_type() -> Optional[str]:
        """Get the type of the accelerator of this family on the current node.

        Currently Ray only supports single accelerator type of
        an accelerator family on each node.

        The result should only be used when get_current_node_num_accelerators() > 0.

        Returns:
            The detected accelerator type of this family: e.g., H100 for NVIDIA GPU.
            Return None if it's unknown or the node doesn't have
            accelerators of this family.
        """

    @staticmethod
    @abstractmethod
    def get_current_node_additional_resources() -> Optional[Dict[str, float]]:
        """Get any additional resources required for the current node.

        In case a particular accelerator type requires considerations for
        additional resources (e.g. for TPUs, providing the TPU pod type and
        TPU name), this function can be used to provide the
        additional logical resources.

        Returns:
            A dictionary representing additional resources that may be
            necessary for a particular accelerator type.
        """

    @staticmethod
    @abstractmethod
    def validate_resource_request_quantity(
        quantity: float,
    ) -> Tuple[bool, Optional[str]]:
        """Validate the resource request quantity of this accelerator resource.

        Args:
            quantity: The resource request quantity to be validated.

        Returns:
            (valid, error_message) tuple: the first element of the tuple
                indicates whether the given quantity is valid or not,
                the second element is the error message
                if the given quantity is invalid.
        """

    @staticmethod
    @abstractmethod
    def get_current_process_visible_accelerator_ids() -> Optional[List[str]]:
        """Get the ids of accelerators of this family that are visible to the current process.

        Returns:
            The list of visiable accelerator ids.
            Return None if all accelerators are visible.
        """

    @staticmethod
    @abstractmethod
    def set_current_process_visible_accelerator_ids(ids: List[str]) -> None:
        """Set the ids of accelerators of this family that are visible to the current process.

        Args:
            ids: The ids of visible accelerators of this family.
        """

    @staticmethod
    def get_ec2_instance_num_accelerators(
        instance_type: str, instances: dict
    ) -> Optional[int]:
        """Get the number of accelerators of this family on ec2 instance with given type.

        Args:
            instance_type: The ec2 instance type.
            instances: Map from ec2 instance type to instance metadata returned by
                ec2 `describe-instance-types`.

        Returns:
            The number of accelerators of this family on the ec2 instance
            with given type.
            Return None if it's unknown.
        """
        return None

    @staticmethod
    def get_ec2_instance_accelerator_type(
        instance_type: str, instances: dict
    ) -> Optional[str]:
        """Get the accelerator type of this family on ec2 instance with given type.

        Args:
            instance_type: The ec2 instance type.
            instances: Map from ec2 instance type to instance metadata returned by
                ec2 `describe-instance-types`.

        Returns:
            The accelerator type of this family on the ec2 instance with given type.
            Return None if it's unknown.
        """
        return None

    @staticmethod
    def get_current_node_accelerator_labels() -> Optional[Dict[str, str]]:
        """Get accelerator related Ray node labels of the curent node.

        Returns:
            A dictionary mapping accelerator related label keys to values.
        """
        return None