machinelearning/layer1.py · hemamaps/boto

hemamaps / boto python

Repository URL to install this package:
Version: 2.42.0

/ machinelearning / layer1.py

# Copyright (c) 2015 Amazon.com, Inc. or its affiliates.  All Rights Reserved
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish, dis-
# tribute, sublicense, and/or sell copies of the Software, and to permit
# persons to whom the Software is furnished to do so, subject to the fol-
# lowing conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL-
# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
#

import boto
from boto.compat import json, urlsplit
from boto.connection import AWSQueryConnection
from boto.regioninfo import RegionInfo
from boto.exception import JSONResponseError
from boto.machinelearning import exceptions


class MachineLearningConnection(AWSQueryConnection):
    """
    Definition of the public APIs exposed by Amazon Machine Learning
    """
    APIVersion = "2014-12-12"
    AuthServiceName = 'machinelearning'
    DefaultRegionName = "us-east-1"
    DefaultRegionEndpoint = "machinelearning.us-east-1.amazonaws.com"
    ServiceName = "MachineLearning"
    TargetPrefix = "AmazonML_20141212"
    ResponseError = JSONResponseError

    _faults = {
        "InternalServerException": exceptions.InternalServerException,
        "LimitExceededException": exceptions.LimitExceededException,
        "ResourceNotFoundException": exceptions.ResourceNotFoundException,
        "IdempotentParameterMismatchException": exceptions.IdempotentParameterMismatchException,
        "PredictorNotMountedException": exceptions.PredictorNotMountedException,
        "InvalidInputException": exceptions.InvalidInputException,
    }


    def __init__(self, **kwargs):
        region = kwargs.pop('region', None)
        if not region:
            region = RegionInfo(self, self.DefaultRegionName,
                                self.DefaultRegionEndpoint)

        if 'host' not in kwargs or kwargs['host'] is None:
            kwargs['host'] = region.endpoint

        super(MachineLearningConnection, self).__init__(**kwargs)
        self.region = region
        self.auth_region_name = self.region.name

    def _required_auth_capability(self):
        return ['hmac-v4']

    def create_batch_prediction(self, batch_prediction_id, ml_model_id,
                                batch_prediction_data_source_id, output_uri,
                                batch_prediction_name=None):
        """
        Generates predictions for a group of observations. The
        observations to process exist in one or more data files
        referenced by a `DataSource`. This operation creates a new
        `BatchPrediction`, and uses an `MLModel` and the data files
        referenced by the `DataSource` as information sources.

        `CreateBatchPrediction` is an asynchronous operation. In
        response to `CreateBatchPrediction`, Amazon Machine Learning
        (Amazon ML) immediately returns and sets the `BatchPrediction`
        status to `PENDING`. After the `BatchPrediction` completes,
        Amazon ML sets the status to `COMPLETED`.

        You can poll for status updates by using the
        GetBatchPrediction operation and checking the `Status`
        parameter of the result. After the `COMPLETED` status appears,
        the results are available in the location specified by the
        `OutputUri` parameter.

        :type batch_prediction_id: string
        :param batch_prediction_id: A user-supplied ID that uniquely identifies
            the `BatchPrediction`.

        :type batch_prediction_name: string
        :param batch_prediction_name: A user-supplied name or description of
            the `BatchPrediction`. `BatchPredictionName` can only use the UTF-8
            character set.

        :type ml_model_id: string
        :param ml_model_id: The ID of the `MLModel` that will generate
            predictions for the group of observations.

        :type batch_prediction_data_source_id: string
        :param batch_prediction_data_source_id: The ID of the `DataSource` that
            points to the group of observations to predict.

        :type output_uri: string
        :param output_uri: The location of an Amazon Simple Storage Service
            (Amazon S3) bucket or directory to store the batch prediction
            results. The following substrings are not allowed in the s3 key
            portion of the "outputURI" field: ':', '//', '/./', '/../'.
        Amazon ML needs permissions to store and retrieve the logs on your
            behalf. For information about how to set permissions, see the
            `Amazon Machine Learning Developer Guide`_.

        """
        params = {
            'BatchPredictionId': batch_prediction_id,
            'MLModelId': ml_model_id,
            'BatchPredictionDataSourceId': batch_prediction_data_source_id,
            'OutputUri': output_uri,
        }
        if batch_prediction_name is not None:
            params['BatchPredictionName'] = batch_prediction_name
        return self.make_request(action='CreateBatchPrediction',
                                 body=json.dumps(params))

    def create_data_source_from_rds(self, data_source_id, rds_data, role_arn,
                                    data_source_name=None,
                                    compute_statistics=None):
        """
        Creates a `DataSource` object from an ` Amazon Relational
        Database Service`_ (Amazon RDS). A `DataSource` references
        data that can be used to perform CreateMLModel,
        CreateEvaluation, or CreateBatchPrediction operations.

        `CreateDataSourceFromRDS` is an asynchronous operation. In
        response to `CreateDataSourceFromRDS`, Amazon Machine Learning
        (Amazon ML) immediately returns and sets the `DataSource`
        status to `PENDING`. After the `DataSource` is created and
        ready for use, Amazon ML sets the `Status` parameter to
        `COMPLETED`. `DataSource` in `COMPLETED` or `PENDING` status
        can only be used to perform CreateMLModel, CreateEvaluation,
        or CreateBatchPrediction operations.

        If Amazon ML cannot accept the input source, it sets the
        `Status` parameter to `FAILED` and includes an error message
        in the `Message` attribute of the GetDataSource operation
        response.

        :type data_source_id: string
        :param data_source_id: A user-supplied ID that uniquely identifies the
            `DataSource`. Typically, an Amazon Resource Number (ARN) becomes
            the ID for a `DataSource`.

        :type data_source_name: string
        :param data_source_name: A user-supplied name or description of the
            `DataSource`.

        :type rds_data: dict
        :param rds_data:
        The data specification of an Amazon RDS `DataSource`:


        + DatabaseInformation -

            + `DatabaseName ` - Name of the Amazon RDS database.
            + ` InstanceIdentifier ` - Unique identifier for the Amazon RDS
                  database instance.

        + DatabaseCredentials - AWS Identity and Access Management (IAM)
              credentials that are used to connect to the Amazon RDS database.
        + ResourceRole - Role (DataPipelineDefaultResourceRole) assumed by an
              Amazon Elastic Compute Cloud (EC2) instance to carry out the copy
              task from Amazon RDS to Amazon S3. For more information, see `Role
              templates`_ for data pipelines.
        + ServiceRole - Role (DataPipelineDefaultRole) assumed by the AWS Data
              Pipeline service to monitor the progress of the copy task from
              Amazon RDS to Amazon Simple Storage Service (S3). For more
              information, see `Role templates`_ for data pipelines.
        + SecurityInfo - Security information to use to access an Amazon RDS
              instance. You need to set up appropriate ingress rules for the
              security entity IDs provided to allow access to the Amazon RDS
              instance. Specify a [ `SubnetId`, `SecurityGroupIds`] pair for a
              VPC-based Amazon RDS instance.
        + SelectSqlQuery - Query that is used to retrieve the observation data
              for the `Datasource`.
        + S3StagingLocation - Amazon S3 location for staging RDS data. The data
              retrieved from Amazon RDS using `SelectSqlQuery` is stored in this
              location.
        + DataSchemaUri - Amazon S3 location of the `DataSchema`.
        + DataSchema - A JSON string representing the schema. This is not
              required if `DataSchemaUri` is specified.
        + DataRearrangement - A JSON string representing the splitting
              requirement of a `Datasource`. Sample - ` "{\"randomSeed\":\"some-
              random-seed\",
              \"splitting\":{\"percentBegin\":10,\"percentEnd\":60}}"`

        :type role_arn: string
        :param role_arn: The role that Amazon ML assumes on behalf of the user
            to create and activate a data pipeline in the users account and
            copy data (using the `SelectSqlQuery`) query from Amazon RDS to
            Amazon S3.

        :type compute_statistics: boolean
        :param compute_statistics: The compute statistics for a `DataSource`.
            The statistics are generated from the observation data referenced
            by a `DataSource`. Amazon ML uses the statistics internally during
            an `MLModel` training. This parameter must be set to `True` if the
            ``DataSource `` needs to be used for `MLModel` training.

        """
        params = {
            'DataSourceId': data_source_id,
            'RDSData': rds_data,
            'RoleARN': role_arn,
        }
        if data_source_name is not None:
            params['DataSourceName'] = data_source_name
        if compute_statistics is not None:
            params['ComputeStatistics'] = compute_statistics
        return self.make_request(action='CreateDataSourceFromRDS',
                                 body=json.dumps(params))

    def create_data_source_from_redshift(self, data_source_id, data_spec,
                                         role_arn, data_source_name=None,
                                         compute_statistics=None):
        """
        Creates a `DataSource` from `Amazon Redshift`_. A `DataSource`
        references data that can be used to perform either
        CreateMLModel, CreateEvaluation or CreateBatchPrediction
        operations.

        `CreateDataSourceFromRedshift` is an asynchronous operation.
        In response to `CreateDataSourceFromRedshift`, Amazon Machine
        Learning (Amazon ML) immediately returns and sets the
        `DataSource` status to `PENDING`. After the `DataSource` is
        created and ready for use, Amazon ML sets the `Status`
        parameter to `COMPLETED`. `DataSource` in `COMPLETED` or
        `PENDING` status can only be used to perform CreateMLModel,
        CreateEvaluation, or CreateBatchPrediction operations.

        If Amazon ML cannot accept the input source, it sets the
        `Status` parameter to `FAILED` and includes an error message
        in the `Message` attribute of the GetDataSource operation
        response.

        The observations should exist in the database hosted on an
        Amazon Redshift cluster and should be specified by a
        `SelectSqlQuery`. Amazon ML executes ` Unload`_ command in
        Amazon Redshift to transfer the result set of `SelectSqlQuery`
        to `S3StagingLocation.`

        After the `DataSource` is created, it's ready for use in
        evaluations and batch predictions. If you plan to use the
        `DataSource` to train an `MLModel`, the `DataSource` requires
        another item -- a recipe. A recipe describes the observation
        variables that participate in training an `MLModel`. A recipe
        describes how each input variable will be used in training.
        Will the variable be included or excluded from training? Will
        the variable be manipulated, for example, combined with
        another variable or split apart into word combinations? The
        recipe provides answers to these questions. For more
        information, see the Amazon Machine Learning Developer Guide.

        :type data_source_id: string
        :param data_source_id: A user-supplied ID that uniquely identifies the
            `DataSource`.

        :type data_source_name: string
        :param data_source_name: A user-supplied name or description of the
            `DataSource`.

        :type data_spec: dict
        :param data_spec:
        The data specification of an Amazon Redshift `DataSource`:


        + DatabaseInformation -

            + `DatabaseName ` - Name of the Amazon Redshift database.
            + ` ClusterIdentifier ` - Unique ID for the Amazon Redshift cluster.

        + DatabaseCredentials - AWS Identity abd Access Management (IAM)
              credentials that are used to connect to the Amazon Redshift
              database.
        + SelectSqlQuery - Query that is used to retrieve the observation data
              for the `Datasource`.
        + S3StagingLocation - Amazon Simple Storage Service (Amazon S3)
              location for staging Amazon Redshift data. The data retrieved from
              Amazon Relational Database Service (Amazon RDS) using
              `SelectSqlQuery` is stored in this location.
        + DataSchemaUri - Amazon S3 location of the `DataSchema`.
        + DataSchema - A JSON string representing the schema. This is not
              required if `DataSchemaUri` is specified.
        + DataRearrangement - A JSON string representing the splitting
              requirement of a `Datasource`. Sample - ` "{\"randomSeed\":\"some-
              random-seed\",
              \"splitting\":{\"percentBegin\":10,\"percentEnd\":60}}"`

        :type role_arn: string
        :param role_arn: A fully specified role Amazon Resource Name (ARN).
            Amazon ML assumes the role on behalf of the user to create the
            following:


        + A security group to allow Amazon ML to execute the `SelectSqlQuery`
              query on an Amazon Redshift cluster
        + An Amazon S3 bucket policy to grant Amazon ML read/write permissions
              on the `S3StagingLocation`

        :type compute_statistics: boolean
        :param compute_statistics: The compute statistics for a `DataSource`.
            The statistics are generated from the observation data referenced
            by a `DataSource`. Amazon ML uses the statistics internally during
            `MLModel` training. This parameter must be set to `True` if the
            ``DataSource `` needs to be used for `MLModel` training

        """
        params = {
            'DataSourceId': data_source_id,
            'DataSpec': data_spec,
            'RoleARN': role_arn,
        }
        if data_source_name is not None:
            params['DataSourceName'] = data_source_name
        if compute_statistics is not None:
            params['ComputeStatistics'] = compute_statistics
        return self.make_request(action='CreateDataSourceFromRedshift',
                                 body=json.dumps(params))

    def create_data_source_from_s3(self, data_source_id, data_spec,
                                   data_source_name=None,
                                   compute_statistics=None):
        """
        Creates a `DataSource` object. A `DataSource` references data
        that can be used to perform CreateMLModel, CreateEvaluation,
        or CreateBatchPrediction operations.

        `CreateDataSourceFromS3` is an asynchronous operation. In
        response to `CreateDataSourceFromS3`, Amazon Machine Learning
        (Amazon ML) immediately returns and sets the `DataSource`
        status to `PENDING`. After the `DataSource` is created and
        ready for use, Amazon ML sets the `Status` parameter to
        `COMPLETED`. `DataSource` in `COMPLETED` or `PENDING` status
        can only be used to perform CreateMLModel, CreateEvaluation or
        CreateBatchPrediction operations.

        If Amazon ML cannot accept the input source, it sets the
        `Status` parameter to `FAILED` and includes an error message
        in the `Message` attribute of the GetDataSource operation
        response.

        The observation data used in a `DataSource` should be ready to
        use; that is, it should have a consistent structure, and
        missing data values should be kept to a minimum. The
        observation data must reside in one or more CSV files in an
        Amazon Simple Storage Service (Amazon S3) bucket, along with a
        schema that describes the data items by name and type. The
        same schema must be used for all of the data files referenced
        by the `DataSource`.

        After the `DataSource` has been created, it's ready to use in
        evaluations and batch predictions. If you plan to use the
        `DataSource` to train an `MLModel`, the `DataSource` requires
        another item: a recipe. A recipe describes the observation
        variables that participate in training an `MLModel`. A recipe
        describes how each input variable will be used in training.
        Will the variable be included or excluded from training? Will
        the variable be manipulated, for example, combined with
        another variable, or split apart into word combinations? The
        recipe provides answers to these questions. For more
        information, see the `Amazon Machine Learning Developer
        Guide`_.

        :type data_source_id: string
        :param data_source_id: A user-supplied identifier that uniquely
            identifies the `DataSource`.

        :type data_source_name: string
        :param data_source_name: A user-supplied name or description of the
            `DataSource`.

        :type data_spec: dict
        :param data_spec:
        The data specification of a `DataSource`:


        + DataLocationS3 - Amazon Simple Storage Service (Amazon S3) location
              of the observation data.
        + DataSchemaLocationS3 - Amazon S3 location of the `DataSchema`.
        + DataSchema - A JSON string representing the schema. This is not
              required if `DataSchemaUri` is specified.
        + DataRearrangement - A JSON string representing the splitting
              requirement of a `Datasource`. Sample - ` "{\"randomSeed\":\"some-
              random-seed\",
              \"splitting\":{\"percentBegin\":10,\"percentEnd\":60}}"`

        :type compute_statistics: boolean
        :param compute_statistics: The compute statistics for a `DataSource`.
            The statistics are generated from the observation data referenced
            by a `DataSource`. Amazon ML uses the statistics internally during
            an `MLModel` training. This parameter must be set to `True` if the
            ``DataSource `` needs to be used for `MLModel` training

        """
        params = {
            'DataSourceId': data_source_id,
            'DataSpec': data_spec,
        }
        if data_source_name is not None:
            params['DataSourceName'] = data_source_name
        if compute_statistics is not None:
            params['ComputeStatistics'] = compute_statistics
        return self.make_request(action='CreateDataSourceFromS3',
                                 body=json.dumps(params))

    def create_evaluation(self, evaluation_id, ml_model_id,
                          evaluation_data_source_id, evaluation_name=None):
        """
        Creates a new `Evaluation` of an `MLModel`. An `MLModel` is
        evaluated on a set of observations associated to a
        `DataSource`. Like a `DataSource` for an `MLModel`, the
        `DataSource` for an `Evaluation` contains values for the
        Target Variable. The `Evaluation` compares the predicted
        result for each observation to the actual outcome and provides
        a summary so that you know how effective the `MLModel`
        functions on the test data. Evaluation generates a relevant
        performance metric such as BinaryAUC, RegressionRMSE or
        MulticlassAvgFScore based on the corresponding `MLModelType`:
        `BINARY`, `REGRESSION` or `MULTICLASS`.

        `CreateEvaluation` is an asynchronous operation. In response
        to `CreateEvaluation`, Amazon Machine Learning (Amazon ML)
        immediately returns and sets the evaluation status to
        `PENDING`. After the `Evaluation` is created and ready for
        use, Amazon ML sets the status to `COMPLETED`.

        You can use the GetEvaluation operation to check progress of
        the evaluation during the creation operation.

        :type evaluation_id: string
        :param evaluation_id: A user-supplied ID that uniquely identifies the
            `Evaluation`.

        :type evaluation_name: string
        :param evaluation_name: A user-supplied name or description of the
            `Evaluation`.

        :type ml_model_id: string
        :param ml_model_id: The ID of the `MLModel` to evaluate.
        The schema used in creating the `MLModel` must match the schema of the
            `DataSource` used in the `Evaluation`.

        :type evaluation_data_source_id: string
        :param evaluation_data_source_id: The ID of the `DataSource` for the
            evaluation. The schema of the `DataSource` must match the schema
            used to create the `MLModel`.

        """
        params = {
            'EvaluationId': evaluation_id,
            'MLModelId': ml_model_id,
            'EvaluationDataSourceId': evaluation_data_source_id,
        }
        if evaluation_name is not None:
            params['EvaluationName'] = evaluation_name
        return self.make_request(action='CreateEvaluation',
                                 body=json.dumps(params))

    def create_ml_model(self, ml_model_id, ml_model_type,
                        training_data_source_id, ml_model_name=None,
                        parameters=None, recipe=None, recipe_uri=None):
        """
        Creates a new `MLModel` using the data files and the recipe as
        information sources.

        An `MLModel` is nearly immutable. Users can only update the
        `MLModelName` and the `ScoreThreshold` in an `MLModel` without
        creating a new `MLModel`.

        `CreateMLModel` is an asynchronous operation. In response to
        `CreateMLModel`, Amazon Machine Learning (Amazon ML)
        immediately returns and sets the `MLModel` status to
        `PENDING`. After the `MLModel` is created and ready for use,
        Amazon ML sets the status to `COMPLETED`.

        You can use the GetMLModel operation to check progress of the
        `MLModel` during the creation operation.

        CreateMLModel requires a `DataSource` with computed
        statistics, which can be created by setting
        `ComputeStatistics` to `True` in CreateDataSourceFromRDS,
        CreateDataSourceFromS3, or CreateDataSourceFromRedshift
        operations.

        :type ml_model_id: string
        :param ml_model_id: A user-supplied ID that uniquely identifies the
            `MLModel`.

        :type ml_model_name: string
        :param ml_model_name: A user-supplied name or description of the
            `MLModel`.

        :type ml_model_type: string
        :param ml_model_type: The category of supervised learning that this
            `MLModel` will address. Choose from the following types:

        + Choose `REGRESSION` if the `MLModel` will be used to predict a
              numeric value.
        + Choose `BINARY` if the `MLModel` result has two possible values.
        + Choose `MULTICLASS` if the `MLModel` result has a limited number of
              values.


        For more information, see the `Amazon Machine Learning Developer
            Guide`_.

        :type parameters: map
        :param parameters:
        A list of the training parameters in the `MLModel`. The list is
            implemented as a map of key/value pairs.

        The following is the current set of training parameters:


        + `sgd.l1RegularizationAmount` - Coefficient regularization L1 norm. It
              controls overfitting the data by penalizing large coefficients.
              This tends to drive coefficients to zero, resulting in sparse
              feature set. If you use this parameter, start by specifying a small
              value such as 1.0E-08. The value is a double that ranges from 0 to
              MAX_DOUBLE. The default is not to use L1 normalization. The
              parameter cannot be used when `L2` is specified. Use this parameter
              sparingly.
        + `sgd.l2RegularizationAmount` - Coefficient regularization L2 norm. It
              controls overfitting the data by penalizing large coefficients.
              This tends to drive coefficients to small, nonzero values. If you
              use this parameter, start by specifying a small value such as
              1.0E-08. The valuseis a double that ranges from 0 to MAX_DOUBLE.
              The default is not to use L2 normalization. This cannot be used
              when `L1` is specified. Use this parameter sparingly.
        + `sgd.maxPasses` - Number of times that the training process traverses
              the observations to build the `MLModel`. The value is an integer
              that ranges from 1 to 10000. The default value is 10.
        + `sgd.maxMLModelSizeInBytes` - Maximum allowed size of the model.
              Depending on the input data, the size of the model might affect its
              performance. The value is an integer that ranges from 100000 to
              2147483648. The default value is 33554432.

        :type training_data_source_id: string
        :param training_data_source_id: The `DataSource` that points to the
            training data.

        :type recipe: string
        :param recipe: The data recipe for creating `MLModel`. You must specify
            either the recipe or its URI. If you dont specify a recipe or its
            URI, Amazon ML creates a default.

        :type recipe_uri: string
        :param recipe_uri: The Amazon Simple Storage Service (Amazon S3)
            location and file name that contains the `MLModel` recipe. You must
            specify either the recipe or its URI. If you dont specify a recipe
            or its URI, Amazon ML creates a default.

        """
        params = {
            'MLModelId': ml_model_id,
            'MLModelType': ml_model_type,
            'TrainingDataSourceId': training_data_source_id,
        }
        if ml_model_name is not None:
            params['MLModelName'] = ml_model_name
        if parameters is not None:
            params['Parameters'] = parameters
        if recipe is not None:
            params['Recipe'] = recipe
        if recipe_uri is not None:
            params['RecipeUri'] = recipe_uri
        return self.make_request(action='CreateMLModel',
                                 body=json.dumps(params))

    def create_realtime_endpoint(self, ml_model_id):
        """
        Creates a real-time endpoint for the `MLModel`. The endpoint
        contains the URI of the `MLModel`; that is, the location to
        send real-time prediction requests for the specified
        `MLModel`.

        :type ml_model_id: string
        :param ml_model_id: The ID assigned to the `MLModel` during creation.

        """
        params = {'MLModelId': ml_model_id, }
        return self.make_request(action='CreateRealtimeEndpoint',
                                 body=json.dumps(params))

    def delete_batch_prediction(self, batch_prediction_id):
        """
        Assigns the DELETED status to a `BatchPrediction`, rendering
        it unusable.

        After using the `DeleteBatchPrediction` operation, you can use
        the GetBatchPrediction operation to verify that the status of
        the `BatchPrediction` changed to DELETED.

        The result of the `DeleteBatchPrediction` operation is
        irreversible.

        :type batch_prediction_id: string
        :param batch_prediction_id: A user-supplied ID that uniquely identifies
            the `BatchPrediction`.

        """
        params = {'BatchPredictionId': batch_prediction_id, }
        return self.make_request(action='DeleteBatchPrediction',
                                 body=json.dumps(params))

    def delete_data_source(self, data_source_id):
        """
        Assigns the DELETED status to a `DataSource`, rendering it
        unusable.

        After using the `DeleteDataSource` operation, you can use the
        GetDataSource operation to verify that the status of the
        `DataSource` changed to DELETED.

        The results of the `DeleteDataSource` operation are
        irreversible.

        :type data_source_id: string
        :param data_source_id: A user-supplied ID that uniquely identifies the
            `DataSource`.

        """
        params = {'DataSourceId': data_source_id, }
        return self.make_request(action='DeleteDataSource',
                                 body=json.dumps(params))

    def delete_evaluation(self, evaluation_id):
        """
        Assigns the `DELETED` status to an `Evaluation`, rendering it
        unusable.

        After invoking the `DeleteEvaluation` operation, you can use
        the GetEvaluation operation to verify that the status of the
        `Evaluation` changed to `DELETED`.

        The results of the `DeleteEvaluation` operation are
        irreversible.

        :type evaluation_id: string
        :param evaluation_id: A user-supplied ID that uniquely identifies the
            `Evaluation` to delete.

        """
        params = {'EvaluationId': evaluation_id, }
        return self.make_request(action='DeleteEvaluation',
                                 body=json.dumps(params))

    def delete_ml_model(self, ml_model_id):
        """
        Assigns the DELETED status to an `MLModel`, rendering it
        unusable.

        After using the `DeleteMLModel` operation, you can use the
        GetMLModel operation to verify that the status of the
        `MLModel` changed to DELETED.

        The result of the `DeleteMLModel` operation is irreversible.

        :type ml_model_id: string
        :param ml_model_id: A user-supplied ID that uniquely identifies the
            `MLModel`.

        """
        params = {'MLModelId': ml_model_id, }
        return self.make_request(action='DeleteMLModel',
                                 body=json.dumps(params))

    def delete_realtime_endpoint(self, ml_model_id):
        """
        Deletes a real time endpoint of an `MLModel`.

        :type ml_model_id: string
        :param ml_model_id: The ID assigned to the `MLModel` during creation.

        """
        params = {'MLModelId': ml_model_id, }
        return self.make_request(action='DeleteRealtimeEndpoint',
                                 body=json.dumps(params))

    def describe_batch_predictions(self, filter_variable=None, eq=None,
                                   gt=None, lt=None, ge=None, le=None,
                                   ne=None, prefix=None, sort_order=None,
                                   next_token=None, limit=None):
        """
        Returns a list of `BatchPrediction` operations that match the
        search criteria in the request.

        :type filter_variable: string
        :param filter_variable:
        Use one of the following variables to filter a list of
            `BatchPrediction`:


        + `CreatedAt` - Sets the search criteria to the `BatchPrediction`
              creation date.
        + `Status` - Sets the search criteria to the `BatchPrediction` status.
        + `Name` - Sets the search criteria to the contents of the
              `BatchPrediction` ** ** `Name`.
        + `IAMUser` - Sets the search criteria to the user account that invoked
              the `BatchPrediction` creation.
        + `MLModelId` - Sets the search criteria to the `MLModel` used in the
              `BatchPrediction`.
        + `DataSourceId` - Sets the search criteria to the `DataSource` used in
              the `BatchPrediction`.
        + `DataURI` - Sets the search criteria to the data file(s) used in the
              `BatchPrediction`. The URL can identify either a file or an Amazon
              Simple Storage Solution (Amazon S3) bucket or directory.

        :type eq: string
        :param eq: The equal to operator. The `BatchPrediction` results will
            have `FilterVariable` values that exactly match the value specified
            with `EQ`.

        :type gt: string
        :param gt: The greater than operator. The `BatchPrediction` results
            will have `FilterVariable` values that are greater than the value
            specified with `GT`.

        :type lt: string
        :param lt: The less than operator. The `BatchPrediction` results will
            have `FilterVariable` values that are less than the value specified
            with `LT`.

        :type ge: string
        :param ge: The greater than or equal to operator. The `BatchPrediction`
            results will have `FilterVariable` values that are greater than or
            equal to the value specified with `GE`.

        :type le: string
        :param le: The less than or equal to operator. The `BatchPrediction`
            results will have `FilterVariable` values that are less than or
            equal to the value specified with `LE`.

        :type ne: string
        :param ne: The not equal to operator. The `BatchPrediction` results
            will have `FilterVariable` values not equal to the value specified
            with `NE`.

        :type prefix: string
        :param prefix:
        A string that is found at the beginning of a variable, such as `Name`
            or `Id`.

        For example, a `Batch Prediction` operation could have the `Name`
            `2014-09-09-HolidayGiftMailer`. To search for this
            `BatchPrediction`, select `Name` for the `FilterVariable` and any
            of the following strings for the `Prefix`:


        + 2014-09
        + 2014-09-09
        + 2014-09-09-Holiday

        :type sort_order: string
        :param sort_order: A two-value parameter that determines the sequence
            of the resulting list of `MLModel`s.

        + `asc` - Arranges the list in ascending order (A-Z, 0-9).
        + `dsc` - Arranges the list in descending order (Z-A, 9-0).


        Results are sorted by `FilterVariable`.

        :type next_token: string
        :param next_token: An ID of the page in the paginated results.

        :type limit: integer
        :param limit: The number of pages of information to include in the
            result. The range of acceptable values is 1 through 100. The
            default value is 100.

        """
        params = {}
        if filter_variable is not None:
            params['FilterVariable'] = filter_variable
        if eq is not None:
            params['EQ'] = eq
        if gt is not None:
            params['GT'] = gt
        if lt is not None:
            params['LT'] = lt
        if ge is not None:
            params['GE'] = ge
        if le is not None:
            params['LE'] = le
        if ne is not None:
            params['NE'] = ne
        if prefix is not None:
            params['Prefix'] = prefix
        if sort_order is not None:
            params['SortOrder'] = sort_order
        if next_token is not None:
            params['NextToken'] = next_token
        if limit is not None:
            params['Limit'] = limit
        return self.make_request(action='DescribeBatchPredictions',
                                 body=json.dumps(params))

    def describe_data_sources(self, filter_variable=None, eq=None, gt=None,
                              lt=None, ge=None, le=None, ne=None,
                              prefix=None, sort_order=None, next_token=None,
                              limit=None):
        """
        Returns a list of `DataSource` that match the search criteria
        in the request.

        :type filter_variable: string
        :param filter_variable:
        Use one of the following variables to filter a list of `DataSource`:


        + `CreatedAt` - Sets the search criteria to `DataSource` creation
              dates.
        + `Status` - Sets the search criteria to `DataSource` statuses.
        + `Name` - Sets the search criteria to the contents of `DataSource` **
              ** `Name`.
        + `DataUri` - Sets the search criteria to the URI of data files used to
              create the `DataSource`. The URI can identify either a file or an
              Amazon Simple Storage Service (Amazon S3) bucket or directory.
        + `IAMUser` - Sets the search criteria to the user account that invoked
              the `DataSource` creation.

        :type eq: string
        :param eq: The equal to operator. The `DataSource` results will have
            `FilterVariable` values that exactly match the value specified with
            `EQ`.

        :type gt: string
        :param gt: The greater than operator. The `DataSource` results will
            have `FilterVariable` values that are greater than the value
            specified with `GT`.

        :type lt: string
        :param lt: The less than operator. The `DataSource` results will have
            `FilterVariable` values that are less than the value specified with
            `LT`.

        :type ge: string
        :param ge: The greater than or equal to operator. The `DataSource`
            results will have `FilterVariable` values that are greater than or
            equal to the value specified with `GE`.

        :type le: string
        :param le: The less than or equal to operator. The `DataSource` results
            will have `FilterVariable` values that are less than or equal to
            the value specified with `LE`.

        :type ne: string
        :param ne: The not equal to operator. The `DataSource` results will
            have `FilterVariable` values not equal to the value specified with
            `NE`.

        :type prefix: string
        :param prefix:
        A string that is found at the beginning of a variable, such as `Name`
            or `Id`.

        For example, a `DataSource` could have the `Name`
            `2014-09-09-HolidayGiftMailer`. To search for this `DataSource`,
            select `Name` for the `FilterVariable` and any of the following
            strings for the `Prefix`:


        + 2014-09
        + 2014-09-09
        + 2014-09-09-Holiday

        :type sort_order: string
        :param sort_order: A two-value parameter that determines the sequence
            of the resulting list of `DataSource`.

        + `asc` - Arranges the list in ascending order (A-Z, 0-9).
        + `dsc` - Arranges the list in descending order (Z-A, 9-0).


        Results are sorted by `FilterVariable`.

        :type next_token: string
        :param next_token: The ID of the page in the paginated results.

        :type limit: integer
        :param limit: The maximum number of `DataSource` to include in the
            result.

        """
        params = {}
        if filter_variable is not None:
            params['FilterVariable'] = filter_variable
        if eq is not None:
            params['EQ'] = eq
        if gt is not None:
            params['GT'] = gt
        if lt is not None:
            params['LT'] = lt
        if ge is not None:
            params['GE'] = ge
        if le is not None:
            params['LE'] = le
        if ne is not None:
            params['NE'] = ne
        if prefix is not None:
            params['Prefix'] = prefix
        if sort_order is not None:
            params['SortOrder'] = sort_order
        if next_token is not None:
            params['NextToken'] = next_token
        if limit is not None:
            params['Limit'] = limit
        return self.make_request(action='DescribeDataSources',
                                 body=json.dumps(params))

    def describe_evaluations(self, filter_variable=None, eq=None, gt=None,
                             lt=None, ge=None, le=None, ne=None, prefix=None,
                             sort_order=None, next_token=None, limit=None):
        """
        Returns a list of `DescribeEvaluations` that match the search
        criteria in the request.

        :type filter_variable: string
        :param filter_variable:
        Use one of the following variable to filter a list of `Evaluation`
            objects:


        + `CreatedAt` - Sets the search criteria to the `Evaluation` creation
              date.
        + `Status` - Sets the search criteria to the `Evaluation` status.
        + `Name` - Sets the search criteria to the contents of `Evaluation` **
              ** `Name`.
        + `IAMUser` - Sets the search criteria to the user account that invoked
              an `Evaluation`.
        + `MLModelId` - Sets the search criteria to the `MLModel` that was
              evaluated.
        + `DataSourceId` - Sets the search criteria to the `DataSource` used in
              `Evaluation`.
        + `DataUri` - Sets the search criteria to the data file(s) used in
              `Evaluation`. The URL can identify either a file or an Amazon
              Simple Storage Solution (Amazon S3) bucket or directory.

        :type eq: string
        :param eq: The equal to operator. The `Evaluation` results will have
            `FilterVariable` values that exactly match the value specified with
            `EQ`.

        :type gt: string
        :param gt: The greater than operator. The `Evaluation` results will
            have `FilterVariable` values that are greater than the value
            specified with `GT`.

        :type lt: string
        :param lt: The less than operator. The `Evaluation` results will have
            `FilterVariable` values that are less than the value specified with
            `LT`.

        :type ge: string
        :param ge: The greater than or equal to operator. The `Evaluation`
            results will have `FilterVariable` values that are greater than or
            equal to the value specified with `GE`.

        :type le: string
        :param le: The less than or equal to operator. The `Evaluation` results
            will have `FilterVariable` values that are less than or equal to
            the value specified with `LE`.

        :type ne: string
        :param ne: The not equal to operator. The `Evaluation` results will
            have `FilterVariable` values not equal to the value specified with
            `NE`.

        :type prefix: string
        :param prefix:
        A string that is found at the beginning of a variable, such as `Name`
            or `Id`.

        For example, an `Evaluation` could have the `Name`
            `2014-09-09-HolidayGiftMailer`. To search for this `Evaluation`,
            select `Name` for the `FilterVariable` and any of the following
            strings for the `Prefix`:


        + 2014-09
        + 2014-09-09
        + 2014-09-09-Holiday

        :type sort_order: string
        :param sort_order: A two-value parameter that determines the sequence
            of the resulting list of `Evaluation`.

        + `asc` - Arranges the list in ascending order (A-Z, 0-9).
        + `dsc` - Arranges the list in descending order (Z-A, 9-0).


        Results are sorted by `FilterVariable`.

        :type next_token: string
        :param next_token: The ID of the page in the paginated results.

        :type limit: integer
        :param limit: The maximum number of `Evaluation` to include in the
            result.

        """
        params = {}
        if filter_variable is not None:
            params['FilterVariable'] = filter_variable
        if eq is not None:
            params['EQ'] = eq
        if gt is not None:
            params['GT'] = gt
        if lt is not None:
            params['LT'] = lt
        if ge is not None:
            params['GE'] = ge
        if le is not None:
            params['LE'] = le
        if ne is not None:
            params['NE'] = ne
        if prefix is not None:
            params['Prefix'] = prefix
        if sort_order is not None:
            params['SortOrder'] = sort_order
        if next_token is not None:
            params['NextToken'] = next_token
        if limit is not None:
            params['Limit'] = limit
        return self.make_request(action='DescribeEvaluations',
                                 body=json.dumps(params))

    def describe_ml_models(self, filter_variable=None, eq=None, gt=None,
                           lt=None, ge=None, le=None, ne=None, prefix=None,
                           sort_order=None, next_token=None, limit=None):
        """
        Returns a list of `MLModel` that match the search criteria in
        the request.

        :type filter_variable: string
        :param filter_variable:
        Use one of the following variables to filter a list of `MLModel`:


        + `CreatedAt` - Sets the search criteria to `MLModel` creation date.
        + `Status` - Sets the search criteria to `MLModel` status.
        + `Name` - Sets the search criteria to the contents of `MLModel` ** **
              `Name`.
        + `IAMUser` - Sets the search criteria to the user account that invoked
              the `MLModel` creation.
        + `TrainingDataSourceId` - Sets the search criteria to the `DataSource`
              used to train one or more `MLModel`.
        + `RealtimeEndpointStatus` - Sets the search criteria to the `MLModel`
              real-time endpoint status.
        + `MLModelType` - Sets the search criteria to `MLModel` type: binary,
              regression, or multi-class.
        + `Algorithm` - Sets the search criteria to the algorithm that the
              `MLModel` uses.
        + `TrainingDataURI` - Sets the search criteria to the data file(s) used
              in training a `MLModel`. The URL can identify either a file or an
              Amazon Simple Storage Service (Amazon S3) bucket or directory.

        :type eq: string
        :param eq: The equal to operator. The `MLModel` results will have
            `FilterVariable` values that exactly match the value specified with
            `EQ`.

        :type gt: string
        :param gt: The greater than operator. The `MLModel` results will have
            `FilterVariable` values that are greater than the value specified
            with `GT`.

        :type lt: string
        :param lt: The less than operator. The `MLModel` results will have
            `FilterVariable` values that are less than the value specified with
            `LT`.

        :type ge: string
        :param ge: The greater than or equal to operator. The `MLModel` results
            will have `FilterVariable` values that are greater than or equal to
            the value specified with `GE`.

        :type le: string
        :param le: The less than or equal to operator. The `MLModel` results
            will have `FilterVariable` values that are less than or equal to
            the value specified with `LE`.

        :type ne: string
        :param ne: The not equal to operator. The `MLModel` results will have
            `FilterVariable` values not equal to the value specified with `NE`.

        :type prefix: string
        :param prefix:
        A string that is found at the beginning of a variable, such as `Name`
            or `Id`.

        For example, an `MLModel` could have the `Name`
            `2014-09-09-HolidayGiftMailer`. To search for this `MLModel`,
            select `Name` for the `FilterVariable` and any of the following
            strings for the `Prefix`:


        + 2014-09
        + 2014-09-09
        + 2014-09-09-Holiday

        :type sort_order: string
        :param sort_order: A two-value parameter that determines the sequence
            of the resulting list of `MLModel`.

        + `asc` - Arranges the list in ascending order (A-Z, 0-9).
        + `dsc` - Arranges the list in descending order (Z-A, 9-0).


        Results are sorted by `FilterVariable`.

        :type next_token: string
        :param next_token: The ID of the page in the paginated results.

        :type limit: integer
        :param limit: The number of pages of information to include in the
            result. The range of acceptable values is 1 through 100. The
            default value is 100.

        """
        params = {}
        if filter_variable is not None:
            params['FilterVariable'] = filter_variable
        if eq is not None:
            params['EQ'] = eq
        if gt is not None:
            params['GT'] = gt
        if lt is not None:
            params['LT'] = lt
        if ge is not None:
            params['GE'] = ge
        if le is not None:
            params['LE'] = le
        if ne is not None:
            params['NE'] = ne
        if prefix is not None:
            params['Prefix'] = prefix
        if sort_order is not None:
            params['SortOrder'] = sort_order
        if next_token is not None:
            params['NextToken'] = next_token
        if limit is not None:
            params['Limit'] = limit
        return self.make_request(action='DescribeMLModels',
                                 body=json.dumps(params))

    def get_batch_prediction(self, batch_prediction_id):
        """
        Returns a `BatchPrediction` that includes detailed metadata,
        status, and data file information for a `Batch Prediction`
        request.

        :type batch_prediction_id: string
        :param batch_prediction_id: An ID assigned to the `BatchPrediction` at
            creation.

        """
        params = {'BatchPredictionId': batch_prediction_id, }
        return self.make_request(action='GetBatchPrediction',
                                 body=json.dumps(params))

    def get_data_source(self, data_source_id, verbose=None):
        """
        Returns a `DataSource` that includes metadata and data file
        information, as well as the current status of the
        `DataSource`.

        `GetDataSource` provides results in normal or verbose format.
        The verbose format adds the schema description and the list of
        files pointed to by the DataSource to the normal format.

        :type data_source_id: string
        :param data_source_id: The ID assigned to the `DataSource` at creation.

        :type verbose: boolean
        :param verbose: Specifies whether the `GetDataSource` operation should
            return `DataSourceSchema`.
        If true, `DataSourceSchema` is returned.

        If false, `DataSourceSchema` is not returned.

        """
        params = {'DataSourceId': data_source_id, }
        if verbose is not None:
            params['Verbose'] = verbose
        return self.make_request(action='GetDataSource',
                                 body=json.dumps(params))

    def get_evaluation(self, evaluation_id):
        """
        Returns an `Evaluation` that includes metadata as well as the
        current status of the `Evaluation`.

        :type evaluation_id: string
        :param evaluation_id: The ID of the `Evaluation` to retrieve. The
            evaluation of each `MLModel` is recorded and cataloged. The ID
            provides the means to access the information.

        """
        params = {'EvaluationId': evaluation_id, }
        return self.make_request(action='GetEvaluation',
                                 body=json.dumps(params))

    def get_ml_model(self, ml_model_id, verbose=None):
        """
        Returns an `MLModel` that includes detailed metadata, and data
        source information as well as the current status of the
        `MLModel`.

        `GetMLModel` provides results in normal or verbose format.

        :type ml_model_id: string
        :param ml_model_id: The ID assigned to the `MLModel` at creation.

        :type verbose: boolean
        :param verbose: Specifies whether the `GetMLModel` operation should
            return `Recipe`.
        If true, `Recipe` is returned.

        If false, `Recipe` is not returned.

        """
        params = {'MLModelId': ml_model_id, }
        if verbose is not None:
            params['Verbose'] = verbose
        return self.make_request(action='GetMLModel',
                                 body=json.dumps(params))

    def predict(self, ml_model_id, record, predict_endpoint):
        """
        Generates a prediction for the observation using the specified
        `MLModel`.


        Not all response parameters will be populated because this is
        dependent on the type of requested model.

        :type ml_model_id: string
        :param ml_model_id: A unique identifier of the `MLModel`.

        :type record: map
        :param record: A map of variable name-value pairs that represent an
            observation.

        :type predict_endpoint: string
        :param predict_endpoint: The endpoint to send the predict request to.

        """
        predict_host =  urlsplit(predict_endpoint).hostname
        if predict_host is None:
            predict_host = predict_endpoint

        params = {
            'MLModelId': ml_model_id,
            'Record': record,
            'PredictEndpoint': predict_host,
        }
        return self.make_request(action='Predict',
                                 body=json.dumps(params),
                                 host=predict_host)

    def update_batch_prediction(self, batch_prediction_id,
                                batch_prediction_name):
        """
        Updates the `BatchPredictionName` of a `BatchPrediction`.

        You can use the GetBatchPrediction operation to view the
        contents of the updated data element.

        :type batch_prediction_id: string
        :param batch_prediction_id: The ID assigned to the `BatchPrediction`
            during creation.

        :type batch_prediction_name: string
        :param batch_prediction_name: A new user-supplied name or description
            of the `BatchPrediction`.

        """
        params = {
            'BatchPredictionId': batch_prediction_id,
            'BatchPredictionName': batch_prediction_name,
        }
        return self.make_request(action='UpdateBatchPrediction',
                                 body=json.dumps(params))

    def update_data_source(self, data_source_id, data_source_name):
        """
        Updates the `DataSourceName` of a `DataSource`.

        You can use the GetDataSource operation to view the contents
        of the updated data element.

        :type data_source_id: string
        :param data_source_id: The ID assigned to the `DataSource` during
            creation.

        :type data_source_name: string
        :param data_source_name: A new user-supplied name or description of the
            `DataSource` that will replace the current description.

        """
        params = {
            'DataSourceId': data_source_id,
            'DataSourceName': data_source_name,
        }
        return self.make_request(action='UpdateDataSource',
                                 body=json.dumps(params))

    def update_evaluation(self, evaluation_id, evaluation_name):
        """
        Updates the `EvaluationName` of an `Evaluation`.

        You can use the GetEvaluation operation to view the contents
        of the updated data element.

        :type evaluation_id: string
        :param evaluation_id: The ID assigned to the `Evaluation` during
            creation.

        :type evaluation_name: string
        :param evaluation_name: A new user-supplied name or description of the
            `Evaluation` that will replace the current content.

        """
        params = {
            'EvaluationId': evaluation_id,
            'EvaluationName': evaluation_name,
        }
        return self.make_request(action='UpdateEvaluation',
                                 body=json.dumps(params))

    def update_ml_model(self, ml_model_id, ml_model_name=None,
                        score_threshold=None):
        """
        Updates the `MLModelName` and the `ScoreThreshold` of an
        `MLModel`.

        You can use the GetMLModel operation to view the contents of
        the updated data element.

        :type ml_model_id: string
        :param ml_model_id: The ID assigned to the `MLModel` during creation.

        :type ml_model_name: string
        :param ml_model_name: A user-supplied name or description of the
            `MLModel`.

        :type score_threshold: float
        :param score_threshold: The `ScoreThreshold` used in binary
            classification `MLModel` that marks the boundary between a positive
            prediction and a negative prediction.
        Output values greater than or equal to the `ScoreThreshold` receive a
            positive result from the `MLModel`, such as `True`. Output values
            less than the `ScoreThreshold` receive a negative response from the
            `MLModel`, such as `False`.

        """
        params = {'MLModelId': ml_model_id, }
        if ml_model_name is not None:
            params['MLModelName'] = ml_model_name
        if score_threshold is not None:
            params['ScoreThreshold'] = score_threshold
        return self.make_request(action='UpdateMLModel',
                                 body=json.dumps(params))

    def make_request(self, action, body, host=None):
        headers = {
            'X-Amz-Target': '%s.%s' % (self.TargetPrefix, action),
            'Host': self.region.endpoint,
            'Content-Type': 'application/x-amz-json-1.1',
            'Content-Length': str(len(body)),
        }
        http_request_kwargs = {
            'method':'POST', 'path':'/', 'auth_path':'/', 'params':{},
            'headers': headers, 'data':body
        }
        if host is not None:
            headers['Host'] = host
            http_request_kwargs['host'] = host
        http_request = self.build_base_http_request(**http_request_kwargs)
        response = self._mexe(http_request, sender=None,
                              override_num_retries=10)
        response_body = response.read().decode('utf-8')
        boto.log.debug(response_body)
        if response.status == 200:
            if response_body:
                return json.loads(response_body)
        else:
            json_body = json.loads(response_body)
            fault_name = json_body.get('__type', None)
            exception_class = self._faults.get(fault_name, self.ResponseError)
            raise exception_class(response.status, response.reason,
                                  body=json_body)
hemamaps / boto python

Version: 2.42.0

/ machinelearning / layer1.py

Products

About

Resources

Contact Gemfury