# Copyright (c) 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish, dis-
# tribute, sublicense, and/or sell copies of the Software, and to permit
# persons to whom the Software is furnished to do so, subject to the fol-
# lowing conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL-
# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
#
import boto
from boto.compat import json, urlsplit
from boto.connection import AWSQueryConnection
from boto.regioninfo import RegionInfo
from boto.exception import JSONResponseError
from boto.machinelearning import exceptions
class MachineLearningConnection(AWSQueryConnection):
"""
Definition of the public APIs exposed by Amazon Machine Learning
"""
APIVersion = "2014-12-12"
AuthServiceName = 'machinelearning'
DefaultRegionName = "us-east-1"
DefaultRegionEndpoint = "machinelearning.us-east-1.amazonaws.com"
ServiceName = "MachineLearning"
TargetPrefix = "AmazonML_20141212"
ResponseError = JSONResponseError
_faults = {
"InternalServerException": exceptions.InternalServerException,
"LimitExceededException": exceptions.LimitExceededException,
"ResourceNotFoundException": exceptions.ResourceNotFoundException,
"IdempotentParameterMismatchException": exceptions.IdempotentParameterMismatchException,
"PredictorNotMountedException": exceptions.PredictorNotMountedException,
"InvalidInputException": exceptions.InvalidInputException,
}
def __init__(self, **kwargs):
region = kwargs.pop('region', None)
if not region:
region = RegionInfo(self, self.DefaultRegionName,
self.DefaultRegionEndpoint)
if 'host' not in kwargs or kwargs['host'] is None:
kwargs['host'] = region.endpoint
super(MachineLearningConnection, self).__init__(**kwargs)
self.region = region
self.auth_region_name = self.region.name
def _required_auth_capability(self):
return ['hmac-v4']
def create_batch_prediction(self, batch_prediction_id, ml_model_id,
batch_prediction_data_source_id, output_uri,
batch_prediction_name=None):
"""
Generates predictions for a group of observations. The
observations to process exist in one or more data files
referenced by a `DataSource`. This operation creates a new
`BatchPrediction`, and uses an `MLModel` and the data files
referenced by the `DataSource` as information sources.
`CreateBatchPrediction` is an asynchronous operation. In
response to `CreateBatchPrediction`, Amazon Machine Learning
(Amazon ML) immediately returns and sets the `BatchPrediction`
status to `PENDING`. After the `BatchPrediction` completes,
Amazon ML sets the status to `COMPLETED`.
You can poll for status updates by using the
GetBatchPrediction operation and checking the `Status`
parameter of the result. After the `COMPLETED` status appears,
the results are available in the location specified by the
`OutputUri` parameter.
:type batch_prediction_id: string
:param batch_prediction_id: A user-supplied ID that uniquely identifies
the `BatchPrediction`.
:type batch_prediction_name: string
:param batch_prediction_name: A user-supplied name or description of
the `BatchPrediction`. `BatchPredictionName` can only use the UTF-8
character set.
:type ml_model_id: string
:param ml_model_id: The ID of the `MLModel` that will generate
predictions for the group of observations.
:type batch_prediction_data_source_id: string
:param batch_prediction_data_source_id: The ID of the `DataSource` that
points to the group of observations to predict.
:type output_uri: string
:param output_uri: The location of an Amazon Simple Storage Service
(Amazon S3) bucket or directory to store the batch prediction
results. The following substrings are not allowed in the s3 key
portion of the "outputURI" field: ':', '//', '/./', '/../'.
Amazon ML needs permissions to store and retrieve the logs on your
behalf. For information about how to set permissions, see the
`Amazon Machine Learning Developer Guide`_.
"""
params = {
'BatchPredictionId': batch_prediction_id,
'MLModelId': ml_model_id,
'BatchPredictionDataSourceId': batch_prediction_data_source_id,
'OutputUri': output_uri,
}
if batch_prediction_name is not None:
params['BatchPredictionName'] = batch_prediction_name
return self.make_request(action='CreateBatchPrediction',
body=json.dumps(params))
def create_data_source_from_rds(self, data_source_id, rds_data, role_arn,
data_source_name=None,
compute_statistics=None):
"""
Creates a `DataSource` object from an ` Amazon Relational
Database Service`_ (Amazon RDS). A `DataSource` references
data that can be used to perform CreateMLModel,
CreateEvaluation, or CreateBatchPrediction operations.
`CreateDataSourceFromRDS` is an asynchronous operation. In
response to `CreateDataSourceFromRDS`, Amazon Machine Learning
(Amazon ML) immediately returns and sets the `DataSource`
status to `PENDING`. After the `DataSource` is created and
ready for use, Amazon ML sets the `Status` parameter to
`COMPLETED`. `DataSource` in `COMPLETED` or `PENDING` status
can only be used to perform CreateMLModel, CreateEvaluation,
or CreateBatchPrediction operations.
If Amazon ML cannot accept the input source, it sets the
`Status` parameter to `FAILED` and includes an error message
in the `Message` attribute of the GetDataSource operation
response.
:type data_source_id: string
:param data_source_id: A user-supplied ID that uniquely identifies the
`DataSource`. Typically, an Amazon Resource Number (ARN) becomes
the ID for a `DataSource`.
:type data_source_name: string
:param data_source_name: A user-supplied name or description of the
`DataSource`.
:type rds_data: dict
:param rds_data:
The data specification of an Amazon RDS `DataSource`:
+ DatabaseInformation -
+ `DatabaseName ` - Name of the Amazon RDS database.
+ ` InstanceIdentifier ` - Unique identifier for the Amazon RDS
database instance.
+ DatabaseCredentials - AWS Identity and Access Management (IAM)
credentials that are used to connect to the Amazon RDS database.
+ ResourceRole - Role (DataPipelineDefaultResourceRole) assumed by an
Amazon Elastic Compute Cloud (EC2) instance to carry out the copy
task from Amazon RDS to Amazon S3. For more information, see `Role
templates`_ for data pipelines.
+ ServiceRole - Role (DataPipelineDefaultRole) assumed by the AWS Data
Pipeline service to monitor the progress of the copy task from
Amazon RDS to Amazon Simple Storage Service (S3). For more
information, see `Role templates`_ for data pipelines.
+ SecurityInfo - Security information to use to access an Amazon RDS
instance. You need to set up appropriate ingress rules for the
security entity IDs provided to allow access to the Amazon RDS
instance. Specify a [ `SubnetId`, `SecurityGroupIds`] pair for a
VPC-based Amazon RDS instance.
+ SelectSqlQuery - Query that is used to retrieve the observation data
for the `Datasource`.
+ S3StagingLocation - Amazon S3 location for staging RDS data. The data
retrieved from Amazon RDS using `SelectSqlQuery` is stored in this
location.
+ DataSchemaUri - Amazon S3 location of the `DataSchema`.
+ DataSchema - A JSON string representing the schema. This is not
required if `DataSchemaUri` is specified.
+ DataRearrangement - A JSON string representing the splitting
requirement of a `Datasource`. Sample - ` "{\"randomSeed\":\"some-
random-seed\",
\"splitting\":{\"percentBegin\":10,\"percentEnd\":60}}"`
:type role_arn: string
:param role_arn: The role that Amazon ML assumes on behalf of the user
to create and activate a data pipeline in the users account and
copy data (using the `SelectSqlQuery`) query from Amazon RDS to
Amazon S3.
:type compute_statistics: boolean
:param compute_statistics: The compute statistics for a `DataSource`.
The statistics are generated from the observation data referenced
by a `DataSource`. Amazon ML uses the statistics internally during
an `MLModel` training. This parameter must be set to `True` if the
``DataSource `` needs to be used for `MLModel` training.
"""
params = {
'DataSourceId': data_source_id,
'RDSData': rds_data,
'RoleARN': role_arn,
}
if data_source_name is not None:
params['DataSourceName'] = data_source_name
if compute_statistics is not None:
params['ComputeStatistics'] = compute_statistics
return self.make_request(action='CreateDataSourceFromRDS',
body=json.dumps(params))
def create_data_source_from_redshift(self, data_source_id, data_spec,
role_arn, data_source_name=None,
compute_statistics=None):
"""
Creates a `DataSource` from `Amazon Redshift`_. A `DataSource`
references data that can be used to perform either
CreateMLModel, CreateEvaluation or CreateBatchPrediction
operations.
`CreateDataSourceFromRedshift` is an asynchronous operation.
In response to `CreateDataSourceFromRedshift`, Amazon Machine
Learning (Amazon ML) immediately returns and sets the
`DataSource` status to `PENDING`. After the `DataSource` is
created and ready for use, Amazon ML sets the `Status`
parameter to `COMPLETED`. `DataSource` in `COMPLETED` or
`PENDING` status can only be used to perform CreateMLModel,
CreateEvaluation, or CreateBatchPrediction operations.
If Amazon ML cannot accept the input source, it sets the
`Status` parameter to `FAILED` and includes an error message
in the `Message` attribute of the GetDataSource operation
response.
The observations should exist in the database hosted on an
Amazon Redshift cluster and should be specified by a
`SelectSqlQuery`. Amazon ML executes ` Unload`_ command in
Amazon Redshift to transfer the result set of `SelectSqlQuery`
to `S3StagingLocation.`
After the `DataSource` is created, it's ready for use in
evaluations and batch predictions. If you plan to use the
`DataSource` to train an `MLModel`, the `DataSource` requires
another item -- a recipe. A recipe describes the observation
variables that participate in training an `MLModel`. A recipe
describes how each input variable will be used in training.
Will the variable be included or excluded from training? Will
the variable be manipulated, for example, combined with
another variable or split apart into word combinations? The
recipe provides answers to these questions. For more
information, see the Amazon Machine Learning Developer Guide.
:type data_source_id: string
:param data_source_id: A user-supplied ID that uniquely identifies the
`DataSource`.
:type data_source_name: string
:param data_source_name: A user-supplied name or description of the
`DataSource`.
:type data_spec: dict
:param data_spec:
The data specification of an Amazon Redshift `DataSource`:
+ DatabaseInformation -
+ `DatabaseName ` - Name of the Amazon Redshift database.
+ ` ClusterIdentifier ` - Unique ID for the Amazon Redshift cluster.
+ DatabaseCredentials - AWS Identity abd Access Management (IAM)
credentials that are used to connect to the Amazon Redshift
database.
+ SelectSqlQuery - Query that is used to retrieve the observation data
for the `Datasource`.
+ S3StagingLocation - Amazon Simple Storage Service (Amazon S3)
location for staging Amazon Redshift data. The data retrieved from
Amazon Relational Database Service (Amazon RDS) using
`SelectSqlQuery` is stored in this location.
+ DataSchemaUri - Amazon S3 location of the `DataSchema`.
+ DataSchema - A JSON string representing the schema. This is not
required if `DataSchemaUri` is specified.
+ DataRearrangement - A JSON string representing the splitting
requirement of a `Datasource`. Sample - ` "{\"randomSeed\":\"some-
random-seed\",
\"splitting\":{\"percentBegin\":10,\"percentEnd\":60}}"`
:type role_arn: string
:param role_arn: A fully specified role Amazon Resource Name (ARN).
Amazon ML assumes the role on behalf of the user to create the
following:
+ A security group to allow Amazon ML to execute the `SelectSqlQuery`
query on an Amazon Redshift cluster
+ An Amazon S3 bucket policy to grant Amazon ML read/write permissions
on the `S3StagingLocation`
:type compute_statistics: boolean
:param compute_statistics: The compute statistics for a `DataSource`.
The statistics are generated from the observation data referenced
by a `DataSource`. Amazon ML uses the statistics internally during
`MLModel` training. This parameter must be set to `True` if the
``DataSource `` needs to be used for `MLModel` training
"""
params = {
'DataSourceId': data_source_id,
'DataSpec': data_spec,
'RoleARN': role_arn,
}
if data_source_name is not None:
params['DataSourceName'] = data_source_name
if compute_statistics is not None:
params['ComputeStatistics'] = compute_statistics
return self.make_request(action='CreateDataSourceFromRedshift',
body=json.dumps(params))
def create_data_source_from_s3(self, data_source_id, data_spec,
data_source_name=None,
compute_statistics=None):
"""
Creates a `DataSource` object. A `DataSource` references data
that can be used to perform CreateMLModel, CreateEvaluation,
or CreateBatchPrediction operations.
`CreateDataSourceFromS3` is an asynchronous operation. In
response to `CreateDataSourceFromS3`, Amazon Machine Learning
(Amazon ML) immediately returns and sets the `DataSource`
status to `PENDING`. After the `DataSource` is created and
ready for use, Amazon ML sets the `Status` parameter to
`COMPLETED`. `DataSource` in `COMPLETED` or `PENDING` status
can only be used to perform CreateMLModel, CreateEvaluation or
CreateBatchPrediction operations.
If Amazon ML cannot accept the input source, it sets the
`Status` parameter to `FAILED` and includes an error message
in the `Message` attribute of the GetDataSource operation
response.
The observation data used in a `DataSource` should be ready to
use; that is, it should have a consistent structure, and
missing data values should be kept to a minimum. The
observation data must reside in one or more CSV files in an
Amazon Simple Storage Service (Amazon S3) bucket, along with a
schema that describes the data items by name and type. The
same schema must be used for all of the data files referenced
by the `DataSource`.
After the `DataSource` has been created, it's ready to use in
evaluations and batch predictions. If you plan to use the
`DataSource` to train an `MLModel`, the `DataSource` requires
another item: a recipe. A recipe describes the observation
variables that participate in training an `MLModel`. A recipe
describes how each input variable will be used in training.
Will the variable be included or excluded from training? Will
the variable be manipulated, for example, combined with
another variable, or split apart into word combinations? The
recipe provides answers to these questions. For more
information, see the `Amazon Machine Learning Developer
Guide`_.
:type data_source_id: string
:param data_source_id: A user-supplied identifier that uniquely
identifies the `DataSource`.
:type data_source_name: string
:param data_source_name: A user-supplied name or description of the
`DataSource`.
:type data_spec: dict
:param data_spec:
The data specification of a `DataSource`:
+ DataLocationS3 - Amazon Simple Storage Service (Amazon S3) location
of the observation data.
+ DataSchemaLocationS3 - Amazon S3 location of the `DataSchema`.
+ DataSchema - A JSON string representing the schema. This is not
required if `DataSchemaUri` is specified.
+ DataRearrangement - A JSON string representing the splitting
requirement of a `Datasource`. Sample - ` "{\"randomSeed\":\"some-
random-seed\",
\"splitting\":{\"percentBegin\":10,\"percentEnd\":60}}"`
:type compute_statistics: boolean
:param compute_statistics: The compute statistics for a `DataSource`.
The statistics are generated from the observation data referenced
by a `DataSource`. Amazon ML uses the statistics internally during
an `MLModel` training. This parameter must be set to `True` if the
``DataSource `` needs to be used for `MLModel` training
"""
params = {
'DataSourceId': data_source_id,
'DataSpec': data_spec,
}
if data_source_name is not None:
params['DataSourceName'] = data_source_name
if compute_statistics is not None:
params['ComputeStatistics'] = compute_statistics
return self.make_request(action='CreateDataSourceFromS3',
body=json.dumps(params))
def create_evaluation(self, evaluation_id, ml_model_id,
evaluation_data_source_id, evaluation_name=None):
"""
Creates a new `Evaluation` of an `MLModel`. An `MLModel` is
evaluated on a set of observations associated to a
`DataSource`. Like a `DataSource` for an `MLModel`, the
`DataSource` for an `Evaluation` contains values for the
Target Variable. The `Evaluation` compares the predicted
result for each observation to the actual outcome and provides
a summary so that you know how effective the `MLModel`
functions on the test data. Evaluation generates a relevant
performance metric such as BinaryAUC, RegressionRMSE or
MulticlassAvgFScore based on the corresponding `MLModelType`:
`BINARY`, `REGRESSION` or `MULTICLASS`.
`CreateEvaluation` is an asynchronous operation. In response
to `CreateEvaluation`, Amazon Machine Learning (Amazon ML)
immediately returns and sets the evaluation status to
`PENDING`. After the `Evaluation` is created and ready for
use, Amazon ML sets the status to `COMPLETED`.
You can use the GetEvaluation operation to check progress of
the evaluation during the creation operation.
:type evaluation_id: string
:param evaluation_id: A user-supplied ID that uniquely identifies the
`Evaluation`.
:type evaluation_name: string
:param evaluation_name: A user-supplied name or description of the
`Evaluation`.
:type ml_model_id: string
:param ml_model_id: The ID of the `MLModel` to evaluate.
The schema used in creating the `MLModel` must match the schema of the
`DataSource` used in the `Evaluation`.
:type evaluation_data_source_id: string
:param evaluation_data_source_id: The ID of the `DataSource` for the
evaluation. The schema of the `DataSource` must match the schema
used to create the `MLModel`.
"""
params = {
'EvaluationId': evaluation_id,
'MLModelId': ml_model_id,
'EvaluationDataSourceId': evaluation_data_source_id,
}
if evaluation_name is not None:
params['EvaluationName'] = evaluation_name
return self.make_request(action='CreateEvaluation',
body=json.dumps(params))
def create_ml_model(self, ml_model_id, ml_model_type,
training_data_source_id, ml_model_name=None,
parameters=None, recipe=None, recipe_uri=None):
"""
Creates a new `MLModel` using the data files and the recipe as
information sources.
An `MLModel` is nearly immutable. Users can only update the
`MLModelName` and the `ScoreThreshold` in an `MLModel` without
creating a new `MLModel`.
`CreateMLModel` is an asynchronous operation. In response to
`CreateMLModel`, Amazon Machine Learning (Amazon ML)
immediately returns and sets the `MLModel` status to
`PENDING`. After the `MLModel` is created and ready for use,
Amazon ML sets the status to `COMPLETED`.
You can use the GetMLModel operation to check progress of the
`MLModel` during the creation operation.
CreateMLModel requires a `DataSource` with computed
statistics, which can be created by setting
`ComputeStatistics` to `True` in CreateDataSourceFromRDS,
CreateDataSourceFromS3, or CreateDataSourceFromRedshift
operations.
:type ml_model_id: string
:param ml_model_id: A user-supplied ID that uniquely identifies the
`MLModel`.
:type ml_model_name: string
:param ml_model_name: A user-supplied name or description of the
`MLModel`.
:type ml_model_type: string
:param ml_model_type: The category of supervised learning that this
`MLModel` will address. Choose from the following types:
+ Choose `REGRESSION` if the `MLModel` will be used to predict a
numeric value.
+ Choose `BINARY` if the `MLModel` result has two possible values.
+ Choose `MULTICLASS` if the `MLModel` result has a limited number of
values.
For more information, see the `Amazon Machine Learning Developer
Guide`_.
:type parameters: map
:param parameters:
A list of the training parameters in the `MLModel`. The list is
implemented as a map of key/value pairs.
The following is the current set of training parameters:
+ `sgd.l1RegularizationAmount` - Coefficient regularization L1 norm. It
controls overfitting the data by penalizing large coefficients.
This tends to drive coefficients to zero, resulting in sparse
feature set. If you use this parameter, start by specifying a small
value such as 1.0E-08. The value is a double that ranges from 0 to
MAX_DOUBLE. The default is not to use L1 normalization. The
parameter cannot be used when `L2` is specified. Use this parameter
sparingly.
+ `sgd.l2RegularizationAmount` - Coefficient regularization L2 norm. It
controls overfitting the data by penalizing large coefficients.
This tends to drive coefficients to small, nonzero values. If you
use this parameter, start by specifying a small value such as
1.0E-08. The valuseis a double that ranges from 0 to MAX_DOUBLE.
The default is not to use L2 normalization. This cannot be used
when `L1` is specified. Use this parameter sparingly.
+ `sgd.maxPasses` - Number of times that the training process traverses
the observations to build the `MLModel`. The value is an integer
that ranges from 1 to 10000. The default value is 10.
+ `sgd.maxMLModelSizeInBytes` - Maximum allowed size of the model.
Depending on the input data, the size of the model might affect its
performance. The value is an integer that ranges from 100000 to
2147483648. The default value is 33554432.
:type training_data_source_id: string
:param training_data_source_id: The `DataSource` that points to the
training data.
:type recipe: string
:param recipe: The data recipe for creating `MLModel`. You must specify
either the recipe or its URI. If you dont specify a recipe or its
URI, Amazon ML creates a default.
:type recipe_uri: string
:param recipe_uri: The Amazon Simple Storage Service (Amazon S3)
location and file name that contains the `MLModel` recipe. You must
specify either the recipe or its URI. If you dont specify a recipe
or its URI, Amazon ML creates a default.
"""
params = {
'MLModelId': ml_model_id,
'MLModelType': ml_model_type,
'TrainingDataSourceId': training_data_source_id,
}
if ml_model_name is not None:
params['MLModelName'] = ml_model_name
if parameters is not None:
params['Parameters'] = parameters
if recipe is not None:
params['Recipe'] = recipe
if recipe_uri is not None:
params['RecipeUri'] = recipe_uri
return self.make_request(action='CreateMLModel',
body=json.dumps(params))
def create_realtime_endpoint(self, ml_model_id):
"""
Creates a real-time endpoint for the `MLModel`. The endpoint
contains the URI of the `MLModel`; that is, the location to
send real-time prediction requests for the specified
`MLModel`.
:type ml_model_id: string
:param ml_model_id: The ID assigned to the `MLModel` during creation.
"""
params = {'MLModelId': ml_model_id, }
return self.make_request(action='CreateRealtimeEndpoint',
body=json.dumps(params))
def delete_batch_prediction(self, batch_prediction_id):
"""
Assigns the DELETED status to a `BatchPrediction`, rendering
it unusable.
After using the `DeleteBatchPrediction` operation, you can use
the GetBatchPrediction operation to verify that the status of
the `BatchPrediction` changed to DELETED.
The result of the `DeleteBatchPrediction` operation is
irreversible.
:type batch_prediction_id: string
:param batch_prediction_id: A user-supplied ID that uniquely identifies
the `BatchPrediction`.
"""
params = {'BatchPredictionId': batch_prediction_id, }
return self.make_request(action='DeleteBatchPrediction',
body=json.dumps(params))
def delete_data_source(self, data_source_id):
"""
Assigns the DELETED status to a `DataSource`, rendering it
unusable.
After using the `DeleteDataSource` operation, you can use the
GetDataSource operation to verify that the status of the
`DataSource` changed to DELETED.
The results of the `DeleteDataSource` operation are
irreversible.
:type data_source_id: string
:param data_source_id: A user-supplied ID that uniquely identifies the
`DataSource`.
"""
params = {'DataSourceId': data_source_id, }
return self.make_request(action='DeleteDataSource',
body=json.dumps(params))
def delete_evaluation(self, evaluation_id):
"""
Assigns the `DELETED` status to an `Evaluation`, rendering it
unusable.
After invoking the `DeleteEvaluation` operation, you can use
the GetEvaluation operation to verify that the status of the
`Evaluation` changed to `DELETED`.
The results of the `DeleteEvaluation` operation are
irreversible.
:type evaluation_id: string
:param evaluation_id: A user-supplied ID that uniquely identifies the
`Evaluation` to delete.
"""
params = {'EvaluationId': evaluation_id, }
return self.make_request(action='DeleteEvaluation',
body=json.dumps(params))
def delete_ml_model(self, ml_model_id):
"""
Assigns the DELETED status to an `MLModel`, rendering it
unusable.
After using the `DeleteMLModel` operation, you can use the
GetMLModel operation to verify that the status of the
`MLModel` changed to DELETED.
The result of the `DeleteMLModel` operation is irreversible.
:type ml_model_id: string
:param ml_model_id: A user-supplied ID that uniquely identifies the
`MLModel`.
"""
params = {'MLModelId': ml_model_id, }
return self.make_request(action='DeleteMLModel',
body=json.dumps(params))
def delete_realtime_endpoint(self, ml_model_id):
"""
Deletes a real time endpoint of an `MLModel`.
:type ml_model_id: string
:param ml_model_id: The ID assigned to the `MLModel` during creation.
"""
params = {'MLModelId': ml_model_id, }
return self.make_request(action='DeleteRealtimeEndpoint',
body=json.dumps(params))
def describe_batch_predictions(self, filter_variable=None, eq=None,
gt=None, lt=None, ge=None, le=None,
ne=None, prefix=None, sort_order=None,
next_token=None, limit=None):
"""
Returns a list of `BatchPrediction` operations that match the
search criteria in the request.
:type filter_variable: string
:param filter_variable:
Use one of the following variables to filter a list of
`BatchPrediction`:
+ `CreatedAt` - Sets the search criteria to the `BatchPrediction`
creation date.
+ `Status` - Sets the search criteria to the `BatchPrediction` status.
+ `Name` - Sets the search criteria to the contents of the
`BatchPrediction` ** ** `Name`.
+ `IAMUser` - Sets the search criteria to the user account that invoked
the `BatchPrediction` creation.
+ `MLModelId` - Sets the search criteria to the `MLModel` used in the
`BatchPrediction`.
+ `DataSourceId` - Sets the search criteria to the `DataSource` used in
the `BatchPrediction`.
+ `DataURI` - Sets the search criteria to the data file(s) used in the
`BatchPrediction`. The URL can identify either a file or an Amazon
Simple Storage Solution (Amazon S3) bucket or directory.
:type eq: string
:param eq: The equal to operator. The `BatchPrediction` results will
have `FilterVariable` values that exactly match the value specified
with `EQ`.
:type gt: string
:param gt: The greater than operator. The `BatchPrediction` results
will have `FilterVariable` values that are greater than the value
specified with `GT`.
:type lt: string
:param lt: The less than operator. The `BatchPrediction` results will
have `FilterVariable` values that are less than the value specified
with `LT`.
:type ge: string
:param ge: The greater than or equal to operator. The `BatchPrediction`
results will have `FilterVariable` values that are greater than or
equal to the value specified with `GE`.
:type le: string
:param le: The less than or equal to operator. The `BatchPrediction`
results will have `FilterVariable` values that are less than or
equal to the value specified with `LE`.
:type ne: string
:param ne: The not equal to operator. The `BatchPrediction` results
will have `FilterVariable` values not equal to the value specified
with `NE`.
:type prefix: string
:param prefix:
A string that is found at the beginning of a variable, such as `Name`
or `Id`.
For example, a `Batch Prediction` operation could have the `Name`
`2014-09-09-HolidayGiftMailer`. To search for this
`BatchPrediction`, select `Name` for the `FilterVariable` and any
of the following strings for the `Prefix`:
+ 2014-09
+ 2014-09-09
+ 2014-09-09-Holiday
:type sort_order: string
:param sort_order: A two-value parameter that determines the sequence
of the resulting list of `MLModel`s.
+ `asc` - Arranges the list in ascending order (A-Z, 0-9).
+ `dsc` - Arranges the list in descending order (Z-A, 9-0).
Results are sorted by `FilterVariable`.
:type next_token: string
:param next_token: An ID of the page in the paginated results.
:type limit: integer
:param limit: The number of pages of information to include in the
result. The range of acceptable values is 1 through 100. The
default value is 100.
"""
params = {}
if filter_variable is not None:
params['FilterVariable'] = filter_variable
if eq is not None:
params['EQ'] = eq
if gt is not None:
params['GT'] = gt
if lt is not None:
params['LT'] = lt
if ge is not None:
params['GE'] = ge
if le is not None:
params['LE'] = le
if ne is not None:
params['NE'] = ne
if prefix is not None:
params['Prefix'] = prefix
if sort_order is not None:
params['SortOrder'] = sort_order
if next_token is not None:
params['NextToken'] = next_token
if limit is not None:
params['Limit'] = limit
return self.make_request(action='DescribeBatchPredictions',
body=json.dumps(params))
def describe_data_sources(self, filter_variable=None, eq=None, gt=None,
lt=None, ge=None, le=None, ne=None,
prefix=None, sort_order=None, next_token=None,
limit=None):
"""
Returns a list of `DataSource` that match the search criteria
in the request.
:type filter_variable: string
:param filter_variable:
Use one of the following variables to filter a list of `DataSource`:
+ `CreatedAt` - Sets the search criteria to `DataSource` creation
dates.
+ `Status` - Sets the search criteria to `DataSource` statuses.
+ `Name` - Sets the search criteria to the contents of `DataSource` **
** `Name`.
+ `DataUri` - Sets the search criteria to the URI of data files used to
create the `DataSource`. The URI can identify either a file or an
Amazon Simple Storage Service (Amazon S3) bucket or directory.
+ `IAMUser` - Sets the search criteria to the user account that invoked
the `DataSource` creation.
:type eq: string
:param eq: The equal to operator. The `DataSource` results will have
`FilterVariable` values that exactly match the value specified with
`EQ`.
:type gt: string
:param gt: The greater than operator. The `DataSource` results will
have `FilterVariable` values that are greater than the value
specified with `GT`.
:type lt: string
:param lt: The less than operator. The `DataSource` results will have
`FilterVariable` values that are less than the value specified with
`LT`.
:type ge: string
:param ge: The greater than or equal to operator. The `DataSource`
results will have `FilterVariable` values that are greater than or
equal to the value specified with `GE`.
:type le: string
:param le: The less than or equal to operator. The `DataSource` results
will have `FilterVariable` values that are less than or equal to
the value specified with `LE`.
:type ne: string
:param ne: The not equal to operator. The `DataSource` results will
have `FilterVariable` values not equal to the value specified with
`NE`.
:type prefix: string
:param prefix:
A string that is found at the beginning of a variable, such as `Name`
or `Id`.
For example, a `DataSource` could have the `Name`
`2014-09-09-HolidayGiftMailer`. To search for this `DataSource`,
select `Name` for the `FilterVariable` and any of the following
strings for the `Prefix`:
+ 2014-09
+ 2014-09-09
+ 2014-09-09-Holiday
:type sort_order: string
:param sort_order: A two-value parameter that determines the sequence
of the resulting list of `DataSource`.
+ `asc` - Arranges the list in ascending order (A-Z, 0-9).
+ `dsc` - Arranges the list in descending order (Z-A, 9-0).
Results are sorted by `FilterVariable`.
:type next_token: string
:param next_token: The ID of the page in the paginated results.
:type limit: integer
:param limit: The maximum number of `DataSource` to include in the
result.
"""
params = {}
if filter_variable is not None:
params['FilterVariable'] = filter_variable
if eq is not None:
params['EQ'] = eq
if gt is not None:
params['GT'] = gt
if lt is not None:
params['LT'] = lt
if ge is not None:
params['GE'] = ge
if le is not None:
params['LE'] = le
if ne is not None:
params['NE'] = ne
if prefix is not None:
params['Prefix'] = prefix
if sort_order is not None:
params['SortOrder'] = sort_order
if next_token is not None:
params['NextToken'] = next_token
if limit is not None:
params['Limit'] = limit
return self.make_request(action='DescribeDataSources',
body=json.dumps(params))
def describe_evaluations(self, filter_variable=None, eq=None, gt=None,
lt=None, ge=None, le=None, ne=None, prefix=None,
sort_order=None, next_token=None, limit=None):
"""
Returns a list of `DescribeEvaluations` that match the search
criteria in the request.
:type filter_variable: string
:param filter_variable:
Use one of the following variable to filter a list of `Evaluation`
objects:
+ `CreatedAt` - Sets the search criteria to the `Evaluation` creation
date.
+ `Status` - Sets the search criteria to the `Evaluation` status.
+ `Name` - Sets the search criteria to the contents of `Evaluation` **
** `Name`.
+ `IAMUser` - Sets the search criteria to the user account that invoked
an `Evaluation`.
+ `MLModelId` - Sets the search criteria to the `MLModel` that was
evaluated.
+ `DataSourceId` - Sets the search criteria to the `DataSource` used in
`Evaluation`.
+ `DataUri` - Sets the search criteria to the data file(s) used in
`Evaluation`. The URL can identify either a file or an Amazon
Simple Storage Solution (Amazon S3) bucket or directory.
:type eq: string
:param eq: The equal to operator. The `Evaluation` results will have
`FilterVariable` values that exactly match the value specified with
`EQ`.
:type gt: string
:param gt: The greater than operator. The `Evaluation` results will
have `FilterVariable` values that are greater than the value
specified with `GT`.
:type lt: string
:param lt: The less than operator. The `Evaluation` results will have
`FilterVariable` values that are less than the value specified with
`LT`.
:type ge: string
:param ge: The greater than or equal to operator. The `Evaluation`
results will have `FilterVariable` values that are greater than or
equal to the value specified with `GE`.
:type le: string
:param le: The less than or equal to operator. The `Evaluation` results
will have `FilterVariable` values that are less than or equal to
the value specified with `LE`.
:type ne: string
:param ne: The not equal to operator. The `Evaluation` results will
have `FilterVariable` values not equal to the value specified with
`NE`.
:type prefix: string
:param prefix:
A string that is found at the beginning of a variable, such as `Name`
or `Id`.
For example, an `Evaluation` could have the `Name`
`2014-09-09-HolidayGiftMailer`. To search for this `Evaluation`,
select `Name` for the `FilterVariable` and any of the following
strings for the `Prefix`:
+ 2014-09
+ 2014-09-09
+ 2014-09-09-Holiday
:type sort_order: string
:param sort_order: A two-value parameter that determines the sequence
of the resulting list of `Evaluation`.
+ `asc` - Arranges the list in ascending order (A-Z, 0-9).
+ `dsc` - Arranges the list in descending order (Z-A, 9-0).
Results are sorted by `FilterVariable`.
:type next_token: string
:param next_token: The ID of the page in the paginated results.
:type limit: integer
:param limit: The maximum number of `Evaluation` to include in the
result.
"""
params = {}
if filter_variable is not None:
params['FilterVariable'] = filter_variable
if eq is not None:
params['EQ'] = eq
if gt is not None:
params['GT'] = gt
if lt is not None:
params['LT'] = lt
if ge is not None:
params['GE'] = ge
if le is not None:
params['LE'] = le
if ne is not None:
params['NE'] = ne
if prefix is not None:
params['Prefix'] = prefix
if sort_order is not None:
params['SortOrder'] = sort_order
if next_token is not None:
params['NextToken'] = next_token
if limit is not None:
params['Limit'] = limit
return self.make_request(action='DescribeEvaluations',
body=json.dumps(params))
def describe_ml_models(self, filter_variable=None, eq=None, gt=None,
lt=None, ge=None, le=None, ne=None, prefix=None,
sort_order=None, next_token=None, limit=None):
"""
Returns a list of `MLModel` that match the search criteria in
the request.
:type filter_variable: string
:param filter_variable:
Use one of the following variables to filter a list of `MLModel`:
+ `CreatedAt` - Sets the search criteria to `MLModel` creation date.
+ `Status` - Sets the search criteria to `MLModel` status.
+ `Name` - Sets the search criteria to the contents of `MLModel` ** **
`Name`.
+ `IAMUser` - Sets the search criteria to the user account that invoked
the `MLModel` creation.
+ `TrainingDataSourceId` - Sets the search criteria to the `DataSource`
used to train one or more `MLModel`.
+ `RealtimeEndpointStatus` - Sets the search criteria to the `MLModel`
real-time endpoint status.
+ `MLModelType` - Sets the search criteria to `MLModel` type: binary,
regression, or multi-class.
+ `Algorithm` - Sets the search criteria to the algorithm that the
`MLModel` uses.
+ `TrainingDataURI` - Sets the search criteria to the data file(s) used
in training a `MLModel`. The URL can identify either a file or an
Amazon Simple Storage Service (Amazon S3) bucket or directory.
:type eq: string
:param eq: The equal to operator. The `MLModel` results will have
`FilterVariable` values that exactly match the value specified with
`EQ`.
:type gt: string
:param gt: The greater than operator. The `MLModel` results will have
`FilterVariable` values that are greater than the value specified
with `GT`.
:type lt: string
:param lt: The less than operator. The `MLModel` results will have
`FilterVariable` values that are less than the value specified with
`LT`.
:type ge: string
:param ge: The greater than or equal to operator. The `MLModel` results
will have `FilterVariable` values that are greater than or equal to
the value specified with `GE`.
:type le: string
:param le: The less than or equal to operator. The `MLModel` results
will have `FilterVariable` values that are less than or equal to
the value specified with `LE`.
:type ne: string
:param ne: The not equal to operator. The `MLModel` results will have
`FilterVariable` values not equal to the value specified with `NE`.
:type prefix: string
:param prefix:
A string that is found at the beginning of a variable, such as `Name`
or `Id`.
For example, an `MLModel` could have the `Name`
`2014-09-09-HolidayGiftMailer`. To search for this `MLModel`,
select `Name` for the `FilterVariable` and any of the following
strings for the `Prefix`:
+ 2014-09
+ 2014-09-09
+ 2014-09-09-Holiday
:type sort_order: string
:param sort_order: A two-value parameter that determines the sequence
of the resulting list of `MLModel`.
+ `asc` - Arranges the list in ascending order (A-Z, 0-9).
+ `dsc` - Arranges the list in descending order (Z-A, 9-0).
Results are sorted by `FilterVariable`.
:type next_token: string
:param next_token: The ID of the page in the paginated results.
:type limit: integer
:param limit: The number of pages of information to include in the
result. The range of acceptable values is 1 through 100. The
default value is 100.
"""
params = {}
if filter_variable is not None:
params['FilterVariable'] = filter_variable
if eq is not None:
params['EQ'] = eq
if gt is not None:
params['GT'] = gt
if lt is not None:
params['LT'] = lt
if ge is not None:
params['GE'] = ge
if le is not None:
params['LE'] = le
if ne is not None:
params['NE'] = ne
if prefix is not None:
params['Prefix'] = prefix
if sort_order is not None:
params['SortOrder'] = sort_order
if next_token is not None:
params['NextToken'] = next_token
if limit is not None:
params['Limit'] = limit
return self.make_request(action='DescribeMLModels',
body=json.dumps(params))
def get_batch_prediction(self, batch_prediction_id):
"""
Returns a `BatchPrediction` that includes detailed metadata,
status, and data file information for a `Batch Prediction`
request.
:type batch_prediction_id: string
:param batch_prediction_id: An ID assigned to the `BatchPrediction` at
creation.
"""
params = {'BatchPredictionId': batch_prediction_id, }
return self.make_request(action='GetBatchPrediction',
body=json.dumps(params))
def get_data_source(self, data_source_id, verbose=None):
"""
Returns a `DataSource` that includes metadata and data file
information, as well as the current status of the
`DataSource`.
`GetDataSource` provides results in normal or verbose format.
The verbose format adds the schema description and the list of
files pointed to by the DataSource to the normal format.
:type data_source_id: string
:param data_source_id: The ID assigned to the `DataSource` at creation.
:type verbose: boolean
:param verbose: Specifies whether the `GetDataSource` operation should
return `DataSourceSchema`.
If true, `DataSourceSchema` is returned.
If false, `DataSourceSchema` is not returned.
"""
params = {'DataSourceId': data_source_id, }
if verbose is not None:
params['Verbose'] = verbose
return self.make_request(action='GetDataSource',
body=json.dumps(params))
def get_evaluation(self, evaluation_id):
"""
Returns an `Evaluation` that includes metadata as well as the
current status of the `Evaluation`.
:type evaluation_id: string
:param evaluation_id: The ID of the `Evaluation` to retrieve. The
evaluation of each `MLModel` is recorded and cataloged. The ID
provides the means to access the information.
"""
params = {'EvaluationId': evaluation_id, }
return self.make_request(action='GetEvaluation',
body=json.dumps(params))
def get_ml_model(self, ml_model_id, verbose=None):
"""
Returns an `MLModel` that includes detailed metadata, and data
source information as well as the current status of the
`MLModel`.
`GetMLModel` provides results in normal or verbose format.
:type ml_model_id: string
:param ml_model_id: The ID assigned to the `MLModel` at creation.
:type verbose: boolean
:param verbose: Specifies whether the `GetMLModel` operation should
return `Recipe`.
If true, `Recipe` is returned.
If false, `Recipe` is not returned.
"""
params = {'MLModelId': ml_model_id, }
if verbose is not None:
params['Verbose'] = verbose
return self.make_request(action='GetMLModel',
body=json.dumps(params))
def predict(self, ml_model_id, record, predict_endpoint):
"""
Generates a prediction for the observation using the specified
`MLModel`.
Not all response parameters will be populated because this is
dependent on the type of requested model.
:type ml_model_id: string
:param ml_model_id: A unique identifier of the `MLModel`.
:type record: map
:param record: A map of variable name-value pairs that represent an
observation.
:type predict_endpoint: string
:param predict_endpoint: The endpoint to send the predict request to.
"""
predict_host = urlsplit(predict_endpoint).hostname
if predict_host is None:
predict_host = predict_endpoint
params = {
'MLModelId': ml_model_id,
'Record': record,
'PredictEndpoint': predict_host,
}
return self.make_request(action='Predict',
body=json.dumps(params),
host=predict_host)
def update_batch_prediction(self, batch_prediction_id,
batch_prediction_name):
"""
Updates the `BatchPredictionName` of a `BatchPrediction`.
You can use the GetBatchPrediction operation to view the
contents of the updated data element.
:type batch_prediction_id: string
:param batch_prediction_id: The ID assigned to the `BatchPrediction`
during creation.
:type batch_prediction_name: string
:param batch_prediction_name: A new user-supplied name or description
of the `BatchPrediction`.
"""
params = {
'BatchPredictionId': batch_prediction_id,
'BatchPredictionName': batch_prediction_name,
}
return self.make_request(action='UpdateBatchPrediction',
body=json.dumps(params))
def update_data_source(self, data_source_id, data_source_name):
"""
Updates the `DataSourceName` of a `DataSource`.
You can use the GetDataSource operation to view the contents
of the updated data element.
:type data_source_id: string
:param data_source_id: The ID assigned to the `DataSource` during
creation.
:type data_source_name: string
:param data_source_name: A new user-supplied name or description of the
`DataSource` that will replace the current description.
"""
params = {
'DataSourceId': data_source_id,
'DataSourceName': data_source_name,
}
return self.make_request(action='UpdateDataSource',
body=json.dumps(params))
def update_evaluation(self, evaluation_id, evaluation_name):
"""
Updates the `EvaluationName` of an `Evaluation`.
You can use the GetEvaluation operation to view the contents
of the updated data element.
:type evaluation_id: string
:param evaluation_id: The ID assigned to the `Evaluation` during
creation.
:type evaluation_name: string
:param evaluation_name: A new user-supplied name or description of the
`Evaluation` that will replace the current content.
"""
params = {
'EvaluationId': evaluation_id,
'EvaluationName': evaluation_name,
}
return self.make_request(action='UpdateEvaluation',
body=json.dumps(params))
def update_ml_model(self, ml_model_id, ml_model_name=None,
score_threshold=None):
"""
Updates the `MLModelName` and the `ScoreThreshold` of an
`MLModel`.
You can use the GetMLModel operation to view the contents of
the updated data element.
:type ml_model_id: string
:param ml_model_id: The ID assigned to the `MLModel` during creation.
:type ml_model_name: string
:param ml_model_name: A user-supplied name or description of the
`MLModel`.
:type score_threshold: float
:param score_threshold: The `ScoreThreshold` used in binary
classification `MLModel` that marks the boundary between a positive
prediction and a negative prediction.
Output values greater than or equal to the `ScoreThreshold` receive a
positive result from the `MLModel`, such as `True`. Output values
less than the `ScoreThreshold` receive a negative response from the
`MLModel`, such as `False`.
"""
params = {'MLModelId': ml_model_id, }
if ml_model_name is not None:
params['MLModelName'] = ml_model_name
if score_threshold is not None:
params['ScoreThreshold'] = score_threshold
return self.make_request(action='UpdateMLModel',
body=json.dumps(params))
def make_request(self, action, body, host=None):
headers = {
'X-Amz-Target': '%s.%s' % (self.TargetPrefix, action),
'Host': self.region.endpoint,
'Content-Type': 'application/x-amz-json-1.1',
'Content-Length': str(len(body)),
}
http_request_kwargs = {
'method':'POST', 'path':'/', 'auth_path':'/', 'params':{},
'headers': headers, 'data':body
}
if host is not None:
headers['Host'] = host
http_request_kwargs['host'] = host
http_request = self.build_base_http_request(**http_request_kwargs)
response = self._mexe(http_request, sender=None,
override_num_retries=10)
response_body = response.read().decode('utf-8')
boto.log.debug(response_body)
if response.status == 200:
if response_body:
return json.loads(response_body)
else:
json_body = json.loads(response_body)
fault_name = json_body.get('__type', None)
exception_class = self._faults.get(fault_name, self.ResponseError)
raise exception_class(response.status, response.reason,
body=json_body)