# -*- coding: utf-8 -*-
# Copyright (c) 2012 Mitch Garnaat http://garnaat.org/
# Copyright (c) 2012 Amazon.com, Inc. or its affiliates. All Rights Reserved
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish, dis-
# tribute, sublicense, and/or sell copies of the Software, and to permit
# persons to whom the Software is furnished to do so, subject to the fol-
# lowing conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL-
# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
#
import os
import boto.glacier
from boto.compat import json
from boto.connection import AWSAuthConnection
from boto.glacier.exceptions import UnexpectedHTTPResponseError
from boto.glacier.response import GlacierResponse
from boto.glacier.utils import ResettingFileSender
class Layer1(AWSAuthConnection):
"""
Amazon Glacier is a storage solution for "cold data."
Amazon Glacier is an extremely low-cost storage service that
provides secure, durable and easy-to-use storage for data backup
and archival. With Amazon Glacier, customers can store their data
cost effectively for months, years, or decades. Amazon Glacier
also enables customers to offload the administrative burdens of
operating and scaling storage to AWS, so they don't have to worry
about capacity planning, hardware provisioning, data replication,
hardware failure and recovery, or time-consuming hardware
migrations.
Amazon Glacier is a great storage choice when low storage cost is
paramount, your data is rarely retrieved, and retrieval latency of
several hours is acceptable. If your application requires fast or
frequent access to your data, consider using Amazon S3. For more
information, go to `Amazon Simple Storage Service (Amazon S3)`_.
You can store any kind of data in any format. There is no maximum
limit on the total amount of data you can store in Amazon Glacier.
If you are a first-time user of Amazon Glacier, we recommend that
you begin by reading the following sections in the Amazon Glacier
Developer Guide :
+ `What is Amazon Glacier`_ - This section of the Developer Guide
describes the underlying data model, the operations it supports,
and the AWS SDKs that you can use to interact with the service.
+ `Getting Started with Amazon Glacier`_ - The Getting Started
section walks you through the process of creating a vault,
uploading archives, creating jobs to download archives, retrieving
the job output, and deleting archives.
"""
Version = '2012-06-01'
def __init__(self, aws_access_key_id=None, aws_secret_access_key=None,
account_id='-', is_secure=True, port=None,
proxy=None, proxy_port=None,
proxy_user=None, proxy_pass=None, debug=0,
https_connection_factory=None, path='/',
provider='aws', security_token=None,
suppress_consec_slashes=True,
region=None, region_name='us-east-1',
profile_name=None):
if not region:
for reg in boto.glacier.regions():
if reg.name == region_name:
region = reg
break
self.region = region
self.account_id = account_id
super(Layer1, self).__init__(region.endpoint,
aws_access_key_id, aws_secret_access_key,
is_secure, port, proxy, proxy_port,
proxy_user, proxy_pass, debug,
https_connection_factory,
path, provider, security_token,
suppress_consec_slashes,
profile_name=profile_name)
def _required_auth_capability(self):
return ['hmac-v4']
def make_request(self, verb, resource, headers=None,
data='', ok_responses=(200,), params=None,
sender=None, response_headers=None):
if headers is None:
headers = {}
headers['x-amz-glacier-version'] = self.Version
uri = '/%s/%s' % (self.account_id, resource)
response = super(Layer1, self).make_request(verb, uri,
params=params,
headers=headers,
sender=sender,
data=data)
if response.status in ok_responses:
return GlacierResponse(response, response_headers)
else:
# create glacier-specific exceptions
raise UnexpectedHTTPResponseError(ok_responses, response)
# Vaults
def list_vaults(self, limit=None, marker=None):
"""
This operation lists all vaults owned by the calling user's
account. The list returned in the response is ASCII-sorted by
vault name.
By default, this operation returns up to 1,000 items. If there
are more vaults to list, the response `marker` field contains
the vault Amazon Resource Name (ARN) at which to continue the
list with a new List Vaults request; otherwise, the `marker`
field is `null`. To return a list of vaults that begins at a
specific vault, set the `marker` request parameter to the
vault ARN you obtained from a previous List Vaults request.
You can also limit the number of vaults returned in the
response by specifying the `limit` parameter in the request.
An AWS account has full permission to perform all operations
(actions). However, AWS Identity and Access Management (IAM)
users don't have any permissions by default. You must grant
them explicit permission to perform specific actions. For more
information, see `Access Control Using AWS Identity and Access
Management (IAM)`_.
For conceptual information and underlying REST API, go to
`Retrieving Vault Metadata in Amazon Glacier`_ and `List
Vaults `_ in the Amazon Glacier Developer Guide .
:type marker: string
:param marker: A string used for pagination. The marker specifies the
vault ARN after which the listing of vaults should begin.
:type limit: string
:param limit: The maximum number of items returned in the response. If
you don't specify a value, the List Vaults operation returns up to
1,000 items.
"""
params = {}
if limit:
params['limit'] = limit
if marker:
params['marker'] = marker
return self.make_request('GET', 'vaults', params=params)
def describe_vault(self, vault_name):
"""
This operation returns information about a vault, including
the vault's Amazon Resource Name (ARN), the date the vault was
created, the number of archives it contains, and the total
size of all the archives in the vault. The number of archives
and their total size are as of the last inventory generation.
This means that if you add or remove an archive from a vault,
and then immediately use Describe Vault, the change in
contents will not be immediately reflected. If you want to
retrieve the latest inventory of the vault, use InitiateJob.
Amazon Glacier generates vault inventories approximately
daily. For more information, see `Downloading a Vault
Inventory in Amazon Glacier`_.
An AWS account has full permission to perform all operations
(actions). However, AWS Identity and Access Management (IAM)
users don't have any permissions by default. You must grant
them explicit permission to perform specific actions. For more
information, see `Access Control Using AWS Identity and Access
Management (IAM)`_.
For conceptual information and underlying REST API, go to
`Retrieving Vault Metadata in Amazon Glacier`_ and `Describe
Vault `_ in the Amazon Glacier Developer Guide .
:type vault_name: string
:param vault_name: The name of the vault.
"""
uri = 'vaults/%s' % vault_name
return self.make_request('GET', uri)
def create_vault(self, vault_name):
"""
This operation creates a new vault with the specified name.
The name of the vault must be unique within a region for an
AWS account. You can create up to 1,000 vaults per account. If
you need to create more vaults, contact Amazon Glacier.
You must use the following guidelines when naming a vault.
+ Names can be between 1 and 255 characters long.
+ Allowed characters are a-z, A-Z, 0-9, '_' (underscore), '-'
(hyphen), and '.' (period).
This operation is idempotent.
An AWS account has full permission to perform all operations
(actions). However, AWS Identity and Access Management (IAM)
users don't have any permissions by default. You must grant
them explicit permission to perform specific actions. For more
information, see `Access Control Using AWS Identity and Access
Management (IAM)`_.
For conceptual information and underlying REST API, go to
`Creating a Vault in Amazon Glacier`_ and `Create Vault `_ in
the Amazon Glacier Developer Guide .
:type vault_name: string
:param vault_name: The name of the vault.
"""
uri = 'vaults/%s' % vault_name
return self.make_request('PUT', uri, ok_responses=(201,),
response_headers=[('Location', 'Location')])
def delete_vault(self, vault_name):
"""
This operation deletes a vault. Amazon Glacier will delete a
vault only if there are no archives in the vault as of the
last inventory and there have been no writes to the vault
since the last inventory. If either of these conditions is not
satisfied, the vault deletion fails (that is, the vault is not
removed) and Amazon Glacier returns an error. You can use
DescribeVault to return the number of archives in a vault, and
you can use `Initiate a Job (POST jobs)`_ to initiate a new
inventory retrieval for a vault. The inventory contains the
archive IDs you use to delete archives using `Delete Archive
(DELETE archive)`_.
This operation is idempotent.
An AWS account has full permission to perform all operations
(actions). However, AWS Identity and Access Management (IAM)
users don't have any permissions by default. You must grant
them explicit permission to perform specific actions. For more
information, see `Access Control Using AWS Identity and Access
Management (IAM)`_.
For conceptual information and underlying REST API, go to
`Deleting a Vault in Amazon Glacier`_ and `Delete Vault `_ in
the Amazon Glacier Developer Guide .
:type vault_name: string
:param vault_name: The name of the vault.
"""
uri = 'vaults/%s' % vault_name
return self.make_request('DELETE', uri, ok_responses=(204,))
def get_vault_notifications(self, vault_name):
"""
This operation retrieves the `notification-configuration`
subresource of the specified vault.
For information about setting a notification configuration on
a vault, see SetVaultNotifications. If a notification
configuration for a vault is not set, the operation returns a
`404 Not Found` error. For more information about vault
notifications, see `Configuring Vault Notifications in Amazon
Glacier`_.
An AWS account has full permission to perform all operations
(actions). However, AWS Identity and Access Management (IAM)
users don't have any permissions by default. You must grant
them explicit permission to perform specific actions. For more
information, see `Access Control Using AWS Identity and Access
Management (IAM)`_.
For conceptual information and underlying REST API, go to
`Configuring Vault Notifications in Amazon Glacier`_ and `Get
Vault Notification Configuration `_ in the Amazon Glacier
Developer Guide .
:type vault_name: string
:param vault_name: The name of the vault.
"""
uri = 'vaults/%s/notification-configuration' % vault_name
return self.make_request('GET', uri)
def set_vault_notifications(self, vault_name, notification_config):
"""
This operation configures notifications that will be sent when
specific events happen to a vault. By default, you don't get
any notifications.
To configure vault notifications, send a PUT request to the
`notification-configuration` subresource of the vault. The
request should include a JSON document that provides an Amazon
SNS topic and specific events for which you want Amazon
Glacier to send notifications to the topic.
Amazon SNS topics must grant permission to the vault to be
allowed to publish notifications to the topic. You can
configure a vault to publish a notification for the following
vault events:
+ **ArchiveRetrievalCompleted** This event occurs when a job
that was initiated for an archive retrieval is completed
(InitiateJob). The status of the completed job can be
"Succeeded" or "Failed". The notification sent to the SNS
topic is the same output as returned from DescribeJob.
+ **InventoryRetrievalCompleted** This event occurs when a job
that was initiated for an inventory retrieval is completed
(InitiateJob). The status of the completed job can be
"Succeeded" or "Failed". The notification sent to the SNS
topic is the same output as returned from DescribeJob.
An AWS account has full permission to perform all operations
(actions). However, AWS Identity and Access Management (IAM)
users don't have any permissions by default. You must grant
them explicit permission to perform specific actions. For more
information, see `Access Control Using AWS Identity and Access
Management (IAM)`_.
For conceptual information and underlying REST API, go to
`Configuring Vault Notifications in Amazon Glacier`_ and `Set
Vault Notification Configuration `_ in the Amazon Glacier
Developer Guide .
:type vault_name: string
:param vault_name: The name of the vault.
:type vault_notification_config: dict
:param vault_notification_config: Provides options for specifying
notification configuration.
The format of the dictionary is:
{'SNSTopic': 'mytopic',
'Events': [event1,...]}
"""
uri = 'vaults/%s/notification-configuration' % vault_name
json_config = json.dumps(notification_config)
return self.make_request('PUT', uri, data=json_config,
ok_responses=(204,))
def delete_vault_notifications(self, vault_name):
"""
This operation deletes the notification configuration set for
a vault. The operation is eventually consistent;that is, it
might take some time for Amazon Glacier to completely disable
the notifications and you might still receive some
notifications for a short time after you send the delete
request.
An AWS account has full permission to perform all operations
(actions). However, AWS Identity and Access Management (IAM)
users don't have any permissions by default. You must grant
them explicit permission to perform specific actions. For more
information, see `Access Control Using AWS Identity and Access
Management (IAM)`_.
For conceptual information and underlying REST API, go to
`Configuring Vault Notifications in Amazon Glacier`_ and
`Delete Vault Notification Configuration `_ in the Amazon
Glacier Developer Guide.
:type vault_name: string
:param vault_name: The name of the vault.
"""
uri = 'vaults/%s/notification-configuration' % vault_name
return self.make_request('DELETE', uri, ok_responses=(204,))
# Jobs
def list_jobs(self, vault_name, completed=None, status_code=None,
limit=None, marker=None):
"""
This operation lists jobs for a vault, including jobs that are
in-progress and jobs that have recently finished.
Amazon Glacier retains recently completed jobs for a period
before deleting them; however, it eventually removes completed
jobs. The output of completed jobs can be retrieved. Retaining
completed jobs for a period of time after they have completed
enables you to get a job output in the event you miss the job
completion notification or your first attempt to download it
fails. For example, suppose you start an archive retrieval job
to download an archive. After the job completes, you start to
download the archive but encounter a network error. In this
scenario, you can retry and download the archive while the job
exists.
To retrieve an archive or retrieve a vault inventory from
Amazon Glacier, you first initiate a job, and after the job
completes, you download the data. For an archive retrieval,
the output is the archive data, and for an inventory
retrieval, it is the inventory list. The List Job operation
returns a list of these jobs sorted by job initiation time.
This List Jobs operation supports pagination. By default, this
operation returns up to 1,000 jobs in the response. You should
always check the response for a `marker` at which to continue
the list; if there are no more items the `marker` is `null`.
To return a list of jobs that begins at a specific job, set
the `marker` request parameter to the value you obtained from
a previous List Jobs request. You can also limit the number of
jobs returned in the response by specifying the `limit`
parameter in the request.
Additionally, you can filter the jobs list returned by
specifying an optional `statuscode` (InProgress, Succeeded, or
Failed) and `completed` (true, false) parameter. The
`statuscode` allows you to specify that only jobs that match a
specified status are returned. The `completed` parameter
allows you to specify that only jobs in a specific completion
state are returned.
An AWS account has full permission to perform all operations
(actions). However, AWS Identity and Access Management (IAM)
users don't have any permissions by default. You must grant
them explicit permission to perform specific actions. For more
information, see `Access Control Using AWS Identity and Access
Management (IAM)`_.
For the underlying REST API, go to `List Jobs `_
:type vault_name: string
:param vault_name: The name of the vault.
:type limit: string
:param limit: Specifies that the response be limited to the specified
number of items or fewer. If not specified, the List Jobs operation
returns up to 1,000 jobs.
:type marker: string
:param marker: An opaque string used for pagination. This value
specifies the job at which the listing of jobs should begin. Get
the marker value from a previous List Jobs response. You need only
include the marker if you are continuing the pagination of results
started in a previous List Jobs request.
:type statuscode: string
:param statuscode: Specifies the type of job status to return. You can
specify the following values: "InProgress", "Succeeded", or
"Failed".
:type completed: string
:param completed: Specifies the state of the jobs to return. You can
specify `True` or `False`.
"""
params = {}
if limit:
params['limit'] = limit
if marker:
params['marker'] = marker
if status_code:
params['statuscode'] = status_code
if completed is not None:
params['completed'] = 'true' if completed else 'false'
uri = 'vaults/%s/jobs' % vault_name
return self.make_request('GET', uri, params=params)
def describe_job(self, vault_name, job_id):
"""
This operation returns information about a job you previously
initiated, including the job initiation date, the user who
initiated the job, the job status code/message and the Amazon
SNS topic to notify after Amazon Glacier completes the job.
For more information about initiating a job, see InitiateJob.
This operation enables you to check the status of your job.
However, it is strongly recommended that you set up an Amazon
SNS topic and specify it in your initiate job request so that
Amazon Glacier can notify the topic after it completes the
job.
A job ID will not expire for at least 24 hours after Amazon
Glacier completes the job.
An AWS account has full permission to perform all operations
(actions). However, AWS Identity and Access Management (IAM)
users don't have any permissions by default. You must grant
them explicit permission to perform specific actions. For more
information, see `Access Control Using AWS Identity and Access
Management (IAM)`_.
For information about the underlying REST API, go to `Working
with Archives in Amazon Glacier`_ in the Amazon Glacier
Developer Guide .
:type vault_name: string
:param vault_name: The name of the vault.
:type job_id: string
:param job_id: The ID of the job to describe.
"""
uri = 'vaults/%s/jobs/%s' % (vault_name, job_id)
return self.make_request('GET', uri, ok_responses=(200,))
def initiate_job(self, vault_name, job_data):
"""
This operation initiates a job of the specified type. In this
release, you can initiate a job to retrieve either an archive
or a vault inventory (a list of archives in a vault).
Retrieving data from Amazon Glacier is a two-step process:
#. Initiate a retrieval job.
#. After the job completes, download the bytes.
The retrieval request is executed asynchronously. When you
initiate a retrieval job, Amazon Glacier creates a job and
returns a job ID in the response. When Amazon Glacier
completes the job, you can get the job output (archive or
inventory data). For information about getting job output, see
GetJobOutput operation.
The job must complete before you can get its output. To
determine when a job is complete, you have the following
options:
+ **Use Amazon SNS Notification** You can specify an Amazon
Simple Notification Service (Amazon SNS) topic to which Amazon
Glacier can post a notification after the job is completed.
You can specify an SNS topic per job request. The notification
is sent only after Amazon Glacier completes the job. In
addition to specifying an SNS topic per job request, you can
configure vault notifications for a vault so that job
notifications are always sent. For more information, see
SetVaultNotifications.
+ **Get job details** You can make a DescribeJob request to
obtain job status information while a job is in progress.
However, it is more efficient to use an Amazon SNS
notification to determine when a job is complete.
The information you get via notification is same that you get
by calling DescribeJob.
If for a specific event, you add both the notification
configuration on the vault and also specify an SNS topic in
your initiate job request, Amazon Glacier sends both
notifications. For more information, see
SetVaultNotifications.
An AWS account has full permission to perform all operations
(actions). However, AWS Identity and Access Management (IAM)
users don't have any permissions by default. You must grant
them explicit permission to perform specific actions. For more
information, see `Access Control Using AWS Identity and Access
Management (IAM)`_.
**About the Vault Inventory**
Amazon Glacier prepares an inventory for each vault
periodically, every 24 hours. When you initiate a job for a
vault inventory, Amazon Glacier returns the last inventory for
the vault. The inventory data you get might be up to a day or
two days old. Also, the initiate inventory job might take some
time to complete before you can download the vault inventory.
So you do not want to retrieve a vault inventory for each
vault operation. However, in some scenarios, you might find
the vault inventory useful. For example, when you upload an
archive, you can provide an archive description but not an
archive name. Amazon Glacier provides you a unique archive ID,
an opaque string of characters. So, you might maintain your
own database that maps archive names to their corresponding
Amazon Glacier assigned archive IDs. You might find the vault
inventory useful in the event you need to reconcile
information in your database with the actual vault inventory.
**About Ranged Archive Retrieval**
You can initiate an archive retrieval for the whole archive or
a range of the archive. In the case of ranged archive
retrieval, you specify a byte range to return or the whole
archive. The range specified must be megabyte (MB) aligned,
that is the range start value must be divisible by 1 MB and
range end value plus 1 must be divisible by 1 MB or equal the
end of the archive. If the ranged archive retrieval is not
megabyte aligned, this operation returns a 400 response.
Furthermore, to ensure you get checksum values for data you
download using Get Job Output API, the range must be tree hash
aligned.
An AWS account has full permission to perform all operations
(actions). However, AWS Identity and Access Management (IAM)
users don't have any permissions by default. You must grant
them explicit permission to perform specific actions. For more
information, see `Access Control Using AWS Identity and Access
Management (IAM)`_.
For conceptual information and the underlying REST API, go to
`Initiate a Job`_ and `Downloading a Vault Inventory`_
:type account_id: string
:param account_id: The `AccountId` is the AWS Account ID. You can
specify either the AWS Account ID or optionally a '-', in which
case Amazon Glacier uses the AWS Account ID associated with the
credentials used to sign the request. If you specify your Account
ID, do not include hyphens in it.
:type vault_name: string
:param vault_name: The name of the vault.
:type job_parameters: dict
:param job_parameters: Provides options for specifying job information.
The dictionary can contain the following attributes:
* ArchiveId - The ID of the archive you want to retrieve.
This field is required only if the Type is set to
archive-retrieval.
* Description - The optional description for the job.
* Format - When initiating a job to retrieve a vault
inventory, you can optionally add this parameter to
specify the output format. Valid values are: CSV|JSON.
* SNSTopic - The Amazon SNS topic ARN where Amazon Glacier
sends a notification when the job is completed and the
output is ready for you to download.
* Type - The job type. Valid values are:
archive-retrieval|inventory-retrieval
* RetrievalByteRange - Optionally specify the range of
bytes to retrieve.
* InventoryRetrievalParameters: Optional job parameters
* Format - The output format, like "JSON"
* StartDate - ISO8601 starting date string
* EndDate - ISO8601 ending date string
* Limit - Maximum number of entries
* Marker - A unique string used for pagination
"""
uri = 'vaults/%s/jobs' % vault_name
response_headers = [('x-amz-job-id', u'JobId'),
('Location', u'Location')]
json_job_data = json.dumps(job_data)
return self.make_request('POST', uri, data=json_job_data,
ok_responses=(202,),
response_headers=response_headers)
def get_job_output(self, vault_name, job_id, byte_range=None):
"""
This operation downloads the output of the job you initiated
using InitiateJob. Depending on the job type you specified
when you initiated the job, the output will be either the
content of an archive or a vault inventory.
A job ID will not expire for at least 24 hours after Amazon
Glacier completes the job. That is, you can download the job
output within the 24 hours period after Amazon Glacier
completes the job.
If the job output is large, then you can use the `Range`
request header to retrieve a portion of the output. This
allows you to download the entire output in smaller chunks of
bytes. For example, suppose you have 1 GB of job output you
want to download and you decide to download 128 MB chunks of
data at a time, which is a total of eight Get Job Output
requests. You use the following process to download the job
output:
#. Download a 128 MB chunk of output by specifying the
appropriate byte range using the `Range` header.
#. Along with the data, the response includes a checksum of
the payload. You compute the checksum of the payload on the
client and compare it with the checksum you received in the
response to ensure you received all the expected data.
#. Repeat steps 1 and 2 for all the eight 128 MB chunks of
output data, each time specifying the appropriate byte range.
#. After downloading all the parts of the job output, you have
a list of eight checksum values. Compute the tree hash of
these values to find the checksum of the entire output. Using
the Describe Job API, obtain job information of the job that
provided you the output. The response includes the checksum of
the entire archive stored in Amazon Glacier. You compare this
value with the checksum you computed to ensure you have
downloaded the entire archive content with no errors.
An AWS account has full permission to perform all operations
(actions). However, AWS Identity and Access Management (IAM)
users don't have any permissions by default. You must grant
them explicit permission to perform specific actions. For more
information, see `Access Control Using AWS Identity and Access
Management (IAM)`_.
For conceptual information and the underlying REST API, go to
`Downloading a Vault Inventory`_, `Downloading an Archive`_,
and `Get Job Output `_
:type account_id: string
:param account_id: The `AccountId` is the AWS Account ID. You can
specify either the AWS Account ID or optionally a '-', in which
case Amazon Glacier uses the AWS Account ID associated with the
credentials used to sign the request. If you specify your Account
ID, do not include hyphens in it.
:type vault_name: string
:param vault_name: The name of the vault.
:type job_id: string
:param job_id: The job ID whose data is downloaded.
:type byte_range: string
:param byte_range: The range of bytes to retrieve from the output. For
example, if you want to download the first 1,048,576 bytes, specify
"Range: bytes=0-1048575". By default, this operation downloads the
entire output.
"""
response_headers = [('x-amz-sha256-tree-hash', u'TreeHash'),
('Content-Range', u'ContentRange'),
('Content-Type', u'ContentType')]
headers = None
if byte_range:
headers = {'Range': 'bytes=%d-%d' % byte_range}
uri = 'vaults/%s/jobs/%s/output' % (vault_name, job_id)
response = self.make_request('GET', uri, headers=headers,
ok_responses=(200, 206),
response_headers=response_headers)
return response
# Archives
def upload_archive(self, vault_name, archive,
linear_hash, tree_hash, description=None):
"""
This operation adds an archive to a vault. This is a
synchronous operation, and for a successful upload, your data
is durably persisted. Amazon Glacier returns the archive ID in
the `x-amz-archive-id` header of the response.
You must use the archive ID to access your data in Amazon
Glacier. After you upload an archive, you should save the
archive ID returned so that you can retrieve or delete the
archive later. Besides saving the archive ID, you can also
index it and give it a friendly name to allow for better
searching. You can also use the optional archive description
field to specify how the archive is referred to in an external
index of archives, such as you might create in Amazon
DynamoDB. You can also get the vault inventory to obtain a
list of archive IDs in a vault. For more information, see
InitiateJob.
You must provide a SHA256 tree hash of the data you are
uploading. For information about computing a SHA256 tree hash,
see `Computing Checksums`_.
You can optionally specify an archive description of up to
1,024 printable ASCII characters. You can get the archive
description when you either retrieve the archive or get the
vault inventory. For more information, see InitiateJob. Amazon
Glacier does not interpret the description in any way. An
archive description does not need to be unique. You cannot use
the description to retrieve or sort the archive list.
Archives are immutable. After you upload an archive, you
cannot edit the archive or its description.
An AWS account has full permission to perform all operations
(actions). However, AWS Identity and Access Management (IAM)
users don't have any permissions by default. You must grant
them explicit permission to perform specific actions. For more
information, see `Access Control Using AWS Identity and Access
Management (IAM)`_.
For conceptual information and underlying REST API, go to
`Uploading an Archive in Amazon Glacier`_ and `Upload
Archive`_ in the Amazon Glacier Developer Guide .
:type vault_name: str
:param vault_name: The name of the vault
:type archive: bytes
:param archive: The data to upload.
:type linear_hash: str
:param linear_hash: The SHA256 checksum (a linear hash) of the
payload.
:type tree_hash: str
:param tree_hash: The user-computed SHA256 tree hash of the
payload. For more information on computing the
tree hash, see http://goo.gl/u7chF.
:type description: str
:param description: The optional description of the archive you
are uploading.
"""
response_headers = [('x-amz-archive-id', u'ArchiveId'),
('Location', u'Location'),
('x-amz-sha256-tree-hash', u'TreeHash')]
uri = 'vaults/%s/archives' % vault_name
try:
content_length = str(len(archive))
except (TypeError, AttributeError):
# If a file like object is provided, try to retrieve
# the file size via fstat.
content_length = str(os.fstat(archive.fileno()).st_size)
headers = {'x-amz-content-sha256': linear_hash,
'x-amz-sha256-tree-hash': tree_hash,
'Content-Length': content_length}
if description:
headers['x-amz-archive-description'] = description
if self._is_file_like(archive):
sender = ResettingFileSender(archive)
else:
sender = None
return self.make_request('POST', uri, headers=headers,
sender=sender,
data=archive, ok_responses=(201,),
response_headers=response_headers)
def _is_file_like(self, archive):
return hasattr(archive, 'seek') and hasattr(archive, 'tell')
def delete_archive(self, vault_name, archive_id):
"""
This operation deletes an archive from a vault. Subsequent
requests to initiate a retrieval of this archive will fail.
Archive retrievals that are in progress for this archive ID
may or may not succeed according to the following scenarios:
+ If the archive retrieval job is actively preparing the data
for download when Amazon Glacier receives the delete archive
request, the archival retrieval operation might fail.
+ If the archive retrieval job has successfully prepared the
archive for download when Amazon Glacier receives the delete
archive request, you will be able to download the output.
This operation is idempotent. Attempting to delete an already-
deleted archive does not result in an error.
An AWS account has full permission to perform all operations
(actions). However, AWS Identity and Access Management (IAM)
users don't have any permissions by default. You must grant
them explicit permission to perform specific actions. For more
information, see `Access Control Using AWS Identity and Access
Management (IAM)`_.
For conceptual information and underlying REST API, go to
`Deleting an Archive in Amazon Glacier`_ and `Delete Archive`_
in the Amazon Glacier Developer Guide .
:type vault_name: string
:param vault_name: The name of the vault.
:type archive_id: string
:param archive_id: The ID of the archive to delete.
"""
uri = 'vaults/%s/archives/%s' % (vault_name, archive_id)
return self.make_request('DELETE', uri, ok_responses=(204,))
# Multipart
def initiate_multipart_upload(self, vault_name, part_size,
description=None):
"""
This operation initiates a multipart upload. Amazon Glacier
creates a multipart upload resource and returns its ID in the
response. The multipart upload ID is used in subsequent
requests to upload parts of an archive (see
UploadMultipartPart).
When you initiate a multipart upload, you specify the part
size in number of bytes. The part size must be a megabyte
(1024 KB) multiplied by a power of 2-for example, 1048576 (1
MB), 2097152 (2 MB), 4194304 (4 MB), 8388608 (8 MB), and so
on. The minimum allowable part size is 1 MB, and the maximum
is 4 GB.
Every part you upload to this resource (see
UploadMultipartPart), except the last one, must have the same
size. The last one can be the same size or smaller. For
example, suppose you want to upload a 16.2 MB file. If you
initiate the multipart upload with a part size of 4 MB, you
will upload four parts of 4 MB each and one part of 0.2 MB.
You don't need to know the size of the archive when you start
a multipart upload because Amazon Glacier does not require you
to specify the overall archive size.
After you complete the multipart upload, Amazon Glacier
removes the multipart upload resource referenced by the ID.
Amazon Glacier also removes the multipart upload resource if
you cancel the multipart upload or it may be removed if there
is no activity for a period of 24 hours.
An AWS account has full permission to perform all operations
(actions). However, AWS Identity and Access Management (IAM)
users don't have any permissions by default. You must grant
them explicit permission to perform specific actions. For more
information, see `Access Control Using AWS Identity and Access
Management (IAM)`_.
For conceptual information and underlying REST API, go to
`Uploading Large Archives in Parts (Multipart Upload)`_ and
`Initiate Multipart Upload`_ in the Amazon Glacier Developer
Guide .
The part size must be a megabyte (1024 KB) multiplied by a power of
2, for example, 1048576 (1 MB), 2097152 (2 MB), 4194304 (4 MB),
8388608 (8 MB), and so on. The minimum allowable part size is 1 MB,
and the maximum is 4 GB (4096 MB).
:type vault_name: str
:param vault_name: The name of the vault.
:type description: str
:param description: The archive description that you are uploading in
parts.
:type part_size: int
:param part_size: The size of each part except the last, in bytes. The
last part can be smaller than this part size.
"""
response_headers = [('x-amz-multipart-upload-id', u'UploadId'),
('Location', u'Location')]
headers = {'x-amz-part-size': str(part_size)}
if description:
headers['x-amz-archive-description'] = description
uri = 'vaults/%s/multipart-uploads' % vault_name
response = self.make_request('POST', uri, headers=headers,
ok_responses=(201,),
response_headers=response_headers)
return response
def complete_multipart_upload(self, vault_name, upload_id,
sha256_treehash, archive_size):
"""
You call this operation to inform Amazon Glacier that all the
archive parts have been uploaded and that Amazon Glacier can
now assemble the archive from the uploaded parts. After
assembling and saving the archive to the vault, Amazon Glacier
returns the URI path of the newly created archive resource.
Using the URI path, you can then access the archive. After you
upload an archive, you should save the archive ID returned to
retrieve the archive at a later point. You can also get the
vault inventory to obtain a list of archive IDs in a vault.
For more information, see InitiateJob.
In the request, you must include the computed SHA256 tree hash
of the entire archive you have uploaded. For information about
computing a SHA256 tree hash, see `Computing Checksums`_. On
the server side, Amazon Glacier also constructs the SHA256
tree hash of the assembled archive. If the values match,
Amazon Glacier saves the archive to the vault; otherwise, it
returns an error, and the operation fails. The ListParts
operation returns a list of parts uploaded for a specific
multipart upload. It includes checksum information for each
uploaded part that can be used to debug a bad checksum issue.
Additionally, Amazon Glacier also checks for any missing
content ranges when assembling the archive, if missing content
ranges are found, Amazon Glacier returns an error and the
operation fails.
Complete Multipart Upload is an idempotent operation. After
your first successful complete multipart upload, if you call
the operation again within a short period, the operation will
succeed and return the same archive ID. This is useful in the
event you experience a network issue that causes an aborted
connection or receive a 500 server error, in which case you
can repeat your Complete Multipart Upload request and get the
same archive ID without creating duplicate archives. Note,
however, that after the multipart upload completes, you cannot
call the List Parts operation and the multipart upload will
not appear in List Multipart Uploads response, even if
idempotent complete is possible.
An AWS account has full permission to perform all operations
(actions). However, AWS Identity and Access Management (IAM)
users don't have any permissions by default. You must grant
them explicit permission to perform specific actions. For more
information, see `Access Control Using AWS Identity and Access
Management (IAM)`_.
For conceptual information and underlying REST API, go to
`Uploading Large Archives in Parts (Multipart Upload)`_ and
`Complete Multipart Upload`_ in the Amazon Glacier Developer
Guide .
:type checksum: string
:param checksum: The SHA256 tree hash of the entire archive. It is the
tree hash of SHA256 tree hash of the individual parts. If the value
you specify in the request does not match the SHA256 tree hash of
the final assembled archive as computed by Amazon Glacier, Amazon
Glacier returns an error and the request fails.
:type vault_name: str
:param vault_name: The name of the vault.
:type upload_id: str
:param upload_id: The upload ID of the multipart upload.
:type sha256_treehash: str
:param sha256_treehash: The SHA256 tree hash of the entire archive.
It is the tree hash of SHA256 tree hash of the individual parts.
If the value you specify in the request does not match the SHA256
tree hash of the final assembled archive as computed by Amazon
Glacier, Amazon Glacier returns an error and the request fails.
:type archive_size: int
:param archive_size: The total size, in bytes, of the entire
archive. This value should be the sum of all the sizes of
the individual parts that you uploaded.
"""
response_headers = [('x-amz-archive-id', u'ArchiveId'),
('Location', u'Location')]
headers = {'x-amz-sha256-tree-hash': sha256_treehash,
'x-amz-archive-size': str(archive_size)}
uri = 'vaults/%s/multipart-uploads/%s' % (vault_name, upload_id)
response = self.make_request('POST', uri, headers=headers,
ok_responses=(201,),
response_headers=response_headers)
return response
def abort_multipart_upload(self, vault_name, upload_id):
"""
This operation aborts a multipart upload identified by the
upload ID.
After the Abort Multipart Upload request succeeds, you cannot
upload any more parts to the multipart upload or complete the
multipart upload. Aborting a completed upload fails. However,
aborting an already-aborted upload will succeed, for a short
time. For more information about uploading a part and
completing a multipart upload, see UploadMultipartPart and
CompleteMultipartUpload.
This operation is idempotent.
An AWS account has full permission to perform all operations
(actions). However, AWS Identity and Access Management (IAM)
users don't have any permissions by default. You must grant
them explicit permission to perform specific actions. For more
information, see `Access Control Using AWS Identity and Access
Management (IAM)`_.
For conceptual information and underlying REST API, go to
`Working with Archives in Amazon Glacier`_ and `Abort
Multipart Upload`_ in the Amazon Glacier Developer Guide .
:type vault_name: string
:param vault_name: The name of the vault.
:type upload_id: string
:param upload_id: The upload ID of the multipart upload to delete.
"""
uri = 'vaults/%s/multipart-uploads/%s' % (vault_name, upload_id)
return self.make_request('DELETE', uri, ok_responses=(204,))
def list_multipart_uploads(self, vault_name, limit=None, marker=None):
"""
This operation lists in-progress multipart uploads for the
specified vault. An in-progress multipart upload is a
multipart upload that has been initiated by an
InitiateMultipartUpload request, but has not yet been
completed or aborted. The list returned in the List Multipart
Upload response has no guaranteed order.
The List Multipart Uploads operation supports pagination. By
default, this operation returns up to 1,000 multipart uploads
in the response. You should always check the response for a
`marker` at which to continue the list; if there are no more
items the `marker` is `null`. To return a list of multipart
uploads that begins at a specific upload, set the `marker`
request parameter to the value you obtained from a previous
List Multipart Upload request. You can also limit the number
of uploads returned in the response by specifying the `limit`
parameter in the request.
Note the difference between this operation and listing parts
(ListParts). The List Multipart Uploads operation lists all
multipart uploads for a vault and does not require a multipart
upload ID. The List Parts operation requires a multipart
upload ID since parts are associated with a single upload.
An AWS account has full permission to perform all operations
(actions). However, AWS Identity and Access Management (IAM)
users don't have any permissions by default. You must grant
them explicit permission to perform specific actions. For more
information, see `Access Control Using AWS Identity and Access
Management (IAM)`_.
For conceptual information and the underlying REST API, go to
`Working with Archives in Amazon Glacier`_ and `List Multipart
Uploads `_ in the Amazon Glacier Developer Guide .
:type vault_name: string
:param vault_name: The name of the vault.
:type limit: string
:param limit: Specifies the maximum number of uploads returned in the
response body. If this value is not specified, the List Uploads
operation returns up to 1,000 uploads.
:type marker: string
:param marker: An opaque string used for pagination. This value
specifies the upload at which the listing of uploads should begin.
Get the marker value from a previous List Uploads response. You
need only include the marker if you are continuing the pagination
of results started in a previous List Uploads request.
"""
params = {}
if limit:
params['limit'] = limit
if marker:
params['marker'] = marker
uri = 'vaults/%s/multipart-uploads' % vault_name
return self.make_request('GET', uri, params=params)
def list_parts(self, vault_name, upload_id, limit=None, marker=None):
"""
This operation lists the parts of an archive that have been
uploaded in a specific multipart upload. You can make this
request at any time during an in-progress multipart upload
before you complete the upload (see CompleteMultipartUpload.
List Parts returns an error for completed uploads. The list
returned in the List Parts response is sorted by part range.
The List Parts operation supports pagination. By default, this
operation returns up to 1,000 uploaded parts in the response.
You should always check the response for a `marker` at which
to continue the list; if there are no more items the `marker`
is `null`. To return a list of parts that begins at a specific
part, set the `marker` request parameter to the value you
obtained from a previous List Parts request. You can also
limit the number of parts returned in the response by
specifying the `limit` parameter in the request.
An AWS account has full permission to perform all operations
(actions). However, AWS Identity and Access Management (IAM)
users don't have any permissions by default. You must grant
them explicit permission to perform specific actions. For more
information, see `Access Control Using AWS Identity and Access
Management (IAM)`_.
For conceptual information and the underlying REST API, go to
`Working with Archives in Amazon Glacier`_ and `List Parts`_
in the Amazon Glacier Developer Guide .
:type vault_name: string
:param vault_name: The name of the vault.
:type upload_id: string
:param upload_id: The upload ID of the multipart upload.
:type marker: string
:param marker: An opaque string used for pagination. This value
specifies the part at which the listing of parts should begin. Get
the marker value from the response of a previous List Parts
response. You need only include the marker if you are continuing
the pagination of results started in a previous List Parts request.
:type limit: string
:param limit: Specifies the maximum number of parts returned in the
response body. If this value is not specified, the List Parts
operation returns up to 1,000 uploads.
"""
params = {}
if limit:
params['limit'] = limit
if marker:
params['marker'] = marker
uri = 'vaults/%s/multipart-uploads/%s' % (vault_name, upload_id)
return self.make_request('GET', uri, params=params)
def upload_part(self, vault_name, upload_id, linear_hash,
tree_hash, byte_range, part_data):
"""
This operation uploads a part of an archive. You can upload
archive parts in any order. You can also upload them in
parallel. You can upload up to 10,000 parts for a multipart
upload.
Amazon Glacier rejects your upload part request if any of the
following conditions is true:
+ **SHA256 tree hash does not match**To ensure that part data
is not corrupted in transmission, you compute a SHA256 tree
hash of the part and include it in your request. Upon
receiving the part data, Amazon Glacier also computes a SHA256
tree hash. If these hash values don't match, the operation
fails. For information about computing a SHA256 tree hash, see
`Computing Checksums`_.
+ **Part size does not match**The size of each part except the
last must match the size specified in the corresponding
InitiateMultipartUpload request. The size of the last part
must be the same size as, or smaller than, the specified size.
If you upload a part whose size is smaller than the part size
you specified in your initiate multipart upload request and
that part is not the last part, then the upload part request
will succeed. However, the subsequent Complete Multipart
Upload request will fail.
+ **Range does not align**The byte range value in the request
does not align with the part size specified in the
corresponding initiate request. For example, if you specify a
part size of 4194304 bytes (4 MB), then 0 to 4194303 bytes (4
MB - 1) and 4194304 (4 MB) to 8388607 (8 MB - 1) are valid
part ranges. However, if you set a range value of 2 MB to 6
MB, the range does not align with the part size and the upload
will fail.
This operation is idempotent. If you upload the same part
multiple times, the data included in the most recent request
overwrites the previously uploaded data.
An AWS account has full permission to perform all operations
(actions). However, AWS Identity and Access Management (IAM)
users don't have any permissions by default. You must grant
them explicit permission to perform specific actions. For more
information, see `Access Control Using AWS Identity and Access
Management (IAM)`_.
For conceptual information and underlying REST API, go to
`Uploading Large Archives in Parts (Multipart Upload)`_ and
`Upload Part `_ in the Amazon Glacier Developer Guide .
:type vault_name: str
:param vault_name: The name of the vault.
:type linear_hash: str
:param linear_hash: The SHA256 checksum (a linear hash) of the
payload.
:type tree_hash: str
:param tree_hash: The user-computed SHA256 tree hash of the
payload. For more information on computing the
tree hash, see http://goo.gl/u7chF.
:type upload_id: str
:param upload_id: The unique ID associated with this upload
operation.
:type byte_range: tuple of ints
:param byte_range: Identifies the range of bytes in the assembled
archive that will be uploaded in this part. Amazon Glacier uses
this information to assemble the archive in the proper sequence.
The format of this header follows RFC 2616. An example header is
Content-Range:bytes 0-4194303/*.
:type part_data: bytes
:param part_data: The data to be uploaded for the part
"""
headers = {'x-amz-content-sha256': linear_hash,
'x-amz-sha256-tree-hash': tree_hash,
'Content-Range': 'bytes %d-%d/*' % byte_range}
response_headers = [('x-amz-sha256-tree-hash', u'TreeHash')]
uri = 'vaults/%s/multipart-uploads/%s' % (str(vault_name), upload_id)
return self.make_request('PUT', uri, headers=headers,
data=part_data, ok_responses=(204,),
response_headers=response_headers)