# -*- coding: utf-8 -*-
# Copyright (c) 2012 Thomas Parslow http://almostobsolete.net/
# Copyright (c) 2012 Robie Basak <robie@justgohome.co.uk>
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish, dis-
# tribute, sublicense, and/or sell copies of the Software, and to permit
# persons to whom the Software is furnished to do so, subject to the fol-
# lowing conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL-
# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
#
import codecs
from boto.glacier.exceptions import UploadArchiveError
from boto.glacier.job import Job
from boto.glacier.writer import compute_hashes_from_fileobj, \
resume_file_upload, Writer
from boto.glacier.concurrent import ConcurrentUploader
from boto.glacier.utils import minimum_part_size, DEFAULT_PART_SIZE
import os.path
_MEGABYTE = 1024 * 1024
_GIGABYTE = 1024 * _MEGABYTE
MAXIMUM_ARCHIVE_SIZE = 10000 * 4 * _GIGABYTE
MAXIMUM_NUMBER_OF_PARTS = 10000
class Vault(object):
DefaultPartSize = DEFAULT_PART_SIZE
SingleOperationThreshold = 100 * _MEGABYTE
ResponseDataElements = (('VaultName', 'name', None),
('VaultARN', 'arn', None),
('CreationDate', 'creation_date', None),
('LastInventoryDate', 'last_inventory_date', None),
('SizeInBytes', 'size', 0),
('NumberOfArchives', 'number_of_archives', 0))
def __init__(self, layer1, response_data=None):
self.layer1 = layer1
if response_data:
for response_name, attr_name, default in self.ResponseDataElements:
value = response_data[response_name]
setattr(self, attr_name, value)
else:
for response_name, attr_name, default in self.ResponseDataElements:
setattr(self, attr_name, default)
def __repr__(self):
return 'Vault("%s")' % self.arn
def delete(self):
"""
Delete's this vault. WARNING!
"""
self.layer1.delete_vault(self.name)
def upload_archive(self, filename, description=None):
"""
Adds an archive to a vault. For archives greater than 100MB the
multipart upload will be used.
:type file: str
:param file: A filename to upload
:type description: str
:param description: An optional description for the archive.
:rtype: str
:return: The archive id of the newly created archive
"""
if os.path.getsize(filename) > self.SingleOperationThreshold:
return self.create_archive_from_file(filename, description=description)
return self._upload_archive_single_operation(filename, description)
def _upload_archive_single_operation(self, filename, description):
"""
Adds an archive to a vault in a single operation. It's recommended for
archives less than 100MB
:type file: str
:param file: A filename to upload
:type description: str
:param description: A description for the archive.
:rtype: str
:return: The archive id of the newly created archive
"""
with open(filename, 'rb') as fileobj:
linear_hash, tree_hash = compute_hashes_from_fileobj(fileobj)
fileobj.seek(0)
response = self.layer1.upload_archive(self.name, fileobj,
linear_hash, tree_hash,
description)
return response['ArchiveId']
def create_archive_writer(self, part_size=DefaultPartSize,
description=None):
"""
Create a new archive and begin a multi-part upload to it.
Returns a file-like object to which the data for the archive
can be written. Once all the data is written the file-like
object should be closed, you can then call the get_archive_id
method on it to get the ID of the created archive.
:type part_size: int
:param part_size: The part size for the multipart upload.
:type description: str
:param description: An optional description for the archive.
:rtype: :class:`boto.glacier.writer.Writer`
:return: A Writer object that to which the archive data
should be written.
"""
response = self.layer1.initiate_multipart_upload(self.name,
part_size,
description)
return Writer(self, response['UploadId'], part_size=part_size)
def create_archive_from_file(self, filename=None, file_obj=None,
description=None, upload_id_callback=None):
"""
Create a new archive and upload the data from the given file
or file-like object.
:type filename: str
:param filename: A filename to upload
:type file_obj: file
:param file_obj: A file-like object to upload
:type description: str
:param description: An optional description for the archive.
:type upload_id_callback: function
:param upload_id_callback: if set, call with the upload_id as the
only parameter when it becomes known, to enable future calls
to resume_archive_from_file in case resume is needed.
:rtype: str
:return: The archive id of the newly created archive
"""
part_size = self.DefaultPartSize
if not file_obj:
file_size = os.path.getsize(filename)
try:
part_size = minimum_part_size(file_size, part_size)
except ValueError:
raise UploadArchiveError("File size of %s bytes exceeds "
"40,000 GB archive limit of Glacier.")
file_obj = open(filename, "rb")
writer = self.create_archive_writer(
description=description,
part_size=part_size)
if upload_id_callback:
upload_id_callback(writer.upload_id)
while True:
data = file_obj.read(part_size)
if not data:
break
writer.write(data)
writer.close()
return writer.get_archive_id()
@staticmethod
def _range_string_to_part_index(range_string, part_size):
start, inside_end = [int(value) for value in range_string.split('-')]
end = inside_end + 1
length = end - start
if length == part_size + 1:
# Off-by-one bug in Amazon's Glacier implementation,
# see: https://forums.aws.amazon.com/thread.jspa?threadID=106866
# Workaround: since part_size is too big by one byte, adjust it
end -= 1
inside_end -= 1
length -= 1
assert not (start % part_size), (
"upload part start byte is not on a part boundary")
assert (length <= part_size), "upload part is bigger than part size"
return start // part_size
def resume_archive_from_file(self, upload_id, filename=None,
file_obj=None):
"""Resume upload of a file already part-uploaded to Glacier.
The resumption of an upload where the part-uploaded section is empty
is a valid degenerate case that this function can handle.
One and only one of filename or file_obj must be specified.
:type upload_id: str
:param upload_id: existing Glacier upload id of upload being resumed.
:type filename: str
:param filename: file to open for resume
:type fobj: file
:param fobj: file-like object containing local data to resume. This
must read from the start of the entire upload, not just from the
point being resumed. Use fobj.seek(0) to achieve this if necessary.
:rtype: str
:return: The archive id of the newly created archive
"""
part_list_response = self.list_all_parts(upload_id)
part_size = part_list_response['PartSizeInBytes']
part_hash_map = {}
for part_desc in part_list_response['Parts']:
part_index = self._range_string_to_part_index(
part_desc['RangeInBytes'], part_size)
part_tree_hash = codecs.decode(part_desc['SHA256TreeHash'], 'hex_codec')
part_hash_map[part_index] = part_tree_hash
if not file_obj:
file_obj = open(filename, "rb")
return resume_file_upload(
self, upload_id, part_size, file_obj, part_hash_map)
def concurrent_create_archive_from_file(self, filename, description,
**kwargs):
"""
Create a new archive from a file and upload the given
file.
This is a convenience method around the
:class:`boto.glacier.concurrent.ConcurrentUploader`
class. This method will perform a multipart upload
and upload the parts of the file concurrently.
:type filename: str
:param filename: A filename to upload
:param kwargs: Additional kwargs to pass through to
:py:class:`boto.glacier.concurrent.ConcurrentUploader`.
You can pass any argument besides the ``api`` and
``vault_name`` param (these arguments are already
passed to the ``ConcurrentUploader`` for you).
:raises: `boto.glacier.exception.UploadArchiveError` is an error
occurs during the upload process.
:rtype: str
:return: The archive id of the newly created archive
"""
uploader = ConcurrentUploader(self.layer1, self.name, **kwargs)
archive_id = uploader.upload(filename, description)
return archive_id
def retrieve_archive(self, archive_id, sns_topic=None,
description=None):
"""
Initiate a archive retrieval job to download the data from an
archive. You will need to wait for the notification from
Amazon (via SNS) before you can actually download the data,
this takes around 4 hours.
:type archive_id: str
:param archive_id: The id of the archive
:type description: str
:param description: An optional description for the job.
:type sns_topic: str
:param sns_topic: The Amazon SNS topic ARN where Amazon Glacier
sends notification when the job is completed and the output
is ready for you to download.
:rtype: :class:`boto.glacier.job.Job`
:return: A Job object representing the retrieval job.
"""
job_data = {'Type': 'archive-retrieval',
'ArchiveId': archive_id}
if sns_topic is not None:
job_data['SNSTopic'] = sns_topic
if description is not None:
job_data['Description'] = description
response = self.layer1.initiate_job(self.name, job_data)
return self.get_job(response['JobId'])
def retrieve_inventory(self, sns_topic=None,
description=None, byte_range=None,
start_date=None, end_date=None,
limit=None):
"""
Initiate a inventory retrieval job to list the items in the
vault. You will need to wait for the notification from
Amazon (via SNS) before you can actually download the data,
this takes around 4 hours.
:type description: str
:param description: An optional description for the job.
:type sns_topic: str
:param sns_topic: The Amazon SNS topic ARN where Amazon Glacier
sends notification when the job is completed and the output
is ready for you to download.
:type byte_range: str
:param byte_range: Range of bytes to retrieve.
:type start_date: DateTime
:param start_date: Beginning of the date range to query.
:type end_date: DateTime
:param end_date: End of the date range to query.
:type limit: int
:param limit: Limits the number of results returned.
:rtype: str
:return: The ID of the job
"""
job_data = {'Type': 'inventory-retrieval'}
if sns_topic is not None:
job_data['SNSTopic'] = sns_topic
if description is not None:
job_data['Description'] = description
if byte_range is not None:
job_data['RetrievalByteRange'] = byte_range
if start_date is not None or end_date is not None or limit is not None:
rparams = {}
if start_date is not None:
rparams['StartDate'] = start_date.strftime('%Y-%m-%dT%H:%M:%S%Z')
if end_date is not None:
rparams['EndDate'] = end_date.strftime('%Y-%m-%dT%H:%M:%S%Z')
if limit is not None:
rparams['Limit'] = limit
job_data['InventoryRetrievalParameters'] = rparams
response = self.layer1.initiate_job(self.name, job_data)
return response['JobId']
def retrieve_inventory_job(self, **kwargs):
"""
Identical to ``retrieve_inventory``, but returns a ``Job`` instance
instead of just the job ID.
:type description: str
:param description: An optional description for the job.
:type sns_topic: str
:param sns_topic: The Amazon SNS topic ARN where Amazon Glacier
sends notification when the job is completed and the output
is ready for you to download.
:type byte_range: str
:param byte_range: Range of bytes to retrieve.
:type start_date: DateTime
:param start_date: Beginning of the date range to query.
:type end_date: DateTime
:param end_date: End of the date range to query.
:type limit: int
:param limit: Limits the number of results returned.
:rtype: :class:`boto.glacier.job.Job`
:return: A Job object representing the retrieval job.
"""
job_id = self.retrieve_inventory(**kwargs)
return self.get_job(job_id)
def delete_archive(self, archive_id):
"""
This operation deletes an archive from the vault.
:type archive_id: str
:param archive_id: The ID for the archive to be deleted.
"""
return self.layer1.delete_archive(self.name, archive_id)
def get_job(self, job_id):
"""
Get an object representing a job in progress.
:type job_id: str
:param job_id: The ID of the job
:rtype: :class:`boto.glacier.job.Job`
:return: A Job object representing the job.
"""
response_data = self.layer1.describe_job(self.name, job_id)
return Job(self, response_data)
def list_jobs(self, completed=None, status_code=None):
"""
Return a list of Job objects related to this vault.
:type completed: boolean
:param completed: Specifies the state of the jobs to return.
If a value of True is passed, only completed jobs will
be returned. If a value of False is passed, only
uncompleted jobs will be returned. If no value is
passed, all jobs will be returned.
:type status_code: string
:param status_code: Specifies the type of job status to return.
Valid values are: InProgress|Succeeded|Failed. If not
specified, jobs with all status codes are returned.
:rtype: list of :class:`boto.glacier.job.Job`
:return: A list of Job objects related to this vault.
"""
response_data = self.layer1.list_jobs(self.name, completed,
status_code)
return [Job(self, jd) for jd in response_data['JobList']]
def list_all_parts(self, upload_id):
"""Automatically make and combine multiple calls to list_parts.
Call list_parts as necessary, combining the results in case multiple
calls were required to get data on all available parts.
"""
result = self.layer1.list_parts(self.name, upload_id)
marker = result['Marker']
while marker:
additional_result = self.layer1.list_parts(
self.name, upload_id, marker=marker)
result['Parts'].extend(additional_result['Parts'])
marker = additional_result['Marker']
# The marker makes no sense in an unpaginated result, and clearing it
# makes testing easier. This also has the nice property that the result
# is a normal (but expanded) response.
result['Marker'] = None
return result