# object_store.py -- Object store for git objects
# Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@jelmer.uk>
# and others
#
# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
# General Public License as public by the Free Software Foundation; version 2.0
# or (at your option) any later version. You can redistribute it and/or
# modify it under the terms of either of these two licenses.
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# You should have received a copy of the licenses; if not, see
# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
# License, Version 2.0.
#
"""Git object store interfaces and implementation."""
from io import BytesIO
import errno
import os
import stat
import sys
import tempfile
from dulwich.diff_tree import (
tree_changes,
walk_trees,
)
from dulwich.errors import (
NotTreeError,
)
from dulwich.file import GitFile
from dulwich.objects import (
Commit,
ShaFile,
Tag,
Tree,
ZERO_SHA,
hex_to_sha,
sha_to_hex,
hex_to_filename,
S_ISGITLINK,
object_class,
)
from dulwich.pack import (
Pack,
PackData,
PackInflater,
PackFileDisappeared,
iter_sha1,
pack_objects_to_data,
write_pack_header,
write_pack_index_v2,
write_pack_data,
write_pack_object,
compute_file_sha,
PackIndexer,
PackStreamCopier,
)
from dulwich.refs import ANNOTATED_TAG_SUFFIX
INFODIR = 'info'
PACKDIR = 'pack'
class BaseObjectStore(object):
"""Object store interface."""
def determine_wants_all(self, refs):
return [sha for (ref, sha) in refs.items()
if sha not in self and
not ref.endswith(ANNOTATED_TAG_SUFFIX) and
not sha == ZERO_SHA]
def iter_shas(self, shas):
"""Iterate over the objects for the specified shas.
:param shas: Iterable object with SHAs
:return: Object iterator
"""
return ObjectStoreIterator(self, shas)
def contains_loose(self, sha):
"""Check if a particular object is present by SHA1 and is loose."""
raise NotImplementedError(self.contains_loose)
def contains_packed(self, sha):
"""Check if a particular object is present by SHA1 and is packed."""
raise NotImplementedError(self.contains_packed)
def __contains__(self, sha):
"""Check if a particular object is present by SHA1.
This method makes no distinction between loose and packed objects.
"""
return self.contains_packed(sha) or self.contains_loose(sha)
@property
def packs(self):
"""Iterable of pack objects."""
raise NotImplementedError
def get_raw(self, name):
"""Obtain the raw text for an object.
:param name: sha for the object.
:return: tuple with numeric type and object contents.
"""
raise NotImplementedError(self.get_raw)
def __getitem__(self, sha):
"""Obtain an object by SHA1."""
type_num, uncomp = self.get_raw(sha)
return ShaFile.from_raw_string(type_num, uncomp, sha=sha)
def __iter__(self):
"""Iterate over the SHAs that are present in this store."""
raise NotImplementedError(self.__iter__)
def add_object(self, obj):
"""Add a single object to this object store.
"""
raise NotImplementedError(self.add_object)
def add_objects(self, objects, progress=None):
"""Add a set of objects to this object store.
:param objects: Iterable over a list of (object, path) tuples
"""
raise NotImplementedError(self.add_objects)
def add_pack_data(self, count, pack_data, progress=None):
"""Add pack data to this object store.
:param num_items: Number of items to add
:param pack_data: Iterator over pack data tuples
"""
if count == 0:
# Don't bother writing an empty pack file
return
f, commit, abort = self.add_pack()
try:
write_pack_data(f, count, pack_data, progress)
except BaseException:
abort()
raise
else:
return commit()
def tree_changes(self, source, target, want_unchanged=False,
include_trees=False, change_type_same=False):
"""Find the differences between the contents of two trees
:param source: SHA1 of the source tree
:param target: SHA1 of the target tree
:param want_unchanged: Whether unchanged files should be reported
:param include_trees: Whether to include trees
:param change_type_same: Whether to report files changing
type in the same entry.
:return: Iterator over tuples with
(oldpath, newpath), (oldmode, newmode), (oldsha, newsha)
"""
for change in tree_changes(self, source, target,
want_unchanged=want_unchanged,
include_trees=include_trees,
change_type_same=change_type_same):
yield ((change.old.path, change.new.path),
(change.old.mode, change.new.mode),
(change.old.sha, change.new.sha))
def iter_tree_contents(self, tree_id, include_trees=False):
"""Iterate the contents of a tree and all subtrees.
Iteration is depth-first pre-order, as in e.g. os.walk.
:param tree_id: SHA1 of the tree.
:param include_trees: If True, include tree objects in the iteration.
:return: Iterator over TreeEntry namedtuples for all the objects in a
tree.
"""
for entry, _ in walk_trees(self, tree_id, None):
if ((entry.mode is not None and
not stat.S_ISDIR(entry.mode)) or include_trees):
yield entry
def find_missing_objects(self, haves, wants, progress=None,
get_tagged=None,
get_parents=lambda commit: commit.parents,
depth=None):
"""Find the missing objects required for a set of revisions.
:param haves: Iterable over SHAs already in common.
:param wants: Iterable over SHAs of objects to fetch.
:param progress: Simple progress function that will be called with
updated progress strings.
:param get_tagged: Function that returns a dict of pointed-to sha ->
tag sha for including tags.
:param get_parents: Optional function for getting the parents of a
commit.
:return: Iterator over (sha, path) pairs.
"""
finder = MissingObjectFinder(self, haves, wants, progress, get_tagged,
get_parents=get_parents)
return iter(finder.next, None)
def find_common_revisions(self, graphwalker):
"""Find which revisions this store has in common using graphwalker.
:param graphwalker: A graphwalker object.
:return: List of SHAs that are in common
"""
haves = []
sha = next(graphwalker)
while sha:
if sha in self:
haves.append(sha)
graphwalker.ack(sha)
sha = next(graphwalker)
return haves
def generate_pack_contents(self, have, want, progress=None):
"""Iterate over the contents of a pack file.
:param have: List of SHA1s of objects that should not be sent
:param want: List of SHA1s of objects that should be sent
:param progress: Optional progress reporting method
"""
return self.iter_shas(self.find_missing_objects(have, want, progress))
def generate_pack_data(self, have, want, progress=None, ofs_delta=True):
"""Generate pack data objects for a set of wants/haves.
:param have: List of SHA1s of objects that should not be sent
:param want: List of SHA1s of objects that should be sent
:param ofs_delta: Whether OFS deltas can be included
:param progress: Optional progress reporting method
"""
# TODO(jelmer): More efficient implementation
return pack_objects_to_data(
self.generate_pack_contents(have, want, progress))
def peel_sha(self, sha):
"""Peel all tags from a SHA.
:param sha: The object SHA to peel.
:return: The fully-peeled SHA1 of a tag object, after peeling all
intermediate tags; if the original ref does not point to a tag,
this will equal the original SHA1.
"""
obj = self[sha]
obj_class = object_class(obj.type_name)
while obj_class is Tag:
obj_class, sha = obj.object
obj = self[sha]
return obj
def _collect_ancestors(self, heads, common=set(),
get_parents=lambda commit: commit.parents):
"""Collect all ancestors of heads up to (excluding) those in common.
:param heads: commits to start from
:param common: commits to end at, or empty set to walk repository
completely
:param get_parents: Optional function for getting the parents of a
commit.
:return: a tuple (A, B) where A - all commits reachable
from heads but not present in common, B - common (shared) elements
that are directly reachable from heads
"""
bases = set()
commits = set()
queue = []
queue.extend(heads)
while queue:
e = queue.pop(0)
if e in common:
bases.add(e)
elif e not in commits:
commits.add(e)
cmt = self[e]
queue.extend(get_parents(cmt))
return (commits, bases)
def close(self):
"""Close any files opened by this object store."""
# Default implementation is a NO-OP
class PackBasedObjectStore(BaseObjectStore):
def __init__(self):
self._pack_cache = {}
@property
def alternates(self):
return []
def contains_packed(self, sha):
"""Check if a particular object is present by SHA1 and is packed.
This does not check alternates.
"""
for pack in self.packs:
try:
if sha in pack:
return True
except PackFileDisappeared:
pass
return False
def __contains__(self, sha):
"""Check if a particular object is present by SHA1.
This method makes no distinction between loose and packed objects.
"""
if self.contains_packed(sha) or self.contains_loose(sha):
return True
for alternate in self.alternates:
if sha in alternate:
return True
return False
def _add_cached_pack(self, base_name, pack):
"""Add a newly appeared pack to the cache by path.
"""
prev_pack = self._pack_cache.get(base_name)
if prev_pack is not pack:
self._pack_cache[base_name] = pack
if prev_pack:
prev_pack.close()
def _clear_cached_packs(self):
pack_cache = self._pack_cache
self._pack_cache = {}
while pack_cache:
(name, pack) = pack_cache.popitem()
Loading ...