Learn more  » Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

agriconnect / dulwich   python

Repository URL to install this package:

/ web.py

# web.py -- WSGI smart-http server
# Copyright (C) 2010 Google, Inc.
# Copyright (C) 2012 Jelmer Vernooij <jelmer@jelmer.uk>
#
# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
# General Public License as public by the Free Software Foundation; version 2.0
# or (at your option) any later version. You can redistribute it and/or
# modify it under the terms of either of these two licenses.
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# You should have received a copy of the licenses; if not, see
# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
# License, Version 2.0.
#

"""HTTP server for dulwich that implements the git smart HTTP protocol."""

from io import BytesIO
import shutil
import tempfile
import gzip
import os
import re
import sys
import time
from wsgiref.simple_server import (
    WSGIRequestHandler,
    ServerHandler,
    WSGIServer,
    make_server,
    )

try:
    from urlparse import parse_qs
except ImportError:
    from urllib.parse import parse_qs


from dulwich import log_utils
from dulwich.protocol import (
    ReceivableProtocol,
    )
from dulwich.repo import (
    Repo,
    )
from dulwich.server import (
    DictBackend,
    DEFAULT_HANDLERS,
    generate_info_refs,
    generate_objects_info_packs,
    )


logger = log_utils.getLogger(__name__)


# HTTP error strings
HTTP_OK = '200 OK'
HTTP_NOT_FOUND = '404 Not Found'
HTTP_FORBIDDEN = '403 Forbidden'
HTTP_ERROR = '500 Internal Server Error'


def date_time_string(timestamp=None):
    # From BaseHTTPRequestHandler.date_time_string in BaseHTTPServer.py in the
    # Python 2.6.5 standard library, following modifications:
    #  - Made a global rather than an instance method.
    #  - weekdayname and monthname are renamed and locals rather than class
    #    variables.
    # Copyright (c) 2001-2010 Python Software Foundation; All Rights Reserved
    weekdays = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
    months = [None,
              'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
              'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
    if timestamp is None:
        timestamp = time.time()
    year, month, day, hh, mm, ss, wd, y, z = time.gmtime(timestamp)
    return '%s, %02d %3s %4d %02d:%02d:%02d GMD' % (
            weekdays[wd], day, months[month], year, hh, mm, ss)


def url_prefix(mat):
    """Extract the URL prefix from a regex match.

    :param mat: A regex match object.
    :returns: The URL prefix, defined as the text before the match in the
        original string. Normalized to start with one leading slash and end
        with zero.
    """
    return '/' + mat.string[:mat.start()].strip('/')


def get_repo(backend, mat):
    """Get a Repo instance for the given backend and URL regex match."""
    return backend.open_repository(url_prefix(mat))


def send_file(req, f, content_type):
    """Send a file-like object to the request output.

    :param req: The HTTPGitRequest object to send output to.
    :param f: An open file-like object to send; will be closed.
    :param content_type: The MIME type for the file.
    :return: Iterator over the contents of the file, as chunks.
    """
    if f is None:
        yield req.not_found('File not found')
        return
    try:
        req.respond(HTTP_OK, content_type)
        while True:
            data = f.read(10240)
            if not data:
                break
            yield data
    except IOError:
        yield req.error('Error reading file')
    finally:
        f.close()


def _url_to_path(url):
    return url.replace('/', os.path.sep)


def get_text_file(req, backend, mat):
    req.nocache()
    path = _url_to_path(mat.group())
    logger.info('Sending plain text file %s', path)
    return send_file(req, get_repo(backend, mat).get_named_file(path),
                     'text/plain')


def get_loose_object(req, backend, mat):
    sha = (mat.group(1) + mat.group(2)).encode('ascii')
    logger.info('Sending loose object %s', sha)
    object_store = get_repo(backend, mat).object_store
    if not object_store.contains_loose(sha):
        yield req.not_found('Object not found')
        return
    try:
        data = object_store[sha].as_legacy_object()
    except IOError:
        yield req.error('Error reading object')
        return
    req.cache_forever()
    req.respond(HTTP_OK, 'application/x-git-loose-object')
    yield data


def get_pack_file(req, backend, mat):
    req.cache_forever()
    path = _url_to_path(mat.group())
    logger.info('Sending pack file %s', path)
    return send_file(req, get_repo(backend, mat).get_named_file(path),
                     'application/x-git-packed-objects')


def get_idx_file(req, backend, mat):
    req.cache_forever()
    path = _url_to_path(mat.group())
    logger.info('Sending pack file %s', path)
    return send_file(req, get_repo(backend, mat).get_named_file(path),
                     'application/x-git-packed-objects-toc')


def get_info_refs(req, backend, mat):
    params = parse_qs(req.environ['QUERY_STRING'])
    service = params.get('service', [None])[0]
    if service and not req.dumb:
        handler_cls = req.handlers.get(service.encode('ascii'), None)
        if handler_cls is None:
            yield req.forbidden('Unsupported service')
            return
        req.nocache()
        write = req.respond(
            HTTP_OK, 'application/x-%s-advertisement' % service)
        proto = ReceivableProtocol(BytesIO().read, write)
        handler = handler_cls(backend, [url_prefix(mat)], proto,
                              http_req=req, advertise_refs=True)
        handler.proto.write_pkt_line(
            b'# service=' + service.encode('ascii') + b'\n')
        handler.proto.write_pkt_line(None)
        handler.handle()
    else:
        # non-smart fallback
        # TODO: select_getanyfile() (see http-backend.c)
        req.nocache()
        req.respond(HTTP_OK, 'text/plain')
        logger.info('Emulating dumb info/refs')
        repo = get_repo(backend, mat)
        for text in generate_info_refs(repo):
            yield text


def get_info_packs(req, backend, mat):
    req.nocache()
    req.respond(HTTP_OK, 'text/plain')
    logger.info('Emulating dumb info/packs')
    return generate_objects_info_packs(get_repo(backend, mat))


class _LengthLimitedFile(object):
    """Wrapper class to limit the length of reads from a file-like object.

    This is used to ensure EOF is read from the wsgi.input object once
    Content-Length bytes are read. This behavior is required by the WSGI spec
    but not implemented in wsgiref as of 2.5.
    """

    def __init__(self, input, max_bytes):
        self._input = input
        self._bytes_avail = max_bytes

    def read(self, size=-1):
        if self._bytes_avail <= 0:
            return b''
        if size == -1 or size > self._bytes_avail:
            size = self._bytes_avail
        self._bytes_avail -= size
        return self._input.read(size)

    # TODO: support more methods as necessary


def handle_service_request(req, backend, mat):
    service = mat.group().lstrip('/')
    logger.info('Handling service request for %s', service)
    handler_cls = req.handlers.get(service.encode('ascii'), None)
    if handler_cls is None:
        yield req.forbidden('Unsupported service')
        return
    req.nocache()
    write = req.respond(HTTP_OK, 'application/x-%s-result' % service)
    proto = ReceivableProtocol(req.environ['wsgi.input'].read, write)
    handler = handler_cls(backend, [url_prefix(mat)], proto, http_req=req)
    handler.handle()


class HTTPGitRequest(object):
    """Class encapsulating the state of a single git HTTP request.

    :ivar environ: the WSGI environment for the request.
    """

    def __init__(self, environ, start_response, dumb=False, handlers=None):
        self.environ = environ
        self.dumb = dumb
        self.handlers = handlers
        self._start_response = start_response
        self._cache_headers = []
        self._headers = []

    def add_header(self, name, value):
        """Add a header to the response."""
        self._headers.append((name, value))

    def respond(self, status=HTTP_OK, content_type=None, headers=None):
        """Begin a response with the given status and other headers."""
        if headers:
            self._headers.extend(headers)
        if content_type:
            self._headers.append(('Content-Type', content_type))
        self._headers.extend(self._cache_headers)

        return self._start_response(status, self._headers)

    def not_found(self, message):
        """Begin a HTTP 404 response and return the text of a message."""
        self._cache_headers = []
        logger.info('Not found: %s', message)
        self.respond(HTTP_NOT_FOUND, 'text/plain')
        return message.encode('ascii')

    def forbidden(self, message):
        """Begin a HTTP 403 response and return the text of a message."""
        self._cache_headers = []
        logger.info('Forbidden: %s', message)
        self.respond(HTTP_FORBIDDEN, 'text/plain')
        return message.encode('ascii')

    def error(self, message):
        """Begin a HTTP 500 response and return the text of a message."""
        self._cache_headers = []
        logger.error('Error: %s', message)
        self.respond(HTTP_ERROR, 'text/plain')
        return message.encode('ascii')

    def nocache(self):
        """Set the response to never be cached by the client."""
        self._cache_headers = [
          ('Expires', 'Fri, 01 Jan 1980 00:00:00 GMT'),
          ('Pragma', 'no-cache'),
          ('Cache-Control', 'no-cache, max-age=0, must-revalidate'),
          ]

    def cache_forever(self):
        """Set the response to be cached forever by the client."""
        now = time.time()
        self._cache_headers = [
          ('Date', date_time_string(now)),
          ('Expires', date_time_string(now + 31536000)),
          ('Cache-Control', 'public, max-age=31536000'),
          ]


class HTTPGitApplication(object):
    """Class encapsulating the state of a git WSGI application.

    :ivar backend: the Backend object backing this application
    """

    services = {
      ('GET', re.compile('/HEAD$')): get_text_file,
      ('GET', re.compile('/info/refs$')): get_info_refs,
      ('GET', re.compile('/objects/info/alternates$')): get_text_file,
      ('GET', re.compile('/objects/info/http-alternates$')): get_text_file,
      ('GET', re.compile('/objects/info/packs$')): get_info_packs,
      ('GET', re.compile('/objects/([0-9a-f]{2})/([0-9a-f]{38})$')):
      get_loose_object,
      ('GET', re.compile('/objects/pack/pack-([0-9a-f]{40})\\.pack$')):
      get_pack_file,
      ('GET', re.compile('/objects/pack/pack-([0-9a-f]{40})\\.idx$')):
      get_idx_file,

      ('POST', re.compile('/git-upload-pack$')): handle_service_request,
      ('POST', re.compile('/git-receive-pack$')): handle_service_request,
    }

    def __init__(self, backend, dumb=False, handlers=None, fallback_app=None):
        self.backend = backend
        self.dumb = dumb
        self.handlers = dict(DEFAULT_HANDLERS)
        self.fallback_app = fallback_app
        if handlers is not None:
            self.handlers.update(handlers)

    def __call__(self, environ, start_response):
        path = environ['PATH_INFO']
Loading ...