# -*- coding: utf-8 -*-
#
# Copyright (C) 2012-2015 Vinay Sajip.
# Licensed to the Python Software Foundation under a contributor agreement.
# See LICENSE.txt and CONTRIBUTORS.txt.
#
import gzip
from io import BytesIO
import json
import logging
import os
import posixpath
import re
try:
import threading
except ImportError: # pragma: no cover
import dummy_threading as threading
import zlib
from . import DistlibException
from .compat import (urljoin, urlparse, urlunparse, url2pathname, pathname2url,
queue, quote, unescape, string_types, build_opener,
HTTPRedirectHandler as BaseRedirectHandler, text_type,
Request, HTTPError, URLError)
from .database import Distribution, DistributionPath, make_dist
from .metadata import Metadata, MetadataInvalidError
from .util import (cached_property, parse_credentials, ensure_slash,
split_filename, get_project_data, parse_requirement,
parse_name_and_version, ServerProxy, normalize_name)
from .version import get_scheme, UnsupportedVersionError
from .wheel import Wheel, is_compatible
logger = logging.getLogger(__name__)
HASHER_HASH = re.compile(r'^(\w+)=([a-f0-9]+)')
CHARSET = re.compile(r';\s*charset\s*=\s*(.*)\s*$', re.I)
HTML_CONTENT_TYPE = re.compile('text/html|application/x(ht)?ml')
DEFAULT_INDEX = 'https://pypi.python.org/pypi'
def get_all_distribution_names(url=None):
"""
Return all distribution names known by an index.
:param url: The URL of the index.
:return: A list of all known distribution names.
"""
if url is None:
url = DEFAULT_INDEX
client = ServerProxy(url, timeout=3.0)
try:
return client.list_packages()
finally:
client('close')()
class RedirectHandler(BaseRedirectHandler):
"""
A class to work around a bug in some Python 3.2.x releases.
"""
# There's a bug in the base version for some 3.2.x
# (e.g. 3.2.2 on Ubuntu Oneiric). If a Location header
# returns e.g. /abc, it bails because it says the scheme ''
# is bogus, when actually it should use the request's
# URL for the scheme. See Python issue #13696.
def http_error_302(self, req, fp, code, msg, headers):
# Some servers (incorrectly) return multiple Location headers
# (so probably same goes for URI). Use first header.
newurl = None
for key in ('location', 'uri'):
if key in headers:
newurl = headers[key]
break
if newurl is None: # pragma: no cover
return
urlparts = urlparse(newurl)
if urlparts.scheme == '':
newurl = urljoin(req.get_full_url(), newurl)
if hasattr(headers, 'replace_header'):
headers.replace_header(key, newurl)
else:
headers[key] = newurl
return BaseRedirectHandler.http_error_302(self, req, fp, code, msg,
headers)
http_error_301 = http_error_303 = http_error_307 = http_error_302
class Locator(object):
"""
A base class for locators - things that locate distributions.
"""
source_extensions = ('.tar.gz', '.tar.bz2', '.tar', '.zip', '.tgz', '.tbz')
binary_extensions = ('.egg', '.exe', '.whl')
excluded_extensions = ('.pdf',)
# A list of tags indicating which wheels you want to match. The default
# value of None matches against the tags compatible with the running
# Python. If you want to match other values, set wheel_tags on a locator
# instance to a list of tuples (pyver, abi, arch) which you want to match.
wheel_tags = None
downloadable_extensions = source_extensions + ('.whl',)
def __init__(self, scheme='default'):
"""
Initialise an instance.
:param scheme: Because locators look for most recent versions, they
need to know the version scheme to use. This specifies
the current PEP-recommended scheme - use ``'legacy'``
if you need to support existing distributions on PyPI.
"""
self._cache = {}
self.scheme = scheme
# Because of bugs in some of the handlers on some of the platforms,
# we use our own opener rather than just using urlopen.
self.opener = build_opener(RedirectHandler())
# If get_project() is called from locate(), the matcher instance
# is set from the requirement passed to locate(). See issue #18 for
# why this can be useful to know.
self.matcher = None
self.errors = queue.Queue()
def get_errors(self):
"""
Return any errors which have occurred.
"""
result = []
while not self.errors.empty(): # pragma: no cover
try:
e = self.errors.get(False)
result.append(e)
except self.errors.Empty:
continue
self.errors.task_done()
return result
def clear_errors(self):
"""
Clear any errors which may have been logged.
"""
# Just get the errors and throw them away
self.get_errors()
def clear_cache(self):
self._cache.clear()
def _get_scheme(self):
return self._scheme
def _set_scheme(self, value):
self._scheme = value
scheme = property(_get_scheme, _set_scheme)
def _get_project(self, name):
"""
For a given project, get a dictionary mapping available versions to Distribution
instances.
This should be implemented in subclasses.
If called from a locate() request, self.matcher will be set to a
matcher for the requirement to satisfy, otherwise it will be None.
"""
raise NotImplementedError('Please implement in the subclass')
def get_distribution_names(self):
"""
Return all the distribution names known to this locator.
"""
raise NotImplementedError('Please implement in the subclass')
def get_project(self, name):
"""
For a given project, get a dictionary mapping available versions to Distribution
instances.
This calls _get_project to do all the work, and just implements a caching layer on top.
"""
if self._cache is None: # pragma: no cover
result = self._get_project(name)
elif name in self._cache:
result = self._cache[name]
else:
self.clear_errors()
result = self._get_project(name)
self._cache[name] = result
return result
def score_url(self, url):
"""
Give an url a score which can be used to choose preferred URLs
for a given project release.
"""
t = urlparse(url)
basename = posixpath.basename(t.path)
compatible = True
is_wheel = basename.endswith('.whl')
is_downloadable = basename.endswith(self.downloadable_extensions)
if is_wheel:
compatible = is_compatible(Wheel(basename), self.wheel_tags)
return (t.scheme == 'https', 'pypi.python.org' in t.netloc,
is_downloadable, is_wheel, compatible, basename)
def prefer_url(self, url1, url2):
"""
Choose one of two URLs where both are candidates for distribution
archives for the same version of a distribution (for example,
.tar.gz vs. zip).
The current implementation favours https:// URLs over http://, archives
from PyPI over those from other locations, wheel compatibility (if a
wheel) and then the archive name.
"""
result = url2
if url1:
s1 = self.score_url(url1)
s2 = self.score_url(url2)
if s1 > s2:
result = url1
if result != url2:
logger.debug('Not replacing %r with %r', url1, url2)
else:
logger.debug('Replacing %r with %r', url1, url2)
return result
def split_filename(self, filename, project_name):
"""
Attempt to split a filename in project name, version and Python version.
"""
return split_filename(filename, project_name)
def convert_url_to_download_info(self, url, project_name):
"""
See if a URL is a candidate for a download URL for a project (the URL
has typically been scraped from an HTML page).
If it is, a dictionary is returned with keys "name", "version",
"filename" and "url"; otherwise, None is returned.
"""
def same_project(name1, name2):
return normalize_name(name1) == normalize_name(name2)
result = None
scheme, netloc, path, params, query, frag = urlparse(url)
if frag.lower().startswith('egg='): # pragma: no cover
logger.debug('%s: version hint in fragment: %r',
project_name, frag)
m = HASHER_HASH.match(frag)
if m:
algo, digest = m.groups()
else:
algo, digest = None, None
origpath = path
if path and path[-1] == '/': # pragma: no cover
path = path[:-1]
if path.endswith('.whl'):
try:
wheel = Wheel(path)
if is_compatible(wheel, self.wheel_tags):
if project_name is None:
include = True
else:
include = same_project(wheel.name, project_name)
if include:
result = {
'name': wheel.name,
'version': wheel.version,
'filename': wheel.filename,
'url': urlunparse((scheme, netloc, origpath,
params, query, '')),
'python-version': ', '.join(
['.'.join(list(v[2:])) for v in wheel.pyver]),
}
except Exception as e: # pragma: no cover
logger.warning('invalid path for wheel: %s', path)
elif not path.endswith(self.downloadable_extensions): # pragma: no cover
logger.debug('Not downloadable: %s', path)
else: # downloadable extension
path = filename = posixpath.basename(path)
for ext in self.downloadable_extensions:
if path.endswith(ext):
path = path[:-len(ext)]
t = self.split_filename(path, project_name)
if not t: # pragma: no cover
logger.debug('No match for project/version: %s', path)
else:
name, version, pyver = t
if not project_name or same_project(project_name, name):
result = {
'name': name,
'version': version,
'filename': filename,
'url': urlunparse((scheme, netloc, origpath,
params, query, '')),
#'packagetype': 'sdist',
}
if pyver: # pragma: no cover
result['python-version'] = pyver
break
if result and algo:
result['%s_digest' % algo] = digest
return result
def _get_digest(self, info):
"""
Get a digest from a dictionary by looking at keys of the form
'algo_digest'.
Returns a 2-tuple (algo, digest) if found, else None. Currently
looks only for SHA256, then MD5.
"""
result = None
for algo in ('sha256', 'md5'):
key = '%s_digest' % algo
if key in info:
result = (algo, info[key])
break
return result
def _update_version_data(self, result, info):
"""
Update a result dictionary (the final result from _get_project) with a
dictionary for a specific version, which typically holds information
gleaned from a filename or URL for an archive for the distribution.
"""
name = info.pop('name')
version = info.pop('version')
if version in result:
dist = result[version]
md = dist.metadata
else:
dist = make_dist(name, version, scheme=self.scheme)
md = dist.metadata
dist.digest = digest = self._get_digest(info)
url = info['url']
result['digests'][url] = digest
if md.source_url != info['url']:
md.source_url = self.prefer_url(md.source_url, url)
result['urls'].setdefault(version, set()).add(url)
dist.locator = self
result[version] = dist
def locate(self, requirement, prereleases=False):
"""
Find the most recent distribution which matches the given
requirement.
Loading ...