Repository URL to install this package:
Version:
1.10.1 ▾
|
# -*- coding: utf-8 -*-
#
# Copyright (c) the purl authors
# SPDX-License-Identifier: MIT
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
# Visit https://github.com/package-url/packageurl-python for support and
# download.
import os
import re
from urllib.parse import unquote_plus
from urllib.parse import urlparse
from packageurl import PackageURL
from packageurl.contrib.route import NoRouteAvailable
from packageurl.contrib.route import Router
"""
This module helps build a PackageURL from an arbitrary URL.
This uses the a routing mechanism available in the route.py module.
In order to make it easy to use, it contains all the conversion functions
in this single Python script.
"""
purl_router = Router()
def url2purl(url):
"""
Return a PackageURL inferred from the `url` string or None.
"""
if url:
try:
return purl_router.process(url)
except NoRouteAvailable:
# If `url` does not fit in one of the existing routes,
# we attempt to create a generic PackageURL for `url`
return build_generic_purl(url)
get_purl = url2purl
def purl_from_pattern(type_, pattern, url, qualifiers=None):
url = unquote_plus(url)
compiled_pattern = re.compile(pattern, re.VERBOSE)
match = compiled_pattern.match(url)
if not match:
return
purl_data = {
field: value for field, value in match.groupdict().items() if field in PackageURL._fields
}
qualifiers = qualifiers or {}
# Include the `version_prefix` as a qualifier to infer valid URLs in purl2url
version_prefix = match.groupdict().get("version_prefix")
if version_prefix:
qualifiers.update({"version_prefix": version_prefix})
if qualifiers:
if "qualifiers" in purl_data:
purl_data["qualifiers"].update(qualifiers)
else:
purl_data["qualifiers"] = qualifiers
return PackageURL(type_, **purl_data)
def register_pattern(type_, pattern, router=purl_router):
"""
Register a pattern with its type.
"""
def endpoint(url):
return purl_from_pattern(type_, pattern, url)
router.append(pattern, endpoint)
def get_path_segments(url):
"""
Return a list of path segments from a `url` string.
"""
path = unquote_plus(urlparse(url).path)
segments = [seg for seg in path.split("/") if seg]
return segments
def build_generic_purl(uri):
"""
Return a PackageURL from `uri`, if `uri` is a parsable URL, or None
`uri` is assumed to be a download URL, e.g. https://example.com/example.tar.gz
"""
parsed_uri = urlparse(uri)
if parsed_uri.scheme and parsed_uri.netloc and parsed_uri.path:
# Get file name from `uri`
uri_path_segments = get_path_segments(uri)
if uri_path_segments:
file_name = uri_path_segments[-1]
return PackageURL(type="generic", name=file_name, qualifiers={"download_url": uri})
@purl_router.route(
"https?://registry.npmjs.*/.*",
"https?://registry.yarnpkg.com/.*",
"https?://(www\\.)?npmjs.*/package.*",
"https?://(www\\.)?yarnpkg.com/package.*",
)
def build_npm_purl(uri):
# npm URLs are difficult to disambiguate with regex
if "/package/" in uri:
return build_npm_web_purl(uri)
elif "/-/" in uri:
return build_npm_download_purl(uri)
else:
return build_npm_api_purl(uri)
def build_npm_api_purl(uri):
path = unquote_plus(urlparse(uri).path)
segments = [seg for seg in path.split("/") if seg]
if len(segments) != 2:
return
# /@invisionag/eslint-config-ivx
if segments[0].startswith("@"):
namespace = segments[0]
name = segments[1]
return PackageURL("npm", namespace, name)
# /angular/1.6.6
else:
name = segments[0]
version = segments[1]
return PackageURL("npm", name=name, version=version)
def build_npm_download_purl(uri):
path = unquote_plus(urlparse(uri).path)
segments = [seg for seg in path.split("/") if seg and seg != "-"]
len_segments = len(segments)
# /@invisionag/eslint-config-ivx/-/eslint-config-ivx-0.0.2.tgz
if len_segments == 3:
namespace, name, filename = segments
# /automatta/-/automatta-0.0.1.tgz
elif len_segments == 2:
namespace = None
name, filename = segments
else:
return
base_filename, ext = os.path.splitext(filename)
version = base_filename.replace(name, "")
if version.startswith("-"):
version = version[1:] # Removes the "-" prefix
return PackageURL("npm", namespace, name, version)
def build_npm_web_purl(uri):
path = unquote_plus(urlparse(uri).path)
if path.startswith("/package/"):
path = path[9:]
segments = [seg for seg in path.split("/") if seg]
len_segments = len(segments)
namespace = version = None
# @angular/cli/v/10.1.2
if len_segments == 4:
namespace = segments[0]
name = segments[1]
version = segments[3]
# express/v/4.17.1
elif len_segments == 3:
namespace = None
name = segments[0]
version = segments[2]
# @angular/cli
elif len_segments == 2:
namespace = segments[0]
name = segments[1]
# express
elif len_segments == 1 and len(segments) > 0 and segments[0][0] != "@":
name = segments[0]
else:
return
return PackageURL("npm", namespace, name, version)
@purl_router.route(
"https?://repo1.maven.org/maven2/.*",
"https?://central.maven.org/maven2/.*",
"maven-index://repo1.maven.org/.*",
)
def build_maven_purl(uri):
path = unquote_plus(urlparse(uri).path)
segments = [seg for seg in path.split("/") if seg and seg != "maven2"]
if len(segments) < 3:
return
before_last_segment, last_segment = segments[-2:]
has_filename = before_last_segment in last_segment
filename = None
if has_filename:
filename = segments.pop()
version = segments[-1]
name = segments[-2]
namespace = ".".join(segments[:-2])
qualifiers = {}
if filename:
name_version = f"{name}-{version}"
_, _, classifier_ext = filename.rpartition(name_version)
classifier, _, extension = classifier_ext.partition(".")
if not extension:
return
qualifiers["classifier"] = classifier.strip("-")
valid_types = ("aar", "ear", "mar", "pom", "rar", "rpm", "sar", "tar.gz", "war", "zip")
if extension in valid_types:
qualifiers["type"] = extension
return PackageURL("maven", namespace, name, version, qualifiers)
# https://rubygems.org/gems/i18n-js-3.0.11.gem
@purl_router.route("https?://rubygems.org/(downloads|gems)/.*")
def build_rubygems_purl(uri):
# We use a more general route pattern instead of using `rubygems_pattern`
# below by itself because we want to capture all rubygems download URLs,
# even the ones that are not completely formed. This helps prevent url2purl
# from attempting to create a generic PackageURL from an invalid rubygems
# download URL.
# https://rubygems.org/downloads/jwt-0.1.8.gem
# https://rubygems.org/gems/i18n-js-3.0.11.gem
rubygems_pattern = (
r"^https?://rubygems.org/(downloads|gems)/(?P<name>.+)-(?P<version>.+)(\.gem)$"
)
return purl_from_pattern("rubygems", rubygems_pattern, uri)
# https://pypi.python.org/packages/source/a/anyjson/anyjson-0.3.3.tar.gz
# https://pypi.python.org/packages/2.6/t/threadpool/threadpool-1.2.7-py2.6.egg
# https://pypi.python.org/packages/any/s/setuptools/setuptools-0.6c11-1.src.rpm
# https://files.pythonhosted.org/packages/84/d8/451842a5496844bb5c7634b231a2e4caf0d867d2e25f09b840d3b07f3d4b/multi_key_dict-2.0.win32.exe
pypi_pattern = r"(?P<name>(\w\.?)+(-\w+)*)-(?P<version>.+)\.(zip|tar.gz|tar.bz2|tgz|egg|rpm|exe)$"
# This pattern can be found in the following locations:
# - wheel.wheelfile.WHEEL_INFO_RE
# - distlib.wheel.FILENAME_RE
# - setuptools.wheel.WHEEL_NAME
# - pip._internal.wheel.Wheel.wheel_file_re
wheel_file_re = re.compile(
r"^(?P<namever>(?P<name>.+?)-(?P<version>.*?))"
r"((-(?P<build>\d[^-]*?))?-(?P<pyver>.+?)-(?P<abi>.+?)-(?P<plat>.+?)"
r"\.whl)$",
re.VERBOSE,
)
@purl_router.route("https?://.+python.+org/packages/.*")
def build_pypi_purl(uri):
path = unquote_plus(urlparse(uri).path)
last_segment = path.split("/")[-1]
# /wheel-0.29.0-py2.py3-none-any.whl
if last_segment.endswith(".whl"):
match = wheel_file_re.match(last_segment)
if match:
return PackageURL(
"pypi",
name=match.group("name"),
version=match.group("version"),
)
return purl_from_pattern("pypi", pypi_pattern, last_segment)
# http://nuget.org/packages/EntityFramework/4.2.0.0
# https://www.nuget.org/api/v2/package/Newtonsoft.Json/11.0.1
nuget_www_pattern = r"^https?://.*nuget.org/(api/v2/)?packages?/(?P<name>.+)/(?P<version>.+)$"
register_pattern("nuget", nuget_www_pattern)
# https://api.nuget.org/v3-flatcontainer/newtonsoft.json/10.0.1/newtonsoft.json.10.0.1.nupkg
nuget_api_pattern = (
r"^https?://api.nuget.org/v3-flatcontainer/"
r"(?P<name>.+)/"
r"(?P<version>.+)/"
r".*(nupkg)$" # ends with "nupkg"
)
register_pattern("nuget", nuget_api_pattern)
@purl_router.route("https?://.*sourceforge.net/project/.*")
def build_sourceforge_purl(uri):
# We use a more general route pattern instead of using `sourceforge_pattern`
# below by itself because we want to capture all sourceforge download URLs,
# even the ones that do not fit `sourceforge_pattern`. This helps prevent
# url2purl from attempting to create a generic PackageURL from a sourceforge
# URL that we can't handle.
# http://master.dl.sourceforge.net/project/libpng/zlib/1.2.3/zlib-1.2.3.tar.bz2
sourceforge_pattern = (
r"^https?://.*sourceforge.net/project/"
r"(?P<namespace>([^/]+))/" # do not allow more "/" segments
r"(?P<name>.+)/"
r"(?P<version>[0-9\.]+)/" # version restricted to digits and dots
r"(?P=name)-(?P=version).*" # {name}-{version} repeated in the filename
r"[^/]$" # not ending with "/"
)
sourceforge_purl = purl_from_pattern("sourceforge", sourceforge_pattern, uri)
if not sourceforge_purl:
# Get the project name from `uri` and use that as the Package name
# http://master.dl.sourceforge.net/project/aloyscore/aloyscore/0.1a1%2520stable/0.1a1_stable_AloysCore.zip
split_uri = uri.split("/project/")
# http://master.dl.sourceforge.net, aloyscore/aloyscore/0.1a1%2520stable/0.1a1_stable_AloysCore.zip
if len(split_uri) >= 2:
# aloyscore/aloyscore/0.1a1%2520stable/0.1a1_stable_AloysCore.zip
remaining_uri_path = split_uri[1]
# aloyscore, aloyscore, 0.1a1%2520stable, 0.1a1_stable_AloysCore.zip
remaining_uri_path_segments = remaining_uri_path.split("/")
if remaining_uri_path_segments:
project_name = remaining_uri_path_segments[0] # aloyscore
sourceforge_purl = PackageURL(
type="sourceforge", name=project_name, qualifiers={"download_url": uri}
)
return sourceforge_purl
# https://crates.io/api/v1/crates/rand/0.7.2/download
cargo_pattern = r"^https?://crates.io/api/v1/crates/(?P<name>.+)/(?P<version>.+)(\/download)$"
register_pattern("cargo", cargo_pattern)
# https://raw.githubusercontent.com/volatilityfoundation/dwarf2json/master/LICENSE.txt
github_raw_content_pattern = (
r"https?://raw.githubusercontent.com/(?P<namespace>[^/]+)/(?P<name>[^/]+)/"
r"(?P<version>[^/]+)/(?P<subpath>.*)$"
)
register_pattern("github", github_raw_content_pattern)
@purl_router.route("https?://api.github\\.com/repos/.*")
def build_github_api_purl(url):
"""
Return a PackageURL object from GitHub API `url`.
For example:
https://api.github.com/repos/nexB/scancode-toolkit/commits/40593af0df6c8378d2b180324b97cb439fa11d66
https://api.github.com/repos/nexB/scancode-toolkit/
and returns a `PackageURL` object
"""
segments = get_path_segments(url)
if not (len(segments) >= 3):
return
namespace = segments[1]
name = segments[2]
version = None
# https://api.github.com/repos/nexB/scancode-toolkit/
if len(segments) == 4 and segments[3] != "commits":
version = segments[3]
# https://api.github.com/repos/nexB/scancode-toolkit/commits/40593af0df6c8378d2b180324b97cb439fa11d66
if len(segments) == 5 and segments[3] == "commits":
version = segments[4]
return PackageURL(type="github", namespace=namespace, name=name, version=version)
# https://codeload.github.com/nexB/scancode-toolkit/tar.gz/v3.1.1
# https://codeload.github.com/berngp/grails-rest/zip/release/0.7
github_codeload_pattern = (
r"https?://codeload.github.com/(?P<namespace>.+)/(?P<name>.+)/"
r"(zip|tar.gz|tar.bz2|tgz)/(.*/)*"
r"(?P<version_prefix>v|V?)(?P<version>.+)$"
)
register_pattern("github", github_codeload_pattern)
@purl_router.route("https?://github\\.com/.*")
def build_github_purl(url):
"""
Return a PackageURL object from GitHub `url`.
"""
# https://github.com/nexB/scancode-toolkit/archive/v3.1.1.zip
archive_pattern = (
r"https?://github.com/(?P<namespace>.+)/(?P<name>.+)"
r"/archive/(.*/)*"
r"((?P=name)(-|_|@))?"
r"(?P<version_prefix>v|V?)(?P<version>.+).(zip|tar.gz|tar.bz2|.tgz)"
)
# https://github.com/downloads/mozilla/rhino/rhino1_7R4.zip
download_pattern = (
r"https?://github.com/downloads/(?P<namespace>.+)/(?P<name>.+)/"
r"((?P=name)(-|@)?)?"
r"(?P<version_prefix>v|V?)(?P<version>.+).(zip|tar.gz|tar.bz2|.tgz)"
)
# https://github.com/pypa/get-virtualenv/raw/20.0.31/public/virtualenv.pyz
raw_pattern = (
r"https?://github.com/(?P<namespace>.+)/(?P<name>.+)"
r"/raw/(?P<version_prefix>v|V?)(?P<version>[^/]+)/(?P<subpath>.*)$"
)
# https://github.com/fanf2/unifdef/blob/master/unifdef.c
blob_pattern = (
r"https?://github.com/(?P<namespace>.+)/(?P<name>.+)"
r"/blob/(?P<version>[^/]+)/(?P<subpath>.*)$"
)
releases_download_pattern = (
r"https?://github.com/(?P<namespace>.+)/(?P<name>.+)"
r"/releases/download/(?P<version_prefix>v|V?)(?P<version>[^/]+)/.*$"
)
# https://github.com/pombredanne/schematics.git
git_pattern = r"https?://github.com/(?P<namespace>.+)/(?P<name>.+).(git)"
patterns = (
archive_pattern,
raw_pattern,
blob_pattern,
releases_download_pattern,
download_pattern,
git_pattern,
)
for pattern in patterns:
matches = re.search(pattern, url)
qualifiers = {}
if matches:
if pattern == releases_download_pattern:
qualifiers["download_url"] = url
return purl_from_pattern(
type_="github", pattern=pattern, url=url, qualifiers=qualifiers
)
segments = get_path_segments(url)
if not len(segments) >= 2:
return
namespace = segments[0]
name = segments[1]
version = None
subpath = None
# https://github.com/TG1999/fetchcode/master
if len(segments) >= 3 and segments[2] != "tree":
version = segments[2]
subpath = "/".join(segments[3:])
# https://github.com/TG1999/fetchcode/tree/master
if len(segments) >= 4 and segments[2] == "tree":
version = segments[3]
subpath = "/".join(segments[4:])
return PackageURL(
type="github",
namespace=namespace,
name=name,
version=version,
subpath=subpath,
)
@purl_router.route("https?://bitbucket\\.org/.*")
def build_bitbucket_purl(url):
"""
Return a PackageURL object from BitBucket `url`.
For example:
https://bitbucket.org/TG1999/first_repo/src/master or
https://bitbucket.org/TG1999/first_repo/src or
https://bitbucket.org/TG1999/first_repo/src/master/new_folder
"""
segments = get_path_segments(url)
if not len(segments) >= 2:
return
namespace = segments[0]
name = segments[1]
bitbucket_download_pattern = (
r"https?://bitbucket.org/"
r"(?P<namespace>.+)/(?P<name>.+)/downloads/"
r"(?P<version>.+).(zip|tar.gz|tar.bz2|.tgz|exe|msi)"
)
matches = re.search(bitbucket_download_pattern, url)
qualifiers = {}
if matches:
qualifiers["download_url"] = url
return PackageURL(type="bitbucket", namespace=namespace, name=name, qualifiers=qualifiers)
version = None
subpath = None
# https://bitbucket.org/TG1999/first_repo/new_folder/
if len(segments) >= 3 and segments[2] != "src":
version = segments[2]
subpath = "/".join(segments[3:])
# https://bitbucket.org/TG1999/first_repo/src/master/new_folder/
if len(segments) >= 4 and segments[2] == "src":
version = segments[3]
subpath = "/".join(segments[4:])
return PackageURL(
type="bitbucket",
namespace=namespace,
name=name,
version=version,
subpath=subpath,
)
@purl_router.route("https?://gitlab\\.com/.*")
def build_gitlab_purl(url):
"""
Return a PackageURL object from Gitlab `url`.
For example:
https://gitlab.com/TG1999/firebase/-/tree/1a122122/views
https://gitlab.com/TG1999/firebase/-/tree
https://gitlab.com/TG1999/firebase/-/master
https://gitlab.com/tg1999/Firebase/-/tree/master
"""
segments = get_path_segments(url)
if not len(segments) >= 2:
return
namespace = segments[0]
name = segments[1]
version = None
subpath = None
# https://gitlab.com/TG1999/firebase/master
if (len(segments) >= 3) and segments[2] != "-" and segments[2] != "tree":
version = segments[2]
subpath = "/".join(segments[3:])
# https://gitlab.com/TG1999/firebase/-/tree/master
if len(segments) >= 5 and (segments[2] == "-" and segments[3] == "tree"):
version = segments[4]
subpath = "/".join(segments[5:])
return PackageURL(
type="gitlab",
namespace=namespace,
name=name,
version=version,
subpath=subpath,
)
# https://hackage.haskell.org/package/cli-extras-0.2.0.0/cli-extras-0.2.0.0.tar.gz
hackage_download_pattern = (
r"^https?://hackage.haskell.org/package/"
r"(?P<name>.+)-(?P<version>.+)/"
r"(?P=name)-(?P=version).*"
r"[^/]$"
)
register_pattern("hackage", hackage_download_pattern)
# https://hackage.haskell.org/package/cli-extras-0.2.0.0/
hackage_project_pattern = r"^https?://hackage.haskell.org/package/(?P<name>.+)-(?P<version>[^/]+)/"
register_pattern("hackage", hackage_project_pattern)
@purl_router.route(
"https?://storage.googleapis.com/google-code-archive-downloads/v2/code.google.com/.*"
)
def build_generic_google_code_archive_purl(uri):
# https://storage.googleapis.com/google-code-archive-downloads/v2/code.google.com
# /android-notifier/android-notifier-desktop-0.5.1-1.i386.rpm
_, remaining_uri = uri.split(
"https://storage.googleapis.com/google-code-archive-downloads/v2/code.google.com/"
)
if remaining_uri: # android-notifier/android-notifier-desktop-0.5.1-1.i386.rpm
split_remaining_uri = remaining_uri.split("/")
# android-notifier, android-notifier-desktop-0.5.1-1.i386.rpm
if split_remaining_uri:
name = split_remaining_uri[0] # android-notifier
return PackageURL(
type="generic",
namespace="code.google.com",
name=name,
qualifiers={"download_url": uri},
)