Why Gemfury? Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Debian packages RPM packages NuGet packages

Repository URL to install this package:

Details    
Size: Mime:
# -*- coding: utf-8 -*-
#
# Copyright (c) the purl authors
# SPDX-License-Identifier: MIT
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

# Visit https://github.com/package-url/packageurl-python for support and
# download.

import os
import re
from urllib.parse import unquote_plus
from urllib.parse import urlparse

from packageurl import PackageURL
from packageurl.contrib.route import NoRouteAvailable
from packageurl.contrib.route import Router

"""
This module helps build a PackageURL from an arbitrary URL.
This uses the a routing mechanism available in the route.py module.

In order to make it easy to use, it contains all the conversion functions
in this single Python script.
"""


purl_router = Router()


def url2purl(url):
    """
    Return a PackageURL inferred from the `url` string or None.
    """
    if url:
        try:
            return purl_router.process(url)
        except NoRouteAvailable:
            # If `url` does not fit in one of the existing routes,
            # we attempt to create a generic PackageURL for `url`
            return build_generic_purl(url)


get_purl = url2purl


def purl_from_pattern(type_, pattern, url, qualifiers=None):
    url = unquote_plus(url)
    compiled_pattern = re.compile(pattern, re.VERBOSE)
    match = compiled_pattern.match(url)

    if not match:
        return

    purl_data = {
        field: value for field, value in match.groupdict().items() if field in PackageURL._fields
    }

    qualifiers = qualifiers or {}
    # Include the `version_prefix` as a qualifier to infer valid URLs in purl2url
    version_prefix = match.groupdict().get("version_prefix")
    if version_prefix:
        qualifiers.update({"version_prefix": version_prefix})

    if qualifiers:
        if "qualifiers" in purl_data:
            purl_data["qualifiers"].update(qualifiers)
        else:
            purl_data["qualifiers"] = qualifiers

    return PackageURL(type_, **purl_data)


def register_pattern(type_, pattern, router=purl_router):
    """
    Register a pattern with its type.
    """

    def endpoint(url):
        return purl_from_pattern(type_, pattern, url)

    router.append(pattern, endpoint)


def get_path_segments(url):
    """
    Return a list of path segments from a `url` string.
    """
    path = unquote_plus(urlparse(url).path)
    segments = [seg for seg in path.split("/") if seg]
    return segments


def build_generic_purl(uri):
    """
    Return a PackageURL from `uri`, if `uri` is a parsable URL, or None

    `uri` is assumed to be a download URL, e.g. https://example.com/example.tar.gz
    """
    parsed_uri = urlparse(uri)
    if parsed_uri.scheme and parsed_uri.netloc and parsed_uri.path:
        # Get file name from `uri`
        uri_path_segments = get_path_segments(uri)
        if uri_path_segments:
            file_name = uri_path_segments[-1]
            return PackageURL(type="generic", name=file_name, qualifiers={"download_url": uri})


@purl_router.route(
    "https?://registry.npmjs.*/.*",
    "https?://registry.yarnpkg.com/.*",
    "https?://(www\\.)?npmjs.*/package.*",
    "https?://(www\\.)?yarnpkg.com/package.*",
)
def build_npm_purl(uri):
    # npm URLs are difficult to disambiguate with regex
    if "/package/" in uri:
        return build_npm_web_purl(uri)
    elif "/-/" in uri:
        return build_npm_download_purl(uri)
    else:
        return build_npm_api_purl(uri)


def build_npm_api_purl(uri):
    path = unquote_plus(urlparse(uri).path)
    segments = [seg for seg in path.split("/") if seg]

    if len(segments) != 2:
        return

    # /@invisionag/eslint-config-ivx
    if segments[0].startswith("@"):
        namespace = segments[0]
        name = segments[1]
        return PackageURL("npm", namespace, name)

    # /angular/1.6.6
    else:
        name = segments[0]
        version = segments[1]
        return PackageURL("npm", name=name, version=version)


def build_npm_download_purl(uri):
    path = unquote_plus(urlparse(uri).path)
    segments = [seg for seg in path.split("/") if seg and seg != "-"]
    len_segments = len(segments)

    # /@invisionag/eslint-config-ivx/-/eslint-config-ivx-0.0.2.tgz
    if len_segments == 3:
        namespace, name, filename = segments

    # /automatta/-/automatta-0.0.1.tgz
    elif len_segments == 2:
        namespace = None
        name, filename = segments

    else:
        return

    base_filename, ext = os.path.splitext(filename)
    version = base_filename.replace(name, "")
    if version.startswith("-"):
        version = version[1:]  # Removes the "-" prefix

    return PackageURL("npm", namespace, name, version)


def build_npm_web_purl(uri):
    path = unquote_plus(urlparse(uri).path)
    if path.startswith("/package/"):
        path = path[9:]

    segments = [seg for seg in path.split("/") if seg]
    len_segments = len(segments)
    namespace = version = None

    # @angular/cli/v/10.1.2
    if len_segments == 4:
        namespace = segments[0]
        name = segments[1]
        version = segments[3]

    # express/v/4.17.1
    elif len_segments == 3:
        namespace = None
        name = segments[0]
        version = segments[2]

    # @angular/cli
    elif len_segments == 2:
        namespace = segments[0]
        name = segments[1]

    # express
    elif len_segments == 1 and len(segments) > 0 and segments[0][0] != "@":
        name = segments[0]

    else:
        return

    return PackageURL("npm", namespace, name, version)


@purl_router.route(
    "https?://repo1.maven.org/maven2/.*",
    "https?://central.maven.org/maven2/.*",
    "maven-index://repo1.maven.org/.*",
)
def build_maven_purl(uri):
    path = unquote_plus(urlparse(uri).path)
    segments = [seg for seg in path.split("/") if seg and seg != "maven2"]

    if len(segments) < 3:
        return

    before_last_segment, last_segment = segments[-2:]
    has_filename = before_last_segment in last_segment

    filename = None
    if has_filename:
        filename = segments.pop()

    version = segments[-1]
    name = segments[-2]
    namespace = ".".join(segments[:-2])
    qualifiers = {}

    if filename:
        name_version = f"{name}-{version}"
        _, _, classifier_ext = filename.rpartition(name_version)
        classifier, _, extension = classifier_ext.partition(".")
        if not extension:
            return

        qualifiers["classifier"] = classifier.strip("-")

        valid_types = ("aar", "ear", "mar", "pom", "rar", "rpm", "sar", "tar.gz", "war", "zip")
        if extension in valid_types:
            qualifiers["type"] = extension

    return PackageURL("maven", namespace, name, version, qualifiers)


# https://rubygems.org/gems/i18n-js-3.0.11.gem
@purl_router.route("https?://rubygems.org/(downloads|gems)/.*")
def build_rubygems_purl(uri):
    # We use a more general route pattern instead of using `rubygems_pattern`
    # below by itself because we want to capture all rubygems download URLs,
    # even the ones that are not completely formed. This helps prevent url2purl
    # from attempting to create a generic PackageURL from an invalid rubygems
    # download URL.

    # https://rubygems.org/downloads/jwt-0.1.8.gem
    # https://rubygems.org/gems/i18n-js-3.0.11.gem
    rubygems_pattern = (
        r"^https?://rubygems.org/(downloads|gems)/(?P<name>.+)-(?P<version>.+)(\.gem)$"
    )
    return purl_from_pattern("rubygems", rubygems_pattern, uri)


# https://pypi.python.org/packages/source/a/anyjson/anyjson-0.3.3.tar.gz
# https://pypi.python.org/packages/2.6/t/threadpool/threadpool-1.2.7-py2.6.egg
# https://pypi.python.org/packages/any/s/setuptools/setuptools-0.6c11-1.src.rpm
# https://files.pythonhosted.org/packages/84/d8/451842a5496844bb5c7634b231a2e4caf0d867d2e25f09b840d3b07f3d4b/multi_key_dict-2.0.win32.exe
pypi_pattern = r"(?P<name>(\w\.?)+(-\w+)*)-(?P<version>.+)\.(zip|tar.gz|tar.bz2|tgz|egg|rpm|exe)$"

# This pattern can be found in the following locations:
# - wheel.wheelfile.WHEEL_INFO_RE
# - distlib.wheel.FILENAME_RE
# - setuptools.wheel.WHEEL_NAME
# - pip._internal.wheel.Wheel.wheel_file_re
wheel_file_re = re.compile(
    r"^(?P<namever>(?P<name>.+?)-(?P<version>.*?))"
    r"((-(?P<build>\d[^-]*?))?-(?P<pyver>.+?)-(?P<abi>.+?)-(?P<plat>.+?)"
    r"\.whl)$",
    re.VERBOSE,
)


@purl_router.route("https?://.+python.+org/packages/.*")
def build_pypi_purl(uri):
    path = unquote_plus(urlparse(uri).path)
    last_segment = path.split("/")[-1]

    # /wheel-0.29.0-py2.py3-none-any.whl
    if last_segment.endswith(".whl"):
        match = wheel_file_re.match(last_segment)
        if match:
            return PackageURL(
                "pypi",
                name=match.group("name"),
                version=match.group("version"),
            )

    return purl_from_pattern("pypi", pypi_pattern, last_segment)


# http://nuget.org/packages/EntityFramework/4.2.0.0
# https://www.nuget.org/api/v2/package/Newtonsoft.Json/11.0.1
nuget_www_pattern = r"^https?://.*nuget.org/(api/v2/)?packages?/(?P<name>.+)/(?P<version>.+)$"

register_pattern("nuget", nuget_www_pattern)


# https://api.nuget.org/v3-flatcontainer/newtonsoft.json/10.0.1/newtonsoft.json.10.0.1.nupkg
nuget_api_pattern = (
    r"^https?://api.nuget.org/v3-flatcontainer/"
    r"(?P<name>.+)/"
    r"(?P<version>.+)/"
    r".*(nupkg)$"  # ends with "nupkg"
)

register_pattern("nuget", nuget_api_pattern)


@purl_router.route("https?://.*sourceforge.net/project/.*")
def build_sourceforge_purl(uri):
    # We use a more general route pattern instead of using `sourceforge_pattern`
    # below by itself because we want to capture all sourceforge download URLs,
    # even the ones that do not fit `sourceforge_pattern`. This helps prevent
    # url2purl from attempting to create a generic PackageURL from a sourceforge
    # URL that we can't handle.

    # http://master.dl.sourceforge.net/project/libpng/zlib/1.2.3/zlib-1.2.3.tar.bz2
    sourceforge_pattern = (
        r"^https?://.*sourceforge.net/project/"
        r"(?P<namespace>([^/]+))/"  # do not allow more "/" segments
        r"(?P<name>.+)/"
        r"(?P<version>[0-9\.]+)/"  # version restricted to digits and dots
        r"(?P=name)-(?P=version).*"  # {name}-{version} repeated in the filename
        r"[^/]$"  # not ending with "/"
    )

    sourceforge_purl = purl_from_pattern("sourceforge", sourceforge_pattern, uri)

    if not sourceforge_purl:
        # Get the project name from `uri` and use that as the Package name
        # http://master.dl.sourceforge.net/project/aloyscore/aloyscore/0.1a1%2520stable/0.1a1_stable_AloysCore.zip
        split_uri = uri.split("/project/")

        # http://master.dl.sourceforge.net, aloyscore/aloyscore/0.1a1%2520stable/0.1a1_stable_AloysCore.zip
        if len(split_uri) >= 2:
            # aloyscore/aloyscore/0.1a1%2520stable/0.1a1_stable_AloysCore.zip
            remaining_uri_path = split_uri[1]
            # aloyscore, aloyscore, 0.1a1%2520stable, 0.1a1_stable_AloysCore.zip
            remaining_uri_path_segments = remaining_uri_path.split("/")
            if remaining_uri_path_segments:
                project_name = remaining_uri_path_segments[0]  # aloyscore
                sourceforge_purl = PackageURL(
                    type="sourceforge", name=project_name, qualifiers={"download_url": uri}
                )
    return sourceforge_purl


# https://crates.io/api/v1/crates/rand/0.7.2/download
cargo_pattern = r"^https?://crates.io/api/v1/crates/(?P<name>.+)/(?P<version>.+)(\/download)$"

register_pattern("cargo", cargo_pattern)


# https://raw.githubusercontent.com/volatilityfoundation/dwarf2json/master/LICENSE.txt
github_raw_content_pattern = (
    r"https?://raw.githubusercontent.com/(?P<namespace>[^/]+)/(?P<name>[^/]+)/"
    r"(?P<version>[^/]+)/(?P<subpath>.*)$"
)

register_pattern("github", github_raw_content_pattern)


@purl_router.route("https?://api.github\\.com/repos/.*")
def build_github_api_purl(url):
    """
    Return a PackageURL object from GitHub API `url`.
    For example:
    https://api.github.com/repos/nexB/scancode-toolkit/commits/40593af0df6c8378d2b180324b97cb439fa11d66
    https://api.github.com/repos/nexB/scancode-toolkit/
    and returns a `PackageURL` object
    """
    segments = get_path_segments(url)

    if not (len(segments) >= 3):
        return
    namespace = segments[1]
    name = segments[2]
    version = None

    # https://api.github.com/repos/nexB/scancode-toolkit/
    if len(segments) == 4 and segments[3] != "commits":
        version = segments[3]

    # https://api.github.com/repos/nexB/scancode-toolkit/commits/40593af0df6c8378d2b180324b97cb439fa11d66
    if len(segments) == 5 and segments[3] == "commits":
        version = segments[4]

    return PackageURL(type="github", namespace=namespace, name=name, version=version)


# https://codeload.github.com/nexB/scancode-toolkit/tar.gz/v3.1.1
# https://codeload.github.com/berngp/grails-rest/zip/release/0.7
github_codeload_pattern = (
    r"https?://codeload.github.com/(?P<namespace>.+)/(?P<name>.+)/"
    r"(zip|tar.gz|tar.bz2|tgz)/(.*/)*"
    r"(?P<version_prefix>v|V?)(?P<version>.+)$"
)

register_pattern("github", github_codeload_pattern)


@purl_router.route("https?://github\\.com/.*")
def build_github_purl(url):
    """
    Return a PackageURL object from GitHub `url`.
    """

    # https://github.com/nexB/scancode-toolkit/archive/v3.1.1.zip
    archive_pattern = (
        r"https?://github.com/(?P<namespace>.+)/(?P<name>.+)"
        r"/archive/(.*/)*"
        r"((?P=name)(-|_|@))?"
        r"(?P<version_prefix>v|V?)(?P<version>.+).(zip|tar.gz|tar.bz2|.tgz)"
    )

    # https://github.com/downloads/mozilla/rhino/rhino1_7R4.zip
    download_pattern = (
        r"https?://github.com/downloads/(?P<namespace>.+)/(?P<name>.+)/"
        r"((?P=name)(-|@)?)?"
        r"(?P<version_prefix>v|V?)(?P<version>.+).(zip|tar.gz|tar.bz2|.tgz)"
    )

    # https://github.com/pypa/get-virtualenv/raw/20.0.31/public/virtualenv.pyz
    raw_pattern = (
        r"https?://github.com/(?P<namespace>.+)/(?P<name>.+)"
        r"/raw/(?P<version_prefix>v|V?)(?P<version>[^/]+)/(?P<subpath>.*)$"
    )

    # https://github.com/fanf2/unifdef/blob/master/unifdef.c
    blob_pattern = (
        r"https?://github.com/(?P<namespace>.+)/(?P<name>.+)"
        r"/blob/(?P<version>[^/]+)/(?P<subpath>.*)$"
    )

    releases_download_pattern = (
        r"https?://github.com/(?P<namespace>.+)/(?P<name>.+)"
        r"/releases/download/(?P<version_prefix>v|V?)(?P<version>[^/]+)/.*$"
    )

    # https://github.com/pombredanne/schematics.git
    git_pattern = r"https?://github.com/(?P<namespace>.+)/(?P<name>.+).(git)"

    patterns = (
        archive_pattern,
        raw_pattern,
        blob_pattern,
        releases_download_pattern,
        download_pattern,
        git_pattern,
    )

    for pattern in patterns:
        matches = re.search(pattern, url)
        qualifiers = {}
        if matches:
            if pattern == releases_download_pattern:
                qualifiers["download_url"] = url
            return purl_from_pattern(
                type_="github", pattern=pattern, url=url, qualifiers=qualifiers
            )

    segments = get_path_segments(url)
    if not len(segments) >= 2:
        return

    namespace = segments[0]
    name = segments[1]
    version = None
    subpath = None

    # https://github.com/TG1999/fetchcode/master
    if len(segments) >= 3 and segments[2] != "tree":
        version = segments[2]
        subpath = "/".join(segments[3:])

    # https://github.com/TG1999/fetchcode/tree/master
    if len(segments) >= 4 and segments[2] == "tree":
        version = segments[3]
        subpath = "/".join(segments[4:])

    return PackageURL(
        type="github",
        namespace=namespace,
        name=name,
        version=version,
        subpath=subpath,
    )


@purl_router.route("https?://bitbucket\\.org/.*")
def build_bitbucket_purl(url):
    """
    Return a PackageURL object from BitBucket `url`.
    For example:
    https://bitbucket.org/TG1999/first_repo/src/master or
    https://bitbucket.org/TG1999/first_repo/src or
    https://bitbucket.org/TG1999/first_repo/src/master/new_folder
    """

    segments = get_path_segments(url)

    if not len(segments) >= 2:
        return
    namespace = segments[0]
    name = segments[1]

    bitbucket_download_pattern = (
        r"https?://bitbucket.org/"
        r"(?P<namespace>.+)/(?P<name>.+)/downloads/"
        r"(?P<version>.+).(zip|tar.gz|tar.bz2|.tgz|exe|msi)"
    )
    matches = re.search(bitbucket_download_pattern, url)

    qualifiers = {}
    if matches:
        qualifiers["download_url"] = url
        return PackageURL(type="bitbucket", namespace=namespace, name=name, qualifiers=qualifiers)

    version = None
    subpath = None

    # https://bitbucket.org/TG1999/first_repo/new_folder/
    if len(segments) >= 3 and segments[2] != "src":
        version = segments[2]
        subpath = "/".join(segments[3:])

    # https://bitbucket.org/TG1999/first_repo/src/master/new_folder/
    if len(segments) >= 4 and segments[2] == "src":
        version = segments[3]
        subpath = "/".join(segments[4:])

    return PackageURL(
        type="bitbucket",
        namespace=namespace,
        name=name,
        version=version,
        subpath=subpath,
    )


@purl_router.route("https?://gitlab\\.com/.*")
def build_gitlab_purl(url):
    """
    Return a PackageURL object from Gitlab `url`.
    For example:
    https://gitlab.com/TG1999/firebase/-/tree/1a122122/views
    https://gitlab.com/TG1999/firebase/-/tree
    https://gitlab.com/TG1999/firebase/-/master
    https://gitlab.com/tg1999/Firebase/-/tree/master
    """
    segments = get_path_segments(url)

    if not len(segments) >= 2:
        return
    namespace = segments[0]
    name = segments[1]
    version = None
    subpath = None

    # https://gitlab.com/TG1999/firebase/master
    if (len(segments) >= 3) and segments[2] != "-" and segments[2] != "tree":
        version = segments[2]
        subpath = "/".join(segments[3:])

    # https://gitlab.com/TG1999/firebase/-/tree/master
    if len(segments) >= 5 and (segments[2] == "-" and segments[3] == "tree"):
        version = segments[4]
        subpath = "/".join(segments[5:])

    return PackageURL(
        type="gitlab",
        namespace=namespace,
        name=name,
        version=version,
        subpath=subpath,
    )


# https://hackage.haskell.org/package/cli-extras-0.2.0.0/cli-extras-0.2.0.0.tar.gz
hackage_download_pattern = (
    r"^https?://hackage.haskell.org/package/"
    r"(?P<name>.+)-(?P<version>.+)/"
    r"(?P=name)-(?P=version).*"
    r"[^/]$"
)

register_pattern("hackage", hackage_download_pattern)


# https://hackage.haskell.org/package/cli-extras-0.2.0.0/
hackage_project_pattern = r"^https?://hackage.haskell.org/package/(?P<name>.+)-(?P<version>[^/]+)/"

register_pattern("hackage", hackage_project_pattern)


@purl_router.route(
    "https?://storage.googleapis.com/google-code-archive-downloads/v2/code.google.com/.*"
)
def build_generic_google_code_archive_purl(uri):
    # https://storage.googleapis.com/google-code-archive-downloads/v2/code.google.com
    # /android-notifier/android-notifier-desktop-0.5.1-1.i386.rpm
    _, remaining_uri = uri.split(
        "https://storage.googleapis.com/google-code-archive-downloads/v2/code.google.com/"
    )
    if remaining_uri:  # android-notifier/android-notifier-desktop-0.5.1-1.i386.rpm
        split_remaining_uri = remaining_uri.split("/")
        # android-notifier, android-notifier-desktop-0.5.1-1.i386.rpm
        if split_remaining_uri:
            name = split_remaining_uri[0]  # android-notifier
            return PackageURL(
                type="generic",
                namespace="code.google.com",
                name=name,
                qualifiers={"download_url": uri},
            )