Why Gemfury? Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

aaronreidsmith / pandas   python

Repository URL to install this package:

Version: 0.25.3 

/ io / s3.py

""" s3 support for remote file interactivity """
from typing import IO, Any, Optional, Tuple
from urllib.parse import urlparse as parse_url

from pandas.compat._optional import import_optional_dependency

from pandas._typing import FilePathOrBuffer

s3fs = import_optional_dependency(
    "s3fs", extra="The s3fs package is required to handle s3 files."
)


def _strip_schema(url):
    """Returns the url without the s3:// part"""
    result = parse_url(url, allow_fragments=False)
    return result.netloc + result.path


def get_file_and_filesystem(
    filepath_or_buffer: FilePathOrBuffer, mode: Optional[str] = None
) -> Tuple[IO, Any]:
    from botocore.exceptions import NoCredentialsError

    if mode is None:
        mode = "rb"

    fs = s3fs.S3FileSystem(anon=False)
    try:
        file = fs.open(_strip_schema(filepath_or_buffer), mode)
    except (FileNotFoundError, NoCredentialsError):
        # boto3 has troubles when trying to access a public file
        # when credentialed...
        # An OSError is raised if you have credentials, but they
        # aren't valid for that bucket.
        # A NoCredentialsError is raised if you don't have creds
        # for that bucket.
        fs = s3fs.S3FileSystem(anon=True)
        file = fs.open(_strip_schema(filepath_or_buffer), mode)
    return file, fs


def get_filepath_or_buffer(
    filepath_or_buffer: FilePathOrBuffer,
    encoding: Optional[str] = None,
    compression: Optional[str] = None,
    mode: Optional[str] = None,
) -> Tuple[IO, Optional[str], Optional[str], bool]:
    file, _fs = get_file_and_filesystem(filepath_or_buffer, mode=mode)
    return file, None, compression, True