Why Gemfury? Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Debian packages RPM packages NuGet packages

Repository URL to install this package:

Details    
pyarrow / include / arrow / filesystem / filesystem.h
Size: Mime:
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

#pragma once

#include <chrono>
#include <cstdint>
#include <functional>
#include <iosfwd>
#include <memory>
#include <string>
#include <utility>
#include <vector>

#include "arrow/filesystem/type_fwd.h"
#include "arrow/io/interfaces.h"
#include "arrow/type_fwd.h"
#include "arrow/util/compare.h"
#include "arrow/util/macros.h"
#include "arrow/util/type_fwd.h"
#include "arrow/util/visibility.h"
#include "arrow/util/windows_fixup.h"

namespace arrow {
namespace fs {

using arrow::util::Uri;

// A system clock time point expressed as a 64-bit (or more) number of
// nanoseconds since the epoch.
using TimePoint =
    std::chrono::time_point<std::chrono::system_clock, std::chrono::nanoseconds>;

ARROW_EXPORT std::string ToString(FileType);

ARROW_EXPORT std::ostream& operator<<(std::ostream& os, FileType);

static const int64_t kNoSize = -1;
static const TimePoint kNoTime = TimePoint(TimePoint::duration(-1));

/// \brief FileSystem entry info
struct ARROW_EXPORT FileInfo : public util::EqualityComparable<FileInfo> {
  FileInfo() = default;
  FileInfo(FileInfo&&) = default;
  FileInfo& operator=(FileInfo&&) = default;
  FileInfo(const FileInfo&) = default;
  FileInfo& operator=(const FileInfo&) = default;

  explicit FileInfo(std::string path, FileType type = FileType::Unknown)
      : path_(std::move(path)), type_(type) {}

  /// The file type
  FileType type() const { return type_; }
  void set_type(FileType type) { type_ = type; }

  /// The full file path in the filesystem
  const std::string& path() const { return path_; }
  void set_path(std::string path) { path_ = std::move(path); }

  /// The file base name (component after the last directory separator)
  std::string base_name() const;

  // The directory base name (component before the file base name).
  std::string dir_name() const;

  /// The size in bytes, if available
  ///
  /// Only regular files are guaranteed to have a size.
  int64_t size() const { return size_; }
  void set_size(int64_t size) { size_ = size; }

  /// The file extension (excluding the dot)
  std::string extension() const;

  /// The time of last modification, if available
  TimePoint mtime() const { return mtime_; }
  void set_mtime(TimePoint mtime) { mtime_ = mtime; }

  bool IsFile() const { return type_ == FileType::File; }
  bool IsDirectory() const { return type_ == FileType::Directory; }

  bool Equals(const FileInfo& other) const {
    return type() == other.type() && path() == other.path() && size() == other.size() &&
           mtime() == other.mtime();
  }

  std::string ToString() const;

  /// Function object implementing less-than comparison and hashing by
  /// path, to support sorting infos, using them as keys, and other
  /// interactions with the STL.
  struct ByPath {
    bool operator()(const FileInfo& l, const FileInfo& r) const {
      return l.path() < r.path();
    }

    size_t operator()(const FileInfo& i) const {
      return std::hash<std::string>{}(i.path());
    }
  };

 protected:
  std::string path_;
  FileType type_ = FileType::Unknown;
  int64_t size_ = kNoSize;
  TimePoint mtime_ = kNoTime;
};

ARROW_EXPORT std::ostream& operator<<(std::ostream& os, const FileInfo&);

/// \brief File selector for filesystem APIs
struct ARROW_EXPORT FileSelector {
  /// The directory in which to select files.
  /// If the path exists but doesn't point to a directory, this should be an error.
  std::string base_dir;
  /// The behavior if `base_dir` isn't found in the filesystem.  If false,
  /// an error is returned.  If true, an empty selection is returned.
  bool allow_not_found;
  /// Whether to recurse into subdirectories.
  bool recursive;
  /// The maximum number of subdirectories to recurse into.
  int32_t max_recursion;

  FileSelector() : allow_not_found(false), recursive(false), max_recursion(INT32_MAX) {}
};

/// \brief FileSystem, path pair
struct ARROW_EXPORT FileLocator {
  std::shared_ptr<FileSystem> filesystem;
  std::string path;
};

using FileInfoVector = std::vector<FileInfo>;
using FileInfoGenerator = std::function<Future<FileInfoVector>()>;

}  // namespace fs

template <>
struct IterationTraits<fs::FileInfoVector> {
  static fs::FileInfoVector End() { return {}; }
  static bool IsEnd(const fs::FileInfoVector& val) { return val.empty(); }
};

namespace fs {

/// \brief Abstract file system API
class ARROW_EXPORT FileSystem
    /// \cond false
    : public std::enable_shared_from_this<FileSystem>
/// \endcond
{  // NOLINT
 public:
  virtual ~FileSystem();

  virtual std::string type_name() const = 0;

  /// EXPERIMENTAL: The IOContext associated with this filesystem.
  const io::IOContext& io_context() const { return io_context_; }

  /// Normalize path for the given filesystem
  ///
  /// The default implementation of this method is a no-op, but subclasses
  /// may allow normalizing irregular path forms (such as Windows local paths).
  virtual Result<std::string> NormalizePath(std::string path);

  /// \brief Ensure a URI (or path) is compatible with the given filesystem and return the
  ///        path
  ///
  /// \param uri_string A URI representing a resource in the given filesystem.
  ///
  /// This method will check to ensure the given filesystem is compatible with the
  /// URI. This can be useful when the user provides both a URI and a filesystem or
  /// when a user provides multiple URIs that should be compatible with the same
  /// filesystem.
  ///
  /// uri_string can be an absolute path instead of a URI.  In that case it will ensure
  /// the filesystem (if supplied) is the local filesystem (or some custom filesystem that
  /// is capable of reading local paths) and will normalize the path's file separators.
  ///
  /// Note, this method only checks to ensure the URI scheme is valid.  It will not detect
  /// inconsistencies like a mismatching region or endpoint override.
  ///
  /// \return The path inside the filesystem that is indicated by the URI.
  virtual Result<std::string> PathFromUri(const std::string& uri_string) const;

  /// \brief Make a URI from which FileSystemFromUri produces an equivalent filesystem
  /// \param path The path component to use in the resulting URI. Must be absolute.
  /// \return A URI string, or an error if an equivalent URI cannot be produced
  virtual Result<std::string> MakeUri(std::string path) const;

  virtual bool Equals(const FileSystem& other) const = 0;

  virtual bool Equals(const std::shared_ptr<FileSystem>& other) const {
    return Equals(*other);
  }

  /// Get info for the given target.
  ///
  /// Any symlink is automatically dereferenced, recursively.
  /// A nonexistent or unreachable file returns an Ok status and
  /// has a FileType of value NotFound.  An error status indicates
  /// a truly exceptional condition (low-level I/O error, etc.).
  virtual Result<FileInfo> GetFileInfo(const std::string& path) = 0;
  /// Same, for many targets at once.
  virtual Result<FileInfoVector> GetFileInfo(const std::vector<std::string>& paths);
  /// Same, according to a selector.
  ///
  /// The selector's base directory will not be part of the results, even if
  /// it exists.
  /// If it doesn't exist, see `FileSelector::allow_not_found`.
  virtual Result<FileInfoVector> GetFileInfo(const FileSelector& select) = 0;

  /// Async version of GetFileInfo
  virtual Future<FileInfoVector> GetFileInfoAsync(const std::vector<std::string>& paths);

  /// Streaming async version of GetFileInfo
  ///
  /// The returned generator is not async-reentrant, i.e. you need to wait for
  /// the returned future to complete before calling the generator again.
  virtual FileInfoGenerator GetFileInfoGenerator(const FileSelector& select);

  /// Create a directory and subdirectories.
  ///
  /// This function succeeds if the directory already exists.
  virtual Status CreateDir(const std::string& path, bool recursive) = 0;
  Status CreateDir(const std::string& path) { return CreateDir(path, true); }

  /// Delete a directory and its contents, recursively.
  virtual Status DeleteDir(const std::string& path) = 0;

  /// Delete a directory's contents, recursively.
  ///
  /// Like DeleteDir, but doesn't delete the directory itself.
  /// Passing an empty path ("" or "/") is disallowed, see DeleteRootDirContents.
  virtual Status DeleteDirContents(const std::string& path, bool missing_dir_ok) = 0;
  Status DeleteDirContents(const std::string& path) {
    return DeleteDirContents(path, false);
  }

  /// Async version of DeleteDirContents.
  virtual Future<> DeleteDirContentsAsync(const std::string& path, bool missing_dir_ok);

  /// Async version of DeleteDirContents.
  ///
  /// This overload allows missing directories.
  Future<> DeleteDirContentsAsync(const std::string& path);

  /// EXPERIMENTAL: Delete the root directory's contents, recursively.
  ///
  /// Implementations may decide to raise an error if this operation is
  /// too dangerous.
  // NOTE: may decide to remove this if it's deemed not useful
  virtual Status DeleteRootDirContents() = 0;

  /// Delete a file.
  virtual Status DeleteFile(const std::string& path) = 0;
  /// Delete many files.
  ///
  /// The default implementation issues individual delete operations in sequence.
  virtual Status DeleteFiles(const std::vector<std::string>& paths);

  /// Move / rename a file or directory.
  ///
  /// If the destination exists:
  /// - if it is a non-empty directory, an error is returned
  /// - otherwise, if it has the same type as the source, it is replaced
  /// - otherwise, behavior is unspecified (implementation-dependent).
  virtual Status Move(const std::string& src, const std::string& dest) = 0;

  /// Copy a file.
  ///
  /// If the destination exists and is a directory, an error is returned.
  /// Otherwise, it is replaced.
  virtual Status CopyFile(const std::string& src, const std::string& dest) = 0;

  /// Open an input stream for sequential reading.
  virtual Result<std::shared_ptr<io::InputStream>> OpenInputStream(
      const std::string& path) = 0;

  /// Open an input stream for sequential reading.
  ///
  /// This override assumes the given FileInfo validly represents the file's
  /// characteristics, and may optimize access depending on them (for example
  /// avoid querying the file size or its existence).
  virtual Result<std::shared_ptr<io::InputStream>> OpenInputStream(const FileInfo& info);

  /// Open an input file for random access reading.
  virtual Result<std::shared_ptr<io::RandomAccessFile>> OpenInputFile(
      const std::string& path) = 0;

  /// Open an input file for random access reading.
  ///
  /// This override assumes the given FileInfo validly represents the file's
  /// characteristics, and may optimize access depending on them (for example
  /// avoid querying the file size or its existence).
  virtual Result<std::shared_ptr<io::RandomAccessFile>> OpenInputFile(
      const FileInfo& info);

  /// Async version of OpenInputStream
  virtual Future<std::shared_ptr<io::InputStream>> OpenInputStreamAsync(
      const std::string& path);

  /// Async version of OpenInputStream
  virtual Future<std::shared_ptr<io::InputStream>> OpenInputStreamAsync(
      const FileInfo& info);

  /// Async version of OpenInputFile
  virtual Future<std::shared_ptr<io::RandomAccessFile>> OpenInputFileAsync(
      const std::string& path);

  /// Async version of OpenInputFile
  virtual Future<std::shared_ptr<io::RandomAccessFile>> OpenInputFileAsync(
      const FileInfo& info);

  /// Open an output stream for sequential writing.
  ///
  /// If the target already exists, existing data is truncated.
  virtual Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(
      const std::string& path,
      const std::shared_ptr<const KeyValueMetadata>& metadata) = 0;
  Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(const std::string& path);

  /// Open an output stream for appending.
  ///
  /// If the target doesn't exist, a new empty file is created.
  ///
  /// Note: some filesystem implementations do not support efficient appending
  /// to an existing file, in which case this method will return NotImplemented.
  /// Consider writing to multiple files (using e.g. the dataset layer) instead.
  virtual Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(
      const std::string& path,
      const std::shared_ptr<const KeyValueMetadata>& metadata) = 0;
  Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(const std::string& path);

 protected:
  explicit FileSystem(io::IOContext io_context = io::default_io_context())
      : io_context_(std::move(io_context)) {}

  io::IOContext io_context_;
  // Whether metadata operations (such as GetFileInfo or OpenInputStream)
  // are cheap enough that the default async variants don't bother with
  // a thread pool.
  bool default_async_is_sync_ = true;
};

struct FileSystemFactory {
  std::function<Result<std::shared_ptr<FileSystem>>(
      const Uri& uri, const io::IOContext& io_context, std::string* out_path)>
      function;
  std::string_view file;
  int line;

  bool operator==(const FileSystemFactory& other) const {
    // In the case where libarrow is linked statically both to the executable and to a
    // dynamically loaded filesystem implementation library, the library contains a
    // duplicate definition of the registry and duplicate definitions of any
    // FileSystemRegistrars which are statically linked to libarrow. When retrieving
    // factories from the filesystem implementation library, we use the file and line
    // of the registrar's definition to determine equivalence of the duplicate factories.
    return file == other.file && line == other.line;
  }
};

/// \brief A FileSystem implementation that delegates to another
/// implementation after prepending a fixed base path.
///
/// This is useful to expose a logical view of a subtree of a filesystem,
/// for example a directory in a LocalFileSystem.
/// This works on abstract paths, i.e. paths using forward slashes and
/// and a single root "/".  Windows paths are not guaranteed to work.
/// This makes no security guarantee.  For example, symlinks may allow to
/// "escape" the subtree and access other parts of the underlying filesystem.
class ARROW_EXPORT SubTreeFileSystem : public FileSystem {
 public:
  // This constructor may abort if base_path is invalid.
  explicit SubTreeFileSystem(const std::string& base_path,
                             std::shared_ptr<FileSystem> base_fs);
  ~SubTreeFileSystem() override;

  std::string type_name() const override { return "subtree"; }
  std::string base_path() const { return base_path_; }
  std::shared_ptr<FileSystem> base_fs() const { return base_fs_; }

  Result<std::string> NormalizePath(std::string path) override;
  Result<std::string> PathFromUri(const std::string& uri_string) const override;

  bool Equals(const FileSystem& other) const override;

  /// \cond FALSE
  using FileSystem::CreateDir;
  using FileSystem::DeleteDirContents;
  using FileSystem::GetFileInfo;
  using FileSystem::OpenAppendStream;
  using FileSystem::OpenOutputStream;
  /// \endcond

  Result<FileInfo> GetFileInfo(const std::string& path) override;
  Result<FileInfoVector> GetFileInfo(const FileSelector& select) override;

  FileInfoGenerator GetFileInfoGenerator(const FileSelector& select) override;

  Status CreateDir(const std::string& path, bool recursive) override;

  Status DeleteDir(const std::string& path) override;
  Status DeleteDirContents(const std::string& path, bool missing_dir_ok) override;
  Status DeleteRootDirContents() override;

  Status DeleteFile(const std::string& path) override;

  Status Move(const std::string& src, const std::string& dest) override;

  Status CopyFile(const std::string& src, const std::string& dest) override;

  Result<std::shared_ptr<io::InputStream>> OpenInputStream(
      const std::string& path) override;
  Result<std::shared_ptr<io::InputStream>> OpenInputStream(const FileInfo& info) override;
  Result<std::shared_ptr<io::RandomAccessFile>> OpenInputFile(
      const std::string& path) override;
  Result<std::shared_ptr<io::RandomAccessFile>> OpenInputFile(
      const FileInfo& info) override;

  Future<std::shared_ptr<io::InputStream>> OpenInputStreamAsync(
      const std::string& path) override;
  Future<std::shared_ptr<io::InputStream>> OpenInputStreamAsync(
      const FileInfo& info) override;
  Future<std::shared_ptr<io::RandomAccessFile>> OpenInputFileAsync(
      const std::string& path) override;
  Future<std::shared_ptr<io::RandomAccessFile>> OpenInputFileAsync(
      const FileInfo& info) override;

  Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(
      const std::string& path,
      const std::shared_ptr<const KeyValueMetadata>& metadata) override;
  Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(
      const std::string& path,
      const std::shared_ptr<const KeyValueMetadata>& metadata) override;

 protected:
  SubTreeFileSystem() = default;

  const std::string base_path_;
  std::shared_ptr<FileSystem> base_fs_;

  Result<std::string> PrependBase(const std::string& s) const;
  Result<std::string> PrependBaseNonEmpty(const std::string& s) const;
  Result<std::string> StripBase(const std::string& s) const;
  Status FixInfo(FileInfo* info) const;

  static Result<std::string> NormalizeBasePath(
      std::string base_path, const std::shared_ptr<FileSystem>& base_fs);
};

/// \brief A FileSystem implementation that delegates to another
/// implementation but inserts latencies at various points.
class ARROW_EXPORT SlowFileSystem : public FileSystem {
 public:
  SlowFileSystem(std::shared_ptr<FileSystem> base_fs,
                 std::shared_ptr<io::LatencyGenerator> latencies);
  SlowFileSystem(std::shared_ptr<FileSystem> base_fs, double average_latency);
  SlowFileSystem(std::shared_ptr<FileSystem> base_fs, double average_latency,
                 int32_t seed);

  std::string type_name() const override { return "slow"; }
  bool Equals(const FileSystem& other) const override;
  Result<std::string> PathFromUri(const std::string& uri_string) const override;

  /// \cond FALSE
  using FileSystem::CreateDir;
  using FileSystem::DeleteDirContents;
  using FileSystem::GetFileInfo;
  using FileSystem::OpenAppendStream;
  using FileSystem::OpenOutputStream;
  /// \endcond

  Result<FileInfo> GetFileInfo(const std::string& path) override;
  Result<FileInfoVector> GetFileInfo(const FileSelector& select) override;

  Status CreateDir(const std::string& path, bool recursive) override;

  Status DeleteDir(const std::string& path) override;
  Status DeleteDirContents(const std::string& path, bool missing_dir_ok) override;
  Status DeleteRootDirContents() override;

  Status DeleteFile(const std::string& path) override;

  Status Move(const std::string& src, const std::string& dest) override;

  Status CopyFile(const std::string& src, const std::string& dest) override;

  Result<std::shared_ptr<io::InputStream>> OpenInputStream(
      const std::string& path) override;
  Result<std::shared_ptr<io::InputStream>> OpenInputStream(const FileInfo& info) override;
  Result<std::shared_ptr<io::RandomAccessFile>> OpenInputFile(
      const std::string& path) override;
  Result<std::shared_ptr<io::RandomAccessFile>> OpenInputFile(
      const FileInfo& info) override;
  Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(
      const std::string& path,
      const std::shared_ptr<const KeyValueMetadata>& metadata) override;
  Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(
      const std::string& path,
      const std::shared_ptr<const KeyValueMetadata>& metadata) override;

 protected:
  std::shared_ptr<FileSystem> base_fs_;
  std::shared_ptr<io::LatencyGenerator> latencies_;
};

/// \brief Ensure all registered filesystem implementations are finalized.
///
/// Individual finalizers may wait for concurrent calls to finish so as to avoid
/// race conditions. After this function has been called, all filesystem APIs
/// will fail with an error.
///
/// The user is responsible for synchronization of calls to this function.
void EnsureFinalized();

/// \defgroup filesystem-factories Functions for creating FileSystem instances
///
/// @{

/// \brief Create a new FileSystem by URI
///
/// Recognized schemes are "file", "mock", "hdfs", "viewfs", "s3",
/// "gs" and "gcs".
///
/// Support for other schemes can be added using RegisterFileSystemFactory.
///
/// \param[in] uri a URI-based path, ex: file:///some/local/path
/// \param[out] out_path (optional) Path inside the filesystem.
/// \return out_fs FileSystem instance.
ARROW_EXPORT
Result<std::shared_ptr<FileSystem>> FileSystemFromUri(const std::string& uri,
                                                      std::string* out_path = NULLPTR);

/// \brief Create a new FileSystem by URI with a custom IO context
///
/// Recognized schemes are "file", "mock", "hdfs", "viewfs", "s3",
/// "gs" and "gcs".
///
/// Support for other schemes can be added using RegisterFileSystemFactory.
///
/// \param[in] uri a URI-based path, ex: file:///some/local/path
/// \param[in] io_context an IOContext which will be associated with the filesystem
/// \param[out] out_path (optional) Path inside the filesystem.
/// \return out_fs FileSystem instance.
ARROW_EXPORT
Result<std::shared_ptr<FileSystem>> FileSystemFromUri(const std::string& uri,
                                                      const io::IOContext& io_context,
                                                      std::string* out_path = NULLPTR);

/// \brief Create a new FileSystem by URI
///
/// Support for other schemes can be added using RegisterFileSystemFactory.
///
/// Same as FileSystemFromUri, but in addition also recognize non-URIs
/// and treat them as local filesystem paths.  Only absolute local filesystem
/// paths are allowed.
ARROW_EXPORT
Result<std::shared_ptr<FileSystem>> FileSystemFromUriOrPath(
    const std::string& uri, std::string* out_path = NULLPTR);

/// \brief Create a new FileSystem by URI with a custom IO context
///
/// Support for other schemes can be added using RegisterFileSystemFactory.
///
/// Same as FileSystemFromUri, but in addition also recognize non-URIs
/// and treat them as local filesystem paths.  Only absolute local filesystem
/// paths are allowed.
ARROW_EXPORT
Result<std::shared_ptr<FileSystem>> FileSystemFromUriOrPath(
    const std::string& uri, const io::IOContext& io_context,
    std::string* out_path = NULLPTR);

/// @}

/// \defgroup filesystem-factory-registration Helpers for FileSystem registration
///
/// @{

/// \brief Register a FileSystem factory
///
/// Support for custom URI schemes can be added by registering a factory
/// for the corresponding FileSystem.
///
/// \param[in] scheme a Uri scheme which the factory will handle.
///            If a factory has already been registered for a scheme,
///            the new factory will be ignored.
/// \param[in] factory a function which can produce a FileSystem for Uris which match
///            scheme.
/// \param[in] finalizer a function which must be called to finalize the factory before
///            the process exits, or nullptr if no finalization is necessary.
/// \return raises KeyError if a name collision occurs.
ARROW_EXPORT Status RegisterFileSystemFactory(std::string scheme,
                                              FileSystemFactory factory,
                                              std::function<void()> finalizer = {});

/// \brief Register FileSystem factories from a shared library
///
/// FileSystem implementations may be housed in separate shared libraries and only
/// registered when the shared library is explicitly loaded. FileSystemRegistrar is
/// provided to simplify definition of such libraries: each instance at namespace scope
/// in the library will register a factory for a scheme. Any library which uses
/// FileSystemRegistrars and which must be dynamically loaded should be loaded using
/// LoadFileSystemFactories(), which will additionally merge registries are if necessary
/// (static linkage to arrow can produce isolated registries).
ARROW_EXPORT Status LoadFileSystemFactories(const char* libpath);

struct ARROW_EXPORT FileSystemRegistrar {
  /// \brief Register a FileSystem factory at load time
  ///
  /// Support for custom URI schemes can be added by registering a factory for the
  /// corresponding FileSystem. An instance of this helper can be defined at namespace
  /// scope to cause the factory to be registered at load time.
  ///
  /// Global constructors will finish execution before main() starts if the registrar is
  /// linked into the same binary as main(), or before dlopen()/LoadLibrary() returns if
  /// the library in which the registrar is defined is dynamically loaded.
  ///
  /// \code
  ///     FileSystemRegistrar kSlowFileSystemModule{
  ///       "slowfile",
  ///       [](const Uri& uri, const io::IOContext& io_context, std::string* out_path)
  ///           ->Result<std::shared_ptr<FileSystem>> {
  ///         auto local_uri = "file" + uri.ToString().substr(uri.scheme().size());
  ///         ARROW_ASSIGN_OR_RAISE(auto base_fs,
  ///             FileSystemFromUri(local_uri, io_context, out_path));
  ///         double average_latency = 1;
  ///         int32_t seed = 0xDEADBEEF;
  ///         ARROW_ASSIGN_OR_RAISE(auto params, uri.query_item());
  ///         for (const auto& [key, value] : params) {
  ///           if (key == "average_latency") {
  ///             average_latency = std::stod(value);
  ///           }
  ///           if (key == "seed") {
  ///             seed = std::stoi(value, nullptr, /*base=*/16);
  ///           }
  ///         }
  ///         return std::make_shared<SlowFileSystem>(base_fs, average_latency, seed);
  ///     }));
  /// \endcode
  ///
  /// \param[in] scheme a Uri scheme which the factory will handle.
  ///            If a factory has already been registered for a scheme, the
  ///            new factory will be ignored.
  /// \param[in] factory a function which can produce a FileSystem for Uris which match
  ///            scheme.
  /// \param[in] finalizer a function which must be called to finalize the factory before
  ///            the process exits, or nullptr if no finalization is necessary.
  FileSystemRegistrar(std::string scheme, FileSystemFactory factory,
                      std::function<void()> finalizer = {});
};

#define ARROW_REGISTER_FILESYSTEM(scheme, factory_function, finalizer)            \
  ::arrow::fs::FileSystemRegistrar {                                              \
    scheme, ::arrow::fs::FileSystemFactory{factory_function, __FILE__, __LINE__}, \
        finalizer                                                                 \
  }

/// @}

namespace internal {
ARROW_EXPORT void* GetFileSystemRegistry();
}  // namespace internal

/// \brief Copy files, including from one FileSystem to another
///
/// If a source and destination are resident in the same FileSystem FileSystem::CopyFile
/// will be used, otherwise the file will be opened as a stream in both FileSystems and
/// chunks copied from the source to the destination. No directories will be created.
ARROW_EXPORT
Status CopyFiles(const std::vector<FileLocator>& sources,
                 const std::vector<FileLocator>& destinations,
                 const io::IOContext& io_context = io::default_io_context(),
                 int64_t chunk_size = 1024 * 1024, bool use_threads = true);

/// \brief Copy selected files, including from one FileSystem to another
///
/// Directories will be created under the destination base directory as needed.
ARROW_EXPORT
Status CopyFiles(const std::shared_ptr<FileSystem>& source_fs,
                 const FileSelector& source_sel,
                 const std::shared_ptr<FileSystem>& destination_fs,
                 const std::string& destination_base_dir,
                 const io::IOContext& io_context = io::default_io_context(),
                 int64_t chunk_size = 1024 * 1024, bool use_threads = true);

struct FileSystemGlobalOptions {
  /// Path to a single PEM file holding all TLS CA certificates
  ///
  /// If empty, the underlying TLS library's defaults will be used.
  std::string tls_ca_file_path;

  /// Path to a directory holding TLS CA certificates in individual PEM files
  /// named along the OpenSSL "hashed" format.
  ///
  /// If empty, the underlying TLS library's defaults will be used.
  std::string tls_ca_dir_path;
};

/// EXPERIMENTAL: optional global initialization routine
///
/// This is for environments (such as manylinux) where the path
/// to TLS CA certificates needs to be configured at runtime.
ARROW_EXPORT
Status Initialize(const FileSystemGlobalOptions& options);

}  // namespace fs
}  // namespace arrow