Repository URL to install this package:
|
Version:
3.0.3 ▾
|
import ctypes
import pytest
import pandas.util._test_decorators as td
import pandas as pd
import pandas._testing as tm
pa = pytest.importorskip("pyarrow", minversion="16.0")
def test_series_arrow_interface():
s = pd.Series([1, 4, 2])
capsule = s.__arrow_c_stream__()
assert (
ctypes.pythonapi.PyCapsule_IsValid(
ctypes.py_object(capsule), b"arrow_array_stream"
)
== 1
)
ca = pa.chunked_array(s)
expected = pa.chunked_array([[1, 4, 2]])
assert ca.equals(expected)
ca = pa.chunked_array(s, type=pa.int32())
expected = pa.chunked_array([[1, 4, 2]], type=pa.int32())
assert ca.equals(expected)
def test_series_arrow_interface_arrow_dtypes():
s = pd.Series([1, 4, 2], dtype="Int64[pyarrow]")
capsule = s.__arrow_c_stream__()
assert (
ctypes.pythonapi.PyCapsule_IsValid(
ctypes.py_object(capsule), b"arrow_array_stream"
)
== 1
)
ca = pa.chunked_array(s)
expected = pa.chunked_array([[1, 4, 2]])
assert ca.equals(expected)
ca = pa.chunked_array(s, type=pa.int32())
expected = pa.chunked_array([[1, 4, 2]], type=pa.int32())
assert ca.equals(expected)
def test_series_arrow_interface_stringdtype():
s = pd.Series(["foo", "bar"], dtype="string[pyarrow]")
capsule = s.__arrow_c_stream__()
assert (
ctypes.pythonapi.PyCapsule_IsValid(
ctypes.py_object(capsule), b"arrow_array_stream"
)
== 1
)
ca = pa.chunked_array(s)
expected = pa.chunked_array([["foo", "bar"]], type=pa.large_string())
assert ca.equals(expected)
class ArrowArrayWrapper:
def __init__(self, array):
self.array = array
def __arrow_c_array__(self, requested_schema=None):
return self.array.__arrow_c_array__(requested_schema)
class ArrowStreamWrapper:
def __init__(self, chunked_array):
self.stream = chunked_array
def __arrow_c_stream__(self, requested_schema=None):
return self.stream.__arrow_c_stream__(requested_schema)
@td.skip_if_no("pyarrow", min_version="14.0")
def test_dataframe_from_arrow():
# objects with __arrow_c_stream__
arr = pa.chunked_array([[1, 2, 3], [4, 5]])
result = pd.Series.from_arrow(arr)
expected = pd.Series([1, 2, 3, 4, 5])
tm.assert_series_equal(result, expected)
# not only pyarrow object are supported
result = pd.Series.from_arrow(ArrowStreamWrapper(arr))
tm.assert_series_equal(result, expected)
# table works as well, but will be seen as a StructArray
table = pa.table({"a": [1, 2, 3], "b": ["a", "b", "c"]})
result = pd.Series.from_arrow(table)
expected = pd.Series([{"a": 1, "b": "a"}, {"a": 2, "b": "b"}, {"a": 3, "b": "c"}])
tm.assert_series_equal(result, expected)
# objects with __arrow_c_array__
arr = pa.array([1, 2, 3])
expected = pd.Series([1, 2, 3])
result = pd.Series.from_arrow(arr)
tm.assert_series_equal(result, expected)
result = pd.Series.from_arrow(ArrowArrayWrapper(arr))
tm.assert_series_equal(result, expected)
# only accept actual Arrow objects
with pytest.raises(
TypeError, match="Expected an Arrow-compatible array-like object"
):
pd.Series.from_arrow([1, 2, 3])