"""Tests for Table Schema integration."""
from collections import OrderedDict
import json
import numpy as np
import pytest
from pandas.core.dtypes.dtypes import (
CategoricalDtype, DatetimeTZDtype, PeriodDtype)
import pandas as pd
from pandas import DataFrame
import pandas.util.testing as tm
from pandas.io.json.table_schema import (
as_json_table_type, build_table_schema, convert_json_field_to_pandas_type,
convert_pandas_type_to_json_field, set_default_names)
class TestBuildSchema(object):
def setup_method(self, method):
self.df = DataFrame(
{'A': [1, 2, 3, 4],
'B': ['a', 'b', 'c', 'c'],
'C': pd.date_range('2016-01-01', freq='d', periods=4),
'D': pd.timedelta_range('1H', periods=4, freq='T'),
},
index=pd.Index(range(4), name='idx'))
def test_build_table_schema(self):
result = build_table_schema(self.df, version=False)
expected = {
'fields': [{'name': 'idx', 'type': 'integer'},
{'name': 'A', 'type': 'integer'},
{'name': 'B', 'type': 'string'},
{'name': 'C', 'type': 'datetime'},
{'name': 'D', 'type': 'duration'},
],
'primaryKey': ['idx']
}
assert result == expected
result = build_table_schema(self.df)
assert "pandas_version" in result
def test_series(self):
s = pd.Series([1, 2, 3], name='foo')
result = build_table_schema(s, version=False)
expected = {'fields': [{'name': 'index', 'type': 'integer'},
{'name': 'foo', 'type': 'integer'}],
'primaryKey': ['index']}
assert result == expected
result = build_table_schema(s)
assert 'pandas_version' in result
def test_series_unnamed(self):
result = build_table_schema(pd.Series([1, 2, 3]), version=False)
expected = {'fields': [{'name': 'index', 'type': 'integer'},
{'name': 'values', 'type': 'integer'}],
'primaryKey': ['index']}
assert result == expected
def test_multiindex(self):
df = self.df.copy()
idx = pd.MultiIndex.from_product([('a', 'b'), (1, 2)])
df.index = idx
result = build_table_schema(df, version=False)
expected = {
'fields': [{'name': 'level_0', 'type': 'string'},
{'name': 'level_1', 'type': 'integer'},
{'name': 'A', 'type': 'integer'},
{'name': 'B', 'type': 'string'},
{'name': 'C', 'type': 'datetime'},
{'name': 'D', 'type': 'duration'},
],
'primaryKey': ['level_0', 'level_1']
}
assert result == expected
df.index.names = ['idx0', None]
expected['fields'][0]['name'] = 'idx0'
expected['primaryKey'] = ['idx0', 'level_1']
result = build_table_schema(df, version=False)
assert result == expected
class TestTableSchemaType(object):
@pytest.mark.parametrize('int_type', [
np.int, np.int16, np.int32, np.int64])
def test_as_json_table_type_int_data(self, int_type):
int_data = [1, 2, 3]
assert as_json_table_type(np.array(
int_data, dtype=int_type)) == 'integer'
@pytest.mark.parametrize('float_type', [
np.float, np.float16, np.float32, np.float64])
def test_as_json_table_type_float_data(self, float_type):
float_data = [1., 2., 3.]
assert as_json_table_type(np.array(
float_data, dtype=float_type)) == 'number'
@pytest.mark.parametrize('bool_type', [bool, np.bool])
def test_as_json_table_type_bool_data(self, bool_type):
bool_data = [True, False]
assert as_json_table_type(np.array(
bool_data, dtype=bool_type)) == 'boolean'
@pytest.mark.parametrize('date_data', [
pd.to_datetime(['2016']),
pd.to_datetime(['2016'], utc=True),
pd.Series(pd.to_datetime(['2016'])),
pd.Series(pd.to_datetime(['2016'], utc=True)),
pd.period_range('2016', freq='A', periods=3)
])
def test_as_json_table_type_date_data(self, date_data):
assert as_json_table_type(date_data) == 'datetime'
@pytest.mark.parametrize('str_data', [
pd.Series(['a', 'b']), pd.Index(['a', 'b'])])
def test_as_json_table_type_string_data(self, str_data):
assert as_json_table_type(str_data) == 'string'
@pytest.mark.parametrize('cat_data', [
pd.Categorical(['a']),
pd.Categorical([1]),
pd.Series(pd.Categorical([1])),
pd.CategoricalIndex([1]),
pd.Categorical([1])])
def test_as_json_table_type_categorical_data(self, cat_data):
assert as_json_table_type(cat_data) == 'any'
# ------
# dtypes
# ------
@pytest.mark.parametrize('int_dtype', [
np.int, np.int16, np.int32, np.int64])
def test_as_json_table_type_int_dtypes(self, int_dtype):
assert as_json_table_type(int_dtype) == 'integer'
@pytest.mark.parametrize('float_dtype', [
np.float, np.float16, np.float32, np.float64])
def test_as_json_table_type_float_dtypes(self, float_dtype):
assert as_json_table_type(float_dtype) == 'number'
@pytest.mark.parametrize('bool_dtype', [bool, np.bool])
def test_as_json_table_type_bool_dtypes(self, bool_dtype):
assert as_json_table_type(bool_dtype) == 'boolean'
@pytest.mark.parametrize('date_dtype', [
np.datetime64, np.dtype("<M8[ns]"), PeriodDtype('D'),
DatetimeTZDtype('ns', 'US/Central')])
def test_as_json_table_type_date_dtypes(self, date_dtype):
# TODO: datedate.date? datetime.time?
assert as_json_table_type(date_dtype) == 'datetime'
@pytest.mark.parametrize('td_dtype', [
np.timedelta64, np.dtype("<m8[ns]")])
def test_as_json_table_type_timedelta_dtypes(self, td_dtype):
assert as_json_table_type(td_dtype) == 'duration'
@pytest.mark.parametrize('str_dtype', [object]) # TODO
def test_as_json_table_type_string_dtypes(self, str_dtype):
assert as_json_table_type(str_dtype) == 'string'
def test_as_json_table_type_categorical_dtypes(self):
# TODO: I think before is_categorical_dtype(Categorical)
# returned True, but now it's False. Figure out why or
# if it matters
assert as_json_table_type(pd.Categorical(['a'])) == 'any'
assert as_json_table_type(CategoricalDtype()) == 'any'
class TestTableOrient(object):
def setup_method(self, method):
self.df = DataFrame(
{'A': [1, 2, 3, 4],
'B': ['a', 'b', 'c', 'c'],
'C': pd.date_range('2016-01-01', freq='d', periods=4),
'D': pd.timedelta_range('1H', periods=4, freq='T'),
'E': pd.Series(pd.Categorical(['a', 'b', 'c', 'c'])),
'F': pd.Series(pd.Categorical(['a', 'b', 'c', 'c'],
ordered=True)),
'G': [1., 2., 3, 4.],
'H': pd.date_range('2016-01-01', freq='d', periods=4,
tz='US/Central'),
},
index=pd.Index(range(4), name='idx'))
def test_build_series(self):
s = pd.Series([1, 2], name='a')
s.index.name = 'id'
result = s.to_json(orient='table', date_format='iso')
result = json.loads(result, object_pairs_hook=OrderedDict)
assert "pandas_version" in result['schema']
result['schema'].pop('pandas_version')
fields = [{'name': 'id', 'type': 'integer'},
{'name': 'a', 'type': 'integer'}]
schema = {
'fields': fields,
'primaryKey': ['id'],
}
expected = OrderedDict([
('schema', schema),
('data', [OrderedDict([('id', 0), ('a', 1)]),
OrderedDict([('id', 1), ('a', 2)])])])
assert result == expected
def test_to_json(self):
df = self.df.copy()
df.index.name = 'idx'
result = df.to_json(orient='table', date_format='iso')
result = json.loads(result, object_pairs_hook=OrderedDict)
assert "pandas_version" in result['schema']
result['schema'].pop('pandas_version')
fields = [
{'name': 'idx', 'type': 'integer'},
{'name': 'A', 'type': 'integer'},
{'name': 'B', 'type': 'string'},
{'name': 'C', 'type': 'datetime'},
{'name': 'D', 'type': 'duration'},
{'constraints': {'enum': ['a', 'b', 'c']},
'name': 'E',
'ordered': False,
'type': 'any'},
{'constraints': {'enum': ['a', 'b', 'c']},
'name': 'F',
'ordered': True,
'type': 'any'},
{'name': 'G', 'type': 'number'},
{'name': 'H', 'type': 'datetime', 'tz': 'US/Central'}
]
schema = {
'fields': fields,
'primaryKey': ['idx'],
}
data = [
OrderedDict([('idx', 0), ('A', 1), ('B', 'a'),
('C', '2016-01-01T00:00:00.000Z'),
('D', 'P0DT1H0M0S'),
('E', 'a'), ('F', 'a'), ('G', 1.),
('H', '2016-01-01T06:00:00.000Z')
]),
OrderedDict([('idx', 1), ('A', 2), ('B', 'b'),
('C', '2016-01-02T00:00:00.000Z'),
('D', 'P0DT1H1M0S'),
('E', 'b'), ('F', 'b'), ('G', 2.),
('H', '2016-01-02T06:00:00.000Z')
]),
OrderedDict([('idx', 2), ('A', 3), ('B', 'c'),
('C', '2016-01-03T00:00:00.000Z'),
('D', 'P0DT1H2M0S'),
('E', 'c'), ('F', 'c'), ('G', 3.),
('H', '2016-01-03T06:00:00.000Z')
]),
OrderedDict([('idx', 3), ('A', 4), ('B', 'c'),
('C', '2016-01-04T00:00:00.000Z'),
('D', 'P0DT1H3M0S'),
('E', 'c'), ('F', 'c'), ('G', 4.),
('H', '2016-01-04T06:00:00.000Z')
]),
]
expected = OrderedDict([('schema', schema), ('data', data)])
assert result == expected
def test_to_json_float_index(self):
data = pd.Series(1, index=[1., 2.])
result = data.to_json(orient='table', date_format='iso')
result = json.loads(result, object_pairs_hook=OrderedDict)
result['schema'].pop('pandas_version')
expected = (
OrderedDict([('schema', {
'fields': [{'name': 'index', 'type': 'number'},
{'name': 'values', 'type': 'integer'}],
'primaryKey': ['index']
}),
('data', [OrderedDict([('index', 1.0), ('values', 1)]),
OrderedDict([('index', 2.0), ('values', 1)])])])
)
assert result == expected
def test_to_json_period_index(self):
idx = pd.period_range('2016', freq='Q-JAN', periods=2)
data = pd.Series(1, idx)
result = data.to_json(orient='table', date_format='iso')
result = json.loads(result, object_pairs_hook=OrderedDict)
result['schema'].pop('pandas_version')
fields = [{'freq': 'Q-JAN', 'name': 'index', 'type': 'datetime'},
{'name': 'values', 'type': 'integer'}]
schema = {'fields': fields, 'primaryKey': ['index']}
data = [OrderedDict([('index', '2015-11-01T00:00:00.000Z'),
('values', 1)]),
OrderedDict([('index', '2016-02-01T00:00:00.000Z'),
('values', 1)])]
expected = OrderedDict([('schema', schema), ('data', data)])
assert result == expected
def test_to_json_categorical_index(self):
data = pd.Series(1, pd.CategoricalIndex(['a', 'b']))
result = data.to_json(orient='table', date_format='iso')
result = json.loads(result, object_pairs_hook=OrderedDict)
result['schema'].pop('pandas_version')
expected = (
OrderedDict([('schema',
{'fields': [{'name': 'index', 'type': 'any',
'constraints': {'enum': ['a', 'b']},
'ordered': False},
{'name': 'values', 'type': 'integer'}],
'primaryKey': ['index']}),
('data', [
OrderedDict([('index', 'a'),
('values', 1)]),
OrderedDict([('index', 'b'), ('values', 1)])])])
)
assert result == expected
def test_date_format_raises(self):
with pytest.raises(ValueError):
self.df.to_json(orient='table', date_format='epoch')
# others work
self.df.to_json(orient='table', date_format='iso')
self.df.to_json(orient='table')
@pytest.mark.parametrize('kind', [pd.Series, pd.Index])
def test_convert_pandas_type_to_json_field_int(self, kind):
data = [1, 2, 3]
result = convert_pandas_type_to_json_field(kind(data, name='name'))
expected = {"name": "name", "type": "integer"}
assert result == expected
@pytest.mark.parametrize('kind', [pd.Series, pd.Index])
Loading ...