import numpy as np
import pytest
import pytz
import pandas as pd
from pandas import Timedelta, merge_asof, read_csv, to_datetime
from pandas.core.reshape.merge import MergeError
from pandas.util.testing import assert_frame_equal
class TestAsOfMerge(object):
def read_data(self, datapath, name, dedupe=False):
path = datapath('reshape', 'merge', 'data', name)
x = read_csv(path)
if dedupe:
x = (x.drop_duplicates(['time', 'ticker'], keep='last')
.reset_index(drop=True)
)
x.time = to_datetime(x.time)
return x
@pytest.fixture(autouse=True)
def setup_method(self, datapath):
self.trades = self.read_data(datapath, 'trades.csv')
self.quotes = self.read_data(datapath, 'quotes.csv', dedupe=True)
self.asof = self.read_data(datapath, 'asof.csv')
self.tolerance = self.read_data(datapath, 'tolerance.csv')
self.allow_exact_matches = self.read_data(datapath,
'allow_exact_matches.csv')
self.allow_exact_matches_and_tolerance = self.read_data(
datapath, 'allow_exact_matches_and_tolerance.csv')
def test_examples1(self):
""" doc-string examples """
left = pd.DataFrame({'a': [1, 5, 10],
'left_val': ['a', 'b', 'c']})
right = pd.DataFrame({'a': [1, 2, 3, 6, 7],
'right_val': [1, 2, 3, 6, 7]})
expected = pd.DataFrame({'a': [1, 5, 10],
'left_val': ['a', 'b', 'c'],
'right_val': [1, 3, 7]})
result = pd.merge_asof(left, right, on='a')
assert_frame_equal(result, expected)
def test_examples2(self):
""" doc-string examples """
trades = pd.DataFrame({
'time': pd.to_datetime(['20160525 13:30:00.023',
'20160525 13:30:00.038',
'20160525 13:30:00.048',
'20160525 13:30:00.048',
'20160525 13:30:00.048']),
'ticker': ['MSFT', 'MSFT',
'GOOG', 'GOOG', 'AAPL'],
'price': [51.95, 51.95,
720.77, 720.92, 98.00],
'quantity': [75, 155,
100, 100, 100]},
columns=['time', 'ticker', 'price', 'quantity'])
quotes = pd.DataFrame({
'time': pd.to_datetime(['20160525 13:30:00.023',
'20160525 13:30:00.023',
'20160525 13:30:00.030',
'20160525 13:30:00.041',
'20160525 13:30:00.048',
'20160525 13:30:00.049',
'20160525 13:30:00.072',
'20160525 13:30:00.075']),
'ticker': ['GOOG', 'MSFT', 'MSFT',
'MSFT', 'GOOG', 'AAPL', 'GOOG',
'MSFT'],
'bid': [720.50, 51.95, 51.97, 51.99,
720.50, 97.99, 720.50, 52.01],
'ask': [720.93, 51.96, 51.98, 52.00,
720.93, 98.01, 720.88, 52.03]},
columns=['time', 'ticker', 'bid', 'ask'])
pd.merge_asof(trades, quotes,
on='time',
by='ticker')
pd.merge_asof(trades, quotes,
on='time',
by='ticker',
tolerance=pd.Timedelta('2ms'))
expected = pd.DataFrame({
'time': pd.to_datetime(['20160525 13:30:00.023',
'20160525 13:30:00.038',
'20160525 13:30:00.048',
'20160525 13:30:00.048',
'20160525 13:30:00.048']),
'ticker': ['MSFT', 'MSFT', 'GOOG', 'GOOG', 'AAPL'],
'price': [51.95, 51.95,
720.77, 720.92, 98.00],
'quantity': [75, 155,
100, 100, 100],
'bid': [np.nan, 51.97, np.nan,
np.nan, np.nan],
'ask': [np.nan, 51.98, np.nan,
np.nan, np.nan]},
columns=['time', 'ticker', 'price', 'quantity',
'bid', 'ask'])
result = pd.merge_asof(trades, quotes,
on='time',
by='ticker',
tolerance=pd.Timedelta('10ms'),
allow_exact_matches=False)
assert_frame_equal(result, expected)
def test_examples3(self):
""" doc-string examples """
# GH14887
left = pd.DataFrame({'a': [1, 5, 10],
'left_val': ['a', 'b', 'c']})
right = pd.DataFrame({'a': [1, 2, 3, 6, 7],
'right_val': [1, 2, 3, 6, 7]})
expected = pd.DataFrame({'a': [1, 5, 10],
'left_val': ['a', 'b', 'c'],
'right_val': [1, 6, np.nan]})
result = pd.merge_asof(left, right, on='a', direction='forward')
assert_frame_equal(result, expected)
def test_examples4(self):
""" doc-string examples """
# GH14887
left = pd.DataFrame({'a': [1, 5, 10],
'left_val': ['a', 'b', 'c']})
right = pd.DataFrame({'a': [1, 2, 3, 6, 7],
'right_val': [1, 2, 3, 6, 7]})
expected = pd.DataFrame({'a': [1, 5, 10],
'left_val': ['a', 'b', 'c'],
'right_val': [1, 6, 7]})
result = pd.merge_asof(left, right, on='a', direction='nearest')
assert_frame_equal(result, expected)
def test_basic(self):
expected = self.asof
trades = self.trades
quotes = self.quotes
result = merge_asof(trades, quotes,
on='time',
by='ticker')
assert_frame_equal(result, expected)
def test_basic_categorical(self):
expected = self.asof
trades = self.trades.copy()
trades.ticker = trades.ticker.astype('category')
quotes = self.quotes.copy()
quotes.ticker = quotes.ticker.astype('category')
expected.ticker = expected.ticker.astype('category')
result = merge_asof(trades, quotes,
on='time',
by='ticker')
assert_frame_equal(result, expected)
def test_basic_left_index(self):
# GH14253
expected = self.asof
trades = self.trades.set_index('time')
quotes = self.quotes
result = merge_asof(trades, quotes,
left_index=True,
right_on='time',
by='ticker')
# left-only index uses right's index, oddly
expected.index = result.index
# time column appears after left's columns
expected = expected[result.columns]
assert_frame_equal(result, expected)
def test_basic_right_index(self):
expected = self.asof
trades = self.trades
quotes = self.quotes.set_index('time')
result = merge_asof(trades, quotes,
left_on='time',
right_index=True,
by='ticker')
assert_frame_equal(result, expected)
def test_basic_left_index_right_index(self):
expected = self.asof.set_index('time')
trades = self.trades.set_index('time')
quotes = self.quotes.set_index('time')
result = merge_asof(trades, quotes,
left_index=True,
right_index=True,
by='ticker')
assert_frame_equal(result, expected)
def test_multi_index(self):
# MultiIndex is prohibited
trades = self.trades.set_index(['time', 'price'])
quotes = self.quotes.set_index('time')
with pytest.raises(MergeError):
merge_asof(trades, quotes,
left_index=True,
right_index=True)
trades = self.trades.set_index('time')
quotes = self.quotes.set_index(['time', 'bid'])
with pytest.raises(MergeError):
merge_asof(trades, quotes,
left_index=True,
right_index=True)
def test_on_and_index(self):
# 'on' parameter and index together is prohibited
trades = self.trades.set_index('time')
quotes = self.quotes.set_index('time')
with pytest.raises(MergeError):
merge_asof(trades, quotes,
left_on='price',
left_index=True,
right_index=True)
trades = self.trades.set_index('time')
quotes = self.quotes.set_index('time')
with pytest.raises(MergeError):
merge_asof(trades, quotes,
right_on='bid',
left_index=True,
right_index=True)
def test_basic_left_by_right_by(self):
# GH14253
expected = self.asof
trades = self.trades
quotes = self.quotes
result = merge_asof(trades, quotes,
on='time',
left_by='ticker',
right_by='ticker')
assert_frame_equal(result, expected)
def test_missing_right_by(self):
expected = self.asof
trades = self.trades
quotes = self.quotes
q = quotes[quotes.ticker != 'MSFT']
result = merge_asof(trades, q,
on='time',
by='ticker')
expected.loc[expected.ticker == 'MSFT', ['bid', 'ask']] = np.nan
assert_frame_equal(result, expected)
def test_multiby(self):
# GH13936
trades = pd.DataFrame({
'time': pd.to_datetime(['20160525 13:30:00.023',
'20160525 13:30:00.023',
'20160525 13:30:00.046',
'20160525 13:30:00.048',
'20160525 13:30:00.050']),
'ticker': ['MSFT', 'MSFT',
'GOOG', 'GOOG', 'AAPL'],
'exch': ['ARCA', 'NSDQ', 'NSDQ', 'BATS', 'NSDQ'],
'price': [51.95, 51.95,
720.77, 720.92, 98.00],
'quantity': [75, 155,
100, 100, 100]},
columns=['time', 'ticker', 'exch',
'price', 'quantity'])
quotes = pd.DataFrame({
'time': pd.to_datetime(['20160525 13:30:00.023',
'20160525 13:30:00.023',
'20160525 13:30:00.030',
'20160525 13:30:00.041',
'20160525 13:30:00.045',
'20160525 13:30:00.049']),
'ticker': ['GOOG', 'MSFT', 'MSFT',
'MSFT', 'GOOG', 'AAPL'],
'exch': ['BATS', 'NSDQ', 'ARCA', 'ARCA',
'NSDQ', 'ARCA'],
'bid': [720.51, 51.95, 51.97, 51.99,
720.50, 97.99],
'ask': [720.92, 51.96, 51.98, 52.00,
720.93, 98.01]},
columns=['time', 'ticker', 'exch', 'bid', 'ask'])
expected = pd.DataFrame({
'time': pd.to_datetime(['20160525 13:30:00.023',
'20160525 13:30:00.023',
'20160525 13:30:00.046',
'20160525 13:30:00.048',
'20160525 13:30:00.050']),
'ticker': ['MSFT', 'MSFT',
'GOOG', 'GOOG', 'AAPL'],
'exch': ['ARCA', 'NSDQ', 'NSDQ', 'BATS', 'NSDQ'],
'price': [51.95, 51.95,
720.77, 720.92, 98.00],
'quantity': [75, 155,
100, 100, 100],
'bid': [np.nan, 51.95, 720.50, 720.51, np.nan],
'ask': [np.nan, 51.96, 720.93, 720.92, np.nan]},
columns=['time', 'ticker', 'exch',
'price', 'quantity', 'bid', 'ask'])
result = pd.merge_asof(trades, quotes, on='time',
by=['ticker', 'exch'])
assert_frame_equal(result, expected)
def test_multiby_heterogeneous_types(self):
# GH13936
trades = pd.DataFrame({
'time': pd.to_datetime(['20160525 13:30:00.023',
'20160525 13:30:00.023',
'20160525 13:30:00.046',
'20160525 13:30:00.048',
'20160525 13:30:00.050']),
'ticker': [0, 0, 1, 1, 2],
'exch': ['ARCA', 'NSDQ', 'NSDQ', 'BATS', 'NSDQ'],
Loading ...