Learn more  » Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

agriconnect / pandas   python

Repository URL to install this package:

Version: 0.24.2 

/ tests / reshape / merge / test_merge_asof.py

import numpy as np
import pytest
import pytz

import pandas as pd
from pandas import Timedelta, merge_asof, read_csv, to_datetime
from pandas.core.reshape.merge import MergeError
from pandas.util.testing import assert_frame_equal


class TestAsOfMerge(object):

    def read_data(self, datapath, name, dedupe=False):
        path = datapath('reshape', 'merge', 'data', name)
        x = read_csv(path)
        if dedupe:
            x = (x.drop_duplicates(['time', 'ticker'], keep='last')
                  .reset_index(drop=True)
                 )
        x.time = to_datetime(x.time)
        return x

    @pytest.fixture(autouse=True)
    def setup_method(self, datapath):

        self.trades = self.read_data(datapath, 'trades.csv')
        self.quotes = self.read_data(datapath, 'quotes.csv', dedupe=True)
        self.asof = self.read_data(datapath, 'asof.csv')
        self.tolerance = self.read_data(datapath, 'tolerance.csv')
        self.allow_exact_matches = self.read_data(datapath,
                                                  'allow_exact_matches.csv')
        self.allow_exact_matches_and_tolerance = self.read_data(
            datapath, 'allow_exact_matches_and_tolerance.csv')

    def test_examples1(self):
        """ doc-string examples """

        left = pd.DataFrame({'a': [1, 5, 10],
                             'left_val': ['a', 'b', 'c']})
        right = pd.DataFrame({'a': [1, 2, 3, 6, 7],
                              'right_val': [1, 2, 3, 6, 7]})

        expected = pd.DataFrame({'a': [1, 5, 10],
                                 'left_val': ['a', 'b', 'c'],
                                 'right_val': [1, 3, 7]})

        result = pd.merge_asof(left, right, on='a')
        assert_frame_equal(result, expected)

    def test_examples2(self):
        """ doc-string examples """

        trades = pd.DataFrame({
            'time': pd.to_datetime(['20160525 13:30:00.023',
                                    '20160525 13:30:00.038',
                                    '20160525 13:30:00.048',
                                    '20160525 13:30:00.048',
                                    '20160525 13:30:00.048']),
            'ticker': ['MSFT', 'MSFT',
                       'GOOG', 'GOOG', 'AAPL'],
            'price': [51.95, 51.95,
                      720.77, 720.92, 98.00],
            'quantity': [75, 155,
                         100, 100, 100]},
            columns=['time', 'ticker', 'price', 'quantity'])

        quotes = pd.DataFrame({
            'time': pd.to_datetime(['20160525 13:30:00.023',
                                    '20160525 13:30:00.023',
                                    '20160525 13:30:00.030',
                                    '20160525 13:30:00.041',
                                    '20160525 13:30:00.048',
                                    '20160525 13:30:00.049',
                                    '20160525 13:30:00.072',
                                    '20160525 13:30:00.075']),
            'ticker': ['GOOG', 'MSFT', 'MSFT',
                       'MSFT', 'GOOG', 'AAPL', 'GOOG',
                       'MSFT'],
            'bid': [720.50, 51.95, 51.97, 51.99,
                    720.50, 97.99, 720.50, 52.01],
            'ask': [720.93, 51.96, 51.98, 52.00,
                    720.93, 98.01, 720.88, 52.03]},
            columns=['time', 'ticker', 'bid', 'ask'])

        pd.merge_asof(trades, quotes,
                      on='time',
                      by='ticker')

        pd.merge_asof(trades, quotes,
                      on='time',
                      by='ticker',
                      tolerance=pd.Timedelta('2ms'))

        expected = pd.DataFrame({
            'time': pd.to_datetime(['20160525 13:30:00.023',
                                    '20160525 13:30:00.038',
                                    '20160525 13:30:00.048',
                                    '20160525 13:30:00.048',
                                    '20160525 13:30:00.048']),
            'ticker': ['MSFT', 'MSFT', 'GOOG', 'GOOG', 'AAPL'],
            'price': [51.95, 51.95,
                      720.77, 720.92, 98.00],
            'quantity': [75, 155,
                         100, 100, 100],
            'bid': [np.nan, 51.97, np.nan,
                    np.nan, np.nan],
            'ask': [np.nan, 51.98, np.nan,
                    np.nan, np.nan]},
            columns=['time', 'ticker', 'price', 'quantity',
                     'bid', 'ask'])

        result = pd.merge_asof(trades, quotes,
                               on='time',
                               by='ticker',
                               tolerance=pd.Timedelta('10ms'),
                               allow_exact_matches=False)
        assert_frame_equal(result, expected)

    def test_examples3(self):
        """ doc-string examples """
        # GH14887

        left = pd.DataFrame({'a': [1, 5, 10],
                             'left_val': ['a', 'b', 'c']})
        right = pd.DataFrame({'a': [1, 2, 3, 6, 7],
                              'right_val': [1, 2, 3, 6, 7]})

        expected = pd.DataFrame({'a': [1, 5, 10],
                                 'left_val': ['a', 'b', 'c'],
                                 'right_val': [1, 6, np.nan]})

        result = pd.merge_asof(left, right, on='a', direction='forward')
        assert_frame_equal(result, expected)

    def test_examples4(self):
        """ doc-string examples """
        # GH14887

        left = pd.DataFrame({'a': [1, 5, 10],
                             'left_val': ['a', 'b', 'c']})
        right = pd.DataFrame({'a': [1, 2, 3, 6, 7],
                              'right_val': [1, 2, 3, 6, 7]})

        expected = pd.DataFrame({'a': [1, 5, 10],
                                 'left_val': ['a', 'b', 'c'],
                                 'right_val': [1, 6, 7]})

        result = pd.merge_asof(left, right, on='a', direction='nearest')
        assert_frame_equal(result, expected)

    def test_basic(self):

        expected = self.asof
        trades = self.trades
        quotes = self.quotes

        result = merge_asof(trades, quotes,
                            on='time',
                            by='ticker')
        assert_frame_equal(result, expected)

    def test_basic_categorical(self):

        expected = self.asof
        trades = self.trades.copy()
        trades.ticker = trades.ticker.astype('category')
        quotes = self.quotes.copy()
        quotes.ticker = quotes.ticker.astype('category')
        expected.ticker = expected.ticker.astype('category')

        result = merge_asof(trades, quotes,
                            on='time',
                            by='ticker')
        assert_frame_equal(result, expected)

    def test_basic_left_index(self):

        # GH14253
        expected = self.asof
        trades = self.trades.set_index('time')
        quotes = self.quotes

        result = merge_asof(trades, quotes,
                            left_index=True,
                            right_on='time',
                            by='ticker')
        # left-only index uses right's index, oddly
        expected.index = result.index
        # time column appears after left's columns
        expected = expected[result.columns]
        assert_frame_equal(result, expected)

    def test_basic_right_index(self):

        expected = self.asof
        trades = self.trades
        quotes = self.quotes.set_index('time')

        result = merge_asof(trades, quotes,
                            left_on='time',
                            right_index=True,
                            by='ticker')
        assert_frame_equal(result, expected)

    def test_basic_left_index_right_index(self):

        expected = self.asof.set_index('time')
        trades = self.trades.set_index('time')
        quotes = self.quotes.set_index('time')

        result = merge_asof(trades, quotes,
                            left_index=True,
                            right_index=True,
                            by='ticker')
        assert_frame_equal(result, expected)

    def test_multi_index(self):

        # MultiIndex is prohibited
        trades = self.trades.set_index(['time', 'price'])
        quotes = self.quotes.set_index('time')
        with pytest.raises(MergeError):
            merge_asof(trades, quotes,
                       left_index=True,
                       right_index=True)

        trades = self.trades.set_index('time')
        quotes = self.quotes.set_index(['time', 'bid'])
        with pytest.raises(MergeError):
            merge_asof(trades, quotes,
                       left_index=True,
                       right_index=True)

    def test_on_and_index(self):

        # 'on' parameter and index together is prohibited
        trades = self.trades.set_index('time')
        quotes = self.quotes.set_index('time')
        with pytest.raises(MergeError):
            merge_asof(trades, quotes,
                       left_on='price',
                       left_index=True,
                       right_index=True)

        trades = self.trades.set_index('time')
        quotes = self.quotes.set_index('time')
        with pytest.raises(MergeError):
            merge_asof(trades, quotes,
                       right_on='bid',
                       left_index=True,
                       right_index=True)

    def test_basic_left_by_right_by(self):

        # GH14253
        expected = self.asof
        trades = self.trades
        quotes = self.quotes

        result = merge_asof(trades, quotes,
                            on='time',
                            left_by='ticker',
                            right_by='ticker')
        assert_frame_equal(result, expected)

    def test_missing_right_by(self):

        expected = self.asof
        trades = self.trades
        quotes = self.quotes

        q = quotes[quotes.ticker != 'MSFT']
        result = merge_asof(trades, q,
                            on='time',
                            by='ticker')
        expected.loc[expected.ticker == 'MSFT', ['bid', 'ask']] = np.nan
        assert_frame_equal(result, expected)

    def test_multiby(self):
        # GH13936
        trades = pd.DataFrame({
            'time': pd.to_datetime(['20160525 13:30:00.023',
                                    '20160525 13:30:00.023',
                                    '20160525 13:30:00.046',
                                    '20160525 13:30:00.048',
                                    '20160525 13:30:00.050']),
            'ticker': ['MSFT', 'MSFT',
                       'GOOG', 'GOOG', 'AAPL'],
            'exch': ['ARCA', 'NSDQ', 'NSDQ', 'BATS', 'NSDQ'],
            'price': [51.95, 51.95,
                      720.77, 720.92, 98.00],
            'quantity': [75, 155,
                         100, 100, 100]},
            columns=['time', 'ticker', 'exch',
                     'price', 'quantity'])

        quotes = pd.DataFrame({
            'time': pd.to_datetime(['20160525 13:30:00.023',
                                    '20160525 13:30:00.023',
                                    '20160525 13:30:00.030',
                                    '20160525 13:30:00.041',
                                    '20160525 13:30:00.045',
                                    '20160525 13:30:00.049']),
            'ticker': ['GOOG', 'MSFT', 'MSFT',
                       'MSFT', 'GOOG', 'AAPL'],
            'exch': ['BATS', 'NSDQ', 'ARCA', 'ARCA',
                     'NSDQ', 'ARCA'],
            'bid': [720.51, 51.95, 51.97, 51.99,
                    720.50, 97.99],
            'ask': [720.92, 51.96, 51.98, 52.00,
                    720.93, 98.01]},
            columns=['time', 'ticker', 'exch', 'bid', 'ask'])

        expected = pd.DataFrame({
            'time': pd.to_datetime(['20160525 13:30:00.023',
                                    '20160525 13:30:00.023',
                                    '20160525 13:30:00.046',
                                    '20160525 13:30:00.048',
                                    '20160525 13:30:00.050']),
            'ticker': ['MSFT', 'MSFT',
                       'GOOG', 'GOOG', 'AAPL'],
            'exch': ['ARCA', 'NSDQ', 'NSDQ', 'BATS', 'NSDQ'],
            'price': [51.95, 51.95,
                      720.77, 720.92, 98.00],
            'quantity': [75, 155,
                         100, 100, 100],
            'bid': [np.nan, 51.95, 720.50, 720.51, np.nan],
            'ask': [np.nan, 51.96, 720.93, 720.92, np.nan]},
            columns=['time', 'ticker', 'exch',
                     'price', 'quantity', 'bid', 'ask'])

        result = pd.merge_asof(trades, quotes, on='time',
                               by=['ticker', 'exch'])
        assert_frame_equal(result, expected)

    def test_multiby_heterogeneous_types(self):
        # GH13936
        trades = pd.DataFrame({
            'time': pd.to_datetime(['20160525 13:30:00.023',
                                    '20160525 13:30:00.023',
                                    '20160525 13:30:00.046',
                                    '20160525 13:30:00.048',
                                    '20160525 13:30:00.050']),
            'ticker': [0, 0, 1, 1, 2],
            'exch': ['ARCA', 'NSDQ', 'NSDQ', 'BATS', 'NSDQ'],
Loading ...