Why Gemfury? Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Debian packages RPM packages NuGet packages

Repository URL to install this package:

Details    
pandas / tests / io / test_gbq.py
Size: Mime:
import pytest
from datetime import datetime
import pytz
import platform
from time import sleep
import os

import numpy as np
import pandas as pd
from pandas import compat, DataFrame

from pandas.compat import range

pandas_gbq = pytest.importorskip('pandas_gbq')

PROJECT_ID = None
PRIVATE_KEY_JSON_PATH = None
PRIVATE_KEY_JSON_CONTENTS = None

if compat.PY3:
    DATASET_ID = 'pydata_pandas_bq_testing_py3'
else:
    DATASET_ID = 'pydata_pandas_bq_testing_py2'

TABLE_ID = 'new_test'
DESTINATION_TABLE = "{0}.{1}".format(DATASET_ID + "1", TABLE_ID)

VERSION = platform.python_version()


def _skip_if_no_project_id():
    if not _get_project_id():
        pytest.skip(
            "Cannot run integration tests without a project id")


def _skip_if_no_private_key_path():
    if not _get_private_key_path():
        pytest.skip("Cannot run integration tests without a "
                    "private key json file path")


def _in_travis_environment():
    return 'TRAVIS_BUILD_DIR' in os.environ and \
           'GBQ_PROJECT_ID' in os.environ


def _get_project_id():
    if _in_travis_environment():
        return os.environ.get('GBQ_PROJECT_ID')
    else:
        return PROJECT_ID


def _get_private_key_path():
    if _in_travis_environment():
        return os.path.join(*[os.environ.get('TRAVIS_BUILD_DIR'), 'ci',
                              'travis_gbq.json'])
    else:
        return PRIVATE_KEY_JSON_PATH


def clean_gbq_environment(private_key=None):
    dataset = pandas_gbq.gbq._Dataset(_get_project_id(),
                                      private_key=private_key)

    for i in range(1, 10):
        if DATASET_ID + str(i) in dataset.datasets():
            dataset_id = DATASET_ID + str(i)
            table = pandas_gbq.gbq._Table(_get_project_id(), dataset_id,
                                          private_key=private_key)
            for j in range(1, 20):
                if TABLE_ID + str(j) in dataset.tables(dataset_id):
                    table.delete(TABLE_ID + str(j))

            dataset.delete(dataset_id)


def make_mixed_dataframe_v2(test_size):
    # create df to test for all BQ datatypes except RECORD
    bools = np.random.randint(2, size=(1, test_size)).astype(bool)
    flts = np.random.randn(1, test_size)
    ints = np.random.randint(1, 10, size=(1, test_size))
    strs = np.random.randint(1, 10, size=(1, test_size)).astype(str)
    times = [datetime.now(pytz.timezone('US/Arizona'))
             for t in range(test_size)]
    return DataFrame({'bools': bools[0],
                      'flts': flts[0],
                      'ints': ints[0],
                      'strs': strs[0],
                      'times': times[0]},
                     index=range(test_size))


@pytest.mark.single
class TestToGBQIntegrationWithServiceAccountKeyPath(object):

    @classmethod
    def setup_class(cls):
        # - GLOBAL CLASS FIXTURES -
        # put here any instruction you want to execute only *ONCE* *BEFORE*
        # executing *ALL* tests described below.

        _skip_if_no_project_id()
        _skip_if_no_private_key_path()

        clean_gbq_environment(_get_private_key_path())
        pandas_gbq.gbq._Dataset(_get_project_id(),
                                private_key=_get_private_key_path()
                                ).create(DATASET_ID + "1")

    @classmethod
    def teardown_class(cls):
        # - GLOBAL CLASS FIXTURES -
        # put here any instruction you want to execute only *ONCE* *AFTER*
        # executing all tests.

        clean_gbq_environment(_get_private_key_path())

    def test_roundtrip(self):
        destination_table = DESTINATION_TABLE + "1"

        test_size = 20001
        df = make_mixed_dataframe_v2(test_size)

        df.to_gbq(destination_table, _get_project_id(), chunksize=10000,
                  private_key=_get_private_key_path())

        sleep(30)  # <- Curses Google!!!

        result = pd.read_gbq("SELECT COUNT(*) AS num_rows FROM {0}"
                             .format(destination_table),
                             project_id=_get_project_id(),
                             private_key=_get_private_key_path())
        assert result['num_rows'][0] == test_size