Why Gemfury? Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Debian packages RPM packages NuGet packages

Repository URL to install this package:

Details    
sarus_statistics / tests / unit / conftest.py
Size: Mime:
import decimal
import string
from datetime import datetime, timedelta
from random import randrange

import numpy as np
import pandas as pd
import pytest

N_EX = 1000
np.random.seed(10)

PUBLIC = 'public'
PU_COLUMN = 'user_col'
WEIGHTS = 'weights'


def random_date(start, end):
    """
    This function will return a random datetime between two datetime
    objects.
    """
    delta = end - start
    int_delta = (delta.days * 24 * 60 * 60) + delta.seconds
    random_second = randrange(int_delta)
    return start + timedelta(seconds=random_second)


@pytest.fixture()
def admin_cols():
    return PUBLIC, PU_COLUMN, WEIGHTS


@pytest.fixture(scope='session')
def pandas_data():

    letters = [char for char in string.ascii_lowercase]
    df_users = pd.DataFrame(
        {
            'bool': np.random.choice(
                [True, False], size=N_EX, replace=True, p=[0.4, 0.6]
            ),
            'integer': np.random.choice(
                list(range(1000)), size=N_EX, replace=True
            ).astype('int64'),
            'integer_pv': list(range(10)) * (N_EX // 10),
            'enum': np.random.choice(
                ['a', 'b', 'c'], size=N_EX, replace=True, p=[0.4, 0.5, 0.1]
            ),
            'float': np.random.uniform(16, 64, size=N_EX).astype('float64'),
            'float_pv': np.random.choice(
                [integer + 0.5 for integer in range(10)],
                size=N_EX,
                replace=True,
            ).astype('float64'),
            'decimals': [decimal.Decimal(i + 0.5) for i in range(N_EX)],
            'text': [
                ''.join(np.random.choice(letters) for i in range(10))
                for j in range(N_EX)
            ],
            'text_pv': np.random.choice(
                ['a', 'b', 'c'], size=N_EX, replace=True, p=[0.4, 0.5, 0.1]
            ),
            'empty_text': [''] * N_EX,
            'text_datetimes': [
                random_date(
                    datetime.strptime('1/1/2008 1:30 PM', '%m/%d/%Y %I:%M %p'),
                    datetime.strptime('1/1/2009 4:50 AM', '%m/%d/%Y %I:%M %p'),
                ).strftime('%m/%d/%Y %I:%M %p')
                for i in range(N_EX)
            ],
            'optional': np.random.choice(
                ['a', 'b', None], size=N_EX, replace=True, p=[0.4, 0.5, 0.1]
            ),
            'optional_full_none': [None] * N_EX,
            'datetime': [
                random_date(
                    datetime.strptime('1/1/2008 1:30 PM', '%m/%d/%Y %I:%M %p'),
                    datetime.strptime('1/1/2009 4:50 AM', '%m/%d/%Y %I:%M %p'),
                )
                for i in range(N_EX)
            ],
            'datetime_pv': np.random.choice(
                [
                    datetime.strptime('1/1/2008 1:30 PM', '%m/%d/%Y %I:%M %p'),
                    datetime.strptime('1/1/2009 4:50 AM', '%m/%d/%Y %I:%M %p'),
                ],
                size=N_EX,
                replace=True,
            ),
            'date': [
                random_date(
                    datetime.strptime('1/1/2008 1:30 PM', '%m/%d/%Y %I:%M %p'),
                    datetime.strptime('1/1/2009 4:50 AM', '%m/%d/%Y %I:%M %p'),
                ).date()
                for i in range(N_EX)
            ],
            'date_pv': np.random.choice(
                [
                    datetime.strptime(
                        '1/1/2008 1:30 PM', '%m/%d/%Y %I:%M %p'
                    ).date(),
                    datetime.strptime(
                        '1/1/2009 4:50 AM', '%m/%d/%Y %I:%M %p'
                    ).date(),
                ],
                size=N_EX,
                replace=True,
            ),
            'time': [
                random_date(
                    datetime.strptime('1/1/2008 1:30 PM', '%m/%d/%Y %I:%M %p'),
                    datetime.strptime('1/1/2009 4:50 AM', '%m/%d/%Y %I:%M %p'),
                ).time()
                for i in range(N_EX)
            ],
            'time_pv': np.random.choice(
                [
                    datetime.strptime(
                        '1/1/2008 1:30 PM', '%m/%d/%Y %I:%M %p'
                    ).time(),
                    datetime.strptime(
                        '1/1/2009 4:50 AM', '%m/%d/%Y %I:%M %p'
                    ).time(),
                ],
                size=N_EX,
                replace=True,
            ),
        }
    )
    return df_users


@pytest.fixture()
def ops_data(pandas_data, admin_cols):
    """dataframe for ops tests"""
    df = pandas_data[
        ['integer', 'integer_pv', 'enum', 'bool', 'optional_full_none']
    ]
    public, user_col, weights = admin_cols
    df[user_col] = list(range(100)) * (N_EX // 100)
    df[weights] = [1] * N_EX
    df[public] = [False] * N_EX
    return df