Repository URL to install this package:
|
Version:
1.0.0 ▾
|
import os
import pathlib
import regression_model
import pandas as pd
pd.options.display.max_rows = 10
pd.options.display.max_columns = 10
PACKAGE_ROOT = pathlib.Path(regression_model.__file__).resolve().parent
TRAINED_MODEL_DIR = PACKAGE_ROOT / 'trained_models'
DATASET_DIR = PACKAGE_ROOT / 'datasets'
# data
TESTING_DATA_FILE = 'test.csv'
TRAINING_DATA_FILE = 'train.csv'
TARGET = 'SalePrice'
# variables
FEATURES = ['MSSubClass', 'MSZoning', 'Neighborhood',
'OverallQual', 'OverallCond', 'YearRemodAdd',
'RoofStyle', 'MasVnrType', 'BsmtQual', 'BsmtExposure',
'HeatingQC', 'CentralAir', '1stFlrSF', 'GrLivArea',
'BsmtFullBath', 'KitchenQual', 'Fireplaces', 'FireplaceQu',
'GarageType', 'GarageFinish', 'GarageCars', 'PavedDrive',
'LotFrontage',
# this one is only to calculate temporal variable:
'YrSold']
# this variable is to calculate the temporal variable,
# can be dropped afterwards
DROP_FEATURES = 'YrSold'
# numerical variables with NA in train set
NUMERICAL_VARS_WITH_NA = ['LotFrontage']
# categorical variables with NA in train set
CATEGORICAL_VARS_WITH_NA = ['MasVnrType', 'BsmtQual', 'BsmtExposure',
'FireplaceQu', 'GarageType', 'GarageFinish']
TEMPORAL_VARS = 'YearRemodAdd'
# variables to log transform
NUMERICALS_LOG_VARS = ['LotFrontage', '1stFlrSF', 'GrLivArea']
# categorical variables to encode
CATEGORICAL_VARS = ['MSZoning', 'Neighborhood', 'RoofStyle', 'MasVnrType',
'BsmtQual', 'BsmtExposure', 'HeatingQC', 'CentralAir',
'KitchenQual', 'FireplaceQu', 'GarageType', 'GarageFinish',
'PavedDrive']
NUMERICAL_NA_NOT_ALLOWED = [
feature for feature in FEATURES
if feature not in CATEGORICAL_VARS + NUMERICAL_VARS_WITH_NA
]
CATEGORICAL_NA_NOT_ALLOWED = [
feature for feature in CATEGORICAL_VARS
if feature not in CATEGORICAL_VARS_WITH_NA
]
PIPELINE_NAME = 'lasso_regression'
PIPELINE_SAVE_FILE = f'{PIPELINE_NAME}_output_v'
# used for differential testing
ACCEPTABLE_MODEL_DIFFERENCE = 0.05