Why Gemfury? Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Debian packages RPM packages NuGet packages

Repository URL to install this package:

Details    
Size: Mime:
from gradient_boosting_model.config.core import config

import numpy as np
import pandas as pd
from marshmallow import fields, Schema, ValidationError


class HouseDataInputSchema(Schema):
    Alley = fields.Str(allow_none=True)
    BedroomAbvGr = fields.Integer()
    BldgType = fields.Str()
    BsmtCond = fields.Str(allow_none=True)
    BsmtExposure = fields.Str(allow_none=True)
    BsmtFinSF1 = fields.Float(allow_none=True)
    BsmtFinSF2 = fields.Float(allow_none=True)
    BsmtFinType1 = fields.Str(allow_none=True)
    BsmtFinType2 = fields.Str(allow_none=True)
    BsmtFullBath = fields.Float(allow_none=True)
    BsmtHalfBath = fields.Float(allow_none=True)
    BsmtQual = fields.Str(allow_none=True)
    BsmtUnfSF = fields.Float()
    CentralAir = fields.Str()
    Condition1 = fields.Str()
    Condition2 = fields.Str()
    Electrical = fields.Str(allow_none=True)
    EnclosedPorch = fields.Integer()
    ExterCond = fields.Str()
    ExterQual = fields.Str()
    Exterior1st = fields.Str(allow_none=True)
    Exterior2nd = fields.Str(allow_none=True)
    Fence = fields.Str(allow_none=True)
    FireplaceQu = fields.Str(allow_none=True)
    Fireplaces = fields.Integer()
    Foundation = fields.Str()
    FullBath = fields.Integer()
    Functional = fields.Str(allow_none=True)
    GarageArea = fields.Float()
    GarageCars = fields.Float()
    GarageCond = fields.Str(allow_none=True)
    GarageFinish = fields.Str(allow_none=True)
    GarageQual = fields.Str(allow_none=True)
    GarageType = fields.Str(allow_none=True)
    GarageYrBlt = fields.Float(allow_none=True)
    GrLivArea = fields.Integer()
    HalfBath = fields.Integer()
    Heating = fields.Str()
    HeatingQC = fields.Str()
    HouseStyle = fields.Str()
    Id = fields.Integer()
    KitchenAbvGr = fields.Integer()
    KitchenQual = fields.Str(allow_none=True)
    LandContour = fields.Str()
    LandSlope = fields.Str()
    LotArea = fields.Integer()
    LotConfig = fields.Str()
    LotFrontage = fields.Float(allow_none=True)
    LotShape = fields.Str()
    LowQualFinSF = fields.Integer()
    MSSubClass = fields.Integer()
    MSZoning = fields.Str(allow_none=True)
    MasVnrArea = fields.Float(allow_none=True)
    MasVnrType = fields.Str(allow_none=True)
    MiscFeature = fields.Str(allow_none=True)
    MiscVal = fields.Integer()
    MoSold = fields.Integer()
    Neighborhood = fields.Str()
    OpenPorchSF = fields.Integer()
    OverallCond = fields.Integer()
    OverallQual = fields.Integer()
    PavedDrive = fields.Str()
    PoolArea = fields.Integer()
    PoolQC = fields.Str(allow_none=True)
    RoofMatl = fields.Str()
    RoofStyle = fields.Str()
    SaleCondition = fields.Str()
    SaleType = fields.Str(allow_none=True)
    ScreenPorch = fields.Integer()
    Street = fields.Str()
    TotRmsAbvGrd = fields.Integer()
    TotalBsmtSF = fields.Float()
    Utilities = fields.Str(allow_none=True)
    WoodDeckSF = fields.Integer()
    YearBuilt = fields.Integer()
    YearRemodAdd = fields.Integer()
    YrSold = fields.Integer()
    FirstFlrSF = fields.Integer()
    SecondFlrSF = fields.Integer()
    ThreeSsnPortch = fields.Integer()


def drop_na_inputs(*, input_data: pd.DataFrame) -> pd.DataFrame:
    """Check model inputs for na values and filter."""
    validated_data = input_data.copy()
    if input_data[
        config.gradient_boosting_model_config.numerical_na_not_allowed
    ].isnull().any().any():
        validated_data = validated_data.dropna(
            axis=0,
            subset=config.gradient_boosting_model_config.numerical_na_not_allowed
        )

    return validated_data


def validate_inputs(
    *, input_data: pd.DataFrame
) -> tuple[pd.DataFrame, dict | None]:
    """Check model inputs for unprocessable values."""

    # Convert syntax error field names (beginning with numbers)
    input_data.rename(
        columns=config.gradient_boosting_model_config.variables_to_rename,
        inplace=True
    )
    validated_data = drop_na_inputs(input_data=input_data)

    # Set many=True to allow passing in a list
    schema = HouseDataInputSchema(many=True)
    errors = None

    try:
        # Replace numpy nans so that Marshmallow can validate
        schema.load(validated_data.replace({np.nan: None}).to_dict(orient="records"))
    except ValidationError as exc:
        # Ensure errors is either a dict or None
        errors = exc.messages if isinstance(exc.messages, dict) else None

    return validated_data, errors