aac prediction

from ai4water import Model
from ai4water.preprocessing import DataSet
from SeqMetrics import RegressionMetrics
from aac_utils import make_whole_data

data = make_whole_data("aac_coppml")

input_features = data.columns.tolist()[0:-1]
output_features = data.columns.tolist()[-1:]

dataset = DataSet(data,
                    train_fraction=1.0,
                  split_random=True,
                  seed=891)
train_x, train_y= dataset.training_data()
test_x, test_y= dataset.validation_data()
********** Removing Examples with nan in labels  **********

***** Training *****
input_x shape:  (299, 6)
target shape:  (299, 1)

********** Removing Examples with nan in labels  **********

***** Validation *****
input_x shape:  (75, 6)
target shape:  (75, 1)
model = Model(
    model=  {
            "XGBRegressor": {
                "n_estimators": 31,
                "learning_rate": 0.4016665322343523,
                "booster": "gbtree",
                "random_state": 313
            }
        },
    x_transformation= [
            {
                "method": "sqrt",
                "features": [
                    "wat_temp_c"
                ],
                "treat_negatives": True
            },
            {
                "method": "quantile",
                "features": [
                    "tide_cm"
                ],
                "n_quantiles": 239
            },
            {
                "method": "scale",
                "features": [
                    "sal_psu"
                ]
            },
            {
                "method": "log10",
                "features": [
                    "pcp_mm"
                ],
                "treat_negatives": True,
                "replace_zeros": True
            },
            {
                "method": "minmax",
                "features": [
                    "wind_speed_mps"
                ]
            },
            {
                "method": "log2",
                "features": [
                    "air_p_hpa"
                ],
                "treat_negatives": True,
                "replace_zeros": True
            }
        ],
    y_transformation= [
            {
                "method": "quantile",
                "features": [
                    "aac_coppml"
                ],
                "n_quantiles": 239
            }
        ],
    seed=891,
    input_features= input_features,
    output_features=output_features,
)
building ML model for
regression problem using XGBRegressor
_ = model.fit(x=train_x, y=train_y)

Training data

train_true, train_pred = model.predict(x=train_x, y=train_y, return_true=True)
  • random split
  • random split
invalid value encountered in true_divide
divide by zero encountered in log10
divide by zero encountered in log10
invalid value encountered in subtract
invalid value encountered in true_divide
metrics = RegressionMetrics(train_true, train_pred).calculate_all()

for metric in ["r2", "r2_score", "nrmse", "rmsle", "mape", "pbias", "rmse"]:
    print(metric, metrics[metric])
r2 0.8830533252834483
r2_score 0.8624608968614176
nrmse 0.031080068298255197
rmsle 0.3525408219692622
mape 24.76423779418666
pbias -21.265096613639898
rmse 2396543.139622922

Test data

test_true, test_pred = model.predict(x=test_x, y=test_y, return_true=True)
  • random split
  • random split
metrics = RegressionMetrics(test_true, test_pred).calculate_all()

for metric in ["r2", "r2_score", "nrmse", "rmsle", "mape", "pbias"]:
    print(metric, metrics[metric])
r2 0.0005852536477711349
r2_score -0.5654572123811603
nrmse 0.20182411019869514
rmsle 3.0184472772487467
mape 3107.1865241828286
pbias -62.41986690005942

Total running time of the script: ( 0 minutes 12.201 seconds)

Gallery generated by Sphinx-Gallery