Note
Go to the end to download the full example code or to run this example in your browser via Binder
aac prediction
from ai4water import Model
from ai4water.preprocessing import DataSet
from SeqMetrics import RegressionMetrics
from aac_utils import make_whole_data
data = make_whole_data("aac_coppml")
input_features = data.columns.tolist()[0:-1]
output_features = data.columns.tolist()[-1:]
dataset = DataSet(data,
train_fraction=1.0,
split_random=True,
seed=891)
train_x, train_y= dataset.training_data()
test_x, test_y= dataset.validation_data()
********** Removing Examples with nan in labels **********
***** Training *****
input_x shape: (299, 6)
target shape: (299, 1)
********** Removing Examples with nan in labels **********
***** Validation *****
input_x shape: (75, 6)
target shape: (75, 1)
model = Model(
model= {
"XGBRegressor": {
"n_estimators": 31,
"learning_rate": 0.4016665322343523,
"booster": "gbtree",
"random_state": 313
}
},
x_transformation= [
{
"method": "sqrt",
"features": [
"wat_temp_c"
],
"treat_negatives": True
},
{
"method": "quantile",
"features": [
"tide_cm"
],
"n_quantiles": 239
},
{
"method": "scale",
"features": [
"sal_psu"
]
},
{
"method": "log10",
"features": [
"pcp_mm"
],
"treat_negatives": True,
"replace_zeros": True
},
{
"method": "minmax",
"features": [
"wind_speed_mps"
]
},
{
"method": "log2",
"features": [
"air_p_hpa"
],
"treat_negatives": True,
"replace_zeros": True
}
],
y_transformation= [
{
"method": "quantile",
"features": [
"aac_coppml"
],
"n_quantiles": 239
}
],
seed=891,
input_features= input_features,
output_features=output_features,
)
building ML model for
regression problem using XGBRegressor
_ = model.fit(x=train_x, y=train_y)
Training data
train_true, train_pred = model.predict(x=train_x, y=train_y, return_true=True)
invalid value encountered in true_divide
divide by zero encountered in log10
divide by zero encountered in log10
invalid value encountered in subtract
invalid value encountered in true_divide
metrics = RegressionMetrics(train_true, train_pred).calculate_all()
for metric in ["r2", "r2_score", "nrmse", "rmsle", "mape", "pbias", "rmse"]:
print(metric, metrics[metric])
r2 0.8830533252834483
r2_score 0.8624608968614176
nrmse 0.031080068298255197
rmsle 0.3525408219692622
mape 24.76423779418666
pbias -21.265096613639898
rmse 2396543.139622922
Test data
test_true, test_pred = model.predict(x=test_x, y=test_y, return_true=True)
metrics = RegressionMetrics(test_true, test_pred).calculate_all()
for metric in ["r2", "r2_score", "nrmse", "rmsle", "mape", "pbias"]:
print(metric, metrics[metric])
r2 0.0005852536477711349
r2_score -0.5654572123811603
nrmse 0.20182411019869514
rmsle 3.0184472772487467
mape 3107.1865241828286
pbias -62.41986690005942
Total running time of the script: ( 0 minutes 12.201 seconds)



