Note
Go to the end to download the full example code or to run this example in your browser via Binder
aac prediction
from ai4water import Model
from ai4water.preprocessing import DataSet
from SeqMetrics import RegressionMetrics
from tetx_utils import make_whole_data
data = make_whole_data("tetx_coppml")
input_features = data.columns.tolist()[0:-1]
output_features = data.columns.tolist()[-1:]
dataset = DataSet(data,
train_fraction=1.0,
split_random=True,
seed=891)
train_x, train_y= dataset.training_data()
test_x, test_y= dataset.validation_data()
********** Removing Examples with nan in labels **********
***** Training *****
input_x shape: (299, 6)
target shape: (299, 1)
********** Removing Examples with nan in labels **********
***** Validation *****
input_x shape: (75, 6)
target shape: (75, 1)
model = Model(
model= {
"XGBRegressor": {
"n_estimators": 164,
"learning_rate": 0.012245084737106074,
"booster": "gbtree",
"random_state": 313
}
},
x_transformation= [
{
"method": "log",
"features": [
"wat_temp_c"
],
"treat_negatives": True,
"replace_zeros": True
},
{
"method": "vast",
"features": [
"tide_cm"
]
},
{
"method": "pareto",
"features": [
"sal_psu"
]
},
{
"method": "log2",
"features": [
"pcp_mm"
],
"treat_negatives": True,
"replace_zeros": True
},
{
"method": "minmax",
"features": [
"wind_speed_mps"
]
},
{
"method": "pareto",
"features": [
"air_p_hpa"
]
}
],
y_transformation= [
{
"method": "sqrt",
"features": [
"tetx_coppml"
],
"treat_negatives": True
}
],
seed=891,
input_features= input_features,
output_features=output_features,
)
building ML model for
regression problem using XGBRegressor
_ = model.fit(x=train_x, y=train_y)
Training data
train_true, train_pred = model.predict(x=train_x, y=train_y, return_true=True)
metrics = RegressionMetrics(train_true, train_pred).calculate_all()
for metric in ["r2", "r2_score", "nrmse", "rmsle", "mape", "pbias", "rmse"]:
print(metric, metrics[metric])
r2 0.9545851708151529
r2_score 0.6015761588182542
nrmse 0.05832919820103902
rmsle 2.765711886634883
mape 8956.576258941443
pbias -54.442626817269776
rmse 6940252.117051477
Test data
test_true, test_pred = model.predict(x=test_x, y=test_y, return_true=True)
metrics = RegressionMetrics(test_true, test_pred).calculate_all()
for metric in ["r2", "r2_score", "nrmse", "rmsle", "mape", "pbias"]:
print(metric, metrics[metric])
r2 0.008689549820795043
r2_score -0.040898827093047485
nrmse 0.11936500045037066
rmsle 3.3317352202244788
mape 7123.521954165846
pbias -90.0158273636899
Total running time of the script: ( 0 minutes 12.024 seconds)



