Note
Go to the end to download the full example code or to run this example in your browser via Binder
aac prediction
from ai4water import Model
from ai4water.preprocessing import DataSet
from SeqMetrics import RegressionMetrics
from sul1_utils import make_whole_data
data = make_whole_data("sul1_coppml")
input_features = data.columns.tolist()[0:-1]
output_features = data.columns.tolist()[-1:]
dataset = DataSet(data,
train_fraction=1.0,
split_random=True,
seed=891)
train_x, train_y= dataset.training_data()
test_x, test_y= dataset.validation_data()
********** Removing Examples with nan in labels **********
***** Training *****
input_x shape: (299, 6)
target shape: (299, 1)
********** Removing Examples with nan in labels **********
***** Validation *****
input_x shape: (75, 6)
target shape: (75, 1)
model = Model(
model= {
"XGBRegressor": {
"n_estimators": 200,
"learning_rate": 0.003140974364964209,
"booster": "gbtree",
"random_state": 313
}
},
x_transformation= [
{
"method": "vast",
"features": [
"wat_temp_c"
]
},
{
"method": "minmax",
"features": [
"tide_cm"
]
},
{
"method": "log10",
"features": [
"sal_psu"
],
"treat_negatives": True,
"replace_zeros": True
},
{
"method": "robust",
"features": [
"pcp_mm"
]
},
{
"method": "log",
"features": [
"wind_speed_mps"
],
"treat_negatives": True,
"replace_zeros": True
},
{
"method": "minmax",
"features": [
"air_p_hpa"
]
}
],
y_transformation= [
{
"method": "sqrt",
"features": [
"sul1_coppml"
],
"treat_negatives": True
}
],
seed=891,
input_features= input_features,
output_features=output_features,
)
building ML model for
regression problem using XGBRegressor
_ = model.fit(x=train_x, y=train_y)
Training data
train_true, train_pred = model.predict(x=train_x, y=train_y, return_true=True)
metrics = RegressionMetrics(train_true, train_pred).calculate_all()
for metric in ["r2", "r2_score", "nrmse", "rmsle", "mape", "pbias", "rmse"]:
print(metric, metrics[metric])
r2 0.7151341354730658
r2_score 0.1070858853657799
nrmse 0.06256899261189287
rmsle 1.6559089749668063
mape 86.68399740950403
pbias -87.49351274725765
rmse 37595952.29005516
Test data
test_true, test_pred = model.predict(x=test_x, y=test_y, return_true=True)
metrics = RegressionMetrics(test_true, test_pred).calculate_all()
for metric in ["r2", "r2_score", "nrmse", "rmsle", "mape", "pbias"]:
print(metric, metrics[metric])
r2 0.0008533183125757705
r2_score -0.019974936723706316
nrmse 0.11564400181975958
rmsle 2.5163598919662986
mape 313.85829702959865
pbias -97.88572564677507
Total running time of the script: ( 0 minutes 12.053 seconds)



