aac prediction

from ai4water import Model
from ai4water.preprocessing import DataSet
from SeqMetrics import RegressionMetrics
from sul1_utils import make_whole_data

data = make_whole_data("sul1_coppml")

input_features = data.columns.tolist()[0:-1]
output_features = data.columns.tolist()[-1:]

dataset = DataSet(data,
                    train_fraction=1.0,
                  split_random=True,
                  seed=891)
train_x, train_y= dataset.training_data()
test_x, test_y= dataset.validation_data()
********** Removing Examples with nan in labels  **********

***** Training *****
input_x shape:  (299, 6)
target shape:  (299, 1)

********** Removing Examples with nan in labels  **********

***** Validation *****
input_x shape:  (75, 6)
target shape:  (75, 1)
model = Model(
    model= {
            "XGBRegressor": {
                "n_estimators": 200,
                "learning_rate": 0.003140974364964209,
                "booster": "gbtree",
                "random_state": 313
            }
        },
    x_transformation= [
            {
                "method": "vast",
                "features": [
                    "wat_temp_c"
                ]
            },
            {
                "method": "minmax",
                "features": [
                    "tide_cm"
                ]
            },
            {
                "method": "log10",
                "features": [
                    "sal_psu"
                ],
                "treat_negatives": True,
                "replace_zeros": True
            },
            {
                "method": "robust",
                "features": [
                    "pcp_mm"
                ]
            },
            {
                "method": "log",
                "features": [
                    "wind_speed_mps"
                ],
                "treat_negatives": True,
                "replace_zeros": True
            },
            {
                "method": "minmax",
                "features": [
                    "air_p_hpa"
                ]
            }
        ],
    y_transformation= [
            {
                "method": "sqrt",
                "features": [
                    "sul1_coppml"
                ],
                "treat_negatives": True
            }
        ],
    seed=891,
    input_features= input_features,
    output_features=output_features,
)
building ML model for
regression problem using XGBRegressor
_ = model.fit(x=train_x, y=train_y)

Training data

train_true, train_pred = model.predict(x=train_x, y=train_y, return_true=True)
  • random split
  • random split
metrics = RegressionMetrics(train_true, train_pred).calculate_all()

for metric in ["r2", "r2_score", "nrmse", "rmsle", "mape", "pbias", "rmse"]:
    print(metric, metrics[metric])
r2 0.7151341354730658
r2_score 0.1070858853657799
nrmse 0.06256899261189287
rmsle 1.6559089749668063
mape 86.68399740950403
pbias -87.49351274725765
rmse 37595952.29005516

Test data

test_true, test_pred = model.predict(x=test_x, y=test_y, return_true=True)
  • random split
  • random split
metrics = RegressionMetrics(test_true, test_pred).calculate_all()

for metric in ["r2", "r2_score", "nrmse", "rmsle", "mape", "pbias"]:
    print(metric, metrics[metric])
r2 0.0008533183125757705
r2_score -0.019974936723706316
nrmse 0.11564400181975958
rmsle 2.5163598919662986
mape 313.85829702959865
pbias -97.88572564677507

Total running time of the script: ( 0 minutes 12.053 seconds)

Gallery generated by Sphinx-Gallery