Ocean Temperature Prediction Using CalCOFI Environmental Data

Ocean Temperature Prediction Using CalCOFI Environmental Data#

CalCOFI: Over 60 years of oceanographic data

import pandas as pd
import numpy as np

bottle = pd.read_csv("../data/bottle.csv", low_memory=False)
cast = pd.read_csv("../data/cast.csv", low_memory=False)

cast_cols = [
    "Cst_Cnt",
    "Year",
    "Month",
    "Quarter",
    "Lat_Dec",
    "Lon_Dec",
    "Bottom_D",
    "Distance"
]

df = bottle.merge(
    cast[cast_cols],
    on="Cst_Cnt",
    how="left"
)

df.shape
(864863, 81)
df = df.dropna(subset=["T_degC"]).copy()
rename_map = {
    "T_degC": "target_temperature_celsius",
    "Depthm": "depth_meters",
    "Salnty": "salinity",
    "O2ml_L": "oxygen_ml_per_liter",
    "PO4uM": "phosphate_umol",
    "SiO3uM": "silicate_umol",
    "NO2uM": "nitrite_umol",
    "NO3uM": "nitrate_umol",
    "Lat_Dec": "latitude",
    "Lon_Dec": "longitude",
    "Bottom_D": "bottom_depth",
    "Distance": "distance_from_coast",
    "Year": "year",
    "Month": "month",
    "Quarter": "quarter"
}

df = df.rename(columns=rename_map)
target = "target_temperature_celsius"

features = [
    "depth_meters",
    "salinity",
    "oxygen_ml_per_liter",
    "phosphate_umol",
    "silicate_umol",
    "nitrite_umol",
    "nitrate_umol",
    "year",
    "month",
    "latitude",
    "longitude",
    "bottom_depth",
    "distance_from_coast"
]
train_cv_df = df[df["year"] <= 2014].copy()
test_df = df[df["year"] >= 2015].copy()

train_cv_df = train_cv_df.sort_values(
    ["year", "month", "Cst_Cnt", "depth_meters"]
).reset_index(drop=True)

test_df = test_df.sort_values(
    ["year", "month", "Cst_Cnt", "depth_meters"]
).reset_index(drop=True)

X_train_cv = train_cv_df[features]
y_train_cv = train_cv_df[target]

X_test = test_df[features]
y_test = test_df[target]
from sklearn.model_selection import TimeSeriesSplit

years = np.sort(train_cv_df["year"].unique())

tscv = TimeSeriesSplit(n_splits=5)

for fold, (train_year_idx, val_year_idx) in enumerate(tscv.split(years), start=1):
    train_years = years[train_year_idx]
    val_years = years[val_year_idx]

    print(
        f"Fold {fold}: "
        f"Train {train_years[0]}{train_years[-1]} | "
        f"Validate {val_years[0]}{val_years[-1]}"
    )
Fold 1: Train 1949–1963 | Validate 1964–1973
Fold 2: Train 1949–1973 | Validate 1974–1984
Fold 3: Train 1949–1984 | Validate 1985–1994
Fold 4: Train 1949–1994 | Validate 1995–2004
Fold 5: Train 1949–2004 | Validate 2005–2014
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge

ridge_pipeline = Pipeline([
    ("imputer", SimpleImputer(strategy="median", add_indicator=True)),
    ("scaler", StandardScaler()),
    ("model", Ridge(alpha=10.0))
])
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

def regression_metrics(y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    r2 = r2_score(y_true, y_pred)

    return mae, rmse, r2


cv_results = []

for fold, (train_year_idx, val_year_idx) in enumerate(tscv.split(years), start=1):
    train_years = years[train_year_idx]
    val_years = years[val_year_idx]

    train_mask = train_cv_df["year"].isin(train_years)
    val_mask = train_cv_df["year"].isin(val_years)

    X_train_fold = train_cv_df.loc[train_mask, features]
    y_train_fold = train_cv_df.loc[train_mask, target]

    X_val_fold = train_cv_df.loc[val_mask, features]
    y_val_fold = train_cv_df.loc[val_mask, target]

    ridge_pipeline.fit(X_train_fold, y_train_fold)

    val_pred = ridge_pipeline.predict(X_val_fold)

    mae, rmse, r2 = regression_metrics(y_val_fold, val_pred)

    cv_results.append({
        "fold": fold,
        "train_years": f"{train_years[0]}{train_years[-1]}",
        "validation_years": f"{val_years[0]}{val_years[-1]}",
        "MAE": mae,
        "RMSE": rmse,
        "R2": r2
    })

cv_results_df = pd.DataFrame(cv_results)
cv_results_df
fold train_years validation_years MAE RMSE R2
0 1 1949–1963 1964–1973 2.502009 3.128031 0.483301
1 2 1949–1973 1974–1984 1.815000 2.334075 0.669461
2 3 1949–1984 1985–1994 1.289910 1.696874 0.801674
3 4 1949–1994 1995–2004 1.126286 1.518651 0.833472
4 5 1949–2004 2005–2014 1.096580 1.470248 0.831887
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np
import pandas as pd

def regression_metrics(y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    r2 = r2_score(y_true, y_pred)
    return mae, rmse, r2


def evaluate_model_timeseries(model, model_name, scale=True):
    results = []

    for fold, (train_year_idx, val_year_idx) in enumerate(tscv.split(years), start=1):
        train_years = years[train_year_idx]
        val_years = years[val_year_idx]

        train_mask = train_cv_df["year"].isin(train_years)
        val_mask = train_cv_df["year"].isin(val_years)

        X_train_fold = train_cv_df.loc[train_mask, features]
        y_train_fold = train_cv_df.loc[train_mask, target]

        X_val_fold = train_cv_df.loc[val_mask, features]
        y_val_fold = train_cv_df.loc[val_mask, target]

        if scale:
            pipe = Pipeline([
                ("imputer", SimpleImputer(strategy="median", add_indicator=True)),
                ("scaler", StandardScaler()),
                ("model", model)
            ])
        else:
            pipe = Pipeline([
                ("imputer", SimpleImputer(strategy="median", add_indicator=True)),
                ("model", model)
            ])

        pipe.fit(X_train_fold, y_train_fold)

        train_pred = pipe.predict(X_train_fold)
        val_pred = pipe.predict(X_val_fold)

        train_mae, train_rmse, train_r2 = regression_metrics(y_train_fold, train_pred)
        val_mae, val_rmse, val_r2 = regression_metrics(y_val_fold, val_pred)

        results.append({
            "model": model_name,
            "fold": fold,
            "train_years": f"{train_years[0]}{train_years[-1]}",
            "validation_years": f"{val_years[0]}{val_years[-1]}",
            "train_MAE": train_mae,
            "train_RMSE": train_rmse,
            "train_R2": train_r2,
            "val_MAE": val_mae,
            "val_RMSE": val_rmse,
            "val_R2": val_r2
        })

    return pd.DataFrame(results)
from sklearn.linear_model import Ridge

ridge_alphas = [0.01, 0.1, 1, 10, 50, 100, 500, 1000]

all_ridge_results = []

for alpha in ridge_alphas:
    model = Ridge(alpha=alpha)

    result = evaluate_model_timeseries(
        model=model,
        model_name=f"Ridge_alpha_{alpha}",
        scale=True
    )

    all_ridge_results.append(result)

ridge_tuning_df = pd.concat(all_ridge_results, ignore_index=True)

ridge_summary = (
    ridge_tuning_df
    .groupby("model")
    .agg(
        mean_train_MAE=("train_MAE", "mean"),
        mean_val_MAE=("val_MAE", "mean"),
        mean_train_RMSE=("train_RMSE", "mean"),
        mean_val_RMSE=("val_RMSE", "mean"),
        mean_train_R2=("train_R2", "mean"),
        mean_val_R2=("val_R2", "mean")
    )
    .sort_values("mean_val_MAE")
)

ridge_summary
mean_train_MAE mean_val_MAE mean_train_RMSE mean_val_RMSE mean_train_R2 mean_val_R2
model
Ridge_alpha_0.01 1.682884 1.565956 2.262360 2.029577 0.741926 0.723958
Ridge_alpha_0.1 1.682884 1.565956 2.262360 2.029577 0.741926 0.723958
Ridge_alpha_1 1.682883 1.565956 2.262360 2.029577 0.741926 0.723958
Ridge_alpha_10 1.682882 1.565957 2.262360 2.029576 0.741926 0.723959
Ridge_alpha_50 1.682877 1.565962 2.262361 2.029572 0.741926 0.723962
Ridge_alpha_100 1.682870 1.565969 2.262361 2.029568 0.741926 0.723966
Ridge_alpha_500 1.682817 1.566023 2.262366 2.029537 0.741924 0.723998
Ridge_alpha_1000 1.682758 1.566095 2.262382 2.029504 0.741921 0.724035
from sklearn.tree import DecisionTreeRegressor

tree_models = [
    ("DecisionTree_depth_5", DecisionTreeRegressor(max_depth=5, random_state=42)),
    ("DecisionTree_depth_10", DecisionTreeRegressor(max_depth=10, random_state=42)),
    ("DecisionTree_depth_15", DecisionTreeRegressor(max_depth=15, random_state=42)),
    ("DecisionTree_depth_20", DecisionTreeRegressor(max_depth=20, random_state=42)),
]

tree_results = []

for name, model in tree_models:
    result = evaluate_model_timeseries(
        model=model,
        model_name=name,
        scale=False
    )
    tree_results.append(result)

tree_results_df = pd.concat(tree_results, ignore_index=True)

tree_summary = (
    tree_results_df
    .groupby("model")
    .agg(
        mean_train_MAE=("train_MAE", "mean"),
        mean_val_MAE=("val_MAE", "mean"),
        mean_train_RMSE=("train_RMSE", "mean"),
        mean_val_RMSE=("val_RMSE", "mean"),
        mean_train_R2=("train_R2", "mean"),
        mean_val_R2=("val_R2", "mean")
    )
    .sort_values("mean_val_MAE")
)

tree_summary
mean_train_MAE mean_val_MAE mean_train_RMSE mean_val_RMSE mean_train_R2 mean_val_R2
model
DecisionTree_depth_15 0.365398 0.599433 0.568175 0.979580 0.983640 0.933012
DecisionTree_depth_20 0.155387 0.620787 0.285406 1.017554 0.995810 0.927901
DecisionTree_depth_10 0.586091 0.654021 0.887020 1.009999 0.960305 0.930502
DecisionTree_depth_5 0.961570 0.858932 1.409718 1.243463 0.899870 0.898872
from sklearn.ensemble import HistGradientBoostingRegressor

hgb_models = [
    (
        "HGB_lr_0.05_depth_6",
        HistGradientBoostingRegressor(
            learning_rate=0.05,
            max_iter=200,
            max_leaf_nodes=31,
            max_depth=6,
            l2_regularization=0.0,
            random_state=42
        )
    ),
    (
        "HGB_lr_0.05_depth_10",
        HistGradientBoostingRegressor(
            learning_rate=0.05,
            max_iter=200,
            max_leaf_nodes=31,
            max_depth=10,
            l2_regularization=0.0,
            random_state=42
        )
    ),
    (
        "HGB_lr_0.1_depth_6",
        HistGradientBoostingRegressor(
            learning_rate=0.1,
            max_iter=200,
            max_leaf_nodes=31,
            max_depth=6,
            l2_regularization=0.0,
            random_state=42
        )
    ),
    (
        "HGB_lr_0.1_depth_10",
        HistGradientBoostingRegressor(
            learning_rate=0.1,
            max_iter=200,
            max_leaf_nodes=31,
            max_depth=10,
            l2_regularization=0.0,
            random_state=42
        )
    )
]

hgb_results = []

for name, model in hgb_models:
    result = evaluate_model_timeseries(
        model=model,
        model_name=name,
        scale=False
    )
    hgb_results.append(result)

hgb_results_df = pd.concat(hgb_results, ignore_index=True)

hgb_summary = (
    hgb_results_df
    .groupby("model")
    .agg(
        mean_train_MAE=("train_MAE", "mean"),
        mean_val_MAE=("val_MAE", "mean"),
        mean_train_RMSE=("train_RMSE", "mean"),
        mean_val_RMSE=("val_RMSE", "mean"),
        mean_train_R2=("train_R2", "mean"),
        mean_val_R2=("val_R2", "mean")
    )
    .sort_values("mean_val_MAE")
)

hgb_summary
mean_train_MAE mean_val_MAE mean_train_RMSE mean_val_RMSE mean_train_R2 mean_val_R2
model
HGB_lr_0.1_depth_6 0.440810 0.607332 0.653321 0.819093 0.978479 0.951920
HGB_lr_0.05_depth_6 0.480591 0.609946 0.710915 0.823207 0.974516 0.953166
HGB_lr_0.05_depth_10 0.470636 0.633857 0.692547 0.847645 0.975819 0.949125
HGB_lr_0.1_depth_10 0.432193 0.642879 0.635330 0.855562 0.979645 0.945839
from sklearn.ensemble import RandomForestRegressor

rf_models = [
    (
        "RF_100_depth_10",
        RandomForestRegressor(
            n_estimators=100,
            max_depth=10,
            min_samples_leaf=5,
            n_jobs=-1,
            random_state=42
        )
    ),
    (
        "RF_100_depth_15",
        RandomForestRegressor(
            n_estimators=100,
            max_depth=15,
            min_samples_leaf=5,
            n_jobs=-1,
            random_state=42
        )
    )
]

rf_results = []

for name, model in rf_models:
    result = evaluate_model_timeseries(
        model=model,
        model_name=name,
        scale=False
    )
    rf_results.append(result)

rf_results_df = pd.concat(rf_results, ignore_index=True)

rf_summary = (
    rf_results_df
    .groupby("model")
    .agg(
        mean_train_MAE=("train_MAE", "mean"),
        mean_val_MAE=("val_MAE", "mean"),
        mean_train_RMSE=("train_RMSE", "mean"),
        mean_val_RMSE=("val_RMSE", "mean"),
        mean_train_R2=("train_R2", "mean"),
        mean_val_R2=("val_R2", "mean")
    )
    .sort_values("mean_val_MAE")
)

rf_summary
mean_train_MAE mean_val_MAE mean_train_RMSE mean_val_RMSE mean_train_R2 mean_val_R2
model
RF_100_depth_15 0.335501 0.514797 0.512193 0.798657 0.986723 0.954657
RF_100_depth_10 0.544814 0.612853 0.822920 0.913149 0.965817 0.943263
model_comparison = pd.concat(
    [
        ridge_summary,
        tree_summary,
        hgb_summary,
        rf_summary
    ],
    axis=0
).sort_values("mean_val_MAE")

model_comparison
mean_train_MAE mean_val_MAE mean_train_RMSE mean_val_RMSE mean_train_R2 mean_val_R2
model
RF_100_depth_15 0.335501 0.514797 0.512193 0.798657 0.986723 0.954657
DecisionTree_depth_15 0.365398 0.599433 0.568175 0.979580 0.983640 0.933012
HGB_lr_0.1_depth_6 0.440810 0.607332 0.653321 0.819093 0.978479 0.951920
HGB_lr_0.05_depth_6 0.480591 0.609946 0.710915 0.823207 0.974516 0.953166
RF_100_depth_10 0.544814 0.612853 0.822920 0.913149 0.965817 0.943263
DecisionTree_depth_20 0.155387 0.620787 0.285406 1.017554 0.995810 0.927901
HGB_lr_0.05_depth_10 0.470636 0.633857 0.692547 0.847645 0.975819 0.949125
HGB_lr_0.1_depth_10 0.432193 0.642879 0.635330 0.855562 0.979645 0.945839
DecisionTree_depth_10 0.586091 0.654021 0.887020 1.009999 0.960305 0.930502
DecisionTree_depth_5 0.961570 0.858932 1.409718 1.243463 0.899870 0.898872
Ridge_alpha_0.01 1.682884 1.565956 2.262360 2.029577 0.741926 0.723958
Ridge_alpha_0.1 1.682884 1.565956 2.262360 2.029577 0.741926 0.723958
Ridge_alpha_1 1.682883 1.565956 2.262360 2.029577 0.741926 0.723958
Ridge_alpha_10 1.682882 1.565957 2.262360 2.029576 0.741926 0.723959
Ridge_alpha_50 1.682877 1.565962 2.262361 2.029572 0.741926 0.723962
Ridge_alpha_100 1.682870 1.565969 2.262361 2.029568 0.741926 0.723966
Ridge_alpha_500 1.682817 1.566023 2.262366 2.029537 0.741924 0.723998
Ridge_alpha_1000 1.682758 1.566095 2.262382 2.029504 0.741921 0.724035
rf_tuned_models = [
    (
        "RF_150_depth_15_leaf_5",
        RandomForestRegressor(
            n_estimators=150,
            max_depth=15,
            min_samples_leaf=5,
            max_features="sqrt",
            n_jobs=-1,
            random_state=42
        )
    ),
    (
        "RF_200_depth_15_leaf_5",
        RandomForestRegressor(
            n_estimators=200,
            max_depth=15,
            min_samples_leaf=5,
            max_features="sqrt",
            n_jobs=-1,
            random_state=42
        )
    ),
    (
        "RF_150_depth_18_leaf_5",
        RandomForestRegressor(
            n_estimators=150,
            max_depth=18,
            min_samples_leaf=5,
            max_features="sqrt",
            n_jobs=-1,
            random_state=42
        )
    ),
    (
        "RF_150_depth_15_leaf_3",
        RandomForestRegressor(
            n_estimators=150,
            max_depth=15,
            min_samples_leaf=3,
            max_features="sqrt",
            n_jobs=-1,
            random_state=42
        )
    )
]

rf_tuned_results = []

for name, model in rf_tuned_models:
    result = evaluate_model_timeseries(
        model=model,
        model_name=name,
        scale=False
    )
    rf_tuned_results.append(result)

rf_tuned_df = pd.concat(rf_tuned_results, ignore_index=True)

rf_tuned_summary = (
    rf_tuned_df
    .groupby("model")
    .agg(
        mean_train_MAE=("train_MAE", "mean"),
        mean_val_MAE=("val_MAE", "mean"),
        mean_train_RMSE=("train_RMSE", "mean"),
        mean_val_RMSE=("val_RMSE", "mean"),
        mean_train_R2=("train_R2", "mean"),
        mean_val_R2=("val_R2", "mean")
    )
    .sort_values("mean_val_MAE")
)

rf_tuned_summary
mean_train_MAE mean_val_MAE mean_train_RMSE mean_val_RMSE mean_train_R2 mean_val_R2
model
RF_150_depth_18_leaf_5 0.366232 0.479001 0.549087 0.707585 0.984800 0.965903
RF_150_depth_15_leaf_5 0.454583 0.493641 0.669212 0.725675 0.977409 0.964364
RF_200_depth_15_leaf_5 0.454409 0.493948 0.668912 0.724926 0.977428 0.964419
RF_150_depth_15_leaf_3 0.446824 0.497121 0.655967 0.723273 0.978287 0.964589
hgb_tuned_models = [
    (
        "HGB_lr_0.08_depth_6_iter_300_l2_0",
        HistGradientBoostingRegressor(
            learning_rate=0.08,
            max_iter=300,
            max_depth=6,
            max_leaf_nodes=31,
            l2_regularization=0.0,
            random_state=42
        )
    ),
    (
        "HGB_lr_0.05_depth_6_iter_400_l2_0",
        HistGradientBoostingRegressor(
            learning_rate=0.05,
            max_iter=400,
            max_depth=6,
            max_leaf_nodes=31,
            l2_regularization=0.0,
            random_state=42
        )
    ),
    (
        "HGB_lr_0.1_depth_6_iter_300_l2_0.01",
        HistGradientBoostingRegressor(
            learning_rate=0.1,
            max_iter=300,
            max_depth=6,
            max_leaf_nodes=31,
            l2_regularization=0.01,
            random_state=42
        )
    ),
    (
        "HGB_lr_0.08_depth_8_iter_300_l2_0.01",
        HistGradientBoostingRegressor(
            learning_rate=0.08,
            max_iter=300,
            max_depth=8,
            max_leaf_nodes=31,
            l2_regularization=0.01,
            random_state=42
        )
    )
]

hgb_tuned_results = []

for name, model in hgb_tuned_models:
    result = evaluate_model_timeseries(
        model=model,
        model_name=name,
        scale=False
    )
    hgb_tuned_results.append(result)

hgb_tuned_df = pd.concat(hgb_tuned_results, ignore_index=True)

hgb_tuned_summary = (
    hgb_tuned_df
    .groupby("model")
    .agg(
        mean_train_MAE=("train_MAE", "mean"),
        mean_val_MAE=("val_MAE", "mean"),
        mean_train_RMSE=("train_RMSE", "mean"),
        mean_val_RMSE=("val_RMSE", "mean"),
        mean_train_R2=("train_R2", "mean"),
        mean_val_R2=("val_R2", "mean")
    )
    .sort_values("mean_val_MAE")
)

hgb_tuned_summary
mean_train_MAE mean_val_MAE mean_train_RMSE mean_val_RMSE mean_train_R2 mean_val_R2
model
HGB_lr_0.1_depth_6_iter_300_l2_0.01 0.418488 0.588808 0.621999 0.801887 0.980496 0.953815
HGB_lr_0.08_depth_6_iter_300_l2_0 0.430455 0.605328 0.639342 0.820997 0.979390 0.951453
HGB_lr_0.05_depth_6_iter_400_l2_0 0.439307 0.607189 0.651902 0.818582 0.978573 0.952200
HGB_lr_0.08_depth_8_iter_300_l2_0.01 0.423710 0.636932 0.625902 0.852140 0.980245 0.945434
finalist_comparison = pd.concat(
    [
        model_comparison.loc[["RF_100_depth_15", "HGB_lr_0.1_depth_6"]],
        rf_tuned_summary,
        hgb_tuned_summary
    ],
    axis=0
).sort_values("mean_val_MAE")

finalist_comparison
mean_train_MAE mean_val_MAE mean_train_RMSE mean_val_RMSE mean_train_R2 mean_val_R2
model
RF_150_depth_18_leaf_5 0.366232 0.479001 0.549087 0.707585 0.984800 0.965903
RF_150_depth_15_leaf_5 0.454583 0.493641 0.669212 0.725675 0.977409 0.964364
RF_200_depth_15_leaf_5 0.454409 0.493948 0.668912 0.724926 0.977428 0.964419
RF_150_depth_15_leaf_3 0.446824 0.497121 0.655967 0.723273 0.978287 0.964589
RF_100_depth_15 0.335501 0.514797 0.512193 0.798657 0.986723 0.954657
HGB_lr_0.1_depth_6_iter_300_l2_0.01 0.418488 0.588808 0.621999 0.801887 0.980496 0.953815
HGB_lr_0.08_depth_6_iter_300_l2_0 0.430455 0.605328 0.639342 0.820997 0.979390 0.951453
HGB_lr_0.05_depth_6_iter_400_l2_0 0.439307 0.607189 0.651902 0.818582 0.978573 0.952200
HGB_lr_0.1_depth_6 0.440810 0.607332 0.653321 0.819093 0.978479 0.951920
HGB_lr_0.08_depth_8_iter_300_l2_0.01 0.423710 0.636932 0.625902 0.852140 0.980245 0.945434

Final !

from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer

final_rf_pipeline = Pipeline([
    ("imputer", SimpleImputer(strategy="median", add_indicator=True)),
    ("model", RandomForestRegressor(
        n_estimators=150,
        max_depth=18,
        min_samples_leaf=5,
        max_features="sqrt",
        n_jobs=-1,
        random_state=42
    ))
])

final_rf_pipeline.fit(X_train_cv, y_train_cv)

final_test_pred = final_rf_pipeline.predict(X_test)

final_test_mae, final_test_rmse, final_test_r2 = regression_metrics(
    y_test,
    final_test_pred
)

print("Final Tuned Random Forest Test Results")
print("MAE :", final_test_mae)
print("RMSE:", final_test_rmse)
print("R²  :", final_test_r2)
Final Tuned Random Forest Test Results
MAE : 0.43265300737751977
RMSE: 0.6991607339656488
R²  : 0.9706738906233219