Linear Regression with PEST++ GLM

This notebooks is a small proof of concept / benchmark comparison between PEST and SciPy. It shows how to easily setup the PEST++ solver via pyeumu. The same setup is used for Pastas under the hood of the PestGlmSolver.

Packages

import shutil
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pyemu
import scipy as sc

Create data

np.random.seed(pyemu.en.SEED)  # set seed

x = np.linspace(0.0, 1.0, 101)  # x coordinates
a = 1.0  # slope
b = 10.0  # y-intercept
y = a * x + b  # y coordinates
y_noise = y + np.random.normal(
    loc=0.0, scale=0.1, size=len(x)
)  # y coordinates with noise

Scipy Linear Regression

res = sc.stats.linregress(x=x, y=y_noise)  # result object
a_fit = res.slope  # fitted slope
b_fit = res.intercept  # fitted y-intercept

# # add confidence interval
# dof = len(x) - 2 # degrees of freedom
# alpha = 0.05 # probability of confidence interval
# tinv = abs(sc.stats.distributions.t.ppf(alpha/2.0, dof)) # Two-sided inverse Students t-distribution
# a_fit_ci = tinv * res.stderr
# b_fit_ci = tinv * res.intercept_stderr
# lower_bound = (a_fit - a_fit_ci) * x + b_fit - b_fit_ci
# upper_bound = (a_fit + a_fit_ci) * x + b_fit + b_fit_ci

# visualize scipy result
fig, ax = plt.subplots(figsize=(6.75, 3.0))
ax.plot(x, y, label="True")
ax.plot(x, y_noise, marker=".", linestyle="", label="Noisy")
ax.plot(
    x, a_fit * x + b_fit, color="k", linestyle="--", label="SciPy Linear Regression"
)
# ax.fill_between(x, lower_bound, upper_bound, color="k", alpha=0.2, label=f"{1-alpha:0.0%} Confidence Interval")
ax.legend()
ax.grid()
../_images/4f4b37282c69c6ee0179086d6df5c87c1ce205f21a7b71da8a46b87db81fc3ff.png

Pest Linear Regression

Define run function

This function will be called each iteration by PEST++ GLM to create new simulated values.

def run():
    from numpy import linspace
    from pandas import Index, Series, read_csv

    parameters = read_csv("parameters_sel.csv", index_col=0)
    print("parameters view\n", parameters)
    # simulation = read_csv("simulation.csv")
    # x = simulation.index
    x = linspace(0.0, 1.0, 101)
    y = parameters.at["a", "optimal"] * x + parameters.at["b", "optimal"]
    simulation_new = Series(y, index=Index(x, name="x"), name="Simulation")
    simulation_new.to_csv("simulation.csv")

Setup PEST++ GLM files with Pyemu

# setup directories
model_dir = Path("model")
model_dir.mkdir() if not model_dir.exists() else None
temp_dir = Path("template")
temp_dir.mkdir() if not temp_dir.exists() else None
bin_dir = Path("bin")

# parameter bounds
bound = 10.0
a_ini = 0.8
b_ini = 0.2
parameters = pd.DataFrame(
    {
        "optimal": [a, b],
        "pmin": [a - bound, b - bound],
        "pmax": [a + bound, b + bound],
        "initial": [a_ini, b_ini],
    },
    index=pd.Index(["a", "b"], name="parnames"),
    dtype=float,
)
# setup files
par_sel = parameters.loc[:, ["optimal"]]
par_sel.to_csv(model_dir / "parameters_sel.csv")

observations = pd.Series(y_noise, index=pd.Index(x, name="x"), name="Observations")
observations.to_csv(model_dir / "simulation.csv")

# create pest optimization with pyemu
pf = pyemu.utils.PstFrom(original_d=model_dir, new_d=temp_dir, remove_existing=True)
# parameters
pf.add_parameters(
    "parameters_sel.csv",
    index_cols=[par_sel.index.name],
    use_cols=par_sel.columns.to_list(),
    par_type="grid",
    par_style="d",
)
# observations
pf.add_observations(
    "simulation.csv", index_cols=[observations.index.name], use_cols=[observations.name]
)
# python scripts to run
pf.add_py_function(file_name=run, call_str="run()", is_pre_cmd=None)
pf.mod_py_cmds.append("run()")

# create control file
pst = pf.build_pst()
pst.parameter_data.loc[:, ["parlbnd", "parubnd"]] = parameters.loc[
    :, ["pmin", "pmax"]
].values  # parameter bounds
pst.parameter_data["partrans"] = "none"  # parameter transformation not logarithmic
pst.control_data.noptmax = 100  # optimization runs
pst.write(pf.new_d / "pest.pst", version=2)

# execute
executable = "pestpp-glm"
shutil.copy(bin_dir / executable, temp_dir)
pyemu.os_utils.run(f"{executable} pest.pst", cwd=pf.new_d)

# get pets results
ipar = pd.read_csv(temp_dir / "pest.ipar", index_col=0).transpose()
a_pest, b_pest = ipar.iloc[:, -1].values

Hide code cell output

2026-03-10 15:44:45.399847 starting: opening PstFrom.log for logging
2026-03-10 15:44:45.400039 starting PstFrom process
2026-03-10 15:44:45.400068 starting: setting up dirs
2026-03-10 15:44:45.400115 starting: removing existing new_d 'template'
2026-03-10 15:44:45.403885 finished: removing existing new_d 'template' took: 0:00:00.003770
2026-03-10 15:44:45.403919 starting: copying original_d 'model' to new_d 'template'
2026-03-10 15:44:45.404254 finished: copying original_d 'model' to new_d 'template' took: 0:00:00.000335
2026-03-10 15:44:45.404369 finished: setting up dirs took: 0:00:00.004301
2026-03-10 15:44:45.404459 transform was not passed, setting default transform to 'log'
2026-03-10 15:44:45.404522 starting: adding grid type d style parameters for file(s) ['parameters_sel.csv']
2026-03-10 15:44:45.404577 starting: loading list-style template/parameters_sel.csv
2026-03-10 15:44:45.404612 starting: reading list-style file: template/parameters_sel.csv
2026-03-10 15:44:45.405640 finished: reading list-style file: template/parameters_sel.csv took: 0:00:00.001028
2026-03-10 15:44:45.405714 loaded list-style 'template/parameters_sel.csv' of shape (2, 2)
2026-03-10 15:44:45.406517 finished: loading list-style template/parameters_sel.csv took: 0:00:00.001940
2026-03-10 15:44:45.406586 starting: writing list-style template file 'template/parameters_sel.csv.tpl'
2026-03-10 15:44:45.412535 finished: writing list-style template file 'template/parameters_sel.csv.tpl' took: 0:00:00.005949
2026-03-10 15:44:45.414856 finished: adding grid type d style parameters for file(s) ['parameters_sel.csv'] took: 0:00:00.010334
2026-03-10 15:44:45.414962 starting: adding observations from output file simulation.csv
2026-03-10 15:44:45.414996 starting: adding observations from tabular output file '['simulation.csv']'
2026-03-10 15:44:45.415052 starting: reading list-style file: template/simulation.csv
2026-03-10 15:44:45.415805 finished: reading list-style file: template/simulation.csv took: 0:00:00.000753
2026-03-10 15:44:45.416204 starting: building insfile for tabular output file simulation.csv
2026-03-10 15:44:45.418967 finished: building insfile for tabular output file simulation.csv took: 0:00:00.002763
2026-03-10 15:44:45.419047 starting: adding observation from instruction file 'template/simulation.csv.ins'
2026-03-10 15:44:45.422234 finished: adding observation from instruction file 'template/simulation.csv.ins' took: 0:00:00.003187
2026-03-10 15:44:45.422672 finished: adding observations from tabular output file '['simulation.csv']' took: 0:00:00.007676
2026-03-10 15:44:45.422730 finished: adding observations from output file simulation.csv took: 0:00:00.007768
2026-03-10 15:44:45.423327 WARNING: add_py_function() command: run() is not being called directly
noptmax:0, npar_adj:2, nnz_obs:101
noptmax:100, npar_adj:2, nnz_obs:101
/home/docs/checkouts/readthedocs.org/user_builds/pastas-plugins/envs/stable/lib/python3.11/site-packages/pyemu/logger.py:100: PyemuWarning: 2026-03-10 15:44:45.423327 WARNING: add_py_function() command: run() is not being called directly
---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
Cell In[5], line 58
     56 # execute
     57 executable = "pestpp-glm"
---> 58 shutil.copy(bin_dir / executable, temp_dir)
     59 pyemu.os_utils.run(f"{executable} pest.pst", cwd=pf.new_d)
     61 # get pets results

File ~/.asdf/installs/python/3.11.12/lib/python3.11/shutil.py:431, in copy(src, dst, follow_symlinks)
    429 if os.path.isdir(dst):
    430     dst = os.path.join(dst, os.path.basename(src))
--> 431 copyfile(src, dst, follow_symlinks=follow_symlinks)
    432 copymode(src, dst, follow_symlinks=follow_symlinks)
    433 return dst

File ~/.asdf/installs/python/3.11.12/lib/python3.11/shutil.py:256, in copyfile(src, dst, follow_symlinks)
    254     os.symlink(os.readlink(src), dst)
    255 else:
--> 256     with open(src, 'rb') as fsrc:
    257         try:
    258             with open(dst, 'wb') as fdst:
    259                 # macOS

FileNotFoundError: [Errno 2] No such file or directory: 'bin/pestpp-glm'

Compare result

fig, ax = plt.subplots(figsize=(6.75, 3))
ax.plot(x, y, label=f"True {a=}, {b=}")
ax.plot(x, y_noise, marker=".", linestyle="", label="Noisy")
ax.plot(
    x,
    a_fit * x + b_fit,
    color="k",
    linestyle="--",
    label=f"SciPy Linear Regression a={a_fit:0.3f}, b={b_fit:0.3f}",
)
ax.plot(
    x,
    a_pest * x + b_pest,
    color="r",
    linestyle="-.",
    label=f"{executable} a={a_pest:0.3f}, b={b_pest:0.3f}",
)
ax.legend(handlelength=2.5)
ax.grid(True)
../_images/ac52b7b88dc2f2cdb89c54b43929cf326392dc8d09571415fde3a3708694b2b3.png