Linear Regression with PEST++ GLM

This notebooks is a small proof of concept / benchmark comparison between PEST and SciPy. It shows how to easily setup the PEST++ solver via pyeumu. The same setup is used for Pastas under the hood of the PestGlmSolver.

Packages

import shutil
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pyemu
import scipy as sc

Create data

np.random.seed(pyemu.en.SEED)  # set seed

x = np.linspace(0.0, 1.0, 101)  # x coordinates
a = 1.0  # slope
b = 10.0  # y-intercept
y = a * x + b  # y coordinates
y_noise = y + np.random.normal(
    loc=0.0, scale=0.1, size=len(x)
)  # y coordinates with noise

Scipy Linear Regression

res = sc.stats.linregress(x=x, y=y_noise)  # result object
a_fit = res.slope  # fitted slope
b_fit = res.intercept  # fitted y-intercept

# # add confidence interval
# dof = len(x) - 2 # degrees of freedom
# alpha = 0.05 # probability of confidence interval
# tinv = abs(sc.stats.distributions.t.ppf(alpha/2.0, dof)) # Two-sided inverse Students t-distribution
# a_fit_ci = tinv * res.stderr
# b_fit_ci = tinv * res.intercept_stderr
# lower_bound = (a_fit - a_fit_ci) * x + b_fit - b_fit_ci
# upper_bound = (a_fit + a_fit_ci) * x + b_fit + b_fit_ci

# visualize scipy result
fig, ax = plt.subplots(figsize=(6.75, 3.0))
ax.plot(x, y, label="True")
ax.plot(x, y_noise, marker=".", linestyle="", label="Noisy")
ax.plot(
    x, a_fit * x + b_fit, color="k", linestyle="--", label="SciPy Linear Regression"
)
# ax.fill_between(x, lower_bound, upper_bound, color="k", alpha=0.2, label=f"{1-alpha:0.0%} Confidence Interval")
ax.legend()
ax.grid()
../_images/4f4b37282c69c6ee0179086d6df5c87c1ce205f21a7b71da8a46b87db81fc3ff.png

Pest Linear Regression

Define run function

This function will be called each iteration by PEST++ GLM to create new simulated values.

def run():
    from numpy import linspace
    from pandas import Index, Series, read_csv

    parameters = read_csv("parameters_sel.csv", index_col=0)
    print("parameters view\n", parameters)
    # simulation = read_csv("simulation.csv")
    # x = simulation.index
    x = linspace(0.0, 1.0, 101)
    y = parameters.at["a", "optimal"] * x + parameters.at["b", "optimal"]
    simulation_new = Series(y, index=Index(x, name="x"), name="Simulation")
    simulation_new.to_csv("simulation.csv")

Setup PEST++ GLM files with Pyemu

# setup directories
model_dir = Path("model")
model_dir.mkdir() if not model_dir.exists() else None
temp_dir = Path("template")
temp_dir.mkdir() if not temp_dir.exists() else None
bin_dir = Path("bin")

# parameter bounds
bound = 10.0
a_ini = 0.8
b_ini = 0.2
parameters = pd.DataFrame(
    {
        "optimal": [a, b],
        "pmin": [a - bound, b - bound],
        "pmax": [a + bound, b + bound],
        "initial": [a_ini, b_ini],
    },
    index=pd.Index(["a", "b"], name="parnames"),
    dtype=float,
)
# setup files
par_sel = parameters.loc[:, ["optimal"]]
par_sel.to_csv(model_dir / "parameters_sel.csv")

observations = pd.Series(y_noise, index=pd.Index(x, name="x"), name="Observations")
observations.to_csv(model_dir / "simulation.csv")

# create pest optimization with pyemu
pf = pyemu.utils.PstFrom(original_d=model_dir, new_d=temp_dir, remove_existing=True)
# parameters
pf.add_parameters(
    "parameters_sel.csv",
    index_cols=[par_sel.index.name],
    use_cols=par_sel.columns.to_list(),
    par_type="grid",
    par_style="d",
)
# observations
pf.add_observations(
    "simulation.csv", index_cols=[observations.index.name], use_cols=[observations.name]
)
# python scripts to run
pf.add_py_function(file_name=run, call_str="run()", is_pre_cmd=None)
pf.mod_py_cmds.append("run()")

# create control file
pst = pf.build_pst()
pst.parameter_data.loc[:, ["parlbnd", "parubnd"]] = parameters.loc[
    :, ["pmin", "pmax"]
].values  # parameter bounds
pst.parameter_data["partrans"] = "none"  # parameter transformation not logarithmic
pst.control_data.noptmax = 100  # optimization runs
pst.write(pf.new_d / "pest.pst", version=2)

# execute
executable = "pestpp-glm"
shutil.copy(bin_dir / executable, temp_dir)
pyemu.os_utils.run(f"{executable} pest.pst", cwd=pf.new_d)

# get pets results
ipar = pd.read_csv(temp_dir / "pest.ipar", index_col=0).transpose()
a_pest, b_pest = ipar.iloc[:, -1].values

Hide code cell output

2026-03-10 15:43:01.741112 starting: opening PstFrom.log for logging
2026-03-10 15:43:01.741289 starting PstFrom process
2026-03-10 15:43:01.741319 starting: setting up dirs
2026-03-10 15:43:01.741368 starting: removing existing new_d 'template'
2026-03-10 15:43:01.746293 finished: removing existing new_d 'template' took: 0:00:00.004925
2026-03-10 15:43:01.746335 starting: copying original_d 'model' to new_d 'template'
2026-03-10 15:43:01.746682 finished: copying original_d 'model' to new_d 'template' took: 0:00:00.000347
2026-03-10 15:43:01.746794 finished: setting up dirs took: 0:00:00.005475
2026-03-10 15:43:01.746890 transform was not passed, setting default transform to 'log'
2026-03-10 15:43:01.746958 starting: adding grid type d style parameters for file(s) ['parameters_sel.csv']
2026-03-10 15:43:01.747017 starting: loading list-style template/parameters_sel.csv
2026-03-10 15:43:01.747054 starting: reading list-style file: template/parameters_sel.csv
2026-03-10 15:43:01.748117 finished: reading list-style file: template/parameters_sel.csv took: 0:00:00.001063
2026-03-10 15:43:01.748211 loaded list-style 'template/parameters_sel.csv' of shape (2, 2)
2026-03-10 15:43:01.749020 finished: loading list-style template/parameters_sel.csv took: 0:00:00.002003
2026-03-10 15:43:01.749089 starting: writing list-style template file 'template/parameters_sel.csv.tpl'
2026-03-10 15:43:01.755153 finished: writing list-style template file 'template/parameters_sel.csv.tpl' took: 0:00:00.006064
2026-03-10 15:43:01.757530 finished: adding grid type d style parameters for file(s) ['parameters_sel.csv'] took: 0:00:00.010572
2026-03-10 15:43:01.757639 starting: adding observations from output file simulation.csv
2026-03-10 15:43:01.757674 starting: adding observations from tabular output file '['simulation.csv']'
2026-03-10 15:43:01.757719 starting: reading list-style file: template/simulation.csv
2026-03-10 15:43:01.758506 finished: reading list-style file: template/simulation.csv took: 0:00:00.000787
2026-03-10 15:43:01.758906 starting: building insfile for tabular output file simulation.csv
2026-03-10 15:43:01.761671 finished: building insfile for tabular output file simulation.csv took: 0:00:00.002765
2026-03-10 15:43:01.761742 starting: adding observation from instruction file 'template/simulation.csv.ins'
2026-03-10 15:43:01.764991 finished: adding observation from instruction file 'template/simulation.csv.ins' took: 0:00:00.003249
2026-03-10 15:43:01.765472 finished: adding observations from tabular output file '['simulation.csv']' took: 0:00:00.007798
2026-03-10 15:43:01.765536 finished: adding observations from output file simulation.csv took: 0:00:00.007897
2026-03-10 15:43:01.766135 WARNING: add_py_function() command: run() is not being called directly
noptmax:0, npar_adj:2, nnz_obs:101
noptmax:100, npar_adj:2, nnz_obs:101
/home/docs/checkouts/readthedocs.org/user_builds/pastas-plugins/envs/latest/lib/python3.11/site-packages/pyemu/logger.py:100: PyemuWarning: 2026-03-10 15:43:01.766135 WARNING: add_py_function() command: run() is not being called directly
---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
Cell In[5], line 58
     56 # execute
     57 executable = "pestpp-glm"
---> 58 shutil.copy(bin_dir / executable, temp_dir)
     59 pyemu.os_utils.run(f"{executable} pest.pst", cwd=pf.new_d)
     61 # get pets results

File ~/.asdf/installs/python/3.11.12/lib/python3.11/shutil.py:431, in copy(src, dst, follow_symlinks)
    429 if os.path.isdir(dst):
    430     dst = os.path.join(dst, os.path.basename(src))
--> 431 copyfile(src, dst, follow_symlinks=follow_symlinks)
    432 copymode(src, dst, follow_symlinks=follow_symlinks)
    433 return dst

File ~/.asdf/installs/python/3.11.12/lib/python3.11/shutil.py:256, in copyfile(src, dst, follow_symlinks)
    254     os.symlink(os.readlink(src), dst)
    255 else:
--> 256     with open(src, 'rb') as fsrc:
    257         try:
    258             with open(dst, 'wb') as fdst:
    259                 # macOS

FileNotFoundError: [Errno 2] No such file or directory: 'bin/pestpp-glm'

Compare result

fig, ax = plt.subplots(figsize=(6.75, 3))
ax.plot(x, y, label=f"True {a=}, {b=}")
ax.plot(x, y_noise, marker=".", linestyle="", label="Noisy")
ax.plot(
    x,
    a_fit * x + b_fit,
    color="k",
    linestyle="--",
    label=f"SciPy Linear Regression a={a_fit:0.3f}, b={b_fit:0.3f}",
)
ax.plot(
    x,
    a_pest * x + b_pest,
    color="r",
    linestyle="-.",
    label=f"{executable} a={a_pest:0.3f}, b={b_pest:0.3f}",
)
ax.legend(handlelength=2.5)
ax.grid(True)
../_images/ac52b7b88dc2f2cdb89c54b43929cf326392dc8d09571415fde3a3708694b2b3.png