[ ]:
from mimic.utilities import *
from mimic.utilities.utilities import plot_CRM, plot_CRM_with_intervals
from mimic.model_infer.infer_CRM_bayes import *
from mimic.model_infer import *
from mimic.model_simulate import *
from mimic.model_simulate.sim_CRM import *
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import arviz as azS
import pymc as pm
import pytensor.tensor as at
import pickle
import cloudpickle
from scipy import stats
from scipy.integrate import odeint
WARNING (pytensor.tensor.blas): Using NumPy C-API based implementation for BLAS functions.
Bayesian inference to infer the parameters of a Consumer Resource model¶
The Consumer Resource equation based on the MacArthur model takes the form
where:
is the concentration of each species is the species timescale is the species mortality rate is the concentration of each resource is the resource timescale is the quality of each resource is the resource capacity is each species’ preference for each resource
Unlike the gLV, the CRM is not linearised, so the DifferentialEquation function from pymc is utilised to solve the ODEs within the inference function. This can take a while if inferring all parameters, so below we demonstrate run_inference by inferring only one parameter (c) while the rest remain fixed, where inferring all parameters takes longer.
Read in simulated data¶
The data was simulated examples-sim-CRM.ipynb
[2]:
with open("params-s2-r2.pkl", "rb") as f:
params = pickle.load(f)
tau = params["tau"]
m = params["m"]
r = params["r"]
w = params["w"]
K = params["K"]
c = params["c"]
# read in the data
data = pd.read_csv("data-s2-r2.csv")
times = data.iloc[:, 0].values
yobs = data.iloc[:, 1:6].values
Infer parameter c only¶
While parameters tau, m, r, w and K remain fixed to true values generated by the simulation. Any combination of parameters can be inferred by providing the prior_mean and prior_sigma instead of the true value
[3]:
num_species = 2
num_resources = 2
# fixed parameters
tau = params["tau"]
m = params["m"]
r = params["r"]
w = params["w"]
K = params["K"]
# Define prior for c (resource preference matrix)
prior_c_mean = 0.6
prior_c_sigma = 0.1
# Sampling conditions
draws = 1000
tune = 1000
chains = 4
cores = 4
inference = inferCRMbayes()
inference.set_parameters(times=times, yobs=yobs, num_species=num_species, num_resources=num_resources,
tau=tau, m=m, r=r, w=w, K=K,
prior_c_mean=prior_c_mean, prior_c_sigma=prior_c_sigma,
draws=draws, tune=tune, chains=chains, cores=cores)
idata = inference.run_inference()
# To plot posterior distributions
inference.plot_posterior(idata)
summary = az.summary(idata, var_names=["c_hat", "sigma"])
print(summary[["mean", "sd", "r_hat"]])
# Save posterior samples to file
az.to_netcdf(idata, 'model_posterior.nc')
times shape: (100,)
yobs shape: (100, 4)
Number of species: 2
Number of resources: 2
tau_hat is fixed
w_hat is fixed
c_hat is inferred
m_hat is fixed
r_hat is fixed
K_hat is fixed
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [sigma, c_hat_vals]
/home/cclare/projects/CRM/MIMIC/MIMIC_env/lib/python3.10/site-packages/pymc/ode/ode.py:131: ODEintWarning: Illegal input detected (internal error). Run with full_output = 1 to get quantitative information.
sol = scipy.integrate.odeint(
/home/cclare/projects/CRM/MIMIC/MIMIC_env/lib/python3.10/site-packages/pymc/ode/ode.py:131: ODEintWarning: Excess accuracy requested (tolerances too small). Run with full_output = 1 to get quantitative information.
sol = scipy.integrate.odeint(
lsoda-- warning..internal t (=r1) and h (=r2) are such that in the machine, t + h = t on the next step
(h = step size). solver will continue anyway in above, r1 = 0.0000000000000D+00 r2 = 0.0000000000000D+00
intdy-- t (=r1) illegal in above message, r1 = 0.1000000000000D+00
t not in interval tcur - hu (= r1) to tcur (=r2)
in above, r1 = 0.0000000000000D+00 r2 = 0.0000000000000D+00
intdy-- t (=r1) illegal in above message, r1 = 0.2000000000000D+00
t not in interval tcur - hu (= r1) to tcur (=r2)
in above, r1 = 0.0000000000000D+00 r2 = 0.0000000000000D+00
lsoda-- trouble from intdy. itask = i1, tout = r1 in above message, i1 = 1
in above message, r1 = 0.2000000000000D+00
lsoda-- at t (=r1), too much accuracy requested for precision of machine.. see tolsf (=r2) in above, r1 = 0.1000000000000D+00 r2 = NaN
/home/cclare/projects/CRM/MIMIC/MIMIC_env/lib/python3.10/site-packages/pymc/ode/ode.py:131: ODEintWarning: Excess work done on this call (perhaps wrong Dfun type). Run with full_output = 1 to get quantitative information.
sol = scipy.integrate.odeint(
/home/cclare/projects/CRM/MIMIC/MIMIC_env/lib/python3.10/site-packages/pymc/ode/ode.py:131: ODEintWarning: Excess accuracy requested (tolerances too small). Run with full_output = 1 to get quantitative information.
sol = scipy.integrate.odeint(
/home/cclare/projects/CRM/MIMIC/MIMIC_env/lib/python3.10/site-packages/pymc/ode/ode.py:131: ODEintWarning: Excess accuracy requested (tolerances too small). Run with full_output = 1 to get quantitative information.
sol = scipy.integrate.odeint(
lsoda-- at t (=r1), too much accuracy requested for precision of machine.. see tolsf (=r2) in above, r1 = 0.1000000000000D+00 r2 = NaN
lsoda-- at t (=r1), too much accuracy requested for precision of machine.. see tolsf (=r2) in above, r1 = 0.1000000000000D+00 r2 = NaN
Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 664 seconds.
Parameter tau_hat not found in posterior samples, skipping plot.
Parameter w_hat not found in posterior samples, skipping plot.
Plotting posterior for c_hat

Parameter m_hat not found in posterior samples, skipping plot.
Parameter r_hat not found in posterior samples, skipping plot.
Parameter K_hat not found in posterior samples, skipping plot.
mean sd r_hat
c_hat[0, 0] 0.597 0.043 1.0
c_hat[0, 1] 0.903 0.108 1.0
c_hat[1, 0] 0.647 0.040 1.0
c_hat[1, 1] 1.068 0.067 1.0
sigma[0] 2.840 0.093 1.0
[3]:
'model_posterior.nc'
[4]:
# Plot the trace of the posterior samples
az.plot_trace(idata, var_names=["c_hat", "sigma"])
[4]:
array([[<Axes: title={'center': 'c_hat'}>,
<Axes: title={'center': 'c_hat'}>],
[<Axes: title={'center': 'sigma'}>,
<Axes: title={'center': 'sigma'}>]], dtype=object)

[5]:
# Plot the CRM
init_species = 10 * np.ones(num_species+num_resources)
# inferred parameters
c_h = np.median(idata.posterior["c_hat"].values, axis=(0,1))
compare_params(c=(c, c_h))
predictor = sim_CRM()
predictor.set_parameters(num_species = num_species,
num_resources = num_resources,
tau = tau,
w = w,
c = c_h,
m = m,
r = r,
K = K)
#predictor.print_parameters()
observed_species, observed_resources = predictor.simulate(times, init_species)
observed_data = np.hstack((observed_species, observed_resources))
# plot predicted species and resouce dynamics against observed data
plot_CRM(observed_species, observed_resources, times, 'data-s2-r2.csv')
c_hat/c:
[[0.6 0.9 ]
[0.65 1.07]]
[[0.67557518 0.43848517]
[0.88461136 0.64786379]]


[5]:
(<Figure size 1000x600 with 1 Axes>,
<Axes: title={'center': 'CRM Growth Curves: Simulated vs Observed'}, xlabel='Time', ylabel='Concentration'>)
[6]:
## Plot CRM with confidence intervals
# Get posterior samples for c_hat
c_posterior_samples = idata.posterior["c_hat"].values
lower_percentile = 2.5
upper_percentile = 97.5
n_samples = 1000
random_indices = np.random.choice(c_posterior_samples.shape[1], size=n_samples, replace=False)
# Store simulation results
all_species_trajectories = []
all_resource_trajectories = []
# Run simulations with different posterior samples
for i in range(n_samples):
chain_idx = np.random.randint(0, c_posterior_samples.shape[0])
c_sample = c_posterior_samples[chain_idx, random_indices[i]]
sample_predictor = sim_CRM()
sample_predictor.set_parameters(num_species=num_species,
num_resources=num_resources,
tau=tau,
w=w,
c=c_sample,
m=m,
r=r,
K=K)
sample_species, sample_resources = sample_predictor.simulate(times, init_species)
# Store results
all_species_trajectories.append(sample_species)
all_resource_trajectories.append(sample_resources)
# Convert to numpy arrays
all_species_trajectories = np.array(all_species_trajectories)
all_resource_trajectories = np.array(all_resource_trajectories)
# Calculate percentiles across samples for each time point and species/resource
species_lower = np.percentile(all_species_trajectories, lower_percentile, axis=0)
species_upper = np.percentile(all_species_trajectories, upper_percentile, axis=0)
resource_lower = np.percentile(all_resource_trajectories, lower_percentile, axis=0)
resource_upper = np.percentile(all_resource_trajectories, upper_percentile, axis=0)
# plot the CRM with confidence intervals
plot_CRM_with_intervals(observed_species, observed_resources,
species_lower, species_upper,
resource_lower, resource_upper,
times, 'data-s2-r2.csv')
