[1]:
from mimic.utilities import *
from mimic.model_infer.infer_gLV_bayes import *
from mimic.model_infer import *
from mimic.model_simulate import *
from mimic.model_simulate.sim_gLV import *
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import arviz as az
import pymc as pm
import pytensor.tensor as at
import pickle
import cloudpickle
WARNING (pytensor.tensor.blas): Using NumPy C-API based implementation for BLAS functions.
Used Bayesian inference to infer the parameters of a (linearised) gLV model¶
The generalized Lotka-Volterra equation takes the form
\[\frac{dX_i}{dt} = \mu_i X_i + X_i M_{ij} X_j + X_i \epsilon_{il} u_l\]
where:
\(X_i\) is the concentration of a species
\(\mu_i\) is its specific growth rate
\(M_{ij}\) is the effect of the interaction of species \(i\) on species \(j\)
\(\epsilon_{il}\) is the susceptibility to the time-dependent perturbation \(u_l\)
Bayesian inference with no shrinkage¶
[2]:
# read in pickled simulated parameters, mu, M, epsilon
num_species = 5
with open("params-s5.pkl", "rb") as f:
params = pickle.load(f)
M = params["M"]
mu = params["mu"]
epsilon = params["epsilon"]
# read in the data
num_timecourses = 1
data = pd.read_csv("data-s5-r1.csv")
times = data.iloc[:, 0].values
yobs = data.iloc[:, 1:6].values
X, F = linearize_time_course_16S(yobs, times)
# Define priors
prior_mu_mean = 1.0
prior_mu_sigma = 0.5
## NB prior_Mii_mean is 0, so not defined as an argument
prior_Mii_mean = 0.0
prior_Mii_sigma = 0.1
prior_Mij_sigma = 0.1
# Sampling conditions
draws = 500
tune = 500
chains = 4
cores = 4
inference = infergLVbayes()
inference.set_parameters(X=X, F=F, prior_mu_mean=prior_mu_mean, prior_mu_sigma=prior_mu_sigma,
prior_Mii_sigma=prior_Mii_sigma, prior_Mii_mean=prior_Mii_mean,
prior_Mij_sigma=prior_Mij_sigma,
draws=draws, tune=tune, chains=chains,cores=cores)
idata = inference.run_bayes_gLV()
summary = az.summary(idata, var_names=["mu_hat", "M_ii_hat", "M_ij_hat", "M_hat", "sigma"])
print(summary[["mean", "sd", "r_hat"]])
# Save posterior samples to file
az.to_netcdf(idata, 'model_posterior.nc')
# get median mu_hat and M_hat
mu_h = np.median(idata.posterior["mu_hat"].values, axis=(0,1) ).reshape(-1)
M_h = np.median(idata.posterior["M_hat"].values, axis=(0,1) )
# compare fitted with simulated parameters
compare_params(mu=(mu, mu_h), M=(M, M_h))
predictor = sim_gLV(num_species=num_species, M=M_h, mu=mu_h)
yobs_h, _, _, _, _ = predictor.simulate(times=times, init_species=yobs[0])
plot_fit_gLV(yobs, yobs_h, times)
X shape: (99, 6)
F shape: (99, 5)
Number of species: 5
AdvancedSetSubtensor.0
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [sigma, mu_hat, M_ii_hat_p, M_ij_hat]
Sampling 4 chains for 500 tune and 500 draw iterations (2_000 + 2_000 draws total) took 80 seconds.
Chain 1 reached the maximum tree depth. Increase `max_treedepth`, increase `target_accept` or reparameterize.
The rhat statistic is larger than 1.01 for some parameters. This indicates problems during sampling. See https://arxiv.org/abs/1903.08008 for details
/Users/chaniaclare/anaconda3/bin/python3.12/lib/python3.12/site-packages/arviz/stats/diagnostics.py:592: RuntimeWarning: invalid value encountered in scalar divide
(between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)
/Users/chaniaclare/anaconda3/bin/python3.12/lib/python3.12/site-packages/arviz/stats/diagnostics.py:592: RuntimeWarning: invalid value encountered in scalar divide
(between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)
/Users/chaniaclare/anaconda3/bin/python3.12/lib/python3.12/site-packages/arviz/stats/diagnostics.py:592: RuntimeWarning: invalid value encountered in scalar divide
(between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)
/Users/chaniaclare/anaconda3/bin/python3.12/lib/python3.12/site-packages/arviz/stats/diagnostics.py:592: RuntimeWarning: invalid value encountered in scalar divide
(between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)
mean sd r_hat
mu_hat[0, 0] 1.137 0.453 1.00
mu_hat[0, 1] 0.946 0.435 1.00
mu_hat[0, 2] 1.316 0.498 1.01
mu_hat[0, 3] 1.003 0.448 1.00
mu_hat[0, 4] 1.200 0.089 1.00
M_ii_hat[0] -0.079 0.060 1.00
M_ii_hat[1] -0.081 0.063 1.00
M_ii_hat[2] -0.078 0.060 1.00
M_ii_hat[3] -0.079 0.061 1.00
M_ii_hat[4] -0.225 0.016 1.00
M_ij_hat[0, 0] -0.109 0.053 1.00
M_ij_hat[0, 1] -0.027 0.053 1.00
M_ij_hat[0, 2] 0.099 0.054 1.00
M_ij_hat[0, 3] 0.014 0.054 1.00
M_ij_hat[1, 0] -0.021 0.035 1.00
M_ij_hat[1, 1] -0.106 0.036 1.01
M_ij_hat[1, 2] 0.045 0.036 1.00
M_ij_hat[1, 3] 0.018 0.036 1.00
M_ij_hat[2, 0] 0.047 0.051 1.00
M_ij_hat[2, 1] 0.008 0.051 1.00
M_ij_hat[2, 2] -0.204 0.051 1.00
M_ij_hat[2, 3] -0.015 0.050 1.00
M_ij_hat[3, 0] 0.015 0.022 1.00
M_ij_hat[3, 1] 0.055 0.023 1.01
M_ij_hat[3, 2] -0.030 0.023 1.00
M_ij_hat[3, 3] -0.021 0.023 1.00
M_ij_hat[4, 0] 0.001 0.036 1.00
M_ij_hat[4, 1] -0.019 0.037 1.01
M_ij_hat[4, 2] 0.016 0.038 1.00
M_ij_hat[4, 3] -0.024 0.037 1.00
M_hat[0, 0] -0.109 0.053 1.00
M_hat[0, 1] -0.027 0.053 1.00
M_hat[0, 2] 0.099 0.054 1.00
M_hat[0, 3] 0.014 0.054 1.00
M_hat[0, 4] 0.000 0.000 NaN
M_hat[1, 0] -0.021 0.035 1.00
M_hat[1, 1] -0.106 0.036 1.01
M_hat[1, 2] 0.045 0.036 1.00
M_hat[1, 3] 0.018 0.036 1.00
M_hat[1, 4] 0.000 0.000 NaN
M_hat[2, 0] 0.047 0.051 1.00
M_hat[2, 1] 0.008 0.051 1.00
M_hat[2, 2] -0.204 0.051 1.00
M_hat[2, 3] -0.015 0.050 1.00
M_hat[2, 4] 0.000 0.000 NaN
M_hat[3, 0] 0.015 0.022 1.00
M_hat[3, 1] 0.055 0.023 1.01
M_hat[3, 2] -0.030 0.023 1.00
M_hat[3, 3] -0.021 0.023 1.00
M_hat[3, 4] 0.000 0.000 NaN
M_hat[4, 0] 0.001 0.036 1.00
M_hat[4, 1] -0.019 0.037 1.01
M_hat[4, 2] 0.016 0.038 1.00
M_hat[4, 3] -0.024 0.037 1.00
M_hat[4, 4] -0.225 0.016 1.00
sigma[0] 0.118 0.004 1.00
mu_hat/mu:
[1.13231041 0.93116339 1.31844754 0.99030916 1.20111417]
[1.27853844 0.55683415 2.06752757 0.86387608 0.70448068]
M_hat/M:
[[-0.11 -0.03 0.1 0.01 0. ]
[-0.02 -0.11 0.05 0.02 0. ]
[ 0.05 0.01 -0.2 -0.01 0. ]
[ 0.02 0.05 -0.03 -0.02 0. ]
[ 0. -0.02 0.02 -0.02 -0.23]]
[[-0.05 0. -0.025 0. 0. ]
[ 0. -0.1 0. 0.05 0. ]
[ 0. 0. -0.15 0. 0. ]
[ 0. 0. 0. -0.01 0. ]
[ 0.02 0. 0. 0. -0.2 ]]
Using the following parameters for gLV simulation: {'num_species': 5, 'mu': array([1.13231041, 0.93116339, 1.31844754, 0.99030916, 1.20111417]), 'M': array([[-0.10877948, -0.02778685, 0.09988316, 0.01282891, 0. ],
[-0.0215632 , -0.10622212, 0.04504069, 0.01855462, 0. ],
[ 0.04557527, 0.00821253, -0.20316988, -0.013968 , 0. ],
[ 0.01521565, 0.05476717, -0.03063867, -0.02148651, 0. ],
[ 0.00113054, -0.01876978, 0.01532805, -0.02466473, -0.22526982]]), 'epsilon': array([], shape=(5, 0), dtype=float64)}
[3]:
# read in pickled simulated parameters, mu, M, epsilon
num_species = 5
with open("params-s5.pkl", "rb") as f:
params = pickle.load(f)
M = params["M"]
mu = params["mu"]
epsilon = params["epsilon"]
# read in the data
num_timecourses = 3
data = pd.read_csv("data-s5-r3.csv")
times = data.iloc[:, 0].values
yobs_1 = data.iloc[:, 1:(num_species+1)].values
yobs_2 = data.iloc[:, (num_species+1):(2*num_species+1)].values
yobs_3 = data.iloc[:, (2*num_species+1):(3*num_species+1)].values
ryobs = np.array([yobs_1, yobs_2, yobs_3])
X = np.array([], dtype=np.double).reshape(0, num_species+1)
F = np.array([], dtype=np.double).reshape(0, num_species)
for timecourse_idx in range(num_timecourses):
Xs, Fs = linearize_time_course_16S(ryobs[timecourse_idx], times)
X = np.vstack([X, Xs])
F = np.vstack([F, Fs])
# Define priors
prior_mu_mean = [1.0]
prior_mu_sigma = 0.5
## NB prior_Mii_mean is 0, so not defined as an argument
prior_Mii_mean = 0.0
prior_Mii_sigma = 0.1
prior_Mij_sigma = 0.1
# Sampling conditions
draws = 500
tune = 500
chains = 4
cores = 4
inference = infergLVbayes()
inference.set_parameters(X=X, F=F, prior_mu_mean=prior_mu_mean, prior_mu_sigma=prior_mu_sigma,
prior_Mii_sigma=prior_Mii_sigma, prior_Mii_mean=prior_Mii_mean,
prior_Mij_sigma=prior_Mij_sigma,
draws=draws, tune=tune, chains=chains,cores=cores)
idata = inference.run_bayes_gLV()
summary = az.summary(idata, var_names=["mu_hat", "M_ii_hat", "M_ij_hat", "M_hat", "sigma"])
#print(summary[["mean", "sd", "r_hat"]])
# Save posterior samples to file
az.to_netcdf(idata, 'model_posterior.nc')
# get median mu_hat and M_hat
mu_h = np.median(idata.posterior["mu_hat"].values, axis=(0,1) ).reshape(-1)
M_h = np.median(idata.posterior["M_hat"].values, axis=(0,1) )
compare_params(mu=(mu, mu_h), M=(M, M_h))
predictor = sim_gLV(num_species=num_species, M=M_h, mu=mu_h)
# plot comparison of simulated and predicted timeseries
for timecourse_idx in range(num_timecourses):
yobs_h, _, _, _, _ = predictor.simulate(times=times, init_species=ryobs[timecourse_idx,0,:])
plot_fit_gLV(ryobs[timecourse_idx], yobs_h, times)
X shape: (297, 6)
F shape: (297, 5)
Number of species: 5
AdvancedSetSubtensor.0
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [sigma, mu_hat, M_ii_hat_p, M_ij_hat]
Sampling 4 chains for 500 tune and 500 draw iterations (2_000 + 2_000 draws total) took 41 seconds.
There were 1 divergences after tuning. Increase `target_accept` or reparameterize.
/Users/chaniaclare/anaconda3/bin/python3.12/lib/python3.12/site-packages/arviz/stats/diagnostics.py:592: RuntimeWarning: invalid value encountered in scalar divide
(between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)
/Users/chaniaclare/anaconda3/bin/python3.12/lib/python3.12/site-packages/arviz/stats/diagnostics.py:592: RuntimeWarning: invalid value encountered in scalar divide
(between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)
/Users/chaniaclare/anaconda3/bin/python3.12/lib/python3.12/site-packages/arviz/stats/diagnostics.py:592: RuntimeWarning: invalid value encountered in scalar divide
(between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)
/Users/chaniaclare/anaconda3/bin/python3.12/lib/python3.12/site-packages/arviz/stats/diagnostics.py:592: RuntimeWarning: invalid value encountered in scalar divide
(between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)
mu_hat/mu:
[1.3672287 0.74213139 1.76629376 0.96536547 0.80465853]
[1.27853844 0.55683415 2.06752757 0.86387608 0.70448068]
M_hat/M:
[[-0.06 -0. 0. -0. 0. ]
[ 0. -0.09 0. 0. 0. ]
[-0.03 -0.01 -0.13 -0. 0. ]
[-0. 0.04 0. -0.01 0. ]
[ 0. 0.01 0. 0. -0.15]]
[[-0.05 0. -0.025 0. 0. ]
[ 0. -0.1 0. 0.05 0. ]
[ 0. 0. -0.15 0. 0. ]
[ 0. 0. 0. -0.01 0. ]
[ 0.02 0. 0. 0. -0.2 ]]
Using the following parameters for gLV simulation: {'num_species': 5, 'mu': array([1.3672287 , 0.74213139, 1.76629376, 0.96536547, 0.80465853]), 'M': array([[-5.52650375e-02, -1.27299037e-03, 4.73617599e-04,
-2.24961179e-03, 0.00000000e+00],
[ 2.35583261e-03, -9.00693975e-02, 8.11131991e-05,
4.56646138e-04, 0.00000000e+00],
[-2.51252422e-02, -1.35618057e-02, -1.30670217e-01,
-4.90212464e-03, 0.00000000e+00],
[-1.34249757e-03, 4.42079830e-02, 1.78249633e-04,
-1.01354679e-02, 0.00000000e+00],
[ 2.36000085e-03, 6.16244691e-03, 1.04110199e-03,
1.28691079e-03, -1.51785839e-01]]), 'epsilon': array([], shape=(5, 0), dtype=float64)}
Using the following parameters for gLV simulation: {'num_species': 5, 'mu': array([1.3672287 , 0.74213139, 1.76629376, 0.96536547, 0.80465853]), 'M': array([[-5.52650375e-02, -1.27299037e-03, 4.73617599e-04,
-2.24961179e-03, 0.00000000e+00],
[ 2.35583261e-03, -9.00693975e-02, 8.11131991e-05,
4.56646138e-04, 0.00000000e+00],
[-2.51252422e-02, -1.35618057e-02, -1.30670217e-01,
-4.90212464e-03, 0.00000000e+00],
[-1.34249757e-03, 4.42079830e-02, 1.78249633e-04,
-1.01354679e-02, 0.00000000e+00],
[ 2.36000085e-03, 6.16244691e-03, 1.04110199e-03,
1.28691079e-03, -1.51785839e-01]]), 'epsilon': array([], shape=(5, 0), dtype=float64)}
Using the following parameters for gLV simulation: {'num_species': 5, 'mu': array([1.3672287 , 0.74213139, 1.76629376, 0.96536547, 0.80465853]), 'M': array([[-5.52650375e-02, -1.27299037e-03, 4.73617599e-04,
-2.24961179e-03, 0.00000000e+00],
[ 2.35583261e-03, -9.00693975e-02, 8.11131991e-05,
4.56646138e-04, 0.00000000e+00],
[-2.51252422e-02, -1.35618057e-02, -1.30670217e-01,
-4.90212464e-03, 0.00000000e+00],
[-1.34249757e-03, 4.42079830e-02, 1.78249633e-04,
-1.01354679e-02, 0.00000000e+00],
[ 2.36000085e-03, 6.16244691e-03, 1.04110199e-03,
1.28691079e-03, -1.51785839e-01]]), 'epsilon': array([], shape=(5, 0), dtype=float64)}
Bayesian inference with shrinkage: Horseshoe prior¶
[4]:
# read in pickled simulated parameters, mu, M, epsilon
num_species = 5
with open("params-s5.pkl", "rb") as f:
params = pickle.load(f)
M = params["M"]
mu = params["mu"]
epsilon = params["epsilon"]
# read in the data
num_timecourses = 1
data = pd.read_csv("data-s5-r1.csv")
times = data.iloc[:, 0].values
# Define priors
prior_mu_mean = [1.0]
prior_mu_sigma = 0.5
## NB prior_Mii_mean is 0, so not defined as an argument
prior_Mii_mean = 0.0
prior_Mii_sigma = 0.1
prior_Mij_sigma = 0.1
# Define parameters for shrinkage on M_ij (non diagonal elements)
n_obs = times.shape[0] - 1
num_species = F.shape[1]
nX = num_species
noise_stddev = 0.1
DA = nX*nX - nX
DA0 = 3 # expected number of non zero entries in M_ij
N = n_obs - 2
# Sampling conditions
draws = 500
tune = 500
chains = 4
cores = 4
# Run inference
inference = infergLVbayes()
inference.set_parameters(X=X, F=F, prior_mu_mean=prior_mu_mean, prior_mu_sigma=prior_mu_sigma,
prior_Mii_sigma=prior_Mii_sigma, prior_Mii_mean=prior_Mii_mean,
prior_Mij_sigma=prior_Mij_sigma,
DA=DA, DA0=DA0, N=N, noise_stddev=noise_stddev,
draws=draws, tune=tune, chains=chains,cores=cores)
idata = inference.run_bayes_gLV_shrinkage()
#nX = num_species
#n_obs = times.shape[0] - 1
#noise_stddev = 0.1
# Params for shrinkage on M_ij (non diagonal elements)
#DA = nX*nX - nX
#DA0 = 3 # expected number of non zero entries in M_ij
#N = n_obs - 2
#inference = infergLVbayes(X, F, mu_prior, M_prior, DA=DA, DA0=DA0, N=N, noise_stddev=noise_stddev)
#idata = inference.run_bayes_gLV_shrinkage()
# print summary
summary = az.summary(idata, var_names=["mu_hat", "M_ii_hat", "M_ij_hat", "M_hat", "sigma"])
print(summary[["mean", "sd", "r_hat"]])
# Write posterior samples to file
az.to_netcdf(idata, 'model_posterior.nc')
# get median mu_hat and M_hat
mu_h = np.median(idata.posterior["mu_hat"].values, axis=(0,1) ).reshape(-1)
M_h = np.median(idata.posterior["M_hat"].values, axis=(0,1) )
# compare fitted with simulated parameters
compare_params(mu=(mu, mu_h), M=(M, M_h))
predictor = sim_gLV(num_species=num_species, M=M_h, mu=mu_h)
yobs_h, _, _, _, _ = predictor.simulate(times=times, init_species=yobs[0])
plot_fit_gLV(yobs, yobs_h, times)
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [sigma, mu_hat, M_ii_hat_p, c2, tau, lam, M_ij_hat]
Sampling 4 chains for 500 tune and 500 draw iterations (2_000 + 2_000 draws total) took 60 seconds.
There were 3 divergences after tuning. Increase `target_accept` or reparameterize.
/Users/chaniaclare/anaconda3/bin/python3.12/lib/python3.12/site-packages/arviz/stats/diagnostics.py:592: RuntimeWarning: invalid value encountered in scalar divide
(between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)
/Users/chaniaclare/anaconda3/bin/python3.12/lib/python3.12/site-packages/arviz/stats/diagnostics.py:592: RuntimeWarning: invalid value encountered in scalar divide
(between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)
/Users/chaniaclare/anaconda3/bin/python3.12/lib/python3.12/site-packages/arviz/stats/diagnostics.py:592: RuntimeWarning: invalid value encountered in scalar divide
(between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)
/Users/chaniaclare/anaconda3/bin/python3.12/lib/python3.12/site-packages/arviz/stats/diagnostics.py:592: RuntimeWarning: invalid value encountered in scalar divide
(between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)
mean sd r_hat
mu_hat[0, 0] 1.373 0.261 1.00
mu_hat[0, 1] 0.718 0.265 1.00
mu_hat[0, 2] 1.753 0.274 1.00
mu_hat[0, 3] 0.947 0.275 1.00
mu_hat[0, 4] 0.805 0.012 1.00
M_ii_hat[0] -0.083 0.058 1.00
M_ii_hat[1] -0.080 0.061 1.00
M_ii_hat[2] -0.083 0.062 1.00
M_ii_hat[3] -0.077 0.059 1.00
M_ii_hat[4] -0.152 0.002 1.00
M_ij_hat[0, 0] -0.056 0.009 1.00
M_ij_hat[0, 1] -0.000 0.009 1.00
M_ij_hat[0, 2] 0.002 0.010 1.00
M_ij_hat[0, 3] -0.002 0.010 1.00
M_ij_hat[1, 0] 0.003 0.004 1.00
M_ij_hat[1, 1] -0.090 0.004 1.00
M_ij_hat[1, 2] -0.000 0.004 1.00
M_ij_hat[1, 3] 0.000 0.004 1.01
M_ij_hat[2, 0] -0.025 0.010 1.00
M_ij_hat[2, 1] -0.013 0.010 1.00
M_ij_hat[2, 2] -0.131 0.010 1.00
M_ij_hat[2, 3] -0.005 0.010 1.00
M_ij_hat[3, 0] -0.002 0.002 1.00
M_ij_hat[3, 1] 0.045 0.002 1.00
M_ij_hat[3, 2] 0.000 0.002 1.00
M_ij_hat[3, 3] -0.010 0.002 1.01
M_ij_hat[4, 0] 0.002 0.004 1.00
M_ij_hat[4, 1] 0.006 0.004 1.00
M_ij_hat[4, 2] 0.001 0.004 1.00
M_ij_hat[4, 3] 0.001 0.004 1.00
M_hat[0, 0] -0.056 0.009 1.00
M_hat[0, 1] -0.000 0.009 1.00
M_hat[0, 2] 0.002 0.010 1.00
M_hat[0, 3] -0.002 0.010 1.00
M_hat[0, 4] 0.000 0.000 NaN
M_hat[1, 0] 0.003 0.004 1.00
M_hat[1, 1] -0.090 0.004 1.00
M_hat[1, 2] -0.000 0.004 1.00
M_hat[1, 3] 0.000 0.004 1.01
M_hat[1, 4] 0.000 0.000 NaN
M_hat[2, 0] -0.025 0.010 1.00
M_hat[2, 1] -0.013 0.010 1.00
M_hat[2, 2] -0.131 0.010 1.00
M_hat[2, 3] -0.005 0.010 1.00
M_hat[2, 4] 0.000 0.000 NaN
M_hat[3, 0] -0.002 0.002 1.00
M_hat[3, 1] 0.045 0.002 1.00
M_hat[3, 2] 0.000 0.002 1.00
M_hat[3, 3] -0.010 0.002 1.01
M_hat[3, 4] 0.000 0.000 NaN
M_hat[4, 0] 0.002 0.004 1.00
M_hat[4, 1] 0.006 0.004 1.00
M_hat[4, 2] 0.001 0.004 1.00
M_hat[4, 3] 0.001 0.004 1.00
M_hat[4, 4] -0.152 0.002 1.00
sigma[0] 0.118 0.002 1.01
mu_hat/mu:
[1.36620739 0.71402644 1.76006706 0.9497201 0.80509239]
[1.27853844 0.55683415 2.06752757 0.86387608 0.70448068]
M_hat/M:
[[-0.06 -0. 0. -0. 0. ]
[ 0. -0.09 -0. 0. 0. ]
[-0.02 -0.01 -0.13 -0. 0. ]
[-0. 0.04 0. -0.01 0. ]
[ 0. 0.01 0. 0. -0.15]]
[[-0.05 0. -0.025 0. 0. ]
[ 0. -0.1 0. 0.05 0. ]
[ 0. 0. -0.15 0. 0. ]
[ 0. 0. 0. -0.01 0. ]
[ 0.02 0. 0. 0. -0.2 ]]
Using the following parameters for gLV simulation: {'num_species': 5, 'mu': array([1.36620739, 0.71402644, 1.76006706, 0.9497201 , 0.80509239]), 'M': array([[-5.55189265e-02, -9.15353703e-06, 1.84429845e-03,
-1.90496623e-03, 0.00000000e+00],
[ 2.56584422e-03, -9.04844299e-02, -4.55343481e-04,
2.92222338e-04, 0.00000000e+00],
[-2.47203957e-02, -1.30517608e-02, -1.31416647e-01,
-4.88621827e-03, 0.00000000e+00],
[-1.47705073e-03, 4.45336261e-02, 4.05379448e-04,
-1.00789444e-02, 0.00000000e+00],
[ 2.37280302e-03, 6.18986248e-03, 1.30611726e-03,
1.34196919e-03, -1.51759470e-01]]), 'epsilon': array([], shape=(5, 0), dtype=float64)}
Bayesian inference with shrinkage and a perturbation with unknown interactions¶
Now we will do inference with the Horseshoe prior for shrinkage but now we include a perturbation (assuming unknown interaction terms). This gives more identifiability
[5]:
num_timecourses = 3
num_perturbations = 1
data = pd.read_csv("data-s5-r3-p1.csv")
times = data.iloc[:, 0].values
yobs_1 = data.iloc[:, 1:(num_species+1)].values
yobs_2 = data.iloc[:, (num_species+1):(2*num_species+1)].values
yobs_3 = data.iloc[:, (2*num_species+1):(3*num_species+1)].values
ryobs = np.array([yobs_1, yobs_2, yobs_3])
# create the perturbation signal
def pert_fn(t):
if 2.0 <= t < 2.2 or 3.0 <= t < 3.2 or 4.0 <= t < 4.2:
return np.array([1])
else:
return np.array([0])
u = np.array([pert_fn(t)[0] for t in times])
u = u.astype(int)
X = np.array([], dtype=np.double).reshape(0, num_species+2)
F = np.array([], dtype=np.double).reshape(0, num_species)
for timecourse_idx in range(num_timecourses):
Xs, Fs = linearize_time_course_16S_u(ryobs[timecourse_idx], times, u)
X = np.vstack([X, Xs])
F = np.vstack([F, Fs])
# Define priors
prior_mu_mean = [1.0]
prior_mu_sigma = 0.5
## NB prior_Mii_mean is 0, so not defined as an argument
prior_Mii_mean = 0.0
prior_Mii_sigma = 0.1
prior_Mij_sigma = 0.1
prior_eps_mean = 0.1
prior_eps_sigma = 0.1
# Define parameters for shrinkage on M_ij (non diagonal elements)
n_obs = times.shape[0] - 1
num_species = F.shape[1]
nX = num_species
noise_stddev = 0.1
DA = nX*nX - nX
DA0 = 3 # expected number of non zero entries in M_ij
N = n_obs - 2
# Sampling conditions
draws = 500
tune = 500
chains = 4
cores = 4
# Run inference
inference = infergLVbayes()
inference.set_parameters(X=X, F=F, prior_mu_mean=prior_mu_mean, prior_mu_sigma=prior_mu_sigma,
prior_Mii_sigma=prior_Mii_sigma, prior_Mii_mean=prior_Mii_mean,
prior_Mij_sigma=prior_Mij_sigma,
prior_eps_mean=prior_eps_mean, prior_eps_sigma=prior_eps_sigma,
DA=DA, DA0=DA0, N=N, noise_stddev=noise_stddev,
draws=draws, tune=tune, chains=chains,cores=cores)
idata = inference.run_bayes_gLV_shrinkage_pert()
#nX = num_species
#n_obs = times.shape[0] - 1
#noise_stddev = 0.1
# Params for shrinkage on M_ij (non diagonal elements)
#DA = nX*nX - nX
#DA0 = 3 # expected number of non zero entries in M_ij
#N = n_obs - 2
#inference = infergLVbayes(X, F, mu_prior, M_prior, DA=DA, DA0=DA0, N=N, noise_stddev=noise_stddev, epsilon=epsilon)
#idata = inference.run_bayes_gLV_shrinkage_pert()
# print summary
summary = az.summary(idata, var_names=["mu_hat", "M_ii_hat", "M_ij_hat", "M_hat", "epsilon_hat", "sigma"])
print(summary[["mean", "sd", "r_hat"]])
# Write posterior samples to file
az.to_netcdf(idata, 'model_posterior.nc')
num_species = 5
with open("params-s5.pkl", "rb") as f:
params = pickle.load(f)
M = params["M"]
mu = params["mu"]
epsilon = params["epsilon"]
# get median mu_hat and M_hat
mu_h = np.median(idata.posterior["mu_hat"].values, axis=(0,1) ).reshape(-1)
M_h = np.median(idata.posterior["M_hat"].values, axis=(0,1) )
e_h = np.median(idata.posterior["epsilon_hat"].values, axis=(0,1) ).T
print(e_h.shape)
predictor = sim_gLV(num_species=num_species,
num_perturbations=1,
M=M_h,
mu=mu_h,
epsilon=e_h,
)
# # plot comparison of simulated and predicted timeseries
for timecourse_idx in range(num_timecourses):
yobs_h, _, _, _, _ = predictor.simulate(times=times, init_species=ryobs[timecourse_idx,0,:], u=pert_fn)
plot_fit_gLV(ryobs[timecourse_idx], yobs_h, times)
compare_params(mu=(mu, mu_h), M=(M, M_h), e=(epsilon, e_h))
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [sigma, mu_hat, epsilon_hat, M_ii_hat_p, c2, tau, lam, M_ij_hat]
Sampling 4 chains for 500 tune and 500 draw iterations (2_000 + 2_000 draws total) took 43 seconds.
/Users/chaniaclare/anaconda3/bin/python3.12/lib/python3.12/site-packages/arviz/stats/diagnostics.py:592: RuntimeWarning: invalid value encountered in scalar divide
(between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)
/Users/chaniaclare/anaconda3/bin/python3.12/lib/python3.12/site-packages/arviz/stats/diagnostics.py:592: RuntimeWarning: invalid value encountered in scalar divide
(between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)
/Users/chaniaclare/anaconda3/bin/python3.12/lib/python3.12/site-packages/arviz/stats/diagnostics.py:592: RuntimeWarning: invalid value encountered in scalar divide
(between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)
/Users/chaniaclare/anaconda3/bin/python3.12/lib/python3.12/site-packages/arviz/stats/diagnostics.py:592: RuntimeWarning: invalid value encountered in scalar divide
(between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)
mean sd r_hat
mu_hat[0, 0] 1.234 0.124 1.0
mu_hat[0, 1] 0.595 0.127 1.0
mu_hat[0, 2] 1.682 0.130 1.0
mu_hat[0, 3] 0.874 0.131 1.0
mu_hat[0, 4] 0.854 0.016 1.0
... ... ... ...
epsilon_hat[0, 1] -0.897 0.029 1.0
epsilon_hat[0, 2] 0.849 0.028 1.0
epsilon_hat[0, 3] -0.882 0.026 1.0
epsilon_hat[0, 4] 0.905 0.029 1.0
sigma[0] 0.116 0.002 1.0
[61 rows x 3 columns]
(5, 1)
Using the following parameters for gLV simulation: {'num_species': 5, 'mu': array([1.235932 , 0.59504845, 1.67902925, 0.87447153, 0.85468732]), 'M': array([[-4.81204937e-02, 1.39836766e-04, 1.01777966e-03,
-1.27672070e-03, 0.00000000e+00],
[ 2.01160570e-03, -9.07691992e-02, -1.57861381e-03,
2.20328233e-03, 0.00000000e+00],
[-2.27590891e-02, -6.89935402e-03, -1.27635697e-01,
-1.70301440e-03, 0.00000000e+00],
[-1.45651050e-03, 4.49332464e-02, 1.38178260e-03,
-1.09125279e-02, 0.00000000e+00],
[ 1.08967172e-04, 7.04120914e-03, 4.25886942e-03,
1.10626607e-03, -1.59250496e-01]]), 'epsilon': array([[ 0.87394546],
[-0.89655916],
[ 0.84899661],
[-0.88254273],
[ 0.90439728]])}
Using the following parameters for gLV simulation: {'num_species': 5, 'mu': array([1.235932 , 0.59504845, 1.67902925, 0.87447153, 0.85468732]), 'M': array([[-4.81204937e-02, 1.39836766e-04, 1.01777966e-03,
-1.27672070e-03, 0.00000000e+00],
[ 2.01160570e-03, -9.07691992e-02, -1.57861381e-03,
2.20328233e-03, 0.00000000e+00],
[-2.27590891e-02, -6.89935402e-03, -1.27635697e-01,
-1.70301440e-03, 0.00000000e+00],
[-1.45651050e-03, 4.49332464e-02, 1.38178260e-03,
-1.09125279e-02, 0.00000000e+00],
[ 1.08967172e-04, 7.04120914e-03, 4.25886942e-03,
1.10626607e-03, -1.59250496e-01]]), 'epsilon': array([[ 0.87394546],
[-0.89655916],
[ 0.84899661],
[-0.88254273],
[ 0.90439728]])}
Using the following parameters for gLV simulation: {'num_species': 5, 'mu': array([1.235932 , 0.59504845, 1.67902925, 0.87447153, 0.85468732]), 'M': array([[-4.81204937e-02, 1.39836766e-04, 1.01777966e-03,
-1.27672070e-03, 0.00000000e+00],
[ 2.01160570e-03, -9.07691992e-02, -1.57861381e-03,
2.20328233e-03, 0.00000000e+00],
[-2.27590891e-02, -6.89935402e-03, -1.27635697e-01,
-1.70301440e-03, 0.00000000e+00],
[-1.45651050e-03, 4.49332464e-02, 1.38178260e-03,
-1.09125279e-02, 0.00000000e+00],
[ 1.08967172e-04, 7.04120914e-03, 4.25886942e-03,
1.10626607e-03, -1.59250496e-01]]), 'epsilon': array([[ 0.87394546],
[-0.89655916],
[ 0.84899661],
[-0.88254273],
[ 0.90439728]])}
mu_hat/mu:
[1.235932 0.59504845 1.67902925 0.87447153 0.85468732]
[1.27853844 0.55683415 2.06752757 0.86387608 0.70448068]
M_hat/M:
[[-0.05 0. 0. -0. 0. ]
[ 0. -0.09 -0. 0. 0. ]
[-0.02 -0.01 -0.13 -0. 0. ]
[-0. 0.04 0. -0.01 0. ]
[ 0. 0.01 0. 0. -0.16]]
[[-0.05 0. -0.025 0. 0. ]
[ 0. -0.1 0. 0.05 0. ]
[ 0. 0. -0.15 0. 0. ]
[ 0. 0. 0. -0.01 0. ]
[ 0.02 0. 0. 0. -0.2 ]]
e_hat/e:
[[ 0.87]
[-0.9 ]
[ 0.85]
[-0.88]
[ 0.9 ]]
[[ 1.]
[-1.]
[ 1.]
[-1.]
[ 1.]]
[ ]: