Source code for mimic.model_simulate.sim_VAR

from typing import List, Optional, Union, Dict

import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

from mimic.model_simulate.base_model import BaseModel


[docs] class sim_VAR(BaseModel): """ A class for simulating Vector Autoregression (VAR) models. Inherits from BaseModel and adds specific functionalities for VAR model simulation, including data generation for both univariate and multivariate autoregressive processes. This class allows users to simulate data from VAR models, specify model parameters, generate simulated data, visualize the results through various plotting methods, and save the generated data for further analysis. It supports both single and multi-variable autoregressive models, making it versatile for different simulation scenarios. Attributes: n_obs (int): Number of observations to generate. coefficients (np.ndarray): Coefficients of the VAR model. initial_values (np.ndarray): Initial values for the VAR model simulation. noise_stddev (float): Standard deviation of the noise in the VAR model. output (str): Specifies the output action for plots ('show', 'save', or 'both'). dataM (np.ndarray): Holds the generated data for multivariate simulations. coefficientsM (np.ndarray): Coefficients for the multivariate VAR model. initial_valuesM (np.ndarray): Initial values for the multivariate VAR model simulation. Methods: set_parameters: Allows setting or updating model parameters like number of observations, model coefficients, initial values, and noise standard deviation. It supports both univariate and multivariate VAR models. generate_var1_data: Simulates data from a VAR(1) process using the specified model parameters and saves the generated data. It can also generate and overlay plots based on the 'output' attribute. generate_mvar1_data: Generates data from a multivariate autoregressive process. It can work with complex interactions between multiple variables and supports overlay plotting based on the 'output' attribute. simulate: Acts as a controller to execute the simulation based on the specified command. It supports commands for simulating univariate VAR, multivariate VAR, and generating plots as specified. make_plot_overlay: Creates overlay plots for visual comparison of simulated data across different variables or processes. make_plot_stacked: Generates a stacked plot and heatmap for the given data, offering a detailed visualization of the simulation results. make_plot: Produces separate line plots for each variable in the given data, facilitating an in-depth analysis of each variable's behavior over time. """ def __init__(self): """ Initializes the sim_VAR instance with default parameter values. """ super().__init__() self.model: str = "VAR" self.n_obs: Optional[int] = None self.coefficients: Optional[np.ndarray] = None self.initial_values: Optional[np.ndarray] = None self.noise_stddev: Optional[Union[int, float]] = None self.output: Optional[str] = None self.dataM: Optional[np.ndarray] = None self.coefficientsM: Optional[np.ndarray] = None self.initial_valuesM: Optional[np.ndarray] = None self.parameters: Dict[str, Optional[Union[int, float, np.ndarray, str]]] = {"n_obs": self.n_obs, "coefficients": self.coefficients, "initial_values": self.initial_values, "noise_stddev": self.noise_stddev, "output": self.output}
[docs] def set_parameters(self, n_obs: Optional[int] = None, coefficients: Optional[List[List[Union[int, float]]]] = None, initial_values: Optional[List[List[int]]] = None, noise_stddev: Optional[Union[int, float]] = None, output: Optional[str] = None, coefficientsM: Optional[List[List[Union[int, float]]]] = None, initial_valuesM: Optional[List[List[Union[int, float]]]] = None) -> None: """ Sets the parameters for the sim_VAR instance. Allows optional specification of all model parameters. Parameters not provided (None) are left unchanged. Parameters: n_obs (Optional[int]): Number of observations to generate. coefficients (Optional[List[List[Union[int, float]]]]): Coefficients of the VAR model. initial_values (Optional[List[List[int]]]): Initial values for the VAR model simulation. noise_stddev (Optional[Union[int, float]]): Standard deviation of the noise. output (Optional[str]): Output action for plots ('show', 'save', or 'both'). coefficientsM (Optional[List[List[Union[int, float]]]]): Coefficients for the multivariate VAR model. initial_valuesM (Optional[List[List[int]]]): Initial values for the multivariate VAR model simulation. """ if n_obs is not None: self.n_obs = n_obs if coefficients is not None: self.coefficients = np.array(coefficients) if initial_values is not None: self.initial_values = np.array(initial_values) if noise_stddev is not None: self.noise_stddev = noise_stddev if output is not None: self.output = output if coefficientsM is not None: self.coefficientsM = np.array(coefficientsM) if initial_valuesM is not None: self.initial_valuesM = np.array(initial_valuesM) self.parameters = { "n_obs": self.n_obs, "coefficients": self.coefficients, "initial_values": self.initial_values, "noise_stddev": self.noise_stddev, "output": self.output, "coefficientsM": self.coefficientsM, "initial_valuesM": self.initial_valuesM}
[docs] def generate_var1_data(self) -> np.ndarray: """ Generate simulated data from a VAR(1) process. Simulates a univariate or multivariate VAR(1) process based on the set parameters. This method populates the `data` attribute with the generated data. Returns: np.ndarray: The generated data as a numpy array with shape (n_obs, number of variables). """ # Check if the coefficients and initial values are provided if self.coefficients is None or self.initial_values is None or self.n_obs is None: raise ValueError( "coefficients, number of observations and initial_values must be provided for VARsim") dim = len(self.initial_values) data = np.zeros((self.n_obs, dim)) data[0, :] = self.initial_values[:, 0] if self.noise_stddev is None: self.noise_stddev = 0.1 print("noise_stddev not provided, setting to default value 0.1") for t in range(1, self.n_obs): # VAR(1) process: X_t = A * X_{t-1} + noise noise = np.random.normal(scale=self.noise_stddev, size=dim) data[t, :] = np.dot(self.coefficients, data[t - 1, :]) + noise if self.output is not None: self.make_plot_overlay(data, None, self.output) self.data = data # the generated data return data
[docs] def generate_mvar1_data(self, coefficientsM: np.ndarray, initial_valuesM: np.ndarray) -> tuple[np.ndarray, np.ndarray]: """ Generates synthetic data for a multivariate autoregressive (MVAR) process of order 1. Specifically tailored for generating data from complex MVAR processes where interactions between multiple variables are considered. Parameters: coefficientsM (np.ndarray): Coefficients for the MVAR model. initial_valuesM (np.ndarray): Initial values for the MVAR model simulation. Returns: tuple: A tuple containing two numpy.ndarrays. The first array is the generated data for the X process, and the second array is the generated data for the S process. Both arrays have shapes (n_obs, number of X variables) and (n_obs, number of S variables), respectively. """ # Check if the coefficients and initial values are provided if coefficientsM is None or initial_valuesM is None: raise ValueError( "coefficients, number of observations and initial_values must be provided for MVARsim") # check if initial_values and n_obs is different from None if self.initial_values is None or self.n_obs is None or self.coefficients is None: raise ValueError( "initial_values and n_obs must be provided for MVARsim") nX = len(self.initial_values) data = np.zeros((self.n_obs, nX)) data[0, :] = self.initial_values[:, 0] coefficientsM = np.array(coefficientsM) initial_valuesM = np.array(initial_valuesM) nS = len(initial_valuesM) dataM = np.zeros((self.n_obs, nS)) dataM[0, :] = initial_valuesM[:, 0] if self.noise_stddev is None: self.noise_stddev = 0.1 print("noise_stddev not provided, setting to default value 0.1") for t in range(1, self.n_obs): # VAR(1) process: X_t = A * X_{t-1} + noise noise = np.random.normal(scale=self.noise_stddev, size=nX) data[t, :] = np.dot(self.coefficients, data[t - 1, :]) + noise for t in range(1, self.n_obs): # process: S_t = B * X_{t-1} + noise noise = np.random.normal(scale=self.noise_stddev, size=(nS)) Xt = data[t - 1, :].reshape((nX, 1)) # print( "mult:", (coefficientsM @ Xt).shape ) product = coefficientsM @ Xt dataM[t, :] = product[:, 0] + noise if self.output is not None: self.make_plot_overlay(data, dataM, self.output) self.data, self.dataM = data, dataM # the generated data return data, dataM
[docs] def simulate(self, command: str) -> None: """ Simulates data based on the specified command. Supports commands for simulating VAR and multivariate VAR (MVAR) processes. Adjusts internal state based on simulation results. Parameters: command (str): The simulation command ('VARsim' for VAR simulation, 'MVARsim' for multivariate VAR simulation). Raises: ValueError: If an invalid command is provided. """ if command == "MVARsim": self.check_params(self.parameters, "sVAR") self.coefficientsM = ( np.array([self.parameters["coefficientsM"]]) if isinstance(self.parameters["coefficientsM"], (int, float, str)) else self.parameters["coefficientsM"] ) self.initial_valuesM = ( np.array([self.parameters["initial_valuesM"]]) if isinstance( self.parameters["initial_valuesM"], (int, float, str) ) else self.parameters["initial_valuesM"] ) if self.coefficientsM is None or self.initial_valuesM is None: raise ValueError( "coefficientsM and initial_valuesM must be provided for MVARsim") self.generate_mvar1_data(self.coefficientsM, self.initial_valuesM) elif command == "VARsim": self.check_params(self.parameters, "VAR") self.generate_var1_data() else: raise ValueError("Invalid command. Must be 'VARsim' or 'MVARsim'")
[docs] def make_plot_overlay(self, dataX: np.ndarray, dataS: Optional[np.ndarray] = None, output: Optional[str] = 'show') -> None: """ Creates an overlay plot of the given data for easy comparison. This method visualizes the time series data by overlaying the plots of each variable. It supports both the primary VAR process data and an optional secondary process data for comparison. Parameters: dataX (np.ndarray): The primary data series for the VAR process. Shape: (n_obs, num_variables). dataS (np.ndarray, optional): The secondary data series for comparison. Shape: (n_obs, num_variables). output (str): Controls the output of the plot ('show', 'save', or 'both'). Note: Saves the plot as "plot-data-overlay.pdf" if 'save' or 'both' is selected as output. """ # Number of variables in dataX nX = dataX.shape[1] # Create a figure with two subplots fig, axs = plt.subplots( 2, 1, figsize=( 10, 4)) if dataS is not None else plt.subplots( 1, 1, figsize=( 10, 4)) axs = np.atleast_1d(axs) # Ensure axs is always a list # Plot each variable in dataX on the first subplot for i in range(nX): axs[0].plot(dataX[:, i], label=f"X{str(i)}") axs[0].set_title("Abundance changes, X") # Plot each variable in dataS on the second subplot, if available if dataS is not None: nS = dataS.shape[1] for i in range(nS): axs[1].plot(dataS[:, i], label=f"S{str(i)}") axs[1].set_title("Abundance changes, S") # Adjust the layout to ensure that the subplots do not overlap plt.tight_layout() # If the output option is 'save' or 'both', save the figure as a PDF if output in ['save', 'both']: plt.savefig("plot-data-overlay.pdf") # If the output option is 'show' or 'both', show the figure in a new # window if output in ['show', 'both']: plt.show()
[docs] def make_plot_stacked(self, dataX: np.ndarray, dataS: np.ndarray) -> None: """ Creates a stacked plot and a heatmap for the given data, providing a comprehensive view of the data dynamics. This method visualizes the time series data from the VAR process in a stacked plot for a clear overview of each variable's contribution over time. Additionally, it generates a heatmap for secondary data, offering an intuitive representation of data intensity across variables and time points. Parameters: dataX (np.ndarray): The primary data series from the VAR process. Shape: (n_obs, num_variables). dataS (np.ndarray): The secondary data series for heatmap visualization. Shape: (n_obs, num_variables). Note: Saves the plots as "plot-data-XS-stacked.pdf". """ #  stacked nX = len(dataX[0]) # Number of columns in dataX nS = len(dataS[0]) # Number of columns in dataS nobs = dataS.shape[0] # Create a figure with two subplots fig, axs = plt.subplots(2, 1, figsize=(10, 4)) # Stack plot for dataX axs[0].stackplot( range( len(dataX)), *dataX.T, labels=[ f"X{str(i)}" for i in range(nX)]) axs[0].set_title("Abundance changes over time") axs[0].set_ylabel("ΔX") axs[0].set_xlim(0, nobs - 1) sns.heatmap( dataS.T, annot=False, cmap="YlGnBu", yticklabels=[f"S{str(i)}" for i in range(nS)], ax=axs[1], cbar=False, ) axs[1].set_title("Metabolites, S") axs[1].set_ylabel("S") axs[1].set_xlabel("time (t)") axs[1].set_xlim(0, nobs) plt.tight_layout() # Adjust the layout plt.savefig("plot-data-XS-stacked.pdf")
[docs] def make_plot(self, dataX: np.ndarray, dataS: Optional[np.ndarray] = None, output: Optional[str] = 'show') -> None: """ Generates separate line plots for each variable in the given data, facilitating detailed analysis. This method creates individual line plots for each variable in the primary and, optionally, secondary data series. This detailed visualization allows for in-depth analysis of each variable's behavior over time. Parameters: dataX (np.ndarray): The primary data series for the VAR process. Shape: (n_obs, num_variables). dataS (np.ndarray, optional): The secondary data series for comparison. Shape: (n_obs, num_variables). output (str): Controls the output of the plot ('show', 'save', or 'both'). Note: Saves the individual plots as "plot-data-XS.pdf" if 'save' or 'both' is selected as output. """ nX = len(dataX[0]) # Number of columns in dataX # Number of columns in dataS if it is provided nS = len(dataS[0]) if dataS is not None else 0 fig, axs = plt.subplots(nX + nS, 1, figsize=(10, 2 * (nX + nS))) # Adjust the vertical spacing between subplots plt.subplots_adjust(hspace=0.5) for i, ax in enumerate(axs): if i < nX: axs[i].plot(dataX[:, i]) axs[i].set_title(f"X{str(i)}") elif dataS is not None: axs[i].plot(dataS[:, i - nX]) axs[i].set_title(f"S{str(i - nX)}") # Set the y-axis label axs[i].set_ylabel('Abundance') if output in {'save', 'both'}: plt.savefig("plot-data-stacked.pdf") if output in {'show', 'both'}: plt.show()