Source code for mimic.model_infer.multi_penalty_lasso

import matplotlib.pyplot as plt
import numpy as np
from sklearn.base import BaseEstimator, RegressorMixin
from sklearn.linear_model import Lasso, LinearRegression
from sklearn.model_selection import KFold, RepeatedKFold, cross_val_score



[docs]
class MultiPenaltyLasso(BaseEstimator, RegressorMixin):
    """Linear regression with non-uniform L1 regularizer.

    Minimizes objective function::

        1 / (2 * n_samples) * ||y - X * w||^2_2 + ||alpha * w||_1

    where::

        alpha = diag(alpha_1, alpha_2, ..., alpha_{num_samples})

    and all alpha are non-zero

    """

    def __init__(self, alpha):
        self.coef_ = None
        self.alpha = alpha


[docs]
    def fit(self, X, y):
        if sum(self.alpha == 0) == 0:
            self.coef_ = self.non_zero_penalties(X, y)
        else:
            self.coef_ = self.penalised_lasso(X, y)
        return self



[docs]
    def predict(self, X) -> np.ndarray:
        if self.coef_ is None:
            raise ValueError("Model not fitted")
        return X @ self.coef_.T



[docs]
    def get_params(self, deep=True) -> dict:
        return {"alpha": self.alpha}



[docs]
    def non_zero_penalties(self, X, y) -> np.ndarray:
        n_samples, n_features = X.shape
        n_targets = y.shape[1]
        # print(
        # f'n_samples: {n_samples}, n_features: {n_features}, n_targets:
        # {n_targets}')

        lambda_p = np.diag(self.alpha)

        X_p = X @ np.linalg.inv(lambda_p)

        model = Lasso(fit_intercept=False, max_iter=int(1e6), alpha=1)

        model.fit(X_p, y)

        return model.coef_ @ np.linalg.inv(lambda_p)



[docs]
    def penalised_lasso(self, X, y) -> np.ndarray:
        n_samples, n_features = X.shape
        n_targets = y.shape[1]
        # print(
        # f'n_samples: {n_samples}, n_features: {n_features}, n_targets:
        # {n_targets}')

        alpha_zeros_idx = np.nonzero(self.alpha == 0)[0]
        alpha_nonzeros_idx = np.nonzero(self.alpha != 0)[0]

        X_NP = X[:, alpha_zeros_idx]
        print(f'shape of non-penalised: {X_NP.shape}')
        X_P = X[:, alpha_nonzeros_idx]
        print(f'shape of penalised: {X_P.shape}')

        # 1. Project out the non-penalized coefficients: Calculate the residual matrix for the non-penalized
        # coefficients (λ_i=0):
        #   MNP = I − X_NP (X^T_NP X_NP)^−1 X^T_NP
        print((X_NP @ np.linalg.inv(np.transpose(X_NP) @ X_NP)

[docs]
              @ np.transpose(X_NP)).shape)
        M_NP = np.subtract(np.identity(
            n_samples), X_NP @ np.linalg.inv(np.transpose(X_NP) @ X_NP) @ np.transpose(X_NP))

        # and apply it to the response variable (y → M_NP y)
        y_prime = M_NP @ y

        # 2. Rescale the projected design matrix: Transform the projected design matrix according to the diagonal
        # matrix of the lasso penalties:
        #   M_NP X_P → M_NP X_P Λ^−1_P
        lambda_p = np.diag(self.alpha[alpha_nonzeros_idx])
        projected_X_P = M_NP @ X_P @ np.linalg.inv(lambda_p)

        # 3. Apply a lasso: Lasso regress the projected response variable M_NP y on the projected and scaled design
        # matrix M_NP X_P Λ^−1_P with a regularization penalty λ=1 to obtain
        # estimates for the penalized parameters β̂_P
        if len(alpha_nonzeros_idx) > 0:
            model = Lasso(fit_intercept=False, max_iter=int(1e6), alpha=1)
            model.fit(projected_X_P, y_prime)
            beta_P_hat = model.coef_
        else:
            beta_P_hat = []

        # 4. Apply an ordinary least squares: Finally, ordinary least squares regress the residuals of the response
        # variable y − X_P β̂_P on the non-penalized design matrix X_NP to
        # obtain the non-penalized parameters β̂_NP.
        if len(alpha_zeros_idx) > 0:
            model2 = LinearRegression(fit_intercept=False)
            model2.fit(X_NP, y - X_P @ beta_P_hat)
            beta_NP_hat = model2.coef_
        else:
            beta_NP_hat = []

        coefs = np.empty([n_features, n_targets])
        for i, idx in enumerate(alpha_zeros_idx):
            coefs[idx] = beta_NP_hat[i]
        for i, idx in enumerate(alpha_nonzeros_idx):
            coefs[idx] = beta_P_hat[i]

        return coefs




def fit_alpha_MPLasso(X, y, n_a) -> np.ndarray:

    candidate_alpha_1 = np.logspace(-9, 2, n_a)
    candidate_alpha_2 = np.logspace(-9, 2, n_a)
    candidate_alphas = np.transpose([np.tile(candidate_alpha_1, len(
        candidate_alpha_2)), np.repeat(candidate_alpha_2, len(candidate_alpha_1))])

    candidate_regressors = []
    for a in candidate_alphas:
        # this sets the diag(Lambda) penalties matrix with a[0] penality for M
        # and a[1] penalty for mu
        lambda_p = np.append(np.ones(y.shape[1]) * a[0], a[1])
        candidate_regressors.append(MultiPenaltyLasso(alpha=lambda_p))

    # cv = RepeatedKFold(n_splits=5, n_repeats=10)
    cv = KFold(n_splits=5, shuffle=True)

    cv_results = [-cross_val_score(r,
                                   X,
                                   y,
                                   scoring='neg_root_mean_squared_error',
                                   cv=cv,
                                   n_jobs=-1) for r in candidate_regressors]

    n_est = np.array([len(x) for x in cv_results])
    cv_means = np.array([np.mean(x) for x in cv_results])
    cv_se = np.array([np.std(x) / np.sqrt(100) for x in cv_results])

    min_i = np.argmin(cv_means)

    print("minimum found: a/error:", candidate_alphas[min_i], cv_means[min_i])

    fig, axs = plt.subplots(1, 2, layout='constrained')
    for i in range(2):
        axs[i].scatter(candidate_alphas[:, i].T,
                       cv_means, marker='o', color='blue')
        axs[i].scatter([candidate_alphas[min_i, i]], [cv_means[min_i]],
                       marker='o', color='red', label='Minimum rule')
        axs[i].legend()
        axs[i].set_xscale('log')
        axs[i].set_xlabel('Log alpha')
        axs[i].set_ylabel('MSE')

    return candidate_alphas[min_i]
Source code for mimic.model_infer.multi_penalty_lasso

MIMIC

Navigation

Related Topics