Source code for dae.pheno.utils.lin_regress

from __future__ import annotations

from typing import Any

import numpy as np
import pandas as pd
import scipy as sp
from scipy.stats import t
from sklearn.linear_model import LinearRegression as LinearRegressionSK


[docs] class LinearRegression(LinearRegressionSK): """Class to build linear regression models.""" def __init__(self, **kwargs: Any) -> None: super().__init__(**kwargs) self._pvalues: np.ndarray | None = None self._tvalues: np.ndarray | None = None
[docs] def calc_regression( self, x_values: np.ndarray, y_values: pd.Series | np.ndarray, sample_weight: float | None = None, ) -> LinearRegression: """Calculate regression for given X and Y values.""" super().fit(x_values, y_values, sample_weight) n = len(y_values) # pylint: disable=invalid-name x_consts = np.column_stack([np.ones(x_values.shape[0]), x_values]) pinv_x, rank = sp.linalg.pinv(x_consts, return_rank=True) df_resid = x_consts.shape[0] - np.linalg.matrix_rank(x_consts) resid = y_values - self.predict(x_values) scale = np.dot(resid, resid) / df_resid cov_params = np.dot(pinv_x, pinv_x.T) * scale beta = np.dot(pinv_x, y_values) bse = np.sqrt(np.diag(cov_params)) if np.any(bse == 0): tvalues = beta * 0 pvalues = beta * 0 else: tvalues = beta / bse pvalues = t.sf(np.abs(tvalues), n - rank) * 2 self._tvalues = tvalues self._pvalues = pvalues return self
@property def tvalues(self) -> np.ndarray | None: return self._tvalues @property def pvalues(self) -> np.ndarray | None: return self._pvalues