Source code for dae.pheno.utils.lin_regress

from __future__ import annotations

from typing import Any

import numpy as np
import pandas as pd
import scipy as sp
from scipy.stats import t
from sklearn.linear_model import LinearRegression as LinearRegressionSK



[docs]
class LinearRegression(LinearRegressionSK):
    """Class to build linear regression models."""

    def __init__(self, **kwargs: Any) -> None:
        super().__init__(**kwargs)

        self._pvalues: np.ndarray | None = None
        self._tvalues: np.ndarray | None = None


[docs]
    def calc_regression(
        self, x_values: np.ndarray, y_values: pd.Series | np.ndarray,
        sample_weight: float | None = None,
    ) -> LinearRegression:
        """Calculate regression for given X and Y values."""
        super().fit(x_values, y_values, sample_weight)
        n = len(y_values)  # pylint: disable=invalid-name

        x_consts = np.column_stack([np.ones(x_values.shape[0]), x_values])
        pinv_x, rank = sp.linalg.pinv(x_consts, return_rank=True)

        df_resid = x_consts.shape[0] - np.linalg.matrix_rank(x_consts)

        resid = y_values - self.predict(x_values)

        scale = np.dot(resid, resid) / df_resid

        cov_params = np.dot(pinv_x, pinv_x.T) * scale

        beta = np.dot(pinv_x, y_values)
        bse = np.sqrt(np.diag(cov_params))

        if np.any(bse == 0):
            tvalues = beta * 0
            pvalues = beta * 0
        else:
            tvalues = beta / bse

            pvalues = t.sf(np.abs(tvalues), n - rank) * 2

        self._tvalues = tvalues
        self._pvalues = pvalues

        return self


    @property
    def tvalues(self) -> np.ndarray | None:
        return self._tvalues

    @property
    def pvalues(self) -> np.ndarray | None:
        return self._pvalues