Source code for probnet.models.grnn

#!/usr/bin/env python
# Created by "Thieu" at 19:28, 02/05/2025 ----------%                                                                               
#       Email: nguyenthieu2102@gmail.com            %                                                    
#       Github: https://github.com/thieu1995        %                         
# --------------------------------------------------%

import numpy as np
from sklearn.base import RegressorMixin
from sklearn.neighbors import NearestNeighbors
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
from probnet.models.base_net import BaseNet


[docs]class GrnnRegressor(BaseNet, RegressorMixin):
    """
    General Regression Neural Network (GRNN) implementation.

    Attributes
    ----------
    sigma : float
        The bandwidth parameter for the kernel function.
    kernel : str
        The kernel function to use ('gaussian', 'laplace', 'epanechnikov',...).
    dist : str
        The distance method to use ('euclidean', 'manhattan', 'cosine',...).
    k_neighbors : int or None
        The number of nearest neighbors to consider. If None, all training samples are used.
    normalize_output : bool
        Whether to normalize the output predictions.
    kwargs : dict
        Additional keyword arguments for customization.
    """

    SUPPORTED_KERNELS = ['gaussian', 'laplace', 'cauchy', 'epanechnikov']
    SUPPORTED_METRICS = ['euclidean', 'manhattan']

    def __init__(self, sigma=1.0, kernel='gaussian', dist='euclidean',
                 k_neighbors=None, normalize_output=True, **kwargs):
        """
        Initialize the GRNN regressor.

        Parameters
        ----------
        sigma : float, default=1.0
            The bandwidth parameter for the kernel function.
        kernel : str, default='gaussian'
            The kernel function to use ('gaussian', 'laplace', 'epanechnikov',...).
        dist : str, default='euclidean'
            The distance method to use ('euclidean', 'manhattan', 'cosine',...).
        k_neighbors : int or None, default=None
            The number of nearest neighbors to consider. If None, all training samples are used.
        normalize_output : bool, default=True
            Whether to normalize the output predictions.
        kwargs : dict
            Additional keyword arguments for customization.
        """
        super().__init__(sigma, kernel, dist, **kwargs)
        self.k_neighbors = k_neighbors
        self.normalize_output = normalize_output

[docs]    def fit(self, X, y):
        """
        Fit the GRNN model to the training data.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Training data.
        y : array-like of shape (n_samples,)
            Target values.

        Returns
        -------
        self : GrnnRegressor
            The fitted model.
        """
        X, y = check_X_y(X, y)
        self.X_train_ = X
        self.y_train_ = y

        # Dùng NearestNeighbors nếu có k_neighbors
        if self.k_neighbors is not None:
            self.nn_ = NearestNeighbors(n_neighbors=self.k_neighbors, metric=self.dist)
            self.nn_.fit(X)

        return self

[docs]    def predict(self, X):
        """
        Predict target values for the given input data.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Input data.

        Returns
        -------
        predictions : array-like of shape (n_samples,)
            Predicted target values.
        """
        check_is_fitted(self, ["X_train_", "y_train_"])
        X = check_array(X)

        if self.k_neighbors is not None:
            # Chỉ dùng k hàng xóm gần nhất
            distances, indices = self.nn_.kneighbors(X, return_distance=True)
            weights = self.kernel_func(distances, self.sigma)
            preds = []
            for i in range(X.shape[0]):
                y_neighbors = self.y_train_[indices[i]]
                w = weights[i]
                if self.normalize_output:
                    w_sum = np.sum(w)
                    if w_sum == 0:
                        preds.append(0.0)
                    else:
                        preds.append(np.dot(w, y_neighbors) / w_sum)
                else:
                    preds.append(np.dot(w, y_neighbors))
            return np.array(preds)

        else:
            # Dùng toàn bộ dữ liệu huấn luyện
            distances = self.dist_func(X, self.X_train_)  # shape (n_test, n_train)
            weights = self.kernel_func(distances, self.sigma)               # shape (n_test, n_train)

            numerator = weights @ self.y_train_                   # (n_test,)
            if self.normalize_output:
                denominator = np.sum(weights, axis=1)
                return numerator / np.where(denominator == 0, 1e-8, denominator)
            else:
                return numerator

[docs]    def score(self, X, y):
        """Return the real R2 (Coefficient of Determination) method, not (Pearson’s Correlation Index)^2 like Scikit-Learn library.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Test samples. For some estimators this may be a precomputed kernel matrix or a list of generic objects instead with shape
            ``(n_samples, n_samples_fitted)``, where ``n_samples_fitted`` is the number of samples used in the fitting for the estimator.

        y : array-like of shape (n_samples,) or (n_samples, n_outputs)
            True values for `X`.

        Returns
        -------
        result : float
            The result of selected method
        """
        return self._BaseNet__score_reg(X, y, "R2")

[docs]    def scores(self, X, y, list_metrics=("MSE", "MAE")):
        """Return the list of regression metrics of the prediction.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Test samples. For some estimators this may be a precomputed kernel matrix or a list of generic objects instead with shape
            ``(n_samples, n_samples_fitted)``, where ``n_samples_fitted`` is the number of samples used in the fitting for the estimator.

        y : array-like of shape (n_samples,) or (n_samples, n_outputs)
            True values for `X`.

        list_metrics : list, default=("MSE", "MAE")
            You can get regression metrics from Permetrics library: https://permetrics.readthedocs.io/en/latest/pages/regression.html

        Returns
        -------
        results : dict
            The results of the list metrics
        """
        return self._BaseNet__scores_reg(X, y, list_metrics)

[docs]    def evaluate(self, y_true, y_pred, list_metrics=("MSE", "MAE")):
        """Return the list of performance metrics of the prediction.

        Parameters
        ----------
        y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)
            True values for `X`.

        y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
            Predicted values for `X`.

        list_metrics : list
            You can get metrics from Permetrics library: https://github.com/thieu1995/permetrics

        Returns
        -------
        results : dict
            The results of the list metrics
        """
        return self._BaseNet__evaluate_reg(y_true, y_pred, list_metrics)