Source code for probnet.models.base_net

#!/usr/bin/env python
# Created by "Thieu" at 11:03, 02/05/2025 ----------%                                                                               
#       Email: nguyenthieu2102@gmail.com            %                                                    
#       Github: https://github.com/thieu1995        %                         
# --------------------------------------------------%

import inspect
import pickle
import pprint
import numpy as np
import pandas as pd
from pathlib import Path
from sklearn.base import BaseEstimator
from permetrics import RegressionMetric, ClassificationMetric
from probnet.helpers import validator
from probnet.helpers import kernel as kernel_module
from probnet.helpers import distance as distance_module
from probnet.helpers.metrics import get_all_regression_metrics, get_all_classification_metrics


[docs]class BaseNet(BaseEstimator): """ Base class for neural network models. Inherits from `BaseEstimator` to integrate with scikit-learn pipelines. Attributes ---------- SUPPORTED_CLS_METRICS : dict Dictionary of supported classification metrics. SUPPORTED_REG_METRICS : dict Dictionary of supported regression metrics. CLS_OBJ_LOSSES : dict Dictionary of classification objective losses. SUPPORTED_KERNELS : list List of supported kernel functions. SUPPORTED_METRICS : list List of supported distance metrics. Parameters ---------- sigma : float, default=1.0 The bandwidth parameter for the kernel function. kernel : str, default='gaussian' The kernel function to use. dist : str, default='euclidean' The distance metric to use. kwargs : dict Additional keyword arguments for customization. """ SUPPORTED_CLS_METRICS = get_all_classification_metrics() SUPPORTED_REG_METRICS = get_all_regression_metrics() CLS_OBJ_LOSSES = {} SUPPORTED_KERNELS = ["gaussian", "laplace", "cauchy", "epanechnikov", "uniform", "triangular", "quartic", "cosine", "logistic", "sigmoid", "multiquadric", "inverse_multiquadric", "rational_quadratic", "exponential", "power", "linear", "bessel", "vonmises", "vonmises_fisher"] SUPPORTED_METRICS = ['euclidean', 'manhattan', "chebyshev", "minkowski", "hamming", "canberra", "braycurtis", "jaccard", "sokalmichener", "sokalsneath", "russellrao", "yule", "kulsinski", "rogers_tanimoto", "kulczynski", "morisita", "morisita_horn", "dice", "kappa", "rogers", "jensen", "jensen_shannon", "hellinger", "bhattacharyya", "cityblock", "cosin", "correlation", "mahalanobis"] def __init__(self, sigma=1.0, kernel='gaussian', dist='euclidean', **kwargs): """ Initialize the BaseNet class. Parameters ---------- sigma : float, default=1.0 The bandwidth parameter for the kernel function. kernel : str, default='gaussian' The kernel function to use. dist : str, default='euclidean' The distance metric to use. kwargs : dict Additional keyword arguments for customization. """ super().__init__() self.sigma = sigma self.set_kernel(kernel) self.set_dist(dist) self.kwargs = kwargs self.n_labels = None
[docs] def set_kernel(self, kernel): """ Set the kernel function. Parameters ---------- kernel : str The kernel function to use ('gaussian', 'laplace', 'cauchy', 'epanechnikov',...). """ self.kernel = validator.check_str("kernel", kernel, self.SUPPORTED_KERNELS) self.kernel_func = getattr(kernel_module, f"{self.kernel}_kernel")
[docs] def set_dist(self, dist): """ Set the distance metric. Parameters ---------- dist : str The distance metric to use ('euclidean', 'manhattan',...). """ self.dist = validator.check_str("dist", dist, self.SUPPORTED_METRICS) self.dist_func = getattr(distance_module, f"{self.dist}_distance")
def __repr__(self, **kwargs): """Pretty-print parameters like scikit-learn's Estimator. """ param_order = list(inspect.signature(self.__init__).parameters.keys()) param_dict = {k: getattr(self, k) for k in param_order} param_str = ", ".join(f"{k}={repr(v)}" for k, v in param_dict.items()) if len(param_str) <= 80: return f"{self.__class__.__name__}({param_str})" else: formatted_params = ",\n ".join(f"{k}={pprint.pformat(v)}" for k, v in param_dict.items()) return f"{self.__class__.__name__}(\n {formatted_params}\n)"
[docs] def fit(self, X, y): """ Fit the model to the training data. Parameters ---------- X : array-like of shape (n_samples, n_features) Training data. y : array-like of shape (n_samples,) Target values. """ pass
[docs] def predict(self, X): """ Predict target values for the given input data. Parameters ---------- X : array-like of shape (n_samples, n_features) Input data. Returns ------- array-like Predicted target values. """ pass
[docs] def predict_proba(self, X): """ Predict class probabilities for the given input data. Parameters ---------- X : array-like of shape (n_samples, n_features) Input data. Returns ------- array-like Predicted class probabilities. """ pass
def __evaluate_reg(self, y_true, y_pred, list_metrics=("MSE", "MAE")): """ Parameters ---------- y_true : array-like Ground truth (correct) target values. y_pred : array-like Estimated target values. list_metrics : tuple/list of str, optional List of metric names to evaluate. Default is ("MSE", "MAE"). """ rm = RegressionMetric(y_true=y_true, y_pred=y_pred) return rm.get_metrics_by_list_names(list_metrics) def __evaluate_cls(self, y_true, y_pred, list_metrics=("AS", "RS")): """ Parameters ---------- y_true : array-like of shape (n_samples,) True class labels. y_pred : array-like of shape (n_samples,) Predicted class labels by the classifier. list_metrics : tuple/list of str, optional List of metric names to evaluate, by default ("AS", "RS"). """ cm = ClassificationMetric(y_true, y_pred) return cm.get_metrics_by_list_names(list_metrics) def __score_reg(self, X, y, metric="RMSE"): """ Parameters ---------- X : array-like of shape (n_samples, n_features) The input samples used for prediction. y : array-like of shape (n_samples,) The true target values. method : str, optional, default="RMSE" The regression metric to be used for scoring. Must be one of the supported metrics in SUPPORTED_REG_METRICS. Returns ------- float The calculated regression metric based on the method provided. """ y_pred = self.predict(X) return RegressionMetric(y, y_pred).get_metric_by_name(metric)[metric] def __scores_reg(self, X, y, list_metrics=("MSE", "MAE")): """ Parameters ---------- X : array-like of shape (n_samples, n_features) Input data. y : array-like of shape (n_samples,) True values for X. list_metrics : tuple of str, optional List of evaluation metrics to be used. Default is ("MSE", "MAE"). """ y_pred = self.predict(X) return self.__evaluate_reg(y_true=y, y_pred=y_pred, list_metrics=list_metrics) def __score_cls(self, X, y, metric="AS"): """ Parameters ---------- X : array-like of shape (n_samples, n_features) Test samples to score. y : array-like of shape (n_samples,) True labels for X. method : str, default="AS" Scoring method to use. Supported methods are determined by the keys in self.SUPPORTED_CLS_METRICS. Returns ------- float Computed score based on the specified method. """ metric = validator.check_str("metric", metric, list(self.SUPPORTED_CLS_METRICS.keys())) return_prob = False if self.n_labels > 2: if metric in self.CLS_OBJ_LOSSES: return_prob = True if return_prob: y_pred = self.predict_proba(X) else: y_pred = self.predict(X) cm = ClassificationMetric(y_true=y, y_pred=y_pred) return cm.get_metric_by_name(metric)[metric] def __scores_cls(self, X, y, list_metrics=("AS", "RS")): """ Parameters ---------- X : array-like of shape (n_samples, n_features) Feature matrix for the samples for which predictions are to be made. y : array-like of shape (n_samples,) True labels for the samples. list_metrics : tuple of str, optional List of method names to evaluate. Possible values include 'AS', 'RS', etc. Default is ('AS', 'RS'). Returns ------- dict A dictionary with the performance metrics from the selected methods listed in `list_metrics`. """ list_errors = list(set(list_metrics) & set(self.CLS_OBJ_LOSSES)) list_scores = list((set(self.SUPPORTED_CLS_METRICS.keys()) - set(self.CLS_OBJ_LOSSES)) & set(list_metrics)) t1 = {} if len(list_errors) > 0: return_prob = False if self.n_labels > 2: return_prob = True if return_prob: y_pred = self.predict_proba(X) else: y_pred = self.predict(X) t1 = self.__evaluate_cls(y_true=y, y_pred=y_pred, list_metrics=list_errors) y_pred = self.predict(X) t2 = self.__evaluate_cls(y_true=y, y_pred=y_pred, list_metrics=list_scores) return {**t2, **t1}
[docs] def score(self, X, y): """Default interface for score function""" pass
[docs] def scores(self, X, y, list_metrics=None): """Return the list of metrics of the prediction.""" pass
[docs] def evaluate(self, y_true, y_pred, list_metrics=None): """Return the list of performance metrics of the prediction.""" pass
[docs] def save_metrics(self, y_true, y_pred, list_metrics=("RMSE", "MAE"), save_path="history", filename="metrics.csv"): """ Save evaluation metrics to csv file Parameters ---------- y_true : ground truth data y_pred : predicted output list_metrics : list of evaluation metrics save_path : saved path (relative path, consider from current executed script path) filename : name of the file, needs to have ".csv" extension """ Path(save_path).mkdir(parents=True, exist_ok=True) results = self.evaluate(y_true, y_pred, list_metrics) df = pd.DataFrame.from_dict(results, orient='index').T df.to_csv(f"{save_path}/{filename}", index=False)
[docs] def save_y_predicted(self, X, y_true, save_path="history", filename="y_predicted.csv"): """ Save the predicted results to csv file Parameters ---------- X : The features data, nd.ndarray y_true : The ground truth data save_path : saved path (relative path, consider from current executed script path) filename : name of the file, needs to have ".csv" extension """ Path(save_path).mkdir(parents=True, exist_ok=True) y_pred = self.predict(X) data = {"y_true": np.squeeze(np.asarray(y_true)), "y_pred": np.squeeze(np.asarray(y_pred))} pd.DataFrame(data).to_csv(f"{save_path}/{filename}", index=False)
[docs] def save_model(self, save_path="history", filename="model.pkl"): """ Save model to pickle file Parameters ---------- save_path : saved path (relative path, consider from current executed script path) filename : name of the file, needs to have ".pkl" extension """ Path(save_path).mkdir(parents=True, exist_ok=True) if filename[-4:] != ".pkl": filename += ".pkl" pickle.dump(self, open(f"{save_path}/{filename}", 'wb'))
[docs] @staticmethod def load_model(load_path="history", filename="model.pkl"): """ Parameters ---------- load_path : str, optional Directory path where the model file is located. Defaults to "history". filename : str Name of the file to be loaded. If the filename doesn't end with ".pkl", the extension is automatically added. Returns ------- object The model loaded from the specified pickle file. """ if filename[-4:] != ".pkl": filename += ".pkl" return pickle.load(open(f"{load_path}/{filename}", 'rb'))