"""Utilities for the neural network modules""" # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause import numpy as np from scipy.special import expit as logistic_sigmoid from scipy.special import xlogy def inplace_identity(X): """Simply leave the input array unchanged. Parameters ---------- X : {array-like, sparse matrix}, shape (n_samples, n_features) Data, where `n_samples` is the number of samples and `n_features` is the number of features. """ # Nothing to do def inplace_exp(X): """Compute the exponential inplace. Parameters ---------- X : {array-like, sparse matrix}, shape (n_samples, n_features) The input data. """ np.exp(X, out=X) def inplace_logistic(X): """Compute the logistic function inplace. Parameters ---------- X : {array-like, sparse matrix}, shape (n_samples, n_features) The input data. """ logistic_sigmoid(X, out=X) def inplace_tanh(X): """Compute the hyperbolic tan function inplace. Parameters ---------- X : {array-like, sparse matrix}, shape (n_samples, n_features) The input data. """ np.tanh(X, out=X) def inplace_relu(X): """Compute the rectified linear unit function inplace. Parameters ---------- X : {array-like, sparse matrix}, shape (n_samples, n_features) The input data. """ np.maximum(X, 0, out=X) def inplace_softmax(X): """Compute the K-way softmax function inplace. Parameters ---------- X : {array-like, sparse matrix}, shape (n_samples, n_features) The input data. """ tmp = X - X.max(axis=1)[:, np.newaxis] np.exp(tmp, out=X) X /= X.sum(axis=1)[:, np.newaxis] ACTIVATIONS = { "identity": inplace_identity, "exp": inplace_exp, "tanh": inplace_tanh, "logistic": inplace_logistic, "relu": inplace_relu, "softmax": inplace_softmax, } def inplace_identity_derivative(Z, delta): """Apply the derivative of the identity function: do nothing. Parameters ---------- Z : {array-like, sparse matrix}, shape (n_samples, n_features) The data which was output from the identity activation function during the forward pass. delta : {array-like}, shape (n_samples, n_features) The backpropagated error signal to be modified inplace. """ # Nothing to do def inplace_logistic_derivative(Z, delta): """Apply the derivative of the logistic sigmoid function. It exploits the fact that the derivative is a simple function of the output value from logistic function. Parameters ---------- Z : {array-like, sparse matrix}, shape (n_samples, n_features) The data which was output from the logistic activation function during the forward pass. delta : {array-like}, shape (n_samples, n_features) The backpropagated error signal to be modified inplace. """ delta *= Z delta *= 1 - Z def inplace_tanh_derivative(Z, delta): """Apply the derivative of the hyperbolic tanh function. It exploits the fact that the derivative is a simple function of the output value from hyperbolic tangent. Parameters ---------- Z : {array-like, sparse matrix}, shape (n_samples, n_features) The data which was output from the hyperbolic tangent activation function during the forward pass. delta : {array-like}, shape (n_samples, n_features) The backpropagated error signal to be modified inplace. """ delta *= 1 - Z**2 def inplace_relu_derivative(Z, delta): """Apply the derivative of the relu function. It exploits the fact that the derivative is a simple function of the output value from rectified linear units activation function. Parameters ---------- Z : {array-like, sparse matrix}, shape (n_samples, n_features) The data which was output from the rectified linear units activation function during the forward pass. delta : {array-like}, shape (n_samples, n_features) The backpropagated error signal to be modified inplace. """ delta[Z == 0] = 0 DERIVATIVES = { "identity": inplace_identity_derivative, "tanh": inplace_tanh_derivative, "logistic": inplace_logistic_derivative, "relu": inplace_relu_derivative, } def squared_loss(y_true, y_pred, sample_weight=None): """Compute the squared loss for regression. Parameters ---------- y_true : array-like or label indicator matrix Ground truth (correct) values. y_pred : array-like or label indicator matrix Predicted values, as returned by a regression estimator. sample_weight : array-like of shape (n_samples,), default=None Sample weights. Returns ------- loss : float The degree to which the samples are correctly predicted. """ return ( 0.5 * np.average((y_true - y_pred) ** 2, weights=sample_weight, axis=0).mean() ) def poisson_loss(y_true, y_pred, sample_weight=None): """Compute (half of the) Poisson deviance loss for regression. Parameters ---------- y_true : array-like or label indicator matrix Ground truth (correct) labels. y_pred : array-like or label indicator matrix Predicted values, as returned by a regression estimator. sample_weight : array-like of shape (n_samples,), default=None Sample weights. Returns ------- loss : float The degree to which the samples are correctly predicted. """ # TODO: Decide what to do with the term `xlogy(y_true, y_true) - y_true`. For now, # it is included. But the _loss module doesn't use it (for performance reasons) and # only adds it as return of constant_to_optimal_zero (mainly for testing). return np.average( xlogy(y_true, y_true / y_pred) - y_true + y_pred, weights=sample_weight, axis=0 ).sum() def log_loss(y_true, y_prob, sample_weight=None): """Compute Logistic loss for classification. Parameters ---------- y_true : array-like or label indicator matrix Ground truth (correct) labels. y_prob : array-like of float, shape = (n_samples, n_classes) Predicted probabilities, as returned by a classifier's predict_proba method. sample_weight : array-like of shape (n_samples,), default=None Sample weights. Returns ------- loss : float The degree to which the samples are correctly predicted. """ eps = np.finfo(y_prob.dtype).eps y_prob = np.clip(y_prob, eps, 1 - eps) if y_prob.shape[1] == 1: y_prob = np.append(1 - y_prob, y_prob, axis=1) if y_true.shape[1] == 1: y_true = np.append(1 - y_true, y_true, axis=1) return -np.average(xlogy(y_true, y_prob), weights=sample_weight, axis=0).sum() def binary_log_loss(y_true, y_prob, sample_weight=None): """Compute binary logistic loss for classification. This is identical to log_loss in binary classification case, but is kept for its use in multilabel case. Parameters ---------- y_true : array-like or label indicator matrix Ground truth (correct) labels. y_prob : array-like of float, shape = (n_samples, 1) Predicted probabilities, as returned by a classifier's predict_proba method. sample_weight : array-like of shape (n_samples,), default=None Sample weights. Returns ------- loss : float The degree to which the samples are correctly predicted. """ eps = np.finfo(y_prob.dtype).eps y_prob = np.clip(y_prob, eps, 1 - eps) return -np.average( xlogy(y_true, y_prob) + xlogy(1 - y_true, 1 - y_prob), weights=sample_weight, axis=0, ).sum() LOSS_FUNCTIONS = { "squared_error": squared_loss, "poisson": poisson_loss, "log_loss": log_loss, "binary_log_loss": binary_log_loss, }