Source code for prosper_nn.utils.neuron_correlation_hidden_layers

""""""
"""
Prosper_nn provides implementations for specialized time series forecasting
neural networks and related utility functions.

Copyright (C) 2022 Nico Beck, Julia Schemm, Henning Frechen, Jacob Fidorra,
    Denni Schmidt, Sai Kiran Srivatsav Gollapalli

This file is part of Propser_nn.

Propser_nn is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
"""

import torch
import torch.nn as nn
import numpy as np
from typing import List, Optional, Union, Tuple
from . import visualization


[docs] def corrcoef(m: torch.Tensor, rowvar: bool = True) -> torch.Tensor: """Return Pearson product-moment correlation coefficients. Implementation of the numpy function of the same name. Parameters ---------- m: A 1-D or 2-D torch.Tensor containing multiple variables and observations. Each row of `m` represents a variable, and each column a single observation of all those variables. rowvar: If `rowvar` is True, then each row represents a variable, with observations in the columns. Otherwise, the relationship is transposed: each column represents a variable, while the rows contain observations. Returns ------- torch.Tensor The correlation matrix of the variables. """ if m.dim() > 2: raise ValueError("m has more than 2 dimensions") if m.dim() < 2: m = m.view(1, -1) if not rowvar and m.size(0) != 1: m = m.t() # m = m.type(torch.double) # uncomment this line if desired fact = 1.0 / (m.size(1) - 1) m -= torch.mean(m, dim=1, keepdim=True) mt = m.t() cov = fact * m.matmul(mt).squeeze() try: d = torch.diag(cov) except ValueError: # scalar covariance # nan if incorrect value (nan, inf, 0), 1 otherwise return cov / cov stddev = torch.sqrt(d) corr = cov / stddev[:, None] corr = corr / stddev[None, :] return corr
def _create_corr_matrix(output_layer: torch.Tensor) -> torch.Tensor: """ Creates a correlation matrix of layer neurons with corrcoef function. Parameters ---------- output_layer : torch.Tensor The output of the layer you want to investigate. shape = (n_neurons, sample size). Returns ------- corr_matrix : torch.tensor A 2D tensor, the correlation matrix of the neurons in the layer. """ # transpose output_layer to iterate on the rows as they represent # the neurons in the layer output_layer_t = torch.transpose(output_layer, 0, 1) corr_matrix = corrcoef(output_layer_t) corr_matrix = torch.round(corr_matrix * 100) / 100 return corr_matrix def _determine_annotations( corr_matrix: torch.tensor, min_absolute_corr: float ) -> np.array: """ Determines and formats annotations for plotting of the correlation matrix. Parameters ---------- corr_matrix : torch.Tensor The correlation matrix of the neurons in the layer. min_absolute_corr : float A value between 0 and 1 which defines the minimal absolute correlation coefficient that is to be displayed in the plotted correlation matrix. Returns ------- np.array """ annotations = corr_matrix.clone() annotations[ (annotations > -min_absolute_corr) * (annotations < min_absolute_corr) ] = 0 annotations = annotations.to("cpu").numpy() annotations = np.where(np.equal(annotations, 0), "", annotations) return annotations def _find_max_correlation( corr_matrix: torch.tensor, print_values: bool ) -> Union[List[torch.Tensor], Tuple[torch.Tensor]]: """ Finds the strongest correlation and the corresponding neurons in correlation matrix. Parameters ---------- corr_matrix : torch.Tensor The correlation matrix of the neurons in the layer. print_values : boolean Are the value of the strongest correlation and the corresponding neuron indices to be printed? Returns ------- List[torch.Tensor] The first entry of the tuple is a tensor which contains the correlation coefficient(s) indicating the strongest correlation. The second entry is a tensor containing the indices of the corresponding neurons. """ # find absolute max entry NOT on diagonal corr_matrix_0diag = torch.tril(corr_matrix.clone()) corr_matrix_0diag = corr_matrix_0diag.fill_diagonal_(0) abs_max_corr = torch.max(torch.abs(corr_matrix_0diag)) # find indeces of most correlated neurons ind_neurons_pos = torch.nonzero(corr_matrix_0diag == abs_max_corr, as_tuple=False) ind_neurons_neg = torch.nonzero(corr_matrix_0diag == -abs_max_corr, as_tuple=False) ind_neurons = torch.cat((ind_neurons_pos, ind_neurons_neg), 0) abs_max_corr = abs_max_corr.tolist() abs_max_corr = round(abs_max_corr, 2) print_most_corr = "The most correlated neurons are the ones with indices " if ind_neurons_neg.nelement() == 0: if print_values: print( print_most_corr, ind_neurons.squeeze().tolist(), ) print("The according Pearson correlation coefficient is", abs_max_corr) return (abs_max_corr, ind_neurons) elif ind_neurons_pos.nelement() == 0: if print_values: print( print_most_corr, ind_neurons.squeeze().tolist(), ) print("The according Pearson correlation coefficient is", -abs_max_corr) return (-abs_max_corr, ind_neurons) else: if print_values: print( print_most_corr, ind_neurons.squeeze().tolist(), ) print( "The according Pearson correlation coefficients are + -", abs_max_corr.tolist(), ) return [torch.tensor([[-abs_max_corr], [abs_max_corr]]), ind_neurons]
[docs] def hl_size_analysis( output_layer: torch.Tensor, min_absolute_corr: float = 0.5, print_values: bool = True, xlabel: str = "Neuron Index", ylabel: str = "Neuron Index", title: str = "Correlation of neurons in layer", ) -> List[torch.Tensor]: """ Analyses the correlation of neurons in a layer to see if more neurons are needed. If the strongest correlation is small, it can be helpful to increase the number of neurons. Plots correlation matrix of layer neurons, gives correlation matrix and strongest correlation with corresponding neurons. Parameters ---------- output_layer : torch.Tensor The output of the layer you want to investigate. min_absolute_corr : float See plot_correlation. The default is 0.5. print_values : boolean See find_max_correlation. xlabel : str Set the label for the x-axis. ylabel : str Set the label for the y-axis. title : str Set a title for the axes. Returns ------- List[torch.Tensor] Contains the correlation matrix as a 2D PyTorch tensor, the value of the strongest correlation and the indices of the corresponding neurons. """ corr_matrix = _create_corr_matrix(output_layer) # plot heatmap mask = np.zeros_like(corr_matrix) mask[np.triu_indices_from(mask)] = True visualization.plot_heatmap( corr_matrix, xlabel=xlabel, ylabel=ylabel, title=title, cbar_kws={"label": "Pearson Corr Coef"}, vmin=-1, vmax=1, annot=_determine_annotations(corr_matrix, min_absolute_corr), mask=mask, fmt="", square=True, figsize=(10, 10), ) max_corr, ind_neurons = _find_max_correlation(corr_matrix, print_values) return [corr_matrix, max_corr, ind_neurons]
[docs] def hl_size_analysis_Sequential( loaded_model: nn.Sequential, model_input: torch.Tensor, index_layer: Optional[int] = None, name_layer: Optional[str] = None, min_absolute_corr: float = 0.5, print_values: bool = True, xlabel: str = "Neuron Index", ylabel: str = "Neuron Index", title: str = "Correlation of neurons in layer", ) -> List[torch.Tensor]: """ Analyses the correlation of neurons in a layer to see if more neurons are needed. If the strongest correlation is small, it can be helpful to increase the number of neurons. Plots correlation matrix of layer neurons, gives correlation matrix and strongest correlation with corresponding neurons. In contrast to hl_size_analysis, the layer output is automatically computed, using the loaded model, the model input and the name OR the index of the layer(module) in the model. Be careful, always check the print-out to see if you are actually analyzing the model you want to analyze. If the layers in your model are not initialized in the order they are used in the sequential, you might analyze the wrong model. Parameters ---------- loaded_model : nn.Sequential The loaded model which you want to analyze. model_input : torch.Tensor The input for the model. index_layer : integer The index of the layer you want to analyze in list(loaded_model.modules()). Bear in mind that the first entry in this list is the pre-trained model itself and that submodules cannot be analyzed individually but only the top level module they are part of. EITHER index_layer OR name_layer has to be forwarded. name_layer : string The name of the layer you want to analyze. For this to suffice, all modules at the top level have to be named. Bear in mind that submodules cannot be analyzed individually but only the top level module they are part of. EITHER index_layer OR name_layer has to be forwarded. min_absolute_corr : float See plot_correlation. print_values : boolean See find_max_correlation. xlabel : str Set the label for the x-axis. ylabel : str Set the label for the y-axis. title : str Set a title for the axes. Returns ------- List[torch.Tensor] Contains the correlation matrix as a 2D PyTorch tensor, the value of the strongest correlation and the indices of the corresponding neurons. """ if index_layer is None: if name_layer is None: raise ValueError( "Error: Either name_layer or index_layer has to be forwarded." ) else: index_layer = ( list(dict(loaded_model.named_modules()).keys()).index(name_layer) + 1 ) else: if name_layer is not None: raise ValueError( "Error: EITHER name_layer OR index_layer has to be forwarded." ) # copy loaded model up until the layer that is to be analyzed shorter_model = torch.nn.Sequential(*list(loaded_model.modules())[1:index_layer]) print("The analysis refers to the last module of the following model: ") print(shorter_model) with torch.no_grad(): output_shorter_model = shorter_model(model_input) corr_matrix = _create_corr_matrix(output_shorter_model) mask = np.zeros_like(corr_matrix) mask[np.triu_indices_from(mask)] = True visualization.plot_heatmap( corr_matrix, xlabel=xlabel, ylabel=ylabel, title=title, cbar_kws={"label": "Pearson Corr Coef"}, vmin=-1, vmax=1, annot=_determine_annotations(corr_matrix, min_absolute_corr), mask=mask, fmt="", square=True, figsize=(10, 10), ) max_corr, ind_neurons = _find_max_correlation(corr_matrix, print_values) return [corr_matrix, max_corr, ind_neurons]