Source code for vampire.plot

import os
from datetime import datetime

import matplotlib.pyplot as plt
import numpy as np
from scipy import cluster

from . import analysis


[docs]def set_plot_style(): """ Set matplotlib plot settings of ``rcParams`` for better visualization. References ---------- [1] Customizing Matplotlib with style sheets and rcParams. https://matplotlib.org/stable/tutorials/introductory/customizing.html """ plt.rcParams.update({ 'font.family': 'Arial', 'font.weight': 'normal', 'mathtext.fontset': 'cm', 'font.size': 18, 'lines.linewidth': 2, 'axes.linewidth': 2, 'axes.spines.top': False, 'axes.spines.right': False, 'axes.titleweight': 'bold', 'axes.titlesize': 18, 'axes.labelweight': 'bold', 'xtick.major.size': 8, 'xtick.major.width': 2, 'ytick.major.size': 8, 'ytick.major.width': 2, 'figure.dpi': 80, 'savefig.dpi': 300, 'legend.framealpha': 1, 'legend.edgecolor': 'black', 'legend.fancybox': False, 'legend.fontsize': 14, })
[docs]def save_fig( fig, output_path, fig_type, extension='.png', model_name=None, apply_name=None ): """ Save figure to local directory. Parameters ---------- fig : Figure Figure to be saved. output_path : str Path to the output directory. fig_type : str General description/type of the figure. extension : str, optional File extension. model_name : str, optional Name of the built model. apply_name : str, optional Name of the image set being applied to. """ time_stamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') if model_name is None: fig_path = os.path.join(output_path, f'{fig_type}_{time_stamp}{extension}') else: if apply_name is None: # build model fig_path = os.path.join(output_path, f'{fig_type}_build_{model_name}{extension}') else: # apply model fig_path = os.path.join(output_path, f'{fig_type}_apply_{model_name}_on_{apply_name}{extension}') if os.path.exists(fig_path): fig_name, extension = os.path.splitext(fig_path) fig.savefig(f'{fig_name}_{time_stamp}{extension}', dpi=300) else: fig.savefig(fig_path, dpi=300) return
[docs]def plot_dendrogram(model, ax=None, fig_size=(6, 2)): """ Plots dendrogram of mean contours. Parameters ---------- model : vampire.model.Vampire Built VAMPIRE model. ax : Axes, optional Figure axis to be plotted on. Cannot be not None with ``output_path`` at the same time. fig_size : (float, float), optional Width, height in inches. Default (6, 2). Returns ------- ax : matplotlib.axes.Axes See Also -------- scipy.cluster.hierarchy.dendrogram """ if ax is None: fig, ax = plt.subplots(figsize=fig_size) cluster.hierarchy.set_link_color_palette(['k']) cluster.hierarchy.dendrogram( model.linkage_matrix, ax=ax, p=0, truncate_mode='lastp', orientation='bottom', above_threshold_color='k' ) ax.axis('off') return ax
[docs]def plot_contours( model, apply_properties_df=None, contour_scale=3, ax=None, fig_size=(6, 2), colors=None, alpha=1, lw=2 ): """ Plots mean contours. Parameters ---------- model : vampire.model.Vampire Built VAMPIRE model. apply_properties_df : DataFrame, optional Properties output of VAMPIRE model applied to data. contour_scale : float, optional Scale of the contour shapes. ax : Axes, optional Figure axis to be plotted on. Cannot be not None with ``output_path`` at the same time. fig_size : (float, float), optional Width, height in inches. Default (6, 2). colors : str or list, optional Colors of mean contours. alpha : float, optional Alpha of mean contours. lw : float, optional Line width of mean contours. Returns ------- ax : matplotlib.axes.Axes """ if apply_properties_df is None: mean_cluster_contours = model.mean_cluster_contours else: contours = np.vstack(apply_properties_df['normalized_contour'].to_numpy()) cluster_id_df = apply_properties_df[['cluster_id', 'distance_to_centroid']] labeled_contours_df = analysis.get_labeled_contours_df(contours, cluster_id_df) mean_cluster_contours = analysis.get_mean_cluster_contours(labeled_contours_df) if ax is None: fig, ax = plt.subplots(figsize=fig_size) if colors is None: colors = [plt.get_cmap('twilight')(cluster_i) for cluster_i in np.linspace(0.1, 0.9, model.n_clusters)] elif type(colors) == str: colors = [colors] * model.n_clusters x_first = 5 # offset of first contour x_offset = 10 # offset between contours for i in range(model.n_clusters): # read in contour coordinates x = mean_cluster_contours[i, :model.n_points] y = mean_cluster_contours[i, model.n_points:] # form close shape when plotting x = np.append(x, x[0]) y = np.append(y, y[0]) # place shape into right location x = x * contour_scale + x_first + x_offset * i y = y * contour_scale # plot shape of objects corresponding to the branches ax.plot(x, y, color=colors[i], alpha=alpha, lw=lw) ax.axis('equal') ax.axis('off') return ax
[docs]def plot_representatives( model, apply_properties_df, n_samples=10, random_state=None, ax=None, fig_size=(17, 2), colors=None, alpha=None, lw=None ): """ Plots representative object contours. Parameters ---------- model : vampire.model.Vampire Built VAMPIRE model. apply_properties_df : DataFrame, optional Properties output of VAMPIRE model applied to data. n_samples : int, optional Number of sample drawn from each cluster. Default 10. If n_samples > number of total available samples in the smallest cluster, it is set to the that number. random_state : int, optional Random state of sampling representative contours. ax : Axes, optional Figure axis to be plotted on. Cannot be not None with ``output_path`` at the same time. fig_size : (float, float), optional Width, height in inches. Default (17, 2). colors : str or list, optional Colors of representative contours of clusters. alpha : float, optional Alpha of representative contours. lw : float, optional Line width of representative contours. Returns ------- ax : matplotlib.axes.Axes """ if ax is None: fig, ax = plt.subplots(figsize=fig_size) if colors is None: colors = [plt.get_cmap('twilight')(cluster_i) for cluster_i in np.linspace(0.1, 0.9, model.n_clusters)] elif type(colors) == str: colors = [colors] * model.n_clusters x_offset = 5 # move center of another cluster to new location # determine sample size cluster_id = apply_properties_df['cluster_id'].values unique, counts = np.unique(cluster_id, return_counts=True) max_n_samples = np.min(counts) if n_samples > max_n_samples: n_samples = max_n_samples # sample contours from all clusters contours = np.vstack(apply_properties_df['normalized_contour'].to_numpy()) cluster_id_df = apply_properties_df['cluster_id'] labeled_contours_df = analysis.get_labeled_contours_df(contours, cluster_id_df) all_cluster_samples_df = labeled_contours_df.groupby('cluster_id') \ .sample( n=n_samples, random_state=random_state ) # plotting each sample contour for cluster_i in range(model.n_clusters): # sample contour in current cluster cluster_samples_df = all_cluster_samples_df[all_cluster_samples_df['cluster_id'] == cluster_i] cluster_samples = cluster_samples_df.drop(columns='cluster_id').values for sample_i in range(n_samples): # plot each sample contour by... # read in sample contour coordinates x = cluster_samples[sample_i, :model.n_points] y = cluster_samples[sample_i, model.n_points:] # form close shape when plotting x = np.append(x, x[0]) y = np.append(y, y[0]) # place shape into right location x = x + x_offset * cluster_i y = y # sample shape of objects corresponding to the clusters ax.plot(x, y, color=colors[cluster_i], alpha=alpha, lw=lw) plt.axis('equal') plt.axis('off') return ax
[docs]def plot_distribution(properties_df, ax=None): """ Plots the distribution of mean contours in a bar graph. Parameters ---------- properties_df : DataFrame DataFrame containing contour coordinates of objects and a column named 'cluster_id' that indicates the cluster that an object belongs. ax : Axes, optional Figure axis to be plotted on. Cannot be not None with ``output_path`` at the same time. Returns ------- ax : matplotlib.axes.Axes """ if ax is None: fig, ax = plt.subplots() distribution = analysis.get_distribution(properties_df) * 100 # unit: percent n_clusters = len(distribution) x_first = 5 # offset of first contour x_offset = 10 # offset between contours width = x_offset / 2 x = np.arange( x_first, n_clusters * x_offset + x_offset / 2, x_offset ) colors = [ plt.get_cmap('twilight')(cluster_i) for cluster_i in np.linspace(0.1, 0.9, n_clusters) ] ax.bar( x=x, height=distribution, color=colors, align='center', width=width ) ax.set_ylabel(f'Distribution [%]') ax.set_xticks([]) # clear x tick and tick labels # figure settings after plotting plt.tight_layout() return ax
[docs]def plot_contour_dendrogram(model, fig_size=(6, 2)): """ Plots dendrogram with mean contours. Parameters ---------- model : vampire.model.Vampire Built VAMPIRE model. fig_size : (float, float), optional Width, height in inches. Default (6, 2). Returns ------- fig : matplotlib.figure.Figure axs : matplotlib.axes.Axes See Also -------- plot_dendrogram, plot_contours """ fig, axs = plt.subplots(2, 1, figsize=fig_size, frameon=False, sharex='all') plot_dendrogram(model, ax=axs[1]) plot_contours(model, ax=axs[0]) return fig, axs
[docs]def plot_distribution_contour( model, apply_properties_df=None, fig_size=(5, 5), height_ratio=(4, 1) ): """ Plots the distribution of mean contours in a bar graph with labeling of mean contours. Parameters ---------- model : vampire.model.Vampire Built VAMPIRE model. apply_properties_df : DataFrame, optional Properties output of VAMPIRE model applied to data. fig_size : (float, float), optional Width, height in inches. Default (5, 5). height_ratio : list[float], optional Ratio between height of distribution plot, shape mode contours, and shape mode dendrogram. Default [4, 1, 1]. Recommended values: * [4, 1, 1] for 5 clusters (shape modes) * [4, 0.5, 1] for 10 clusters (shape modes) * [4, 0.35, 1] for 15 clusters (shape modes) Returns ------- fig : matplotlib.figure.Figure axs : matplotlib.axes.Axes See Also -------- plot_contours, plot_distribution """ fig, axs = plt.subplots( 2, 1, figsize=fig_size, sharex='all', gridspec_kw={'height_ratios': height_ratio} ) plot_contours(model, ax=axs[1]) if apply_properties_df is None: plot_distribution(model.cluster_id_df, ax=axs[0]) else: plot_distribution(apply_properties_df, ax=axs[0]) plt.tight_layout() fig.subplots_adjust(hspace=0) return fig, axs
[docs]def plot_distribution_contour_dendrogram( model, apply_properties_df=None, fig_size=(5, 5), height_ratio=(4, 1, 1) ): """ Plots the distribution of mean contours in a bar graph with labeling of mean contours and their dendrogram. Parameters ---------- model : vampire.model.Vampire Built VAMPIRE model. apply_properties_df : DataFrame, optional Properties output of VAMPIRE model applied to data. fig_size : (float, float), optional Width, height in inches. Default (6, 2). height_ratio : list[float], optional Ratio between height of distribution plot, shape mode contours, and shape mode dendrogram. Default [4, 1, 1]. Recommended values: * [4, 1, 1] for 5 clusters (shape modes) * [4, 0.5, 1] for 10 clusters (shape modes) * [4, 0.35, 1] for 15 clusters (shape modes) Returns ------- fig : matplotlib.figure.Figure axs : matplotlib.axes.Axes See Also -------- plot_dendrogram, plot_contours, plot_distribution """ # figure structure height_ratio = list(height_ratio) # prevent mutable default argument fig, axs = plt.subplots( 3, 1, figsize=fig_size, sharex='all', gridspec_kw={'height_ratios': height_ratio} ) if apply_properties_df is None: # build model plot plot_dendrogram(model, ax=axs[2]) plot_contours(model, ax=axs[1]) plot_distribution(model.cluster_id_df, ax=axs[0]) else: # apply model plot plot_dendrogram(model, ax=axs[2]) plot_contours(model, ax=axs[1]) plot_contours( model, apply_properties_df, ax=axs[1], colors='black', alpha=0.3 ) plot_distribution(apply_properties_df, ax=axs[0]) # figure settings after plotting plt.tight_layout() fig.subplots_adjust(hspace=0) return fig, axs