Source code for vampire.plot

import os
from datetime import datetime

import matplotlib.pyplot as plt
import numpy as np
from scipy import cluster, spatial


[docs]def _get_df_info(contours_df): """ Returns mean of DataFrame columns by cluster id and number of clusters, coordinates, and points. Parameters ---------- contours_df : DataFrame DataFrame containing contour coordinates of objects and a column named 'cluster_id' that indicates the cluster that an object belongs. Returns ------- mean_contours : DataFrame Mean of columns (contours) by cluster id. num_clusters : int Number of clusters. num_coords : int Number of contour coordinates. num_points : int Number of points in a contour. """ mean_contours = contours_df.groupby('cluster_id').mean().values num_clusters, num_coords = mean_contours.shape if num_coords % 2: raise ValueError('Coordinate does not have matching number of x and y coordinates.') num_points = num_coords // 2 return mean_contours, num_clusters, num_coords, num_points
[docs]def set_plot_style(): """ Set matplotlib plot settings of ``rcParams`` for better visualization. References ---------- [1] Customizing Matplotlib with style sheets and rcParams. https://matplotlib.org/stable/tutorials/introductory/customizing.html """ plt.rcParams.update({ 'font.family': 'Arial', # Times New Roman, Calibri 'font.weight': 'normal', 'mathtext.fontset': 'cm', 'font.size': 18, 'lines.linewidth': 2, 'axes.linewidth': 2, 'axes.spines.top': False, 'axes.spines.right': False, 'axes.titleweight': 'bold', 'axes.titlesize': 18, 'axes.labelweight': 'bold', 'xtick.major.size': 8, 'xtick.major.width': 2, 'ytick.major.size': 8, 'ytick.major.width': 2, 'figure.dpi': 80, 'savefig.dpi': 300, 'legend.framealpha': 1, 'legend.edgecolor': 'black', 'legend.fancybox': False, 'legend.fontsize': 14, })
[docs]def save_fig(fig, output_path, fig_type, build_name=None, apply_name=None): """ Save figure to local directory. Parameters ---------- fig : Figure Figure to be saved. output_path : str Path to the output directory. fig_type : str General description/type of the figure. build_name : str, optional Name of the built model. apply_name : str, optional Name of the image set being applied to. """ time_stamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') if build_name is None: fig_path = os.path.join(output_path, f'{fig_type}_{time_stamp}.png') else: if apply_name is None: # build model fig_path = os.path.join(output_path, f'{fig_type}_build_{build_name}.png') else: # apply model fig_path = os.path.join(output_path, f'{fig_type}_apply_{build_name}_on_{apply_name}.png') if os.path.exists(fig_path): fig_name, extension = os.path.splitext(fig_path) fig.savefig(f'{fig_name}_{time_stamp}{extension}', dpi=300) else: fig.savefig(fig_path, dpi=300) return
[docs]def plot_dendrogram(contours_df, output_path=None, build_name=None, apply_name=None, ax=None, fig_size=(6, 2)): """ Plots dendrogram of mean contours. Parameters ---------- contours_df : DataFrame DataFrame containing contour coordinates of objects and a column named 'cluster_id' that indicates the cluster that an object belongs. output_path : str, optional Path to the output directory. Default None, does not save figure. Cannot be not None at the same time with ``ax``. build_name : str, optional Name of the built model. apply_name : str, optional Name of the image set being applied to. ax : Axes, optional Figure axis to be plotted on. Cannot be not None with ``output_path`` at the same time. fig_size : (float, float), optional Width, height in inches. Default (6, 2). Returns ------- object_index : ndarray of str The cluster that each branch represents. See Also -------- scipy.cluster.hierarchy.linkage, scipy.cluster.hierarchy.dendrogram """ mean_contours, num_clusters, num_coords, num_points = _get_df_info(contours_df) if ax is None: fig, ax = plt.subplots(figsize=fig_size) else: if output_path is not None: raise ValueError('Unable to save figure. output_path and ax cannot be not None at the same time.') pair_distance = spatial.distance.pdist(mean_contours, 'euclidean') linkage_mat = cluster.hierarchy.linkage(pair_distance, method='complete') linkage_mat[:, 2] = linkage_mat[:, 2] * 5 # scale distance for less cluttered visualization # plot dendrogram cluster.hierarchy.set_link_color_palette(['k']) branches = cluster.hierarchy.dendrogram(linkage_mat, ax=ax, p=0, truncate_mode='mlab', orientation='bottom', above_threshold_color='k') object_index = np.array(branches['ivl'], dtype=int) # make connection between item and dendrogram ax.axis('off') # save figure if output_path is not None: save_fig(fig, output_path, 'dendrogram', build_name, apply_name) return object_index
[docs]def plot_contours(contours_df, object_index, output_path=None, build_name=None, apply_name=None, contour_scale=3, ax=None, fig_size=(6, 2), color=None, alpha=None, linewidth=None): """ Plots mean contours. Parameters ---------- contours_df : DataFrame DataFrame containing contour coordinates of objects and a column named 'cluster_id' that indicates the cluster that an object belongs. object_index : ndarray of str A list of labels corresponding to the leaf nodes. output_path : str, optional Path to the output directory. Default None, does not save figure. Cannot be not None at the same time with ``ax``. build_name : str, optional Name of the built model. apply_name : str, optional Name of the image set being applied to. contour_scale : float, optional Scale of the contour shapes. ax : Axes, optional Figure axis to be plotted on. Cannot be not None with ``output_path`` at the same time. fig_size : (float, float), optional Width, height in inches. Default (6, 2). color : str, optional Color of mean contours. alpha : float, optional Alpha of mean contours. linewidth : float, optional Line width of mean contours. """ mean_contours, num_clusters, num_coords, num_points = _get_df_info(contours_df) if ax is None: fig, ax = plt.subplots(figsize=fig_size) else: if output_path is not None: raise ValueError('Unable to save figure. output_path and ax cannot be not None at the same time.') num_clusters, num_coords = mean_contours.shape num_points = num_coords // 2 x_first = 5 # offset of first contour x_offset = 10 # offset between contours for i in range(num_clusters): # read in contour coordinates x = mean_contours[object_index[i], :num_points] y = mean_contours[object_index[i], num_points:] # form close shape when plotting x = np.append(x, x[0]) y = np.append(y, y[0]) # place shape into right location x = x * contour_scale + x_first + x_offset * i y = y * contour_scale # plot shape of objects corresponding to the branches ax.plot(x, y, color=color, alpha=alpha, lw=linewidth) ax.axis('equal') ax.axis('off') # save figure if output_path is not None: save_fig(fig, output_path, 'contours', build_name, apply_name) return
[docs]def plot_representatives(contours_df, object_index, output_path=None, build_name=None, apply_name=None, num_sample=10, random_state=None, ax=None, fig_size=(17, 2), color=None, alpha=None, linewidth=None): """ Plots representative object contours. Parameters ---------- contours_df : DataFrame DataFrame containing contour coordinates of objects and a column named 'cluster_id' that indicates the cluster that an object belongs. object_index : ndarray of str A list of labels corresponding to the leaf nodes. output_path : str, optional Path to the output directory. Default None, does not save figure. Cannot be not None at the same time with ``ax``. build_name : str, optional Name of the built model. apply_name : str, optional Name of the image set being applied to. num_sample : int, optional Number of sample drawn from each cluster. Default 10. If num_sample > number of total available samples in the smallest cluster, it is set to the that number. random_state : int, optional Random state of sampling representative contours. ax : Axes, optional Figure axis to be plotted on. Cannot be not None with ``output_path`` at the same time. fig_size : (float, float), optional Width, height in inches. Default (17, 2). color : str, optional Color of representative contours. alpha : float, optional Alpha of representative contours. linewidth : float, optional Line width of representative contours. """ mean_contours, num_clusters, num_coords, num_points = _get_df_info(contours_df) if ax is None: fig, ax = plt.subplots(figsize=fig_size) else: if output_path is not None: raise ValueError('Unable to save figure. output_path and ax cannot be not None at the same time.') x_offset = 5 # move center of another cluster to new location # determine sample size cluster_id = contours_df['cluster_id'].values unique, counts = np.unique(cluster_id, return_counts=True) max_num_sample = np.min(counts) if num_sample > max_num_sample: num_sample = max_num_sample # sample contours from all clusters all_cluster_samples_df = contours_df.groupby('cluster_id').sample(n=num_sample, random_state=random_state) sorting_index = object_index.astype(int) # plotting each sample contour for cluster_i in range(num_clusters): # sample contour in current cluster cluster_samples_df = all_cluster_samples_df[all_cluster_samples_df['cluster_id'] == sorting_index[cluster_i]] cluster_samples = cluster_samples_df.drop(columns='cluster_id').values for sample_i in range(num_sample): # plot each sample contour by... # read in sample contour coordinates x = cluster_samples[sample_i, :num_points] y = cluster_samples[sample_i, num_points:] # form close shape when plotting x = np.append(x, x[0]) y = np.append(y, y[0]) # place shape into right location x = x + x_offset * cluster_i y = y # sample shape of objects corresponding to the clusters ax.plot(x, y, color=color, alpha=alpha, lw=linewidth) plt.axis('equal') plt.axis('off') # save figure if output_path is not None: save_fig(fig, output_path, 'representatives', build_name, apply_name) return
[docs]def plot_distribution(contours_df, object_index, output_path=None, build_name=None, apply_name=None, ax=None): """ Plots the distribution of mean contours in a bar graph. Parameters ---------- contours_df : DataFrame DataFrame containing contour coordinates of objects and a column named 'cluster_id' that indicates the cluster that an object belongs. object_index : ndarray of str A list of labels corresponding to the leaf nodes. output_path : str, optional Path to the output directory. Default None, does not save figure. Cannot be not None at the same time with ``ax``. build_name : str, optional Name of the built model. apply_name : str, optional Name of the image set being applied to. ax : Axes, optional Figure axis to be plotted on. Cannot be not None with ``output_path`` at the same time. """ mean_contours, num_clusters, num_coords, num_points = _get_df_info(contours_df) if ax is None: fig, ax = plt.subplots() else: if output_path is not None: raise ValueError('Unable to save figure. output_path and ax cannot be not None at the same time.') x_first = 5 # offset of first contour x_offset = 10 # offset between contours cluster_id = contours_df['cluster_id'].values unique, counts = np.unique(cluster_id, return_counts=True) # sort the distributions corresponding to the dendrogram sorting_index = object_index.astype(int) distribution = counts / np.sum(counts) * 100 # unit: percent distribution = distribution[sorting_index] # plot shape mode distribution bar plot width = x_offset / 2 x = np.arange(x_first, num_clusters * x_offset + x_offset / 2, x_offset) colors = [plt.get_cmap('tab10')(cluster_i) for cluster_i in range(10)] * 5 # cycle through the 10 colors ax.bar(x=x, height=distribution, color=colors, alpha=0.7, align='center', width=width) ax.set_ylabel(f'Distribution [%]') ax.set_xticks([]) # clear x tick and tick labels # figure settings after plotting plt.tight_layout() # save figure if output_path is not None: save_fig(fig, output_path, 'distribution', build_name, apply_name) return
[docs]def plot_contour_dendrogram(contours_df, output_path=None, build_name=None, apply_name=None, fig_size=(6, 2), contour_scale=3): """ Plots dendrogram with mean contours. Parameters ---------- contours_df : DataFrame DataFrame containing contour coordinates of objects and a column named 'cluster_id' that indicates the cluster that an object belongs. output_path : str, optional Path to the output directory. Default None, does not save figure. Cannot be not None at the same time with ``ax``. build_name : str, optional Name of the built model. apply_name : str, optional Name of the image set being applied to. fig_size : (float, float), optional Width, height in inches. Default (6, 2). contour_scale : float, optional Scale of the contour shapes. Returns ------- object_index : ndarray of str The cluster that each branch represents. See Also -------- plot_dendrogram, plot_contours """ # figure structure fig, axs = plt.subplots(2, 1, figsize=fig_size, frameon=False, sharex='all') # plot dendrogram and contour object_index = plot_dendrogram(contours_df, ax=axs[1]) plot_contours(contours_df, object_index, ax=axs[0], contour_scale=contour_scale) # save figure if output_path is not None: save_fig(fig, output_path, 'distribution', build_name, apply_name) return object_index
[docs]def plot_distribution_contour(contours_df, output_path=None, build_name=None, apply_name=None, fig_size=(5, 5), height_ratio=(4, 1), contour_scale=3): """ Plots the distribution of mean contours in a bar graph with labeling of mean contours. Parameters ---------- contours_df : DataFrame DataFrame containing contour coordinates of objects and a column named 'cluster_id' that indicates the cluster that an object belongs. output_path : str, optional Path to the output directory. Default None, does not save figure. Cannot be not None at the same time with ``ax``. build_name : str, optional Name of the built model. apply_name : str, optional Name of the image set being applied to. fig_size : (float, float), optional Width, height in inches. Default (5, 5). height_ratio : list[float], optional Ratio between height of distribution plot, shape mode contours, and shape mode dendrogram. Default [4, 1, 1]. Recommended values: * [4, 1, 1] for 5 clusters (shape modes) * [4, 0.5, 1] for 10 clusters (shape modes) * [4, 0.35, 1] for 15 clusters (shape modes) contour_scale : float, optional Scale of the contour shapes. See Also -------- plot_contours, plot_distribution """ # figure structure fig, axs = plt.subplots(2, 1, figsize=fig_size, frameon=False, sharex='all', gridspec_kw={'height_ratios': height_ratio}) # plot dendrogram and contour object_index = plot_dendrogram(contours_df) plt.close() plot_contours(contours_df, object_index, ax=axs[1], contour_scale=contour_scale) plot_distribution(contours_df, object_index, ax=axs[0]) # figure settings after plotting plt.tight_layout() fig.subplots_adjust(hspace=0) # save figure if output_path is not None: save_fig(fig, output_path, 'distribution', build_name, apply_name) return
[docs]def plot_distribution_contour_dendrogram(build_contours_df, apply_contours_df=None, output_path=None, build_name=None, apply_name=None, fig_size=(5, 5), height_ratio=(4, 1, 1), contour_scale=3): """ Plots the distribution of mean contours in a bar graph with labeling of mean contours and their dendrogram. Parameters ---------- build_contours_df : DataFrame DataFrame containing contour coordinates of objects used to build model and a column named 'cluster_id' that indicates the cluster that an object belongs. apply_contours_df : DataFrame DataFrame containing contour coordinates of objects used to apply model and a column named 'cluster_id' that indicates the cluster that an object belongs. output_path : str, optional Path to the output directory. Default None, does not save figure. Cannot be not None at the same time with ``ax``. build_name : str, optional Name of the built model. apply_name : str, optional Name of the image set being applied to. fig_size : (float, float), optional Width, height in inches. Default (6, 2). height_ratio : list[float], optional Ratio between height of distribution plot, shape mode contours, and shape mode dendrogram. Default [4, 1, 1]. Recommended values: * [4, 1, 1] for 5 clusters (shape modes) * [4, 0.5, 1] for 10 clusters (shape modes) * [4, 0.35, 1] for 15 clusters (shape modes) contour_scale : float, optional Scale of the contour shapes. See Also -------- plot_dendrogram, plot_contours, plot_distribution """ # figure structure height_ratio = list(height_ratio) # prevent mutable default argument fig, axs = plt.subplots(3, 1, figsize=fig_size, frameon=False, sharex='all', gridspec_kw={'height_ratios': height_ratio}) if apply_contours_df is None: # build model plot object_index = plot_dendrogram(build_contours_df, ax=axs[2]) plot_contours(build_contours_df, object_index, ax=axs[1], contour_scale=contour_scale) plot_distribution(build_contours_df, object_index, ax=axs[0]) elif not apply_contours_df.empty: # apply model plot object_index = plot_dendrogram(build_contours_df, ax=axs[2]) plot_contours(build_contours_df, object_index, ax=axs[1], contour_scale=contour_scale, color='black', alpha=0.5, linewidth=2) plot_contours(apply_contours_df, object_index, ax=axs[1], contour_scale=contour_scale, alpha=0.7, linewidth=2) plot_distribution(apply_contours_df, object_index, ax=axs[0]) else: raise ValueError('Got an empty DataFrame apply_contours_df. Expect None or non-empty DataFrame.') # figure settings after plotting plt.tight_layout() fig.subplots_adjust(hspace=0) # save figure if output_path is not None: save_fig(fig, output_path, 'shape_mode', build_name, apply_name) return
# def plot_dendrogram(contours_df, # output_path=None, build_name=None, apply_name=None, # fig_size=(6, 2), contour_scale=3): # """ # Plots shape mode contours and shape mode dendrogram in one figure. # # Used in conjunction with shape mode distribution plot via # `plot_distribution`. # # Parameters # ---------- # contours_df : DataFrame # DataFrame containing contour coordinates of objects and a column # named 'cluster_id' that indicates the cluster that an object belongs. # output_path : str, optional # Path to the output directory. Default None, does not save figure. # build_name : str, optional # Name of the built model. # apply_name : str, optional # Name of the image set being applied to. # fig_size : (float, float), optional # Width, height in inches. Default (6, 2). # contour_scale : float, optional # Scale of contour size. Default 3. Recommended range [2, 3]. # # Returns # ------- # fig : Figure # ax : array of Axes # object_index : ndarray of str # A list of labels corresponding to the leaf nodes. # # See Also # -------- # scipy.cluster.hierarchy.linkage : Computes the linkage matrix. # scipy.cluster.hierarchy.dendrogram : Plots the dendrogram. # plot_distribution : Plots shape mode distribution. # # """ # # testing code below # # mean_contours = bdst0 # # plot_dendrogram(Z, mean_contours) # # # calculate mean and useful numbers # mean_contours = contours_df.groupby('cluster_id').mean().values # num_clusters, num_coords = mean_contours.shape # if num_coords % 2: # raise ValueError('Coordinate does not have matching number of x and y coordinates.') # num_points = num_coords // 2 # # # figure structure # fig, axs = plt.subplots(2, 1, figsize=fig_size, frameon=False, sharex='all') # # ########################################## # # plot dendrogram (bottom) axs[1] # # ########################################## # # calculate dendrogram-required linkage matrix # pair_distance = spatial.distance.pdist(mean_contours, 'euclidean') # linkage_mat = cluster.hierarchy.linkage(pair_distance, method='complete') # linkage_mat[:, 2] = linkage_mat[:, 2] * 5 # scale distance for less cluttered visualization # # # plot dendrogram # cluster.hierarchy.set_link_color_palette(['k']) # branches = cluster.hierarchy.dendrogram(linkage_mat, ax=axs[1], p=0, # truncate_mode='mlab', # orientation='bottom', # above_threshold_color='k') # object_index = np.array(branches['ivl'], dtype=int) # make connection between item and dendrogram # axs[1].axis('off') # # ########################################## # # plot contours of objects (mid) axs[0] # # ########################################## # # plot each object # x_first = 5 # offset of first item # x_offset = 10 # offset between items # y_offset = -5 # for i in range(num_clusters): # # read in contour coordinates # x = mean_contours[object_index[i], :num_points] # y = mean_contours[object_index[i], num_points:] # # form close shape when plotting # x = np.append(x, x[0]) # y = np.append(y, y[0]) # # place shape into right location # x = x * contour_scale + x_first + x_offset * i # y = y * contour_scale + y_offset # # shape of objects corresponding to the branches # axs[0].plot(x, y) # axs[0].axis('equal') # axs[0].axis('off') # # # save figure # if output_path is not None: # save_fig(fig, output_path, 'dendrogram', build_name, apply_name) # return fig, axs, object_index # # # def plot_representatives(contours_df, object_index, # output_path=None, build_name=None, apply_name=None, # fig_size=(17, 2), num_sample=10, random_state=None): # """ # Plots representative object contours. # # Parameters # ---------- # contours_df : DataFrame # DataFrame containing contour coordinates of objects and a column # named 'cluster_id' that indicates the cluster that an object belongs. # object_index : ndarray of str # A list of labels corresponding to the leaf nodes. # output_path : str, optional # Path to the output directory. Default None, does not save figure. # build_name : str, optional # Name of the built model. # apply_name : str, optional # Name of the image set being applied to. # fig_size : (float, float), optional # Width, height in inches. Default (17, 2). # num_sample : int, optional # Number of sample drawn from each cluster. Default 10. If num_sample > # number of total available samples in the smallest cluster, it is # set to the that number. # random_state : int, optional # Random state of sampling representative contours. # # Returns # ------- # fig : Figure # ax : array of Axes # # See Also # -------- # plot_dendrogram : Representative shapes should resemble the mean shape mode. # # """ # # testing code below # # contours_df = pd.DataFrame(bdpc) # # contours_df['cluster_id'] = IDX # # plot_representatives(contours_df) # # # calculate mean and useful numbers # mean_contours = contours_df.groupby('cluster_id').mean().values # num_clusters, num_coords = mean_contours.shape # if num_coords % 2: # raise ValueError('Coordinate does not have matching number of x and y coordinates.') # num_points = num_coords // 2 # x_offset = 5 # move center of another cluster to new location # # # determine sample size # cluster_id = contours_df['cluster_id'].values # unique, counts = np.unique(cluster_id, return_counts=True) # max_num_sample = np.min(counts) # if num_sample > max_num_sample: # num_sample = max_num_sample # # sample contours from all clusters # all_cluster_samples_df = contours_df.groupby('cluster_id').sample(n=num_sample, # random_state=random_state) # sorting_index = object_index.astype(int) # # # plotting each sample contour # fig, ax = plt.subplots(figsize=fig_size) # for cluster_i in range(num_clusters): # # sample contour in current cluster # cluster_samples_df = all_cluster_samples_df[all_cluster_samples_df['cluster_id'] == sorting_index[cluster_i]] # cluster_samples = cluster_samples_df.drop(columns='cluster_id').values # for sample_i in range(num_sample): # # plot each sample contour by... # # read in sample contour coordinates # x = cluster_samples[sample_i, :num_points] # y = cluster_samples[sample_i, num_points:] # # form close shape when plotting # x = np.append(x, x[0]) # y = np.append(y, y[0]) # # place shape into right location # x = x + x_offset * cluster_i # y = y # # sample shape of objects corresponding to the clusters # ax.plot(x, y, 'r', alpha=0.5) # plt.axis('equal') # plt.axis('off') # # # save figure # if output_path is not None: # save_fig(fig, output_path, 'representatives', build_name, apply_name) # return fig, ax # # # def plot_distribution(contours_df, object_index, # output_path=None, build_name=None, apply_name=None, # fig_size=(6, 4)): # """ # Plots shape mode distribution bar graph. # # Used in conjunction with shape mode dendrogram via `plot_dendrogram`. # # Parameters # ---------- # contours_df : DataFrame # DataFrame containing contour coordinates of objects and a column # named 'cluster_id' that indicates the cluster that an object belongs. # object_index : ndarray of str # A list of labels corresponding to the leaf nodes. # output_path : str, optional # Path to the output directory. Default None, does not save figure. # build_name : str, optional # Name of the built model. # apply_name : str, optional # Name of the image set being applied to. # fig_size : (float, float), optional # Width, height in inches. Default (6, 4). # # Returns # ------- # fig : Figure # ax : array of Axes # # See Also # -------- # plot_dendrogram : Plots shape mode contours and shape mode dendrogram. # # """ # # testing code below # # import pandas as pd # # contours_df = pd.DataFrame(bdpc) # # contours_df['cluster_id'] = IDX # # object_index = dendidx # # # calculate distribution from counts # cluster_id = contours_df['cluster_id'].values # unique, counts = np.unique(cluster_id, return_counts=True) # num_clusters = unique.size # # sort the distributions corresponding to the dendrogram # sorting_index = object_index.astype(int) # distribution = counts / np.sum(counts) * 100 # unit: percent # distribution = distribution[sorting_index] # # x_first = 5 # x_offset = 10 # width = x_offset / 2 # # # plot shape mode distribution bar plot # fig, ax = plt.subplots(figsize=fig_size) # # x = np.arange(num_clusters).astype(str) # x = np.arange(x_first, num_clusters * x_offset + x_offset / 2, x_offset) # colors = [plt.get_cmap('tab10')(cluster_i) for cluster_i in range(10)] * 5 # cycle through the 10 colors # ax.bar(x=x, height=distribution, # color=colors, alpha=0.7, # align='center', width=width) # # ax.bar(x=x, height=distribution, align='center') # ax.set_xlabel('Shape mode') # ax.set_ylabel('Distribution [%]') # plt.xticks(x, labels=[''] * num_clusters) # clear x tick labels # plt.tight_layout() # # # save figure # if output_path is not None: # save_fig(fig, output_path, 'distribution', build_name, apply_name) # return fig, ax # # # def plot_full_old(contours_df, # output_path=None, build_name=None, apply_name=None, # fig_size=(5, 5), height_ratio=(4, 1, 1), contour_scale=3): # """ # Plots shape mode distribution, mean contours, and dendrogram in one figure. # # Parameters # ---------- # contours_df : DataFrame # DataFrame containing contour coordinates of objects and a column # named 'cluster_id' that indicates the cluster that an object belongs. # output_path : str, optional # Path to the output directory. Default None, does not save figure. # build_name : str, optional # Name of the built model. # apply_name : str, optional # Name of the image set being applied to. # fig_size : (float, float), optional # Width, height in inches. Default (5, 5). # height_ratio : list[float], optional # Ratio between height of distribution plot, shape mode contours, and # shape mode dendrogram. Default [4, 1, 1]. Recommended values: # # * [4, 1, 1] for 5 clusters (shape modes) # * [4, 0.5, 1] for 10 clusters (shape modes) # * [4, 0.35, 1] for 15 clusters (shape modes) # # contour_scale : float, optional # Scale of contour size. Default 3. Recommended range [2, 3]. # # Returns # ------- # fig : Figure # ax : array of Axes # # See Also # -------- # plot_distribution : Plots shape mode distribution. # plot_dendrogram : Plots shape mode contours and dendrogram in one figure. # # """ # # calculate mean and useful numbers # height_ratio = list(height_ratio) # prevent mutable default argument # mean_contours = contours_df.groupby('cluster_id').mean().values # num_clusters, num_coords = mean_contours.shape # if num_coords % 2: # raise ValueError('Coordinate does not have matching number of x and y coordinates.') # num_points = num_coords // 2 # # # figure structure # fig, axs = plt.subplots(3, 1, figsize=fig_size, frameon=False, sharex='all', # gridspec_kw={'height_ratios': height_ratio}) # # ########################################## # # plot dendrogram (bottom) axs[2] # # ########################################## # # calculate dendrogram-required linkage matrix # # contours = contours_df.drop(columns='cluster_id').values # # contours = mean_contours # pair_distance = spatial.distance.pdist(mean_contours, 'euclidean') # linkage_mat = cluster.hierarchy.linkage(pair_distance, method='complete') # linkage_mat[:, 2] = linkage_mat[:, 2] * 5 # scale distance for less cluttered visualization # # # plot dendrogram # cluster.hierarchy.set_link_color_palette(['k']) # branches = cluster.hierarchy.dendrogram(linkage_mat, ax=axs[2], p=0, # truncate_mode='mlab', # orientation='bottom', # above_threshold_color='k') # object_index = np.array(branches['ivl'], dtype=int) # make connection between item and dendrogram # axs[2].axis('off') # # ########################################## # # plot contours of objects (mid) axs[1] # # ########################################## # x_first = 5 # offset of first contour # x_offset = 10 # offset between contours # for i in range(num_clusters): # # read in contour coordinates # x = mean_contours[object_index[i], :num_points] # y = mean_contours[object_index[i], num_points:] # # form close shape when plotting # x = np.append(x, x[0]) # y = np.append(y, y[0]) # # place shape into right location # x = x * contour_scale + x_first + x_offset * i # y = y * contour_scale # # plot shape of objects corresponding to the branches # axs[1].plot(x, y) # axs[1].axis('equal') # axs[1].axis('off') # # ########################################## # # distribution bar plot (top) axs[0] # # ########################################## # # calculate distribution from counts # cluster_id = contours_df['cluster_id'].values # unique, counts = np.unique(cluster_id, return_counts=True) # # sort the distributions corresponding to the dendrogram # sorting_index = object_index.astype(int) # distribution = counts / np.sum(counts) * 100 # unit: percent # distribution = distribution[sorting_index] # # # plot shape mode distribution bar plot # width = x_offset / 2 # x = np.arange(x_first, num_clusters*x_offset + x_offset/2, x_offset) # colors = [plt.get_cmap('tab10')(cluster_i) for cluster_i in range(10)] * 5 # cycle through the 10 colors # axs[0].bar(x=x, height=distribution, # color=colors, alpha=0.7, # align='center', width=width) # axs[0].set_ylabel(f'Distribution [%]') # axs[0].set_xticks([]) # clear x tick and tick labels # # # figure setting after plotting # plt.tight_layout() # fig.subplots_adjust(hspace=0) # # # save figure # if output_path is not None: # save_fig(fig, output_path, 'shape_mode', build_name, apply_name) # return fig, axs # def scree_plot(latent): # """Scree plot for PCA, currently testing use only.""" # import matplotlib.pyplot as plt # fig, axs = plt.subplots(1, 2, figsize=(8, 4)) # axs[0].semilogy(latent, 'o', alpha=0.5) # axs[1].plot(np.cumsum(latent)/np.sum(latent), 'o', alpha=0.5) # return fig, axs