import numpy as np
from . import analysis, processing, amath
[docs]class Vampire:
"""
Visually Aided Morpho-Phenotyping Image Recognition (VAMPIRE) model.
Attributes
----------
model_name : str
Name of the VAMPIRE model.
n_points : int
Number of sample points in a contour.
n_coords : int
Number of coordinates in a contour.
n_coords = 2*n_points
n_clusters : int
Number of clusters for K-Means clustering.
n_pcs : int
Number of principal components kept for analysis.
random_state : int
Random state of random processes.
mean_registered_contour : ndarray
Mean registered contour.
mean_aligned_contour : ndarray
Mean aligned contour.
contours : ndarray
Raw contours.
principal_directions : ndarray
Principal directions of PCA.
cluster_id_df : DataFrame
Contour's cluster id and min distance to centroid.
labeled_contours_df : DataFrame
Contour coordinates, cluster id, and min distance to centroid.
centroids : ndarray
Coordinate of centroids.
mean_cluster_contours : ndarray
Mean contours of each contour cluster.
pair_distance : ndarray
Pair distance between each cluster
linkage_matrix : ndarray
Linkage matrix for cluster dendrogram.
branches : dict
A dictionary of data structures computed to render the dendrogram.
"""
[docs] def __init__(
self,
model_name,
n_points=50,
n_clusters=5,
n_pcs=None,
random_state=None
):
"""
Initialize VAMPIRE model wih hyperparameters.
Parameters
----------
model_name : str
Name of the VAMPIRE model.
n_points : int, optional
Number of sample points for contour.
n_clusters : int, optional
Number of cluster for K-Means clustering.
n_pcs : int, optional
Number of principal components to keep in analysis.
random_state : int or None, optional
Determines random number generation for K-Means clustering.
Use an int to make the randomness deterministic.
"""
# model hyperparameters
self.model_name = model_name
self.n_points = n_points
self.n_coords = n_points * 2
self.n_clusters = n_clusters
self.n_pcs = n_pcs
if random_state is not None:
self.random_state = random_state
else:
self.random_state = np.random.default_rng().integers(100000)
# contour info
self.mean_registered_contour = None
self.mean_aligned_contour = None
self.contours = None
# pca analysis info
self.principal_directions = None
self.explained_variance = None
self.explained_variance_ratio = None
self.cum_explained_variance_ratio = None
# k-means clustering info
self.cluster_id_df = None
self.labeled_contours_df = None
self.centroids = None
self.inertia = None
self.mean_cluster_contours = None
# hierarchical clustering info
self.pair_distance = None
self.linkage_matrix = None
self.branches = None
[docs] def fit(self, properties_df):
"""
Fit VAMPIRE model to dataset.
Samples, registers, and aligns contour coordinates.
PCA contour coordinates, then K-Means analysis.
Hierarchical clustering determines cluster order.
Parameters
----------
properties_df : DataFrame
DataFrame containing contour properties and raw contours.
Generated by `extraction.extract_properties`.
Returns
-------
self : vampire.model.Vampire
VAMPIRE model.
"""
# process contours
contours = list(properties_df['raw_contour'])
contours = processing.sample_contours(contours, n_points=self.n_points)
contours = processing.register_contours(contours)
self.mean_registered_contour = processing.get_mean_registered_contour(contours)
self.contours = processing.align_contours(contours, self.mean_registered_contour)
self.mean_aligned_contour = processing.get_mean_aligned_contour(self.contours)
# pca contours
self.principal_directions, principal_components, self.explained_variance = amath.pca(self.contours, 'eig')
self.explained_variance_ratio = analysis.get_explained_variance_ratio(self.explained_variance)
self.cum_explained_variance_ratio = analysis.get_cum_explained_variance_ratio(self.explained_variance_ratio)
if self.n_pcs is None:
self.n_pcs = analysis.get_optimal_n_pcs(self.cum_explained_variance_ratio)
# cluster contours
self.cluster_id_df, centroids, self.inertia = analysis.cluster_contours(
principal_components,
n_clusters=self.n_clusters,
n_pcs=self.n_pcs,
random_state=self.random_state
)
self.labeled_contours_df = analysis.get_labeled_contours_df(self.contours, self.cluster_id_df)
self.pair_distance, self.linkage_matrix, self.branches = analysis.hierarchical_cluster_contour(
self.labeled_contours_df
)
# reorder clusters and centroid according to dendrogram
# to be consistent with dendrogram visualization
object_index = analysis.get_cluster_order(self.branches)
cluster_id_sorted = analysis.reorder_clusters(self.cluster_id_df['cluster_id'], object_index)
self.cluster_id_df['cluster_id'] = cluster_id_sorted
self.labeled_contours_df['cluster_id'] = cluster_id_sorted
self.mean_cluster_contours = analysis.get_mean_cluster_contours(self.labeled_contours_df)
self.centroids = analysis.reorder_centroids(centroids, object_index)
return self
def __eq__(self, other):
"""
Test equality with another Vampire object.
Parameters
----------
other : vampire.model.Vampire
Vampire object to be compared to `self`.
Returns
-------
equal : bool
"""
equal = (
# model hyperparameters
self.model_name == other.model_name
and self.n_points == other.n_points
and self.n_coords == other.n_coords
and self.n_clusters == other.n_clusters
and self.n_pcs == other.n_pcs
and self.random_state == other.random_state
# contour info
and np.allclose(self.mean_registered_contour, other.mean_registered_contour)
and np.allclose(self.mean_aligned_contour, other.mean_aligned_contour)
and np.allclose(self.contours, other.contours)
# pca analysis info
and np.allclose(self.principal_directions, other.principal_directions)
and np.allclose(self.explained_variance, other.explained_variance)
and np.allclose(self.explained_variance_ratio, other.explained_variance_ratio)
and np.allclose(self.cum_explained_variance_ratio, other.cum_explained_variance_ratio)
# k-means clustering info
and self.cluster_id_df.equals(other.cluster_id_df)
and self.labeled_contours_df.equals(other.labeled_contours_df)
and np.allclose(self.centroids, other.centroids)
and self.inertia == other.inertia
and np.allclose(self.mean_cluster_contours, other.mean_cluster_contours)
# hierarchical clustering info
and np.allclose(self.pair_distance, other.pair_distance)
and np.allclose(self.linkage_matrix, other.linkage_matrix)
and self.branches['icoord'] == other.branches['icoord']
and self.branches['dcoord'] == other.branches['dcoord']
and self.branches['ivl'] == other.branches['ivl']
and self.branches['leaves'] == other.branches['leaves']
)
return equal