Source code for vampire.extraction

import os
import re

import cv2
import numpy as np
import pandas as pd
from skimage import io
from skimage.measure import regionprops_table

from . import util


[docs]def check_property_csv_existence(img_set_path, filter_info): """ Check existence of property csv that contain object properties. Parameters ---------- img_set_path : str Path to the directory of images to be analyzed. filter_info : ndarray Regex filter(s) of image filenames to be analyzed. Empty if no filter needed. Returns ------- bool """ properties_csv_path = util.get_properties_csv_path(img_set_path, filter_info) if os.path.exists(properties_csv_path): print(f'Contour and properties data already exist in path: {img_set_path}') return True return False
[docs]def get_filtered_filenames(img_set_path, filter_info=None): """ Get filenames filtered with keywords. Parameters ---------- img_set_path : str Path to the directory of images to be analyzed. filter_info : ndarray, optional Regex filter(s) of image filenames to be analyzed. Empty if no filter needed. Returns ------- filtered_filenames : ndarray Filtered filenames. """ if filter_info is None: filter_info = np.array([], dtype=str) filenames = pd.Series(os.listdir(img_set_path)) # filter by img extension extensions_regex = r'\.tif|\.jpeg|\.jpg|\.png|\.bmp|\.gif' extension_filter = filenames.str.contains(extensions_regex, flags=re.IGNORECASE) filenames = filenames[extension_filter] # filter by user constraints for constraint in filter_info: constraint_filter = filenames.str.contains(constraint, regex=True) filenames = filenames[constraint_filter] filenames = np.array(filenames) return filenames
[docs]def get_img_set(img_set_path, filenames): """ Get an image set from image set path. Parameters ---------- img_set_path : str Path to the directory of images to be analyzed. filenames : ndarray Filtered filenames. Returns ------- img_set: list[ndarray] A list of images to be analyzed. """ img_set = [] for filename in filenames: # read image and get contours and properties img = io.imread(os.path.join(img_set_path, filename)) img_set.append(img) return img_set
[docs]def extract_contour_from_object(object_img): """ Returns x and y coordinates of the object contour. Parameters ---------- object_img : ndarray 2D binary image with only one object. Returns ------- contour : ndarray x and y coordinates of n contour sample points, with shape (2, n) """ contour = cv2.findContours( object_img.astype('uint8'), cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE )[0][0] contour = contour.reshape(-1, 2).T contour = np.flip(contour, axis=1) if contour.size <= 6: # contour has <= 3 points, could not be sampled return np.nan return contour
[docs]def extract_properties_from_img(img, filename=None, img_id=None): """ Get object properties of all objects in an image. Parameters ---------- img : ndarray Image to be analyzed filename : str, optional Filename of the image. img_id : int, optional ID/index of the image. Returns ------- properties_df : DataFrame Dataframe of object properties. """ # get properties of objects properties = ( 'label', 'centroid', 'area', 'bbox_area', 'convex_area', 'filled_area', 'perimeter', 'equivalent_diameter', 'major_axis_length', 'minor_axis_length', 'orientation', 'euler_number', 'eccentricity', 'solidity', 'extent' ) properties_dict = regionprops_table( img, properties=properties, extra_properties=(extract_contour_from_object,) ) properties_df = pd.DataFrame(properties_dict) properties_df.rename( columns={ 'centroid-0': 'centroid-y', 'centroid-1': 'centroid-x', 'extract_contour_from_object': 'raw_contour' }, inplace=True ) # additional properties properties_df['circularity'] = 4 * np.pi * properties_df['area'] / properties_df['perimeter'] ** 2 properties_df['aspect_ratio'] = np.nan_to_num(np.divide( properties_df['major_axis_length'], properties_df['minor_axis_length'] )) # discard contours with <= 3 points that cannot be sampled properties_df = properties_df[pd.notna(properties_df['raw_contour'])] # label each object if img_id is not None: properties_df.insert(0, 'image_id', img_id) if filename is not None: properties_df.insert(0, 'filename', filename) return properties_df
[docs]def extract_properties_from_img_set(img_set, filenames=None): """ Get object properties of all objects in an image set. Parameters ---------- img_set: list[ndarray] A list of images to be analyzed. filenames : ndarray, optional Filenames of the images. Returns ------- properties_from_img_set_df : DataFrame Dataframe of object properties. """ if filenames is not None and len(img_set) != len(filenames): raise ValueError('Length of img_set and filenames does not match.') properties_from_img_set = [] for img_i, img in enumerate(img_set): if filenames is not None: filename = filenames[img_i] else: filename = None properties_from_img = extract_properties_from_img( img, filename=filename, img_id=img_i ) properties_from_img_set.append(properties_from_img) properties_from_img_set_df = pd.concat( properties_from_img_set, ignore_index=True ) return properties_from_img_set_df
[docs]def read_properties(img_set_path, filter_info): """ Read object properties from existing property ``pickle`` file. Parameters ---------- img_set_path : str Path to the directory of images to be analyzed. filter_info : ndarray Regex filter(s) of image filenames to be analyzed. Empty if no filter needed. Returns ------- properties_df : DataFrame Dataframe of object properties. """ properties_pickle_path = util.get_properties_pickle_path(img_set_path, filter_info) properties_df = util.read_pickle(properties_pickle_path) return properties_df
[docs]def write_properties(properties_df, img_set_path, filter_info, write_contour=False): """ Writes contour coordinates and properties to given paths. Parameters ---------- properties_df : DataFrame DataFrame of object properties. img_set_path : str Path to the directory of images to be analyzed. filter_info : ndarray Regex filter(s) of image filenames to be analyzed. Empty if no filter needed. write_contour : bool, optional Whether write and save raw contour coordinates. """ properties_csv_path = util.get_properties_csv_path(img_set_path, filter_info) properties_pickle_path = util.get_properties_pickle_path(img_set_path, filter_info) if write_contour: properties_df.to_csv(properties_csv_path, index=False) else: properties_df.drop('raw_contour', axis=1).to_csv(properties_csv_path, index=False) util.write_pickle(properties_pickle_path, properties_df) return
[docs]def extract_properties(img_set_path, filter_info=None, write=True, write_contour=False): """ Extracts object properties from image set path. Parameters ---------- img_set_path : str Path to the directory of images to be analyzed. filter_info : ndarray, optional Regex filter(s) of image filenames to be analyzed. Empty if no filter needed. write : bool, optional Write properties into ``csv`` and ``pickle`` file. write_contour : bool, optional Whether write and save raw contour coordinates. Returns ------- properties_df : Dataframe Dataframe of object properties. """ empty_filter = np.array([], dtype=str) if filter_info is None: filter_info = empty_filter full_set_exist = check_property_csv_existence(img_set_path, empty_filter) specific_set_exist = check_property_csv_existence(img_set_path, filter_info) if specific_set_exist: properties_df = read_properties(img_set_path, filter_info) elif full_set_exist: # extract specific set info from full set filenames = get_filtered_filenames(img_set_path, filter_info) full_properties_df = read_properties(img_set_path, empty_filter) filename_filter = np.isin(full_properties_df['filename'], filenames) properties_df = full_properties_df[filename_filter].reset_index(drop=True) if write: write_properties(properties_df, img_set_path, filter_info, write_contour=write_contour) else: filenames = get_filtered_filenames(img_set_path, filter_info) img_set = get_img_set(img_set_path, filenames) properties_df = extract_properties_from_img_set( img_set, filenames=filenames ) if write: write_properties(properties_df, img_set_path, filter_info, write_contour=write_contour) return properties_df