Source code for vampire.extraction
import os
import re
import cv2
import numpy as np
import pandas as pd
from skimage import io
from skimage.measure import regionprops_table
from . import util
[docs]def check_property_csv_existence(img_set_path, filter_info):
"""
Check existence of property csv that contain object properties.
Parameters
----------
img_set_path : str
Path to the directory of images to be analyzed.
filter_info : ndarray
Regex filter(s) of image filenames to be analyzed.
Empty if no filter needed.
Returns
-------
bool
"""
properties_csv_path = util.get_properties_csv_path(img_set_path, filter_info)
if os.path.exists(properties_csv_path):
print(f'Contour and properties data already exist in path: {img_set_path}')
return True
return False
[docs]def get_filtered_filenames(img_set_path, filter_info=None):
"""
Get filenames filtered with keywords.
Parameters
----------
img_set_path : str
Path to the directory of images to be analyzed.
filter_info : ndarray, optional
Regex filter(s) of image filenames to be analyzed.
Empty if no filter needed.
Returns
-------
filtered_filenames : ndarray
Filtered filenames.
"""
if filter_info is None:
filter_info = np.array([], dtype=str)
filenames = pd.Series(os.listdir(img_set_path))
# filter by img extension
extensions_regex = r'\.tif|\.jpeg|\.jpg|\.png|\.bmp|\.gif'
extension_filter = filenames.str.contains(extensions_regex,
flags=re.IGNORECASE)
filenames = filenames[extension_filter]
# filter by user constraints
for constraint in filter_info:
constraint_filter = filenames.str.contains(constraint, regex=True)
filenames = filenames[constraint_filter]
filenames = np.array(filenames)
return filenames
[docs]def get_img_set(img_set_path, filenames):
"""
Get an image set from image set path.
Parameters
----------
img_set_path : str
Path to the directory of images to be analyzed.
filenames : ndarray
Filtered filenames.
Returns
-------
img_set: list[ndarray]
A list of images to be analyzed.
"""
img_set = []
for filename in filenames:
# read image and get contours and properties
img = io.imread(os.path.join(img_set_path, filename))
img_set.append(img)
return img_set
[docs]def extract_contour_from_object(object_img):
"""
Returns x and y coordinates of the object contour.
Parameters
----------
object_img : ndarray
2D binary image with only one object.
Returns
-------
contour : ndarray
x and y coordinates of n contour sample points, with shape (2, n)
"""
contour = cv2.findContours(
object_img.astype('uint8'),
cv2.RETR_TREE,
cv2.CHAIN_APPROX_NONE
)[0][0]
contour = contour.reshape(-1, 2).T
contour = np.flip(contour, axis=1)
if contour.size <= 6: # contour has <= 3 points, could not be sampled
return np.nan
return contour
[docs]def extract_properties_from_img(img, filename=None, img_id=None):
"""
Get object properties of all objects in an image.
Parameters
----------
img : ndarray
Image to be analyzed
filename : str, optional
Filename of the image.
img_id : int, optional
ID/index of the image.
Returns
-------
properties_df : DataFrame
Dataframe of object properties.
"""
# get properties of objects
properties = (
'label',
'centroid',
'area',
'bbox_area',
'convex_area',
'filled_area',
'perimeter',
'equivalent_diameter',
'major_axis_length',
'minor_axis_length',
'orientation',
'euler_number',
'eccentricity',
'solidity',
'extent'
)
properties_dict = regionprops_table(
img,
properties=properties,
extra_properties=(extract_contour_from_object,)
)
properties_df = pd.DataFrame(properties_dict)
properties_df.rename(
columns={
'centroid-0': 'centroid-y',
'centroid-1': 'centroid-x',
'extract_contour_from_object': 'raw_contour'
},
inplace=True
)
# additional properties
properties_df['circularity'] = 4 * np.pi * properties_df['area'] / properties_df['perimeter'] ** 2
properties_df['aspect_ratio'] = np.nan_to_num(np.divide(
properties_df['major_axis_length'],
properties_df['minor_axis_length']
))
# discard contours with <= 3 points that cannot be sampled
properties_df = properties_df[pd.notna(properties_df['raw_contour'])]
# label each object
if img_id is not None:
properties_df.insert(0, 'image_id', img_id)
if filename is not None:
properties_df.insert(0, 'filename', filename)
return properties_df
[docs]def extract_properties_from_img_set(img_set, filenames=None):
"""
Get object properties of all objects in an image set.
Parameters
----------
img_set: list[ndarray]
A list of images to be analyzed.
filenames : ndarray, optional
Filenames of the images.
Returns
-------
properties_from_img_set_df : DataFrame
Dataframe of object properties.
"""
if filenames is not None and len(img_set) != len(filenames):
raise ValueError('Length of img_set and filenames does not match.')
properties_from_img_set = []
for img_i, img in enumerate(img_set):
if filenames is not None:
filename = filenames[img_i]
else:
filename = None
properties_from_img = extract_properties_from_img(
img,
filename=filename,
img_id=img_i
)
properties_from_img_set.append(properties_from_img)
properties_from_img_set_df = pd.concat(
properties_from_img_set,
ignore_index=True
)
return properties_from_img_set_df
[docs]def read_properties(img_set_path, filter_info):
"""
Read object properties from existing property ``pickle`` file.
Parameters
----------
img_set_path : str
Path to the directory of images to be analyzed.
filter_info : ndarray
Regex filter(s) of image filenames to be analyzed.
Empty if no filter needed.
Returns
-------
properties_df : DataFrame
Dataframe of object properties.
"""
properties_pickle_path = util.get_properties_pickle_path(img_set_path, filter_info)
properties_df = util.read_pickle(properties_pickle_path)
return properties_df
[docs]def write_properties(properties_df, img_set_path, filter_info, write_contour=False):
"""
Writes contour coordinates and properties to given paths.
Parameters
----------
properties_df : DataFrame
DataFrame of object properties.
img_set_path : str
Path to the directory of images to be analyzed.
filter_info : ndarray
Regex filter(s) of image filenames to be analyzed.
Empty if no filter needed.
write_contour : bool, optional
Whether write and save raw contour coordinates.
"""
properties_csv_path = util.get_properties_csv_path(img_set_path, filter_info)
properties_pickle_path = util.get_properties_pickle_path(img_set_path, filter_info)
if write_contour:
properties_df.to_csv(properties_csv_path, index=False)
else:
properties_df.drop('raw_contour', axis=1).to_csv(properties_csv_path, index=False)
util.write_pickle(properties_pickle_path, properties_df)
return
[docs]def extract_properties(img_set_path, filter_info=None, write=True, write_contour=False):
"""
Extracts object properties from image set path.
Parameters
----------
img_set_path : str
Path to the directory of images to be analyzed.
filter_info : ndarray, optional
Regex filter(s) of image filenames to be analyzed.
Empty if no filter needed.
write : bool, optional
Write properties into ``csv`` and ``pickle`` file.
write_contour : bool, optional
Whether write and save raw contour coordinates.
Returns
-------
properties_df : Dataframe
Dataframe of object properties.
"""
empty_filter = np.array([], dtype=str)
if filter_info is None:
filter_info = empty_filter
full_set_exist = check_property_csv_existence(img_set_path, empty_filter)
specific_set_exist = check_property_csv_existence(img_set_path, filter_info)
if specific_set_exist:
properties_df = read_properties(img_set_path, filter_info)
elif full_set_exist:
# extract specific set info from full set
filenames = get_filtered_filenames(img_set_path, filter_info)
full_properties_df = read_properties(img_set_path, empty_filter)
filename_filter = np.isin(full_properties_df['filename'], filenames)
properties_df = full_properties_df[filename_filter].reset_index(drop=True)
if write:
write_properties(properties_df, img_set_path, filter_info, write_contour=write_contour)
else:
filenames = get_filtered_filenames(img_set_path, filter_info)
img_set = get_img_set(img_set_path, filenames)
properties_df = extract_properties_from_img_set(
img_set,
filenames=filenames
)
if write:
write_properties(properties_df, img_set_path, filter_info, write_contour=write_contour)
return properties_df