Source code for vampire.extraction

import os

import cv2
import numpy as np
import pandas as pd
from skimage import io
from skimage.measure import regionprops_table

from . import util


[docs]def _is_filtered_img(filename, filter_info):
    """
    Checks if the file is a tagged image.

    Supports image extensions ``'.tiff', '.tif', '.jpeg', '.jpg', '.png',
    '.bmp', '.gif'``.

    Parameters
    ----------
    filename : str
        Filename of file to be checked.
    filter_info : ndarray
        Unique filter(s) of image filenames to be analyzed.
        Empty if no filter is needed.

    Returns
    -------
    bool

    """
    extensions = ('.tiff', '.tif', '.jpeg', '.jpg', '.png', '.bmp', '.gif')
    for filter_name in filter_info:
        if not (filter_name in filename):
            return False
    if not filename.endswith(extensions):
        return False
    return True


[docs]def get_contour_from_object(object_img):
    """
    Returns x and y coordinates of the object contour.

    Parameters
    ----------
    object_img : ndarray
        2D binary image with only one object.

    Returns
    -------
    contour : ndarray
        x and y coordinates of n contour sample points, with shape (2, n)

    """
    contour = cv2.findContours(object_img.astype('uint8'), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)[0][0]
    contour = contour.reshape(-1, 2).T
    # contour = contour + 1  # original code does this, but I think there's no reason to do that
    contour = np.flip(contour, axis=1)

    # my alternative option
    # from skimage.measure import find_contours
    # contour = find_contours(object_img, fully_connected='high')[0].T
    # contour = np.flip(contour, axis=0)  # switch convention from (row, col) to (x, y)

    # testing visualization
    # plt.plot(*contour, '.-', alpha=0.5)
    # plt.axis('equal')
    return contour


[docs]def get_contours_from_image(img, object_labels):
    """
    Returns a list of contours associated with each object in an image.

    Parameters
    ----------
    img : ndarray
        2D binary image to be analyzed.
    object_labels : ndarray
        Labels of each object.

    Returns
    -------
    contours : list[ndarray]
        List of contour coordinates of objects.

    """
    contours = []
    for object_label in object_labels:
        object_img = (img == object_label)
        contour = get_contour_from_object(object_img)
        if contour.size < 10:  # 5 points offer poor resolution, throw away
            continue
        contours.append(contour)

    # testing visualization
    # for contour in contours:
    #     plt.plot(*contour, '.-', alpha=0.5)
    # plt.imshow(img != 0)
    # plt.axis('equal')
    return contours


[docs]def get_properties_from_image(img, filename, image_index, object_labels):
    """
    Returns properties of the objects in the images.

    Parameters
    ----------
    img : ndarray
        2D grayscale labeled image of objects.
    filename : str
        Filename of the image where the objects come from.
    image_index : int
        ID of the image.
    object_labels : ndarray
        Labels of the objects.

    Returns
    -------
    properties_df : DataFrame
        Properties of each object in the image with labels.

    """
    # get properties of objects
    properties = ('label', 'centroid', 'area', 'perimeter', 'major_axis_length', 'minor_axis_length')
    properties_df = pd.DataFrame(regionprops_table(img, properties=properties)).set_index('label')
    properties_df.rename(columns={'centroid-0': 'y', 'centroid-1': 'x'}, inplace=True)
    properties_df['circularity'] = 4 * np.pi * properties_df['area'] / properties_df['perimeter'] ** 2
    properties_df['aspect_ratio'] = np.nan_to_num(np.divide(properties_df['major_axis_length'],
                                                            properties_df['minor_axis_length']))
    # label each object
    properties_df.insert(0, 'filename', filename)
    properties_df.insert(1, 'image_id', image_index)
    object_labels = pd.Series(np.arange(len(object_labels)) + 1, index=object_labels)
    properties_df.insert(2, 'object_id', object_labels)
    return properties_df


[docs]def get_info_from_folder(img_set_path, filter_info):
    """
    Returns contour and properties of objects from the image set folder.

    Parameters
    ----------
    img_set_path : str
        Path of folder that contains images to be analyzed.
    filter_info : ndarray
        Unique filter(s) of image filenames to be analyzed.
        Empty if no filter is needed.

    Returns
    -------
    contours_from_folder : list[ndarray]
        List of ndarray of contour coordinates of objects
    properties_from_folder : list[DataFrame]
        List of DataFrames of properties of objects

    """
    contours_from_folder = []
    properties_from_folder = []
    img_i = 1
    filenames = np.char.lower(np.array(os.listdir(img_set_path)))
    for filename in filenames:
        # only images containing filter info proceed to calculations below
        filtered_img = _is_filtered_img(filename, filter_info)
        if not filtered_img:
            continue
        # read image and get contours and properties
        img = io.imread(os.path.join(img_set_path, filename))
        object_labels = np.unique(img)[1:]
        contours_from_img = get_contours_from_image(img, object_labels)
        properties_from_img = get_properties_from_image(img, filename, img_i, object_labels)
        contours_from_folder.extend(contours_from_img)
        properties_from_folder.append(properties_from_img)
        img_i += 1
    return contours_from_folder, properties_from_folder


[docs]def write_contours(img_set_path, filter_info):
    """
    Finds contour coordinates of objects in the images from the image set.

    Parameters
    ----------
    img_set_path : str
        Path to the directory of images to be analyzed.
    filter_info : ndarray
        Unique filter(s) of image filenames to be analyzed. Empty if no filter
        needed.

    Returns
    -------
    contours : list[ndarray]
        List of ndarray of contour coordinates.

    """
    # calculations
    contours, properties = get_info_from_folder(img_set_path, filter_info)
    # write the files
    contours_pickle_path, properties_csv_path = util.generate_file_paths(img_set_path, filter_info)
    util.write_pickle(contours_pickle_path, contours)
    pd.concat(properties).to_csv(properties_csv_path)
    return contours


[docs]def read_contours(contours_pickle_path):
    """
    Retrieves contour coordinates from existing contour ``pickle`` file.

    Parameters
    ----------
    contours_pickle_path : str
        Path to contour ``pickle`` file.

    Returns
    -------
    contours : list[ndarray]
        List of ndarray of contour coordinates.

    """
    contours = util.read_pickle(contours_pickle_path)
    return contours


# main
[docs]def extract_contours(img_set_path, filter_info):
    """
    Returns contour coordinates of objects in the images from the image set.

    Parameters
    ----------
    img_set_path : str
        Path to the directory of images to be analyzed.
    filter_info : ndarray
        Unique filter(s) of image filenames to be analyzed. Empty if no filter
        needed.

    Returns
    -------
    contours : list[ndarray]
        List of ndarray of contour coordinates.

    """
    contours_pickle_path, properties_csv_path = util.generate_file_paths(img_set_path, filter_info)
    if os.path.exists(contours_pickle_path) and os.path.exists(properties_csv_path):
        print(f'Contour and properties data already exist in path: {img_set_path}')
        contours = read_contours(contours_pickle_path)
        return contours
    else:
        contours = write_contours(img_set_path, filter_info)
        return contours


# def collect_contours(csv_path):
#     """
#     Collect contour coordinates from pickle.
#
#     Originally `collect_seleced_bstack()`.
#
#     Parameters
#     ----------
#     csv_path : str
#         Path of the `csv` file that stores information about image set
#         used to build model.
#
#     Returns
#     -------
#     contours : list
#
#     """
#     img_df = pd.read_csv(csv_path)
#     folder_paths = img_df['set location']
#     tags = img_df['tag']
#     contours = []
#     for path_i, folder_path in enumerate(folder_paths):
#         pickles = [pickle_file
#                    for pickle_file in os.listdir(folder_path)
#                    if is_tagged_pickle(pickle_file, tags[path_i])]
#         contours = []
#         for pickle_i in pickles:
#             if tags[path_i] in pickle_i:
#                 contours.append(read_pickle(os.path.join(folder_path, pickle_i)))
#     return contours


# def generate_contour_properties_files(img_set_path, filter_info):
#     """
#     Generates a pickle file containing a list of contour coordinates of objects
#     in the image set.
#
#     Originally `getboundary()`
#
#     Parameters
#     ----------
#     img_set_path : str
#         Path to the directory of images to be analyzed.
#     filter_info : ndarray
#         Unique filter(s) of image filenames to be analyzed. Empty if no filter
#         needed.
#
#     """
#     filter_tag = '_'.join(filter_info)
#     contours_pickle_path = os.path.join(img_set_path, f'boundary_coordinates__{filter_tag}.pickle')
#     properties_csv_path = os.path.join(img_set_path, f'vampire_datasheet__{filter_tag}.csv')
#
#     # check existence of previous calculation
#     if os.path.exists(contours_pickle_path) and os.path.exists(properties_csv_path):
#         print(f'Contour and properties data already exist in path: {img_set_path}')
#         return
#
#     # calculations
#     contours, properties = get_contours_and_properties_from_folder(img_set_path, filter_info)
#     # generates the files
#     util.write_pickle(contours_pickle_path, contours)
#     pd.concat(properties).to_csv(properties_csv_path)
#     return