Source code for arcade_collection.convert.convert_to_simularium_shapes

from __future__ import annotations

import random
from math import cos, isnan, pi, sin, sqrt
from typing import TYPE_CHECKING

import numpy as np
import pandas as pd

from arcade_collection.convert.convert_to_simularium import convert_to_simularium
from arcade_collection.output.extract_tick_json import extract_tick_json
from arcade_collection.output.get_location_voxels import get_location_voxels

if TYPE_CHECKING:
    import tarfile

CELL_STATES: list[str] = [
    "UNDEFINED",
    "APOPTOTIC",
    "QUIESCENT",
    "MIGRATORY",
    "PROLIFERATIVE",
    "SENESCENT",
    "NECROTIC",
]
"""Indexed cell states."""

EDGE_TYPES: list[str] = [
    "ARTERIOLE",
    "ARTERY",
    "CAPILLARY",
    "VEIN",
    "VENULE",
    "UNDEFINED",
]
"""Indexed graph edge types."""


[docs]def convert_to_simularium_shapes(
    series_key: str,
    simulation_type: str,
    data_tars: dict[str, tarfile.TarFile],
    frame_spec: tuple[int, int, int],
    box: tuple[int, int, int],
    ds: tuple[float, float, float],
    dt: float,
    colors: dict[str, str],
    resolution: int = 0,
    jitter: float = 1.0,
) -> str:
    """
    Convert data to Simularium trajectory using shapes.

    Parameters
    ----------
    series_key
        Simulation series key.
    simulation_type : {'patch', 'potts'}
        Simulation type.
    data_tars
        Map of simulation data archives.
    frame_spec
        Specification for simulation ticks.
    box
        Size of bounding box.
    ds
        Spatial scaling in um/voxel.
    dt
        Temporal scaling in hours/tick.
    colors
        Map of category to colors.
    resolution
        Number of voxels represented by a sphere (0 for single sphere per cell).
    jitter
        Relative jitter applied to colors (set to 0 for exact colors).

    Returns
    -------
    :
        Simularium trajectory.
    """

    # Throw exception if invalid simulation type.
    if simulation_type not in ("patch", "potts"):
        message = f"invalid simulation type {simulation_type}"
        raise ValueError(message)

    if simulation_type == "patch":
        # Simulation type must have either or both "cells" and "graph" data
        if not ("cells" in data_tars or "graph" in data_tars):
            return ""

        frames = list(map(float, np.arange(*frame_spec)))
        radius, margin, height = box
        bounds, length, width = calculate_patch_size(radius, margin)
        data = format_patch_for_shapes(
            series_key, data_tars.get("cells"), data_tars.get("graph"), frames, bounds
        )
    elif simulation_type == "potts":
        # Simulation type must have both "cells" and "locations" data
        if not ("cells" in data_tars and "locations" in data_tars):
            return ""

        frames = list(map(int, np.arange(*frame_spec)))
        length, width, height = box
        data = format_potts_for_shapes(
            series_key, data_tars["cells"], data_tars["locations"], frames, resolution
        )

    return convert_to_simularium(
        series_key, simulation_type, data, length, width, height, ds, dt, colors, jitter=jitter
    )


[docs]def format_patch_for_shapes(
    series_key: str,
    cells_tar: tarfile.TarFile | None,
    graph_tar: tarfile.TarFile | None,
    frames: list[float],
    bounds: int,
) -> pd.DataFrame:
    """
    Format ``patch`` simulation data for shape-based Simularium trajectory.

    Parameters
    ----------
    series_key
        Simulation series key.
    cells_tar
        Archive of cell agent data.
    graph_tar
        Archive of vascular graph data.
    frames
        List of frames.
    bounds
        Simulation bounds size (radius + margin).

    Returns
    -------
    :
        Data formatted for trajectory.
    """

    data: list[list[int | str | float | list]] = []

    for frame in frames:
        if cells_tar is not None:
            cell_timepoint = extract_tick_json(cells_tar, series_key, frame, field="cells")

            for location, cells in cell_timepoint:
                u, v, w, z = location
                rotation = random.randint(0, 5)  # noqa: S311

                for cell in cells:
                    _, population, state, position, volume, _ = cell
                    cell_id = f"{u}{v}{w}{z}{position}"

                    name = f"POPULATION{population}#{CELL_STATES[state]}#{cell_id}"
                    radius = float("%.2g" % ((volume ** (1.0 / 3)) / 1.5))  # round to 2 sig figs

                    offset = (position + rotation) % 6
                    x, y = convert_hexagonal_to_rectangular_coordinates((u, v, w), bounds, offset)
                    center = [x, y, z]

                    data = [*data, [name, frame, radius, *center, [], "SPHERE"]]

        if graph_tar is not None:
            graph_timepoint = extract_tick_json(
                graph_tar, series_key, frame, "GRAPH", field="graph"
            )

            for from_node, to_node, edge in graph_timepoint:
                edge_type, radius, _, _, _, _, flow = edge

                name = f"VASCULATURE#{'UNDEFINED' if isnan(flow) else EDGE_TYPES[edge_type + 2]}"

                subpoints = [
                    from_node[0] / sqrt(3),
                    from_node[1],
                    from_node[2],
                    to_node[0] / sqrt(3),
                    to_node[1],
                    to_node[2],
                ]

                data = [*data, [name, frame, radius, 0, 0, 0, subpoints, "FIBER"]]

    return pd.DataFrame(
        data, columns=["name", "frame", "radius", "x", "y", "z", "points", "display"]
    )


[docs]def convert_hexagonal_to_rectangular_coordinates(
    uvw: tuple[int, int, int], bounds: int, offset: int
) -> tuple[float, float]:
    """
    Convert hexagonal (u, v, w) coordinates to rectangular (x, y) coordinates.

    Conversion is based on the bounds of the simulation,

    Parameters
    ----------
    uvw
        Hexagonal (u, v, w) coordinates.
    bounds
        Simulation bounds size (radius + margin).
    offset
        Index of hexagonal offset.

    Returns
    -------
    :
        Rectangular (x, y) coordinates.
    """

    u, v, w = uvw
    theta = [pi * (60 * i) / 180.0 for i in range(6)]
    dx = [cos(t) / sqrt(3) for t in theta]
    dy = [sin(t) / sqrt(3) for t in theta]

    x = (3 * (u + bounds) - 1) / sqrt(3)
    y = (v - w) + 2 * bounds - 1

    return x + dx[offset], y + dy[offset]


[docs]def calculate_patch_size(radius: int, margin: int) -> tuple[int, float, float]:
    """
    Calculate hexagonal patch simulation sizes.

    Parameters
    ----------
    radius
        Number of hexagonal patches from the center patch.
    margin
        Number of hexagonal patches in the margin.

    Returns
    -------
    :
        Bounds, length, and width of the simulation bounding box.
    """

    bounds = radius + margin
    length = (2 / sqrt(3)) * (3 * bounds - 1)
    width = 4 * bounds - 2

    return bounds, length, width


[docs]def format_potts_for_shapes(
    series_key: str,
    cells_tar: tarfile.TarFile,
    locations_tar: tarfile.TarFile,
    frames: list[float],
    resolution: int,
) -> pd.DataFrame:
    """
    Format `potts` simulation data for shape-based Simularium trajectory.

    The resolution parameter can be used to tune how many spheres are used to
    represent each cell. Resolution = 0 displays each cell as a single sphere
    centered on the average voxel position. Resolution = 1 displays each
    individual voxel of each cell as a single sphere.

    Resolution = N will aggregate voxels by dividing the voxels into NxNxN
    cubes, and replacing cubes with at least 50% of those voxels occupied with a
    single sphere centered at the center of the cube.

    For resolution > 0, interior voxels (fully surrounded voxels) are not
    removed.

    Parameters
    ----------
    series_key
        Simulation series key.
    cells_tar
        Archive of cell data.
    locations_tar
        Archive of location data.
    frames
        List of frames.
    resolution
        Number of voxels represented by a sphere (0 for single sphere per cell).

    Returns
    -------
    :
        Data formatted for trajectory.
    """

    data: list[list[object]] = []

    for frame in frames:
        cells = extract_tick_json(cells_tar, series_key, frame, "CELLS")
        locations = extract_tick_json(locations_tar, series_key, frame, "LOCATIONS")

        for cell, location in zip(cells, locations):
            regions = [loc["region"] for loc in location["location"]]

            for region in regions:
                name = f"{region}#{cell['phase']}#{cell['id']}"
                all_voxels = get_location_voxels(location, region if region != "DEFAULT" else None)

                if resolution == 0:
                    radius = approximate_radius_from_voxels(len(all_voxels))
                    center = list(np.array(all_voxels).mean(axis=0))
                    data = [*data, [name, int(frame), radius, *center, [], "SPHERE"]]
                else:
                    radius = resolution / 2
                    center_offset = (resolution - 1) / 2

                    resolution_voxels = get_resolution_voxels(all_voxels, resolution)
                    border_voxels = filter_border_voxels(set(resolution_voxels), resolution)
                    center_voxels = [
                        [x + center_offset, y + center_offset, z + center_offset]
                        for x, y, z in border_voxels
                    ]

                    data = data + [
                        [name, int(frame), radius, *voxel, [], "SPHERE"] for voxel in center_voxels
                    ]

    return pd.DataFrame(
        data, columns=["name", "frame", "radius", "x", "y", "z", "points", "display"]
    )


[docs]def approximate_radius_from_voxels(voxels: int) -> float:
    """
    Approximate display sphere radius from number of voxels.

    Parameters
    ----------
    voxels
        Number of voxels.

    Returns
    -------
    :
        Approximate radius.
    """

    return (voxels ** (1.0 / 3)) / 1.5


[docs]def get_resolution_voxels(
    voxels: list[tuple[int, int, int]], resolution: int
) -> list[tuple[int, int, int]]:
    """
    Get voxels at specified resolution.

    Parameters
    ----------
    voxels
        List of voxels.
    resolution
        Resolution of voxels.

    Returns
    -------
    :
        List of voxels at specified resolution.
    """

    voxel_df = pd.DataFrame(voxels, columns=["x", "y", "z"])

    min_x, min_y, min_z = voxel_df.min()
    max_x, max_y, max_z = voxel_df.max()

    samples = [
        (sx, sy, sz)
        for sx in np.arange(min_x, max_x + 1, resolution)
        for sy in np.arange(min_y, max_y + 1, resolution)
        for sz in np.arange(min_z, max_z + 1, resolution)
    ]

    offsets = [
        (dx, dy, dz)
        for dx in range(resolution)
        for dy in range(resolution)
        for dz in range(resolution)
    ]

    resolution_voxels = []

    for sx, sy, sz in samples:
        sample_voxels = [(sx + dx, sy + dy, sz + dz) for dx, dy, dz in offsets]

        if len(set(sample_voxels) - set(voxels)) < len(offsets) / 2:
            resolution_voxels.append((sx, sy, sz))

    return resolution_voxels


[docs]def filter_border_voxels(
    voxels: set[tuple[int, int, int]], resolution: int
) -> list[tuple[int, int, int]]:
    """
    Filter voxels to only include the border voxels.

    Parameters
    ----------
    voxels
        List of voxels.
    resolution
        Resolution of voxels.

    Returns
    -------
    :
        List of filtered voxels.
    """

    offsets = [
        (resolution, 0, 0),
        (-resolution, 0, 0),
        (0, resolution, 0),
        (0, -resolution, 0),
        (0, 0, resolution),
        (0, 0, -resolution),
    ]
    filtered_voxels = []

    for x, y, z in voxels:
        neighbors = [(x + dx, y + dy, z + dz) for dx, dy, dz in offsets]
        if len(set(neighbors) - set(voxels)) != 0:
            filtered_voxels.append((x, y, z))

    return sorted(filtered_voxels)