Source code for arcade_collection.input.convert_to_cells_file

import pandas as pd


[docs]def convert_to_cells_file( samples: pd.DataFrame, reference: pd.DataFrame, volume_distributions: dict[str, tuple[float, float]], height_distributions: dict[str, tuple[float, float]], critical_volume_distributions: dict[str, tuple[float, float]], critical_height_distributions: dict[str, tuple[float, float]], state_thresholds: dict[str, float], ) -> list[dict]: """ Convert all samples to cell objects. For each cell id in samples, current volume and height are rescaled to critical volume and critical height based on distribution means and standard deviations. If reference volume and/or height exist for the cell id, those values are used as the current values to be rescaled. Otherwise, current volume is calculated from the number of voxel samples and current height is calculated from the range of voxel coordinates along the z axis. Initial cell state and cell state phase for each cell are estimated based on state thresholds, the current cell volume, and the critical cell volume. Cell object ids are reindexed starting with cell id 1. Parameters ---------- samples Sample cell ids and coordinates. reference Reference values for volumes and heights. volume_distributions Map of volume means and standard deviations. height_distributions Map of height means and standard deviations. critical_volume_distributions Map of critical volume means and standard deviations. critical_height_distributions Map of critical height means and standard deviations. state_thresholds Critical volume fractions defining threshold between states. Returns ------- : List of cell objects formatted for ARCADE. """ cells: list[dict] = [] samples_by_id = samples.groupby("id") for i, (cell_id, group) in enumerate(samples_by_id): cell_reference = filter_cell_reference(cell_id, reference) cells.append( convert_to_cell( i + 1, group, cell_reference, volume_distributions, height_distributions, critical_volume_distributions, critical_height_distributions, state_thresholds, ) ) return cells
[docs]def convert_to_cell( cell_id: int, samples: pd.DataFrame, reference: dict, volume_distributions: dict[str, tuple[float, float]], height_distributions: dict[str, tuple[float, float]], critical_volume_distributions: dict[str, tuple[float, float]], critical_height_distributions: dict[str, tuple[float, float]], state_thresholds: dict[str, float], ) -> dict: """ Convert samples to cell object. Current volume and height are rescaled to critical volume and critical height based on distribution means and standard deviations. If reference volume and/or height are provided (under the "DEFAULT" key), those values are used as the current values to be rescaled. Otherwise, current volume is calculated from the number of voxel samples and current height is calculated from the range of voxel coordinates along the z axis. Initial cell state and cell state phase are estimated based on state thresholds, the current cell volume, and the critical cell volume. Parameters ---------- cell_id Unique cell id. samples Sample coordinates for a single object. reference Reference data for cell. volume_distributions Map of volume means and standard deviations. height_distributions Map of height means and standard deviations. critical_volume_distributions Map of critical volume means and standard deviations. critical_height_distributions Map of critical height means and standard deviations. state_thresholds Critical volume fractions defining threshold between states. Returns ------- : Cell object formatted for ARCADE. """ volume = len(samples) height = samples.z.max() - samples.z.min() critical_volume = convert_value_distribution( reference.get("volume", volume), volume_distributions["DEFAULT"], critical_volume_distributions["DEFAULT"], ) critical_height = convert_value_distribution( reference.get("height", height), height_distributions["DEFAULT"], critical_height_distributions["DEFAULT"], ) state = get_cell_state(volume, critical_volume, state_thresholds) cell = { "id": cell_id, "parent": 0, "pop": 1, "age": 0, "divisions": 0, "state": state.split("_")[0], "phase": state, "voxels": volume, "criticals": [critical_volume, critical_height], } if "region" in samples.columns and not samples["region"].isna().all(): regions = [ convert_to_cell_region( region, region_samples, reference, volume_distributions, height_distributions, critical_volume_distributions, critical_height_distributions, ) for region, region_samples in samples.groupby("region") ] cell.update({"regions": regions}) return cell
[docs]def convert_to_cell_region( region: str, region_samples: pd.DataFrame, reference: dict, volume_distributions: dict[str, tuple[float, float]], height_distributions: dict[str, tuple[float, float]], critical_volume_distributions: dict[str, tuple[float, float]], critical_height_distributions: dict[str, tuple[float, float]], ) -> dict: """ Convert region samples to cell region object. Current region volume and height are rescaled to critical volume and critical height based on distribution means and standard deviations. If reference region volume and/or height are provided, those values are used as the current values to be rescaled. Otherwise, current region volume is calculated from the number of voxel samples and current region height is calculated from the range of voxel coordinates along the z axis. Parameters ---------- region Region name. region_samples Sample coordinates for region of a single object. reference Reference data for cell region. volume_distributions Map of volume means and standard deviations. height_distributions Map of height means and standard deviations. critical_volume_distributions Map of critical volume means and standard deviations. critical_height_distributions Map of critical height means and standard deviations. Returns ------- : Cell region object formatted for ARCADE. """ region_volume = len(region_samples) region_height = region_samples.z.max() - region_samples.z.min() region_critical_volume = convert_value_distribution( reference.get(f"volume.{region}", region_volume), volume_distributions[region], critical_volume_distributions[region], ) region_critical_height = convert_value_distribution( reference.get(f"height.{region}", region_height), height_distributions[region], critical_height_distributions[region], ) return { "region": region, "voxels": len(region_samples), "criticals": [region_critical_volume, region_critical_height], }
[docs]def get_cell_state( volume: float, critical_volume: float, threshold_fractions: dict[str, float], ) -> str: """ Estimate cell state based on cell volume. The threshold fractions dictionary defines the monotonic thresholds between different cell states. For a given volume v, critical volume V, and states X1, X2, ..., XN with corresponding, monotonic threshold fractions f1, f2, ..., fN, a cell is assigned state Xi such that [f(i - 1) * V] <= v < [fi * V]. Cells with v < f1 * V are assigned state X1. Cells with v > fN * V are assigned state XN. Parameters ---------- volume Current cell volume. critical_volume Critical cell volume. threshold_fractions Critical volume fractions defining threshold between states. Returns ------- : Cell state. """ thresholds = [fraction * critical_volume for fraction in threshold_fractions.values()] states = list(threshold_fractions.keys()) index = next((ind for ind, thresh in enumerate(thresholds) if thresh > volume), -1) return states[index]
[docs]def convert_value_distribution( value: float, source_distribution: tuple[float, float], target_distribution: tuple[float, float], ) -> float: """ Estimate target value based on source value and source and target distributions. Parameters ---------- value Source value. source_distribution Average and standard deviation of source value distribution. target_distribution Average and standard deviation of target value distribution. Returns ------- : Estimated critical value. """ source_avg, source_std = source_distribution target_avg, target_std = target_distribution z_scored_value = (value - source_avg) / source_std return z_scored_value * target_std + target_avg
[docs]def filter_cell_reference(cell_id: int, reference: pd.DataFrame) -> dict: """ Filter reference data for given cell id. Parameters ---------- cell_id Unique cell id. reference Reference data for conversion. Returns ------- : Reference data for given cell id. """ cell_reference = reference[reference["ID"] == cell_id].squeeze() return cell_reference.to_dict() if not cell_reference.empty else {}