import pandas as pd
[docs]def convert_to_cells_file(
samples: pd.DataFrame,
reference: pd.DataFrame,
volume_distributions: dict[str, tuple[float, float]],
height_distributions: dict[str, tuple[float, float]],
critical_volume_distributions: dict[str, tuple[float, float]],
critical_height_distributions: dict[str, tuple[float, float]],
state_thresholds: dict[str, float],
) -> list[dict]:
"""
Convert all samples to cell objects.
For each cell id in samples, current volume and height are rescaled to
critical volume and critical height based on distribution means and standard
deviations. If reference volume and/or height exist for the cell id, those
values are used as the current values to be rescaled. Otherwise, current
volume is calculated from the number of voxel samples and current height is
calculated from the range of voxel coordinates along the z axis.
Initial cell state and cell state phase for each cell are estimated based on
state thresholds, the current cell volume, and the critical cell volume.
Cell object ids are reindexed starting with cell id 1.
Parameters
----------
samples
Sample cell ids and coordinates.
reference
Reference values for volumes and heights.
volume_distributions
Map of volume means and standard deviations.
height_distributions
Map of height means and standard deviations.
critical_volume_distributions
Map of critical volume means and standard deviations.
critical_height_distributions
Map of critical height means and standard deviations.
state_thresholds
Critical volume fractions defining threshold between states.
Returns
-------
:
List of cell objects formatted for ARCADE.
"""
cells: list[dict] = []
samples_by_id = samples.groupby("id")
for i, (cell_id, group) in enumerate(samples_by_id):
cell_reference = filter_cell_reference(cell_id, reference)
cells.append(
convert_to_cell(
i + 1,
group,
cell_reference,
volume_distributions,
height_distributions,
critical_volume_distributions,
critical_height_distributions,
state_thresholds,
)
)
return cells
[docs]def convert_to_cell(
cell_id: int,
samples: pd.DataFrame,
reference: dict,
volume_distributions: dict[str, tuple[float, float]],
height_distributions: dict[str, tuple[float, float]],
critical_volume_distributions: dict[str, tuple[float, float]],
critical_height_distributions: dict[str, tuple[float, float]],
state_thresholds: dict[str, float],
) -> dict:
"""
Convert samples to cell object.
Current volume and height are rescaled to critical volume and critical
height based on distribution means and standard deviations. If reference
volume and/or height are provided (under the "DEFAULT" key), those values
are used as the current values to be rescaled. Otherwise, current volume is
calculated from the number of voxel samples and current height is calculated
from the range of voxel coordinates along the z axis.
Initial cell state and cell state phase are estimated based on state
thresholds, the current cell volume, and the critical cell volume.
Parameters
----------
cell_id
Unique cell id.
samples
Sample coordinates for a single object.
reference
Reference data for cell.
volume_distributions
Map of volume means and standard deviations.
height_distributions
Map of height means and standard deviations.
critical_volume_distributions
Map of critical volume means and standard deviations.
critical_height_distributions
Map of critical height means and standard deviations.
state_thresholds
Critical volume fractions defining threshold between states.
Returns
-------
:
Cell object formatted for ARCADE.
"""
volume = len(samples)
height = samples.z.max() - samples.z.min()
critical_volume = convert_value_distribution(
reference.get("volume", volume),
volume_distributions["DEFAULT"],
critical_volume_distributions["DEFAULT"],
)
critical_height = convert_value_distribution(
reference.get("height", height),
height_distributions["DEFAULT"],
critical_height_distributions["DEFAULT"],
)
state = get_cell_state(volume, critical_volume, state_thresholds)
cell = {
"id": cell_id,
"parent": 0,
"pop": 1,
"age": 0,
"divisions": 0,
"state": state.split("_")[0],
"phase": state,
"voxels": volume,
"criticals": [critical_volume, critical_height],
}
if "region" in samples.columns and not samples["region"].isna().all():
regions = [
convert_to_cell_region(
region,
region_samples,
reference,
volume_distributions,
height_distributions,
critical_volume_distributions,
critical_height_distributions,
)
for region, region_samples in samples.groupby("region")
]
cell.update({"regions": regions})
return cell
[docs]def convert_to_cell_region(
region: str,
region_samples: pd.DataFrame,
reference: dict,
volume_distributions: dict[str, tuple[float, float]],
height_distributions: dict[str, tuple[float, float]],
critical_volume_distributions: dict[str, tuple[float, float]],
critical_height_distributions: dict[str, tuple[float, float]],
) -> dict:
"""
Convert region samples to cell region object.
Current region volume and height are rescaled to critical volume and
critical height based on distribution means and standard deviations. If
reference region volume and/or height are provided, those values are used as
the current values to be rescaled. Otherwise, current region volume is
calculated from the number of voxel samples and current region height is
calculated from the range of voxel coordinates along the z axis.
Parameters
----------
region
Region name.
region_samples
Sample coordinates for region of a single object.
reference
Reference data for cell region.
volume_distributions
Map of volume means and standard deviations.
height_distributions
Map of height means and standard deviations.
critical_volume_distributions
Map of critical volume means and standard deviations.
critical_height_distributions
Map of critical height means and standard deviations.
Returns
-------
:
Cell region object formatted for ARCADE.
"""
region_volume = len(region_samples)
region_height = region_samples.z.max() - region_samples.z.min()
region_critical_volume = convert_value_distribution(
reference.get(f"volume.{region}", region_volume),
volume_distributions[region],
critical_volume_distributions[region],
)
region_critical_height = convert_value_distribution(
reference.get(f"height.{region}", region_height),
height_distributions[region],
critical_height_distributions[region],
)
return {
"region": region,
"voxels": len(region_samples),
"criticals": [region_critical_volume, region_critical_height],
}
[docs]def get_cell_state(
volume: float,
critical_volume: float,
threshold_fractions: dict[str, float],
) -> str:
"""
Estimate cell state based on cell volume.
The threshold fractions dictionary defines the monotonic thresholds between
different cell states. For a given volume v, critical volume V, and states
X1, X2, ..., XN with corresponding, monotonic threshold fractions f1, f2,
..., fN, a cell is assigned state Xi such that [f(i - 1) * V] <= v < [fi *
V].
Cells with v < f1 * V are assigned state X1.
Cells with v > fN * V are assigned state XN.
Parameters
----------
volume
Current cell volume.
critical_volume
Critical cell volume.
threshold_fractions
Critical volume fractions defining threshold between states.
Returns
-------
:
Cell state.
"""
thresholds = [fraction * critical_volume for fraction in threshold_fractions.values()]
states = list(threshold_fractions.keys())
index = next((ind for ind, thresh in enumerate(thresholds) if thresh > volume), -1)
return states[index]
[docs]def convert_value_distribution(
value: float,
source_distribution: tuple[float, float],
target_distribution: tuple[float, float],
) -> float:
"""
Estimate target value based on source value and source and target distributions.
Parameters
----------
value
Source value.
source_distribution
Average and standard deviation of source value distribution.
target_distribution
Average and standard deviation of target value distribution.
Returns
-------
:
Estimated critical value.
"""
source_avg, source_std = source_distribution
target_avg, target_std = target_distribution
z_scored_value = (value - source_avg) / source_std
return z_scored_value * target_std + target_avg
[docs]def filter_cell_reference(cell_id: int, reference: pd.DataFrame) -> dict:
"""
Filter reference data for given cell id.
Parameters
----------
cell_id
Unique cell id.
reference
Reference data for conversion.
Returns
-------
:
Reference data for given cell id.
"""
cell_reference = reference[reference["ID"] == cell_id].squeeze()
return cell_reference.to_dict() if not cell_reference.empty else {}