Coverage for src/arcade_collection/input/convert_to_cells_file.py: 100%
38 statements
« prev ^ index » next coverage.py v7.1.0, created at 2024-12-09 19:07 +0000
« prev ^ index » next coverage.py v7.1.0, created at 2024-12-09 19:07 +0000
1import pandas as pd
4def convert_to_cells_file(
5 samples: pd.DataFrame,
6 reference: pd.DataFrame,
7 volume_distributions: dict[str, tuple[float, float]],
8 height_distributions: dict[str, tuple[float, float]],
9 critical_volume_distributions: dict[str, tuple[float, float]],
10 critical_height_distributions: dict[str, tuple[float, float]],
11 state_thresholds: dict[str, float],
12) -> list[dict]:
13 """
14 Convert all samples to cell objects.
16 For each cell id in samples, current volume and height are rescaled to
17 critical volume and critical height based on distribution means and standard
18 deviations. If reference volume and/or height exist for the cell id, those
19 values are used as the current values to be rescaled. Otherwise, current
20 volume is calculated from the number of voxel samples and current height is
21 calculated from the range of voxel coordinates along the z axis.
23 Initial cell state and cell state phase for each cell are estimated based on
24 state thresholds, the current cell volume, and the critical cell volume.
26 Cell object ids are reindexed starting with cell id 1.
28 Parameters
29 ----------
30 samples
31 Sample cell ids and coordinates.
32 reference
33 Reference values for volumes and heights.
34 volume_distributions
35 Map of volume means and standard deviations.
36 height_distributions
37 Map of height means and standard deviations.
38 critical_volume_distributions
39 Map of critical volume means and standard deviations.
40 critical_height_distributions
41 Map of critical height means and standard deviations.
42 state_thresholds
43 Critical volume fractions defining threshold between states.
45 Returns
46 -------
47 :
48 List of cell objects formatted for ARCADE.
49 """
51 cells: list[dict] = []
52 samples_by_id = samples.groupby("id")
54 for i, (cell_id, group) in enumerate(samples_by_id):
55 cell_reference = filter_cell_reference(cell_id, reference)
56 cells.append(
57 convert_to_cell(
58 i + 1,
59 group,
60 cell_reference,
61 volume_distributions,
62 height_distributions,
63 critical_volume_distributions,
64 critical_height_distributions,
65 state_thresholds,
66 )
67 )
69 return cells
72def convert_to_cell(
73 cell_id: int,
74 samples: pd.DataFrame,
75 reference: dict,
76 volume_distributions: dict[str, tuple[float, float]],
77 height_distributions: dict[str, tuple[float, float]],
78 critical_volume_distributions: dict[str, tuple[float, float]],
79 critical_height_distributions: dict[str, tuple[float, float]],
80 state_thresholds: dict[str, float],
81) -> dict:
82 """
83 Convert samples to cell object.
85 Current volume and height are rescaled to critical volume and critical
86 height based on distribution means and standard deviations. If reference
87 volume and/or height are provided (under the "DEFAULT" key), those values
88 are used as the current values to be rescaled. Otherwise, current volume is
89 calculated from the number of voxel samples and current height is calculated
90 from the range of voxel coordinates along the z axis.
92 Initial cell state and cell state phase are estimated based on state
93 thresholds, the current cell volume, and the critical cell volume.
95 Parameters
96 ----------
97 cell_id
98 Unique cell id.
99 samples
100 Sample coordinates for a single object.
101 reference
102 Reference data for cell.
103 volume_distributions
104 Map of volume means and standard deviations.
105 height_distributions
106 Map of height means and standard deviations.
107 critical_volume_distributions
108 Map of critical volume means and standard deviations.
109 critical_height_distributions
110 Map of critical height means and standard deviations.
111 state_thresholds
112 Critical volume fractions defining threshold between states.
114 Returns
115 -------
116 :
117 Cell object formatted for ARCADE.
118 """
120 volume = len(samples)
121 height = samples.z.max() - samples.z.min()
123 critical_volume = convert_value_distribution(
124 reference.get("volume", volume),
125 volume_distributions["DEFAULT"],
126 critical_volume_distributions["DEFAULT"],
127 )
129 critical_height = convert_value_distribution(
130 reference.get("height", height),
131 height_distributions["DEFAULT"],
132 critical_height_distributions["DEFAULT"],
133 )
135 state = get_cell_state(volume, critical_volume, state_thresholds)
137 cell = {
138 "id": cell_id,
139 "parent": 0,
140 "pop": 1,
141 "age": 0,
142 "divisions": 0,
143 "state": state.split("_")[0],
144 "phase": state,
145 "voxels": volume,
146 "criticals": [critical_volume, critical_height],
147 }
149 if "region" in samples.columns and not samples["region"].isna().all():
150 regions = [
151 convert_to_cell_region(
152 region,
153 region_samples,
154 reference,
155 volume_distributions,
156 height_distributions,
157 critical_volume_distributions,
158 critical_height_distributions,
159 )
160 for region, region_samples in samples.groupby("region")
161 ]
162 cell.update({"regions": regions})
164 return cell
167def convert_to_cell_region(
168 region: str,
169 region_samples: pd.DataFrame,
170 reference: dict,
171 volume_distributions: dict[str, tuple[float, float]],
172 height_distributions: dict[str, tuple[float, float]],
173 critical_volume_distributions: dict[str, tuple[float, float]],
174 critical_height_distributions: dict[str, tuple[float, float]],
175) -> dict:
176 """
177 Convert region samples to cell region object.
179 Current region volume and height are rescaled to critical volume and
180 critical height based on distribution means and standard deviations. If
181 reference region volume and/or height are provided, those values are used as
182 the current values to be rescaled. Otherwise, current region volume is
183 calculated from the number of voxel samples and current region height is
184 calculated from the range of voxel coordinates along the z axis.
186 Parameters
187 ----------
188 region
189 Region name.
190 region_samples
191 Sample coordinates for region of a single object.
192 reference
193 Reference data for cell region.
194 volume_distributions
195 Map of volume means and standard deviations.
196 height_distributions
197 Map of height means and standard deviations.
198 critical_volume_distributions
199 Map of critical volume means and standard deviations.
200 critical_height_distributions
201 Map of critical height means and standard deviations.
203 Returns
204 -------
205 :
206 Cell region object formatted for ARCADE.
207 """
209 region_volume = len(region_samples)
210 region_height = region_samples.z.max() - region_samples.z.min()
212 region_critical_volume = convert_value_distribution(
213 reference.get(f"volume.{region}", region_volume),
214 volume_distributions[region],
215 critical_volume_distributions[region],
216 )
218 region_critical_height = convert_value_distribution(
219 reference.get(f"height.{region}", region_height),
220 height_distributions[region],
221 critical_height_distributions[region],
222 )
224 return {
225 "region": region,
226 "voxels": len(region_samples),
227 "criticals": [region_critical_volume, region_critical_height],
228 }
231def get_cell_state(
232 volume: float,
233 critical_volume: float,
234 threshold_fractions: dict[str, float],
235) -> str:
236 """
237 Estimate cell state based on cell volume.
239 The threshold fractions dictionary defines the monotonic thresholds between
240 different cell states. For a given volume v, critical volume V, and states
241 X1, X2, ..., XN with corresponding, monotonic threshold fractions f1, f2,
242 ..., fN, a cell is assigned state Xi such that [f(i - 1) * V] <= v < [fi *
243 V].
245 Cells with v < f1 * V are assigned state X1.
247 Cells with v > fN * V are assigned state XN.
249 Parameters
250 ----------
251 volume
252 Current cell volume.
253 critical_volume
254 Critical cell volume.
255 threshold_fractions
256 Critical volume fractions defining threshold between states.
258 Returns
259 -------
260 :
261 Cell state.
262 """
264 thresholds = [fraction * critical_volume for fraction in threshold_fractions.values()]
265 states = list(threshold_fractions.keys())
267 index = next((ind for ind, thresh in enumerate(thresholds) if thresh > volume), -1)
268 return states[index]
271def convert_value_distribution(
272 value: float,
273 source_distribution: tuple[float, float],
274 target_distribution: tuple[float, float],
275) -> float:
276 """
277 Estimate target value based on source value and source and target distributions.
279 Parameters
280 ----------
281 value
282 Source value.
283 source_distribution
284 Average and standard deviation of source value distribution.
285 target_distribution
286 Average and standard deviation of target value distribution.
288 Returns
289 -------
290 :
291 Estimated critical value.
292 """
294 source_avg, source_std = source_distribution
295 target_avg, target_std = target_distribution
296 z_scored_value = (value - source_avg) / source_std
297 return z_scored_value * target_std + target_avg
300def filter_cell_reference(cell_id: int, reference: pd.DataFrame) -> dict:
301 """
302 Filter reference data for given cell id.
304 Parameters
305 ----------
306 cell_id
307 Unique cell id.
308 reference
309 Reference data for conversion.
311 Returns
312 -------
313 :
314 Reference data for given cell id.
315 """
317 cell_reference = reference[reference["ID"] == cell_id].squeeze()
318 return cell_reference.to_dict() if not cell_reference.empty else {}