Coverage for src/arcade_collection/input/convert_to_cells

1import pandas as pd

4def convert_to_cells_file(

5 samples: pd.DataFrame,

6 reference: pd.DataFrame,

7 volume_distributions: dict[str, tuple[float, float]],

8 height_distributions: dict[str, tuple[float, float]],

9 critical_volume_distributions: dict[str, tuple[float, float]],

10 critical_height_distributions: dict[str, tuple[float, float]],

11 state_thresholds: dict[str, float],

12) -> list[dict]:

13 """

14 Convert all samples to cell objects.

16 For each cell id in samples, current volume and height are rescaled to

17 critical volume and critical height based on distribution means and standard

18 deviations. If reference volume and/or height exist for the cell id, those

19 values are used as the current values to be rescaled. Otherwise, current

20 volume is calculated from the number of voxel samples and current height is

21 calculated from the range of voxel coordinates along the z axis.

23 Initial cell state and cell state phase for each cell are estimated based on

24 state thresholds, the current cell volume, and the critical cell volume.

26 Cell object ids are reindexed starting with cell id 1.

28 Parameters

29 ----------

30 samples

31 Sample cell ids and coordinates.

32 reference

33 Reference values for volumes and heights.

34 volume_distributions

35 Map of volume means and standard deviations.

36 height_distributions

37 Map of height means and standard deviations.

38 critical_volume_distributions

39 Map of critical volume means and standard deviations.

40 critical_height_distributions

41 Map of critical height means and standard deviations.

42 state_thresholds

43 Critical volume fractions defining threshold between states.

45 Returns

46 -------

47 :

48 List of cell objects formatted for ARCADE.

49 """

51 cells: list[dict] = []

52 samples_by_id = samples.groupby("id")

54 for i, (cell_id, group) in enumerate(samples_by_id):

55 cell_reference = filter_cell_reference(cell_id, reference)

56 cells.append(

57 convert_to_cell(

58 i + 1,

59 group,

60 cell_reference,

61 volume_distributions,

62 height_distributions,

63 critical_volume_distributions,

64 critical_height_distributions,

65 state_thresholds,

66 )

67 )

69 return cells

72def convert_to_cell(

73 cell_id: int,

74 samples: pd.DataFrame,

75 reference: dict,

76 volume_distributions: dict[str, tuple[float, float]],

77 height_distributions: dict[str, tuple[float, float]],

78 critical_volume_distributions: dict[str, tuple[float, float]],

79 critical_height_distributions: dict[str, tuple[float, float]],

80 state_thresholds: dict[str, float],

81) -> dict:

82 """

83 Convert samples to cell object.

85 Current volume and height are rescaled to critical volume and critical

86 height based on distribution means and standard deviations. If reference

87 volume and/or height are provided (under the "DEFAULT" key), those values

88 are used as the current values to be rescaled. Otherwise, current volume is

89 calculated from the number of voxel samples and current height is calculated

90 from the range of voxel coordinates along the z axis.

92 Initial cell state and cell state phase are estimated based on state

93 thresholds, the current cell volume, and the critical cell volume.

95 Parameters

96 ----------

97 cell_id

98 Unique cell id.

99 samples

100 Sample coordinates for a single object.

101 reference

102 Reference data for cell.

103 volume_distributions

104 Map of volume means and standard deviations.

105 height_distributions

106 Map of height means and standard deviations.

107 critical_volume_distributions

108 Map of critical volume means and standard deviations.

109 critical_height_distributions

110 Map of critical height means and standard deviations.

111 state_thresholds

112 Critical volume fractions defining threshold between states.

113

114 Returns

115 -------

116 :

117 Cell object formatted for ARCADE.

118 """

119

120 volume = len(samples)

121 height = samples.z.max() - samples.z.min()

122

123 critical_volume = convert_value_distribution(

124 reference.get("volume", volume),

125 volume_distributions["DEFAULT"],

126 critical_volume_distributions["DEFAULT"],

127 )

128

129 critical_height = convert_value_distribution(

130 reference.get("height", height),

131 height_distributions["DEFAULT"],

132 critical_height_distributions["DEFAULT"],

133 )

134

135 state = get_cell_state(volume, critical_volume, state_thresholds)

136

137 cell = {

138 "id": cell_id,

139 "parent": 0,

140 "pop": 1,

141 "age": 0,

142 "divisions": 0,

143 "state": state.split("_")[0],

144 "phase": state,

145 "voxels": volume,

146 "criticals": [critical_volume, critical_height],

147 }

148

149 if "region" in samples.columns and not samples["region"].isna().all():

150 regions = [

151 convert_to_cell_region(

152 region,

153 region_samples,

154 reference,

155 volume_distributions,

156 height_distributions,

157 critical_volume_distributions,

158 critical_height_distributions,

159 )

160 for region, region_samples in samples.groupby("region")

161 ]

162 cell.update({"regions": regions})

163

164 return cell

165

166

167def convert_to_cell_region(

168 region: str,

169 region_samples: pd.DataFrame,

170 reference: dict,

171 volume_distributions: dict[str, tuple[float, float]],

172 height_distributions: dict[str, tuple[float, float]],

173 critical_volume_distributions: dict[str, tuple[float, float]],

174 critical_height_distributions: dict[str, tuple[float, float]],

175) -> dict:

176 """

177 Convert region samples to cell region object.

178

179 Current region volume and height are rescaled to critical volume and

180 critical height based on distribution means and standard deviations. If

181 reference region volume and/or height are provided, those values are used as

182 the current values to be rescaled. Otherwise, current region volume is

183 calculated from the number of voxel samples and current region height is

184 calculated from the range of voxel coordinates along the z axis.

185

186 Parameters

187 ----------

188 region

189 Region name.

190 region_samples

191 Sample coordinates for region of a single object.

192 reference

193 Reference data for cell region.

194 volume_distributions

195 Map of volume means and standard deviations.

196 height_distributions

197 Map of height means and standard deviations.

198 critical_volume_distributions

199 Map of critical volume means and standard deviations.

200 critical_height_distributions

201 Map of critical height means and standard deviations.

202

203 Returns

204 -------

205 :

206 Cell region object formatted for ARCADE.

207 """

208

209 region_volume = len(region_samples)

210 region_height = region_samples.z.max() - region_samples.z.min()

211

212 region_critical_volume = convert_value_distribution(

213 reference.get(f"volume.{region}", region_volume),

214 volume_distributions[region],

215 critical_volume_distributions[region],

216 )

217

218 region_critical_height = convert_value_distribution(

219 reference.get(f"height.{region}", region_height),

220 height_distributions[region],

221 critical_height_distributions[region],

222 )

223

224 return {

225 "region": region,

226 "voxels": len(region_samples),

227 "criticals": [region_critical_volume, region_critical_height],

228 }

229

230

231def get_cell_state(

232 volume: float,

233 critical_volume: float,

234 threshold_fractions: dict[str, float],

235) -> str:

236 """

237 Estimate cell state based on cell volume.

238

239 The threshold fractions dictionary defines the monotonic thresholds between

240 different cell states. For a given volume v, critical volume V, and states

241 X1, X2, ..., XN with corresponding, monotonic threshold fractions f1, f2,

242 ..., fN, a cell is assigned state Xi such that [f(i - 1) * V] <= v < [fi *

243 V].

244

245 Cells with v < f1 * V are assigned state X1.

246

247 Cells with v > fN * V are assigned state XN.

248

249 Parameters

250 ----------

251 volume

252 Current cell volume.

253 critical_volume

254 Critical cell volume.

255 threshold_fractions

256 Critical volume fractions defining threshold between states.

257

258 Returns

259 -------

260 :

261 Cell state.

262 """

263

264 thresholds = [fraction * critical_volume for fraction in threshold_fractions.values()]

265 states = list(threshold_fractions.keys())

266

267 index = next((ind for ind, thresh in enumerate(thresholds) if thresh > volume), -1)

268 return states[index]

269

270

271def convert_value_distribution(

272 value: float,

273 source_distribution: tuple[float, float],

274 target_distribution: tuple[float, float],

275) -> float:

276 """

277 Estimate target value based on source value and source and target distributions.

278

279 Parameters

280 ----------

281 value

282 Source value.

283 source_distribution

284 Average and standard deviation of source value distribution.

285 target_distribution

286 Average and standard deviation of target value distribution.

287

288 Returns

289 -------

290 :

291 Estimated critical value.

292 """

293

294 source_avg, source_std = source_distribution

295 target_avg, target_std = target_distribution

296 z_scored_value = (value - source_avg) / source_std

297 return z_scored_value * target_std + target_avg

298

299

300def filter_cell_reference(cell_id: int, reference: pd.DataFrame) -> dict:

301 """

302 Filter reference data for given cell id.

303

304 Parameters

305 ----------

306 cell_id

307 Unique cell id.

308 reference

309 Reference data for conversion.

310

311 Returns

312 -------

313 :

314 Reference data for given cell id.

315 """

316

317 cell_reference = reference[reference["ID"] == cell_id].squeeze()

318 return cell_reference.to_dict() if not cell_reference.empty else {}

Coverage for src/arcade_collection/input/convert_to_cells_file.py: 100%

38 statements