Coverage for src/arcade_collection/input/convert_to_cells_file.py: 100%

38 statements  

« prev     ^ index     » next       coverage.py v7.1.0, created at 2024-12-09 19:07 +0000

1import pandas as pd 

2 

3 

4def convert_to_cells_file( 

5 samples: pd.DataFrame, 

6 reference: pd.DataFrame, 

7 volume_distributions: dict[str, tuple[float, float]], 

8 height_distributions: dict[str, tuple[float, float]], 

9 critical_volume_distributions: dict[str, tuple[float, float]], 

10 critical_height_distributions: dict[str, tuple[float, float]], 

11 state_thresholds: dict[str, float], 

12) -> list[dict]: 

13 """ 

14 Convert all samples to cell objects. 

15 

16 For each cell id in samples, current volume and height are rescaled to 

17 critical volume and critical height based on distribution means and standard 

18 deviations. If reference volume and/or height exist for the cell id, those 

19 values are used as the current values to be rescaled. Otherwise, current 

20 volume is calculated from the number of voxel samples and current height is 

21 calculated from the range of voxel coordinates along the z axis. 

22 

23 Initial cell state and cell state phase for each cell are estimated based on 

24 state thresholds, the current cell volume, and the critical cell volume. 

25 

26 Cell object ids are reindexed starting with cell id 1. 

27 

28 Parameters 

29 ---------- 

30 samples 

31 Sample cell ids and coordinates. 

32 reference 

33 Reference values for volumes and heights. 

34 volume_distributions 

35 Map of volume means and standard deviations. 

36 height_distributions 

37 Map of height means and standard deviations. 

38 critical_volume_distributions 

39 Map of critical volume means and standard deviations. 

40 critical_height_distributions 

41 Map of critical height means and standard deviations. 

42 state_thresholds 

43 Critical volume fractions defining threshold between states. 

44 

45 Returns 

46 ------- 

47 : 

48 List of cell objects formatted for ARCADE. 

49 """ 

50 

51 cells: list[dict] = [] 

52 samples_by_id = samples.groupby("id") 

53 

54 for i, (cell_id, group) in enumerate(samples_by_id): 

55 cell_reference = filter_cell_reference(cell_id, reference) 

56 cells.append( 

57 convert_to_cell( 

58 i + 1, 

59 group, 

60 cell_reference, 

61 volume_distributions, 

62 height_distributions, 

63 critical_volume_distributions, 

64 critical_height_distributions, 

65 state_thresholds, 

66 ) 

67 ) 

68 

69 return cells 

70 

71 

72def convert_to_cell( 

73 cell_id: int, 

74 samples: pd.DataFrame, 

75 reference: dict, 

76 volume_distributions: dict[str, tuple[float, float]], 

77 height_distributions: dict[str, tuple[float, float]], 

78 critical_volume_distributions: dict[str, tuple[float, float]], 

79 critical_height_distributions: dict[str, tuple[float, float]], 

80 state_thresholds: dict[str, float], 

81) -> dict: 

82 """ 

83 Convert samples to cell object. 

84 

85 Current volume and height are rescaled to critical volume and critical 

86 height based on distribution means and standard deviations. If reference 

87 volume and/or height are provided (under the "DEFAULT" key), those values 

88 are used as the current values to be rescaled. Otherwise, current volume is 

89 calculated from the number of voxel samples and current height is calculated 

90 from the range of voxel coordinates along the z axis. 

91 

92 Initial cell state and cell state phase are estimated based on state 

93 thresholds, the current cell volume, and the critical cell volume. 

94 

95 Parameters 

96 ---------- 

97 cell_id 

98 Unique cell id. 

99 samples 

100 Sample coordinates for a single object. 

101 reference 

102 Reference data for cell. 

103 volume_distributions 

104 Map of volume means and standard deviations. 

105 height_distributions 

106 Map of height means and standard deviations. 

107 critical_volume_distributions 

108 Map of critical volume means and standard deviations. 

109 critical_height_distributions 

110 Map of critical height means and standard deviations. 

111 state_thresholds 

112 Critical volume fractions defining threshold between states. 

113 

114 Returns 

115 ------- 

116 : 

117 Cell object formatted for ARCADE. 

118 """ 

119 

120 volume = len(samples) 

121 height = samples.z.max() - samples.z.min() 

122 

123 critical_volume = convert_value_distribution( 

124 reference.get("volume", volume), 

125 volume_distributions["DEFAULT"], 

126 critical_volume_distributions["DEFAULT"], 

127 ) 

128 

129 critical_height = convert_value_distribution( 

130 reference.get("height", height), 

131 height_distributions["DEFAULT"], 

132 critical_height_distributions["DEFAULT"], 

133 ) 

134 

135 state = get_cell_state(volume, critical_volume, state_thresholds) 

136 

137 cell = { 

138 "id": cell_id, 

139 "parent": 0, 

140 "pop": 1, 

141 "age": 0, 

142 "divisions": 0, 

143 "state": state.split("_")[0], 

144 "phase": state, 

145 "voxels": volume, 

146 "criticals": [critical_volume, critical_height], 

147 } 

148 

149 if "region" in samples.columns and not samples["region"].isna().all(): 

150 regions = [ 

151 convert_to_cell_region( 

152 region, 

153 region_samples, 

154 reference, 

155 volume_distributions, 

156 height_distributions, 

157 critical_volume_distributions, 

158 critical_height_distributions, 

159 ) 

160 for region, region_samples in samples.groupby("region") 

161 ] 

162 cell.update({"regions": regions}) 

163 

164 return cell 

165 

166 

167def convert_to_cell_region( 

168 region: str, 

169 region_samples: pd.DataFrame, 

170 reference: dict, 

171 volume_distributions: dict[str, tuple[float, float]], 

172 height_distributions: dict[str, tuple[float, float]], 

173 critical_volume_distributions: dict[str, tuple[float, float]], 

174 critical_height_distributions: dict[str, tuple[float, float]], 

175) -> dict: 

176 """ 

177 Convert region samples to cell region object. 

178 

179 Current region volume and height are rescaled to critical volume and 

180 critical height based on distribution means and standard deviations. If 

181 reference region volume and/or height are provided, those values are used as 

182 the current values to be rescaled. Otherwise, current region volume is 

183 calculated from the number of voxel samples and current region height is 

184 calculated from the range of voxel coordinates along the z axis. 

185 

186 Parameters 

187 ---------- 

188 region 

189 Region name. 

190 region_samples 

191 Sample coordinates for region of a single object. 

192 reference 

193 Reference data for cell region. 

194 volume_distributions 

195 Map of volume means and standard deviations. 

196 height_distributions 

197 Map of height means and standard deviations. 

198 critical_volume_distributions 

199 Map of critical volume means and standard deviations. 

200 critical_height_distributions 

201 Map of critical height means and standard deviations. 

202 

203 Returns 

204 ------- 

205 : 

206 Cell region object formatted for ARCADE. 

207 """ 

208 

209 region_volume = len(region_samples) 

210 region_height = region_samples.z.max() - region_samples.z.min() 

211 

212 region_critical_volume = convert_value_distribution( 

213 reference.get(f"volume.{region}", region_volume), 

214 volume_distributions[region], 

215 critical_volume_distributions[region], 

216 ) 

217 

218 region_critical_height = convert_value_distribution( 

219 reference.get(f"height.{region}", region_height), 

220 height_distributions[region], 

221 critical_height_distributions[region], 

222 ) 

223 

224 return { 

225 "region": region, 

226 "voxels": len(region_samples), 

227 "criticals": [region_critical_volume, region_critical_height], 

228 } 

229 

230 

231def get_cell_state( 

232 volume: float, 

233 critical_volume: float, 

234 threshold_fractions: dict[str, float], 

235) -> str: 

236 """ 

237 Estimate cell state based on cell volume. 

238 

239 The threshold fractions dictionary defines the monotonic thresholds between 

240 different cell states. For a given volume v, critical volume V, and states 

241 X1, X2, ..., XN with corresponding, monotonic threshold fractions f1, f2, 

242 ..., fN, a cell is assigned state Xi such that [f(i - 1) * V] <= v < [fi * 

243 V]. 

244 

245 Cells with v < f1 * V are assigned state X1. 

246 

247 Cells with v > fN * V are assigned state XN. 

248 

249 Parameters 

250 ---------- 

251 volume 

252 Current cell volume. 

253 critical_volume 

254 Critical cell volume. 

255 threshold_fractions 

256 Critical volume fractions defining threshold between states. 

257 

258 Returns 

259 ------- 

260 : 

261 Cell state. 

262 """ 

263 

264 thresholds = [fraction * critical_volume for fraction in threshold_fractions.values()] 

265 states = list(threshold_fractions.keys()) 

266 

267 index = next((ind for ind, thresh in enumerate(thresholds) if thresh > volume), -1) 

268 return states[index] 

269 

270 

271def convert_value_distribution( 

272 value: float, 

273 source_distribution: tuple[float, float], 

274 target_distribution: tuple[float, float], 

275) -> float: 

276 """ 

277 Estimate target value based on source value and source and target distributions. 

278 

279 Parameters 

280 ---------- 

281 value 

282 Source value. 

283 source_distribution 

284 Average and standard deviation of source value distribution. 

285 target_distribution 

286 Average and standard deviation of target value distribution. 

287 

288 Returns 

289 ------- 

290 : 

291 Estimated critical value. 

292 """ 

293 

294 source_avg, source_std = source_distribution 

295 target_avg, target_std = target_distribution 

296 z_scored_value = (value - source_avg) / source_std 

297 return z_scored_value * target_std + target_avg 

298 

299 

300def filter_cell_reference(cell_id: int, reference: pd.DataFrame) -> dict: 

301 """ 

302 Filter reference data for given cell id. 

303 

304 Parameters 

305 ---------- 

306 cell_id 

307 Unique cell id. 

308 reference 

309 Reference data for conversion. 

310 

311 Returns 

312 ------- 

313 : 

314 Reference data for given cell id. 

315 """ 

316 

317 cell_reference = reference[reference["ID"] == cell_id].squeeze() 

318 return cell_reference.to_dict() if not cell_reference.empty else {}