Coverage for src/arcade_collection/output/parse_cells_file.py: 100%
25 statements
« prev ^ index » next coverage.py v7.1.0, created at 2024-12-09 19:07 +0000
« prev ^ index » next coverage.py v7.1.0, created at 2024-12-09 19:07 +0000
1from __future__ import annotations
3import json
4from typing import TYPE_CHECKING
6import pandas as pd
8if TYPE_CHECKING:
9 import tarfile
11CELLS_COLUMNS = [
12 "ID",
13 "TICK",
14 "PARENT",
15 "POPULATION",
16 "AGE",
17 "DIVISIONS",
18 "STATE",
19 "PHASE",
20 "NUM_VOXELS",
21]
22"""Column names for cells data parsed into tidy data format."""
25def parse_cells_file(tar: tarfile.TarFile, regions: list[str]) -> pd.DataFrame:
26 """
27 Parse simulation cells data into tidy data format.
29 Parameters
30 ----------
31 tar
32 Tar archive containing locations data.
33 regions
34 List of regions.
36 Returns
37 -------
38 :
39 Parsed cells data.
40 """
42 all_cells: list[list[str | int]] = []
44 for member in tar.getmembers():
45 extracted_member = tar.extractfile(member)
47 if extracted_member is None:
48 continue
50 tick = int(member.name.replace(".CELLS.json", "").split("_")[-1])
51 cells_json = json.loads(extracted_member.read().decode("utf-8"))
53 cells = [parse_cell_tick(tick, cell, regions) for cell in cells_json]
54 all_cells = all_cells + cells
56 columns = CELLS_COLUMNS + [f"NUM_VOXELS.{region}" for region in regions]
57 return pd.DataFrame(all_cells, columns=columns)
60def parse_cell_tick(tick: int, cell: dict, regions: list[str]) -> list:
61 """
62 Parse cell data for a single simulation tick.
64 Original data is formatted as:
66 .. code-block:: python
68 {
69 "id": cell_id,
70 "parent": parent_id,
71 "pop": population,
72 "age": age,
73 "divisions": divisions,
74 "state": state,
75 "phase": phase,
76 "voxels": voxels,
77 "criticals": [critical_volume, critical_height],
78 "regions": [
79 {
80 "region": region_name,
81 "voxels": region_voxels,
82 "criticals": [critical_region_volume, critical_region_height]
83 },
84 ...
85 ]
86 }
88 Parsed data is formatted as:
90 .. code-block:: python
92 [ cell_id, tick, parent_id, population, age, divisions, state, phase, voxels ]
94 When regions are specified, each list also contains the number of voxels for
95 the corresponding regions.
97 Parameters
98 ----------
99 tick
100 Simulation tick.
101 cell
102 Original cell data.
103 regions
104 List of regions.
106 Returns
107 -------
108 :
109 Parsed cell data.
110 """
112 features = ["parent", "pop", "age", "divisions", "state", "phase", "voxels"]
113 parsed = [cell["id"], tick] + [cell[feature] for feature in features]
115 if regions and "regions" in cell:
116 region_voxels = [
117 cell_region["voxels"]
118 for region in regions
119 for cell_region in cell["regions"]
120 if cell_region["region"] == region
121 ]
122 parsed = parsed + region_voxels
124 return parsed