Coverage for src/arcade_collection/output/parse_locations_file.py: 100%
36 statements
« prev ^ index » next coverage.py v7.1.0, created at 2024-12-09 19:07 +0000
« prev ^ index » next coverage.py v7.1.0, created at 2024-12-09 19:07 +0000
1from __future__ import annotations
3import json
4from typing import TYPE_CHECKING
6import numpy as np
7import pandas as pd
9if TYPE_CHECKING:
10 import tarfile
13LOCATIONS_COLUMNS = [
14 "ID",
15 "TICK",
16 "CENTER_X",
17 "CENTER_Y",
18 "CENTER_Z",
19 "MIN_X",
20 "MIN_Y",
21 "MIN_Z",
22 "MAX_X",
23 "MAX_Y",
24 "MAX_Z",
25]
26"""Column names for locations data parsed into tidy data format."""
29def parse_locations_file(tar: tarfile.TarFile, regions: list[str]) -> pd.DataFrame:
30 """
31 Parse simulation locations data into tidy data format.
33 Parameters
34 ----------
35 tar
36 Tar archive containing locations data.
37 regions
38 List of regions.
40 Returns
41 -------
42 :
43 Parsed locations data.
44 """
46 all_locations: list[list[str | int]] = []
48 for member in tar.getmembers():
49 extracted_member = tar.extractfile(member)
51 if extracted_member is None:
52 continue
54 tick = int(member.name.replace(".LOCATIONS.json", "").split("_")[-1])
55 locations_json = json.loads(extracted_member.read().decode("utf-8"))
57 locations = [parse_location_tick(tick, cell, regions) for cell in locations_json]
58 all_locations = all_locations + locations
60 columns = LOCATIONS_COLUMNS + [
61 f"{column}.{region}" for region in regions for column in LOCATIONS_COLUMNS[2:]
62 ]
63 return pd.DataFrame(all_locations, columns=columns)
66def parse_location_tick(tick: int, location: dict, regions: list[str]) -> list:
67 """
68 Parse location data for a single simulation tick.
70 Original data is formatted as:
72 .. code-block:: python
74 {
75 "id": cell_id,
76 "center": [center_x, center_y, center_z],
77 "location": [
78 {
79 "region": region,
80 "voxels": [
81 [x, y, z],
82 [x, y, z],
83 ...
84 ]
85 },
86 {
87 "region": region,
88 "voxels": [
89 [x, y, z],
90 [x, y, z],
91 ...
92 ]
93 },
94 ...
95 ]
96 }
98 Parsed data is formatted as:
100 .. code-block:: python
102 [ cell_id, tick, center_x, center_y, center_z, min_x, min_y, min_z, max_x, max_y, max_z ]
104 When regions are specified, each list also contains centers, minimums, and
105 maximums for the corresponding regions.
107 Parameters
108 ----------
109 tick
110 Simulation tick.
111 location
112 Original location data.
113 regions
114 List of regions.
116 Returns
117 -------
118 :
119 Parsed location data.
120 """
122 if "center" in location:
123 voxels = np.array([voxel for region in location["location"] for voxel in region["voxels"]])
124 mins = np.min(voxels, axis=0)
125 maxs = np.max(voxels, axis=0)
126 parsed = [location["id"], tick, *location["center"], *mins, *maxs]
127 else:
128 parsed = [location["id"], tick, -1, -1, -1, -1, -1, -1, -1, -1, -1]
130 for reg in regions:
131 region_voxels = np.array(
132 [
133 voxel
134 for region in location["location"]
135 for voxel in region["voxels"]
136 if region["region"] == reg
137 ]
138 )
140 if len(region_voxels) == 0:
141 parsed = [*parsed, -1, -1, -1, -1, -1, -1, -1, -1, -1]
142 continue
144 center = [int(value + 0.5) for value in region_voxels.mean(axis=0)]
145 mins = np.min(region_voxels, axis=0)
146 maxs = np.max(region_voxels, axis=0)
147 parsed = [*parsed, *center, *mins, *maxs]
149 return parsed