Coverage for src/arcade_collection/output/parse_locations_file.py: 100%

36 statements  

« prev     ^ index     » next       coverage.py v7.1.0, created at 2024-12-09 19:07 +0000

1from __future__ import annotations 

2 

3import json 

4from typing import TYPE_CHECKING 

5 

6import numpy as np 

7import pandas as pd 

8 

9if TYPE_CHECKING: 

10 import tarfile 

11 

12 

13LOCATIONS_COLUMNS = [ 

14 "ID", 

15 "TICK", 

16 "CENTER_X", 

17 "CENTER_Y", 

18 "CENTER_Z", 

19 "MIN_X", 

20 "MIN_Y", 

21 "MIN_Z", 

22 "MAX_X", 

23 "MAX_Y", 

24 "MAX_Z", 

25] 

26"""Column names for locations data parsed into tidy data format.""" 

27 

28 

29def parse_locations_file(tar: tarfile.TarFile, regions: list[str]) -> pd.DataFrame: 

30 """ 

31 Parse simulation locations data into tidy data format. 

32 

33 Parameters 

34 ---------- 

35 tar 

36 Tar archive containing locations data. 

37 regions 

38 List of regions. 

39 

40 Returns 

41 ------- 

42 : 

43 Parsed locations data. 

44 """ 

45 

46 all_locations: list[list[str | int]] = [] 

47 

48 for member in tar.getmembers(): 

49 extracted_member = tar.extractfile(member) 

50 

51 if extracted_member is None: 

52 continue 

53 

54 tick = int(member.name.replace(".LOCATIONS.json", "").split("_")[-1]) 

55 locations_json = json.loads(extracted_member.read().decode("utf-8")) 

56 

57 locations = [parse_location_tick(tick, cell, regions) for cell in locations_json] 

58 all_locations = all_locations + locations 

59 

60 columns = LOCATIONS_COLUMNS + [ 

61 f"{column}.{region}" for region in regions for column in LOCATIONS_COLUMNS[2:] 

62 ] 

63 return pd.DataFrame(all_locations, columns=columns) 

64 

65 

66def parse_location_tick(tick: int, location: dict, regions: list[str]) -> list: 

67 """ 

68 Parse location data for a single simulation tick. 

69 

70 Original data is formatted as: 

71 

72 .. code-block:: python 

73 

74 { 

75 "id": cell_id, 

76 "center": [center_x, center_y, center_z], 

77 "location": [ 

78 { 

79 "region": region, 

80 "voxels": [ 

81 [x, y, z], 

82 [x, y, z], 

83 ... 

84 ] 

85 }, 

86 { 

87 "region": region, 

88 "voxels": [ 

89 [x, y, z], 

90 [x, y, z], 

91 ... 

92 ] 

93 }, 

94 ... 

95 ] 

96 } 

97 

98 Parsed data is formatted as: 

99 

100 .. code-block:: python 

101 

102 [ cell_id, tick, center_x, center_y, center_z, min_x, min_y, min_z, max_x, max_y, max_z ] 

103 

104 When regions are specified, each list also contains centers, minimums, and 

105 maximums for the corresponding regions. 

106 

107 Parameters 

108 ---------- 

109 tick 

110 Simulation tick. 

111 location 

112 Original location data. 

113 regions 

114 List of regions. 

115 

116 Returns 

117 ------- 

118 : 

119 Parsed location data. 

120 """ 

121 

122 if "center" in location: 

123 voxels = np.array([voxel for region in location["location"] for voxel in region["voxels"]]) 

124 mins = np.min(voxels, axis=0) 

125 maxs = np.max(voxels, axis=0) 

126 parsed = [location["id"], tick, *location["center"], *mins, *maxs] 

127 else: 

128 parsed = [location["id"], tick, -1, -1, -1, -1, -1, -1, -1, -1, -1] 

129 

130 for reg in regions: 

131 region_voxels = np.array( 

132 [ 

133 voxel 

134 for region in location["location"] 

135 for voxel in region["voxels"] 

136 if region["region"] == reg 

137 ] 

138 ) 

139 

140 if len(region_voxels) == 0: 

141 parsed = [*parsed, -1, -1, -1, -1, -1, -1, -1, -1, -1] 

142 continue 

143 

144 center = [int(value + 0.5) for value in region_voxels.mean(axis=0)] 

145 mins = np.min(region_voxels, axis=0) 

146 maxs = np.max(region_voxels, axis=0) 

147 parsed = [*parsed, *center, *mins, *maxs] 

148 

149 return parsed