Coverage for src/arcade_collection/output/parse_cells_file.py: 100%

25 statements  

« prev     ^ index     » next       coverage.py v7.1.0, created at 2024-12-09 19:07 +0000

1from __future__ import annotations 

2 

3import json 

4from typing import TYPE_CHECKING 

5 

6import pandas as pd 

7 

8if TYPE_CHECKING: 

9 import tarfile 

10 

11CELLS_COLUMNS = [ 

12 "ID", 

13 "TICK", 

14 "PARENT", 

15 "POPULATION", 

16 "AGE", 

17 "DIVISIONS", 

18 "STATE", 

19 "PHASE", 

20 "NUM_VOXELS", 

21] 

22"""Column names for cells data parsed into tidy data format.""" 

23 

24 

25def parse_cells_file(tar: tarfile.TarFile, regions: list[str]) -> pd.DataFrame: 

26 """ 

27 Parse simulation cells data into tidy data format. 

28 

29 Parameters 

30 ---------- 

31 tar 

32 Tar archive containing locations data. 

33 regions 

34 List of regions. 

35 

36 Returns 

37 ------- 

38 : 

39 Parsed cells data. 

40 """ 

41 

42 all_cells: list[list[str | int]] = [] 

43 

44 for member in tar.getmembers(): 

45 extracted_member = tar.extractfile(member) 

46 

47 if extracted_member is None: 

48 continue 

49 

50 tick = int(member.name.replace(".CELLS.json", "").split("_")[-1]) 

51 cells_json = json.loads(extracted_member.read().decode("utf-8")) 

52 

53 cells = [parse_cell_tick(tick, cell, regions) for cell in cells_json] 

54 all_cells = all_cells + cells 

55 

56 columns = CELLS_COLUMNS + [f"NUM_VOXELS.{region}" for region in regions] 

57 return pd.DataFrame(all_cells, columns=columns) 

58 

59 

60def parse_cell_tick(tick: int, cell: dict, regions: list[str]) -> list: 

61 """ 

62 Parse cell data for a single simulation tick. 

63 

64 Original data is formatted as: 

65 

66 .. code-block:: python 

67 

68 { 

69 "id": cell_id, 

70 "parent": parent_id, 

71 "pop": population, 

72 "age": age, 

73 "divisions": divisions, 

74 "state": state, 

75 "phase": phase, 

76 "voxels": voxels, 

77 "criticals": [critical_volume, critical_height], 

78 "regions": [ 

79 { 

80 "region": region_name, 

81 "voxels": region_voxels, 

82 "criticals": [critical_region_volume, critical_region_height] 

83 }, 

84 ... 

85 ] 

86 } 

87 

88 Parsed data is formatted as: 

89 

90 .. code-block:: python 

91 

92 [ cell_id, tick, parent_id, population, age, divisions, state, phase, voxels ] 

93 

94 When regions are specified, each list also contains the number of voxels for 

95 the corresponding regions. 

96 

97 Parameters 

98 ---------- 

99 tick 

100 Simulation tick. 

101 cell 

102 Original cell data. 

103 regions 

104 List of regions. 

105 

106 Returns 

107 ------- 

108 : 

109 Parsed cell data. 

110 """ 

111 

112 features = ["parent", "pop", "age", "divisions", "state", "phase", "voxels"] 

113 parsed = [cell["id"], tick] + [cell[feature] for feature in features] 

114 

115 if regions and "regions" in cell: 

116 region_voxels = [ 

117 cell_region["voxels"] 

118 for region in regions 

119 for cell_region in cell["regions"] 

120 if cell_region["region"] == region 

121 ] 

122 parsed = parsed + region_voxels 

123 

124 return parsed