Coverage for src/arcade_collection/convert/convert_to_tfe.py: 100%

30 statements  

« prev     ^ index     » next       coverage.py v7.1.0, created at 2024-12-09 19:07 +0000

1from __future__ import annotations 

2 

3from typing import TYPE_CHECKING 

4 

5import numpy as np 

6 

7if TYPE_CHECKING: 

8 import pandas as pd 

9 

10 

11def convert_to_tfe( 

12 all_data: pd.DataFrame, features: list[tuple[str, str, str]], frame_spec: tuple[int, int, int] 

13) -> dict: 

14 """ 

15 Generate TFE manifest and feature data for simulation. 

16 

17 Parameters 

18 ---------- 

19 all_data 

20 Simulation data containing ID, TICK, and time. 

21 features 

22 List of feature keys, names, and data types. 

23 frame_spec 

24 Specification for frames. 

25 

26 Returns 

27 ------- 

28 : 

29 TFE manifest and feature data 

30 """ 

31 

32 frames = list(np.arange(*frame_spec)) 

33 manifest = get_manifest_data(features, frames) 

34 

35 frame_data = all_data[all_data["TICK"].isin(frames)] 

36 

37 tracks = get_tracks_from_data(frame_data) 

38 times = get_times_from_data(frame_data) 

39 

40 tfe_json = {"manifest": manifest, "tracks": tracks, "times": times, "features": {}} 

41 

42 for index, (key, _, dtype) in enumerate(features): 

43 if dtype == "categorical": 

44 categories = list(all_data[key].unique()) 

45 manifest["features"][index]["categories"] = categories 

46 else: 

47 categories = None 

48 

49 tfe_json["features"][key] = get_feature_from_data(frame_data, key, categories) 

50 

51 return tfe_json 

52 

53 

54def get_manifest_data(features: list[tuple[str, str, str]], frames: list[int]) -> dict: 

55 """ 

56 Build manifest for TFE. 

57 

58 Parameters 

59 ---------- 

60 features 

61 List of feature keys, names, and data types. 

62 frames 

63 List of frames. 

64 

65 Returns 

66 ------- 

67 : 

68 Manifest in TFE format. 

69 """ 

70 

71 return { 

72 "frames": [f"frames/frame_{i}.png" for i in range(len(frames))], 

73 "features": [ 

74 {"key": key, "name": name, "data": f"features/{key}.json", "type": dtype} 

75 for key, name, dtype in features 

76 ], 

77 "tracks": "tracks.json", 

78 "times": "times.json", 

79 } 

80 

81 

82def get_tracks_from_data(data: pd.DataFrame) -> dict: 

83 """ 

84 Extract track ids from data and format for TFE. 

85 

86 Parameters 

87 ---------- 

88 data 

89 Simulation data for selected frames. 

90 

91 Returns 

92 ------- 

93 : 

94 Track data in TFE format. 

95 """ 

96 

97 return {"data": [0, *list(data["ID"])]} 

98 

99 

100def get_times_from_data(data: pd.DataFrame) -> dict: 

101 """ 

102 Extract time points from data and format for TFE. 

103 

104 Parameters 

105 ---------- 

106 data 

107 Simulation data for selected frames. 

108 

109 Returns 

110 ------- 

111 : 

112 Time data in TFE format. 

113 """ 

114 

115 return {"data": [0, *list(data["time"])]} 

116 

117 

118def get_feature_from_data(data: pd.DataFrame, feature: str, categories: list | None = None) -> dict: 

119 """ 

120 Extract specified feature from data and format for TFE. 

121 

122 Parameters 

123 ---------- 

124 data 

125 Simulation data for selected frames. 

126 feature 

127 Feature key. 

128 categories 

129 List of data categories (if data is categorical). 

130 

131 Returns 

132 ------- 

133 : 

134 Feature data in TFE format. 

135 """ 

136 

137 if categories is not None: 

138 feature_values = data[feature].apply(categories.index) 

139 else: 

140 feature_values = data[feature] 

141 

142 feature_min = float(np.nanmin(feature_values)) 

143 feature_max = float(np.nanmax(feature_values)) 

144 

145 return {"data": [0, *list(feature_values)], "min": feature_min, "max": feature_max}