Coverage for src/arcade_collection/output/parse_growth_file.py: 100%

28 statements  

« prev     ^ index     » next       coverage.py v7.1.0, created at 2024-12-09 19:07 +0000

1import json 

2import tarfile 

3 

4import numpy as np 

5import pandas as pd 

6 

7GROWTH_COLUMNS = [ 

8 "TICK", 

9 "SEED", 

10 "U", 

11 "V", 

12 "W", 

13 "Z", 

14 "POSITION", 

15 "POPULATION", 

16 "STATE", 

17 "VOLUME", 

18 "CYCLE", 

19] 

20"""Column names for growth data parsed into tidy data format.""" 

21 

22CELL_STATES = [ 

23 "NEUTRAL", 

24 "APOPTOTIC", 

25 "QUIESCENT", 

26 "MIGRATORY", 

27 "PROLIFERATIVE", 

28 "SENESCENT", 

29 "NECROTIC", 

30] 

31"""Cell state names.""" 

32 

33 

34def parse_growth_file(tar: tarfile.TarFile) -> pd.DataFrame: 

35 """ 

36 Parse simulation growth data into tidy data format. 

37 

38 Parameters 

39 ---------- 

40 tar 

41 Tar archive containing growth data. 

42 

43 Returns 

44 ------- 

45 : 

46 Parsed growth data. 

47 """ 

48 

49 all_timepoints = [] 

50 

51 for member in tar.getmembers(): 

52 extracted_member = tar.extractfile(member) 

53 

54 if extracted_member is None: 

55 continue 

56 

57 extracted_json = json.loads(extracted_member.read().decode("utf-8")) 

58 

59 seed = extracted_json["seed"] 

60 all_timepoints.extend( 

61 [ 

62 data 

63 for timepoint in extracted_json["timepoints"] 

64 for data in parse_growth_timepoint(timepoint, seed) 

65 ] 

66 ) 

67 

68 return pd.DataFrame(all_timepoints, columns=GROWTH_COLUMNS) 

69 

70 

71def parse_growth_timepoint(data: dict, seed: int) -> list: 

72 """ 

73 Parse growth data for a single simulation timepoint. 

74 

75 Original data is formatted as: 

76 

77 .. code-block:: python 

78 

79 { 

80 "time": time, 

81 "cells": [ 

82 [ 

83 [u, v, w, z], 

84 [ 

85 [ 

86 type, 

87 population, 

88 state, 

89 position, 

90 volume, 

91 [cell, cycle, lengths, ...] 

92 ], 

93 ... 

94 ] 

95 ], 

96 ... 

97 ] 

98 } 

99 

100 Parsed data is formatted as: 

101 

102 .. code-block:: python 

103 

104 [ 

105 [time, seed, u, v, w, z, position, population, state, volume, cell_cycle], 

106 [time, seed, u, v, w, z, position, population, state, volume, cell_cycle], 

107 ... 

108 ] 

109 

110 Cell cycle length is ``None`` if the cell has not yet divided. Otherwise, 

111 cell cycle is the average of all cell cycle lengths. 

112 

113 Parameters 

114 ---------- 

115 data 

116 Original simulation data. 

117 seed 

118 Random seed. 

119 

120 Returns 

121 ------- 

122 : 

123 Parsed simulation data. 

124 """ 

125 

126 parsed_data = [] 

127 time = data["time"] 

128 

129 for location, cells in data["cells"]: 

130 for cell in cells: 

131 _, population, state, position, volume, cycles = cell 

132 cycle = None if len(cycles) == 0 else np.mean(cycles) 

133 

134 data_list = [ 

135 time, 

136 seed, 

137 *location, 

138 position, 

139 population, 

140 CELL_STATES[state], 

141 volume, 

142 cycle, 

143 ] 

144 

145 parsed_data.append(data_list) 

146 

147 return parsed_data