Coverage for src/arcade_collection/output/parse_growth_file.py: 100%
28 statements
« prev ^ index » next coverage.py v7.1.0, created at 2024-12-09 19:07 +0000
« prev ^ index » next coverage.py v7.1.0, created at 2024-12-09 19:07 +0000
1import json
2import tarfile
4import numpy as np
5import pandas as pd
7GROWTH_COLUMNS = [
8 "TICK",
9 "SEED",
10 "U",
11 "V",
12 "W",
13 "Z",
14 "POSITION",
15 "POPULATION",
16 "STATE",
17 "VOLUME",
18 "CYCLE",
19]
20"""Column names for growth data parsed into tidy data format."""
22CELL_STATES = [
23 "NEUTRAL",
24 "APOPTOTIC",
25 "QUIESCENT",
26 "MIGRATORY",
27 "PROLIFERATIVE",
28 "SENESCENT",
29 "NECROTIC",
30]
31"""Cell state names."""
34def parse_growth_file(tar: tarfile.TarFile) -> pd.DataFrame:
35 """
36 Parse simulation growth data into tidy data format.
38 Parameters
39 ----------
40 tar
41 Tar archive containing growth data.
43 Returns
44 -------
45 :
46 Parsed growth data.
47 """
49 all_timepoints = []
51 for member in tar.getmembers():
52 extracted_member = tar.extractfile(member)
54 if extracted_member is None:
55 continue
57 extracted_json = json.loads(extracted_member.read().decode("utf-8"))
59 seed = extracted_json["seed"]
60 all_timepoints.extend(
61 [
62 data
63 for timepoint in extracted_json["timepoints"]
64 for data in parse_growth_timepoint(timepoint, seed)
65 ]
66 )
68 return pd.DataFrame(all_timepoints, columns=GROWTH_COLUMNS)
71def parse_growth_timepoint(data: dict, seed: int) -> list:
72 """
73 Parse growth data for a single simulation timepoint.
75 Original data is formatted as:
77 .. code-block:: python
79 {
80 "time": time,
81 "cells": [
82 [
83 [u, v, w, z],
84 [
85 [
86 type,
87 population,
88 state,
89 position,
90 volume,
91 [cell, cycle, lengths, ...]
92 ],
93 ...
94 ]
95 ],
96 ...
97 ]
98 }
100 Parsed data is formatted as:
102 .. code-block:: python
104 [
105 [time, seed, u, v, w, z, position, population, state, volume, cell_cycle],
106 [time, seed, u, v, w, z, position, population, state, volume, cell_cycle],
107 ...
108 ]
110 Cell cycle length is ``None`` if the cell has not yet divided. Otherwise,
111 cell cycle is the average of all cell cycle lengths.
113 Parameters
114 ----------
115 data
116 Original simulation data.
117 seed
118 Random seed.
120 Returns
121 -------
122 :
123 Parsed simulation data.
124 """
126 parsed_data = []
127 time = data["time"]
129 for location, cells in data["cells"]:
130 for cell in cells:
131 _, population, state, position, volume, cycles = cell
132 cycle = None if len(cycles) == 0 else np.mean(cycles)
134 data_list = [
135 time,
136 seed,
137 *location,
138 position,
139 population,
140 CELL_STATES[state],
141 volume,
142 cycle,
143 ]
145 parsed_data.append(data_list)
147 return parsed_data