Source code for arcade_collection.output.parse_cells_file

from __future__ import annotations

import json
from typing import TYPE_CHECKING

import pandas as pd

if TYPE_CHECKING:
    import tarfile

CELLS_COLUMNS = [
    "ID",
    "TICK",
    "PARENT",
    "POPULATION",
    "AGE",
    "DIVISIONS",
    "STATE",
    "PHASE",
    "NUM_VOXELS",
]
"""Column names for cells data parsed into tidy data format."""


[docs]def parse_cells_file(tar: tarfile.TarFile, regions: list[str]) -> pd.DataFrame: """ Parse simulation cells data into tidy data format. Parameters ---------- tar Tar archive containing locations data. regions List of regions. Returns ------- : Parsed cells data. """ all_cells: list[list[str | int]] = [] for member in tar.getmembers(): extracted_member = tar.extractfile(member) if extracted_member is None: continue tick = int(member.name.replace(".CELLS.json", "").split("_")[-1]) cells_json = json.loads(extracted_member.read().decode("utf-8")) cells = [parse_cell_tick(tick, cell, regions) for cell in cells_json] all_cells = all_cells + cells columns = CELLS_COLUMNS + [f"NUM_VOXELS.{region}" for region in regions] return pd.DataFrame(all_cells, columns=columns)
[docs]def parse_cell_tick(tick: int, cell: dict, regions: list[str]) -> list: """ Parse cell data for a single simulation tick. Original data is formatted as: .. code-block:: python { "id": cell_id, "parent": parent_id, "pop": population, "age": age, "divisions": divisions, "state": state, "phase": phase, "voxels": voxels, "criticals": [critical_volume, critical_height], "regions": [ { "region": region_name, "voxels": region_voxels, "criticals": [critical_region_volume, critical_region_height] }, ... ] } Parsed data is formatted as: .. code-block:: python [ cell_id, tick, parent_id, population, age, divisions, state, phase, voxels ] When regions are specified, each list also contains the number of voxels for the corresponding regions. Parameters ---------- tick Simulation tick. cell Original cell data. regions List of regions. Returns ------- : Parsed cell data. """ features = ["parent", "pop", "age", "divisions", "state", "phase", "voxels"] parsed = [cell["id"], tick] + [cell[feature] for feature in features] if regions and "regions" in cell: region_voxels = [ cell_region["voxels"] for region in regions for cell_region in cell["regions"] if cell_region["region"] == region ] parsed = parsed + region_voxels return parsed