Source code for arcade_collection.output.parse_locations_file

from __future__ import annotations

import json
from typing import TYPE_CHECKING

import numpy as np
import pandas as pd

if TYPE_CHECKING:
    import tarfile


LOCATIONS_COLUMNS = [
    "ID",
    "TICK",
    "CENTER_X",
    "CENTER_Y",
    "CENTER_Z",
    "MIN_X",
    "MIN_Y",
    "MIN_Z",
    "MAX_X",
    "MAX_Y",
    "MAX_Z",
]
"""Column names for locations data parsed into tidy data format."""


[docs]def parse_locations_file(tar: tarfile.TarFile, regions: list[str]) -> pd.DataFrame: """ Parse simulation locations data into tidy data format. Parameters ---------- tar Tar archive containing locations data. regions List of regions. Returns ------- : Parsed locations data. """ all_locations: list[list[str | int]] = [] for member in tar.getmembers(): extracted_member = tar.extractfile(member) if extracted_member is None: continue tick = int(member.name.replace(".LOCATIONS.json", "").split("_")[-1]) locations_json = json.loads(extracted_member.read().decode("utf-8")) locations = [parse_location_tick(tick, cell, regions) for cell in locations_json] all_locations = all_locations + locations columns = LOCATIONS_COLUMNS + [ f"{column}.{region}" for region in regions for column in LOCATIONS_COLUMNS[2:] ] return pd.DataFrame(all_locations, columns=columns)
[docs]def parse_location_tick(tick: int, location: dict, regions: list[str]) -> list: """ Parse location data for a single simulation tick. Original data is formatted as: .. code-block:: python { "id": cell_id, "center": [center_x, center_y, center_z], "location": [ { "region": region, "voxels": [ [x, y, z], [x, y, z], ... ] }, { "region": region, "voxels": [ [x, y, z], [x, y, z], ... ] }, ... ] } Parsed data is formatted as: .. code-block:: python [ cell_id, tick, center_x, center_y, center_z, min_x, min_y, min_z, max_x, max_y, max_z ] When regions are specified, each list also contains centers, minimums, and maximums for the corresponding regions. Parameters ---------- tick Simulation tick. location Original location data. regions List of regions. Returns ------- : Parsed location data. """ if "center" in location: voxels = np.array([voxel for region in location["location"] for voxel in region["voxels"]]) mins = np.min(voxels, axis=0) maxs = np.max(voxels, axis=0) parsed = [location["id"], tick, *location["center"], *mins, *maxs] else: parsed = [location["id"], tick, -1, -1, -1, -1, -1, -1, -1, -1, -1] for reg in regions: region_voxels = np.array( [ voxel for region in location["location"] for voxel in region["voxels"] if region["region"] == reg ] ) if len(region_voxels) == 0: parsed = [*parsed, -1, -1, -1, -1, -1, -1, -1, -1, -1] continue center = [int(value + 0.5) for value in region_voxels.mean(axis=0)] mins = np.min(region_voxels, axis=0) maxs = np.max(region_voxels, axis=0) parsed = [*parsed, *center, *mins, *maxs] return parsed