Source code for container_collection.manifest.summarize_manifest_files
import re
import pandas as pd
from tabulate import tabulate
[docs]def summarize_manifest_files(
manifest: pd.DataFrame, name: str, conditions: list[dict], seeds: list[int]
) -> str:
"""
Summarize manifest files into a table.
Summary table is formatted as:
.. code-block:: bash
┍━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━┯━━━━━┯━━━━━━━━━━━━━┑
│ │ extension.a │ ... │ extension.n │
┝━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━┿━━━━━┿━━━━━━━━━━━━━┥
│ condition_key_a │ #/# (##.## %) │ ... │ ✓ │
│ condition_key_b │ #/# (##.## %) │ ... │ ✓ │
│ ... │... │ ... │ ... │
│ condition_key_n │ #/# (##.## %) │ ... │ ✓ │
┕━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━┷━━━━━┷━━━━━━━━━━━━━┙
For file extensions that have files or each random seed, the summary table
reports the number and percentage of random seeds. For file extensions with
only one file per conditions, a checkmark (✓) is used to indicate if the
file exists or not.
Parameters
----------
manifest
Manifest of file keys, extensions, and locations.
name
Name of the simulation series.
conditions
List of series condition dictionaries (must include unique condition
"key").
seeds
List of series random seeds.
Returns
-------
:
Manifest summary table.
"""
condition_keys = [f"{name}_{condition['key']}" for condition in conditions]
manifest_keys = manifest.set_index("KEY").filter(regex=f"^{name}", axis="index").reset_index()
extensions = manifest_keys["EXTENSION"].unique()
counts = pd.DataFrame(index=condition_keys, columns=extensions).fillna(0)
summary = pd.DataFrame(index=condition_keys, columns=extensions)
for entry in manifest_keys.to_dict("records"):
key = entry["KEY"]
extension = entry["EXTENSION"]
match = re.search("[0-9]{4}", key)
if match:
key = entry["KEY"].replace(f"_{match.group(0)}", "")
if key in condition_keys and int(match.group(0)) in seeds:
counts.loc[key][extension] += 1
count = counts.loc[key][extension]
percent = count / len(seeds) * 100
summary.loc[key, extension] = f"{count}/{len(seeds)} ({percent:.2f} %)"
elif key in condition_keys:
summary.loc[key, extension] = "✓"
summary = summary.dropna(axis=1, how="all")
summary = summary.fillna("")
return tabulate(summary, headers="keys", tablefmt="mixed_outline")