Coverage for src/container_collection/manifest/summarize_manifest_files.py: 100%
25 statements
« prev ^ index » next coverage.py v7.1.0, created at 2024-09-25 18:23 +0000
« prev ^ index » next coverage.py v7.1.0, created at 2024-09-25 18:23 +0000
1import re
3import pandas as pd
4from tabulate import tabulate
7def summarize_manifest_files(
8 manifest: pd.DataFrame, name: str, conditions: list[dict], seeds: list[int]
9) -> str:
10 """
11 Summarize manifest files into a table.
13 Summary table is formatted as:
15 .. code-block:: bash
17 ┍━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━┯━━━━━┯━━━━━━━━━━━━━┑
18 │ │ extension.a │ ... │ extension.n │
19 ┝━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━┿━━━━━┿━━━━━━━━━━━━━┥
20 │ condition_key_a │ #/# (##.## %) │ ... │ ✓ │
21 │ condition_key_b │ #/# (##.## %) │ ... │ ✓ │
22 │ ... │... │ ... │ ... │
23 │ condition_key_n │ #/# (##.## %) │ ... │ ✓ │
24 ┕━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━┷━━━━━┷━━━━━━━━━━━━━┙
26 For file extensions that have files or each random seed, the summary table
27 reports the number and percentage of random seeds. For file extensions with
28 only one file per conditions, a checkmark (✓) is used to indicate if the
29 file exists or not.
31 Parameters
32 ----------
33 manifest
34 Manifest of file keys, extensions, and locations.
35 name
36 Name of the simulation series.
37 conditions
38 List of series condition dictionaries (must include unique condition
39 "key").
40 seeds
41 List of series random seeds.
43 Returns
44 -------
45 :
46 Manifest summary table.
47 """
49 condition_keys = [f"{name}_{condition['key']}" for condition in conditions]
50 manifest_keys = manifest.set_index("KEY").filter(regex=f"^{name}", axis="index").reset_index()
51 extensions = manifest_keys["EXTENSION"].unique()
53 counts = pd.DataFrame(index=condition_keys, columns=extensions).fillna(0)
54 summary = pd.DataFrame(index=condition_keys, columns=extensions)
56 for entry in manifest_keys.to_dict("records"):
57 key = entry["KEY"]
58 extension = entry["EXTENSION"]
60 match = re.search("[0-9]{4}", key)
61 if match:
62 key = entry["KEY"].replace(f"_{match.group(0)}", "")
63 if key in condition_keys and int(match.group(0)) in seeds:
64 counts.loc[key][extension] += 1
65 count = counts.loc[key][extension]
66 percent = count / len(seeds) * 100
67 summary.loc[key, extension] = f"{count}/{len(seeds)} ({percent:.2f} %)"
68 elif key in condition_keys:
69 summary.loc[key, extension] = "✓"
71 summary = summary.dropna(axis=1, how="all")
72 summary = summary.fillna("")
74 return tabulate(summary, headers="keys", tablefmt="mixed_outline")