Coverage for src/container_collection/manifest/find_missing_conditions.py: 100%
13 statements
« prev ^ index » next coverage.py v7.1.0, created at 2024-09-25 18:23 +0000
« prev ^ index » next coverage.py v7.1.0, created at 2024-09-25 18:23 +0000
1import pandas as pd
4def find_missing_conditions(
5 manifest: pd.DataFrame,
6 name: str,
7 conditions: list[dict],
8 seeds: list[int],
9 extensions: list[str],
10) -> list[dict]:
11 """
12 Find simulations missing or incomplete from list of conditions.
14 Parameters
15 ----------
16 manifest
17 Manifest of all files for simulation series.
18 name
19 Name of the simulation series.
20 conditions
21 List of series condition dictionaries (must include unique condition
22 "key").
23 seeds
24 List of series random seeds.
25 extensions
26 List of single simulation output extensions.
28 Returns
29 -------
30 :
31 List of missing conditions, including random seed.
32 """
34 missing_conditions = []
36 for condition in conditions:
37 all_keys = {f"{name}_{condition['key']}_{seed:04}": seed for seed in seeds}
38 manifest_keys = manifest[manifest["KEY"].isin(all_keys.keys())]
40 missing_keys = set(all_keys) - set(manifest_keys["KEY"].unique())
41 incomplete_keys = set(
42 manifest_keys.groupby("KEY").filter(
43 lambda x: len(set(extensions) - set(x["EXTENSION"])) != 0
44 )["KEY"]
45 )
47 for key in list(missing_keys) + list(incomplete_keys):
48 missing_condition = condition.copy()
49 missing_condition["seed"] = all_keys[key]
50 missing_conditions.append(missing_condition)
52 return missing_conditions