Coverage for src/container_collection/manifest/find_missing_conditions.py: 100%

13 statements  

« prev     ^ index     » next       coverage.py v7.1.0, created at 2024-09-25 18:23 +0000

1import pandas as pd 

2 

3 

4def find_missing_conditions( 

5 manifest: pd.DataFrame, 

6 name: str, 

7 conditions: list[dict], 

8 seeds: list[int], 

9 extensions: list[str], 

10) -> list[dict]: 

11 """ 

12 Find simulations missing or incomplete from list of conditions. 

13 

14 Parameters 

15 ---------- 

16 manifest 

17 Manifest of all files for simulation series. 

18 name 

19 Name of the simulation series. 

20 conditions 

21 List of series condition dictionaries (must include unique condition 

22 "key"). 

23 seeds 

24 List of series random seeds. 

25 extensions 

26 List of single simulation output extensions. 

27 

28 Returns 

29 ------- 

30 : 

31 List of missing conditions, including random seed. 

32 """ 

33 

34 missing_conditions = [] 

35 

36 for condition in conditions: 

37 all_keys = {f"{name}_{condition['key']}_{seed:04}": seed for seed in seeds} 

38 manifest_keys = manifest[manifest["KEY"].isin(all_keys.keys())] 

39 

40 missing_keys = set(all_keys) - set(manifest_keys["KEY"].unique()) 

41 incomplete_keys = set( 

42 manifest_keys.groupby("KEY").filter( 

43 lambda x: len(set(extensions) - set(x["EXTENSION"])) != 0 

44 )["KEY"] 

45 ) 

46 

47 for key in list(missing_keys) + list(incomplete_keys): 

48 missing_condition = condition.copy() 

49 missing_condition["seed"] = all_keys[key] 

50 missing_conditions.append(missing_condition) 

51 

52 return missing_conditions