Coverage for src/container_collection/manifest/update_manifest_contents.py: 100%

19 statements  

« prev     ^ index     » next       coverage.py v7.1.0, created at 2024-09-25 18:23 +0000

1import os 

2 

3import pandas as pd 

4 

5 

6def update_manifest_contents(location_keys: dict) -> pd.DataFrame: 

7 """ 

8 Update manifest using files at given keys at specified locations. 

9 

10 Parameters 

11 ---------- 

12 location_keys 

13 Map of locations to list of file keys. 

14 

15 Returns 

16 ------- 

17 : 

18 Combined manifest of file keys, extensions, and locations. 

19 """ 

20 

21 all_manifests = [] 

22 

23 for location, keys in location_keys.items(): 

24 location_manifest = make_file_manifest(location, keys) 

25 all_manifests.append(location_manifest) 

26 

27 if len(all_manifests) == 0: 

28 return pd.DataFrame(columns=["KEY", "EXTENSION", "LOCATION", "FULL_KEY"]) 

29 

30 manifest = pd.concat(all_manifests) 

31 manifest = manifest.sort_values(by=["EXTENSION", "KEY"]) 

32 return manifest.reset_index(drop=True) 

33 

34 

35def make_file_manifest(location: str, keys: list[str]) -> pd.DataFrame: 

36 """ 

37 Create manifest for location with given list of file keys. 

38 

39 Parameters 

40 ---------- 

41 location 

42 File location (local path or S3 bucket). 

43 keys 

44 List of file keys. 

45 

46 Returns 

47 ------- 

48 : 

49 Manifest of file keys, extensions, and locations. 

50 """ 

51 

52 contents = [] 

53 

54 for key in keys: 

55 short_key = os.path.split(key)[1].split(".")[0] 

56 extension = ".".join(os.path.split(key)[1].split(".")[1:]) 

57 contents.append((short_key, extension, location, key)) 

58 

59 return pd.DataFrame(contents, columns=["KEY", "EXTENSION", "LOCATION", "FULL_KEY"])