Coverage for src/container_collection/manifest/update_manifest_contents.py: 100%
19 statements
« prev ^ index » next coverage.py v7.1.0, created at 2024-09-25 18:23 +0000
« prev ^ index » next coverage.py v7.1.0, created at 2024-09-25 18:23 +0000
1import os
3import pandas as pd
6def update_manifest_contents(location_keys: dict) -> pd.DataFrame:
7 """
8 Update manifest using files at given keys at specified locations.
10 Parameters
11 ----------
12 location_keys
13 Map of locations to list of file keys.
15 Returns
16 -------
17 :
18 Combined manifest of file keys, extensions, and locations.
19 """
21 all_manifests = []
23 for location, keys in location_keys.items():
24 location_manifest = make_file_manifest(location, keys)
25 all_manifests.append(location_manifest)
27 if len(all_manifests) == 0:
28 return pd.DataFrame(columns=["KEY", "EXTENSION", "LOCATION", "FULL_KEY"])
30 manifest = pd.concat(all_manifests)
31 manifest = manifest.sort_values(by=["EXTENSION", "KEY"])
32 return manifest.reset_index(drop=True)
35def make_file_manifest(location: str, keys: list[str]) -> pd.DataFrame:
36 """
37 Create manifest for location with given list of file keys.
39 Parameters
40 ----------
41 location
42 File location (local path or S3 bucket).
43 keys
44 List of file keys.
46 Returns
47 -------
48 :
49 Manifest of file keys, extensions, and locations.
50 """
52 contents = []
54 for key in keys:
55 short_key = os.path.split(key)[1].split(".")[0]
56 extension = ".".join(os.path.split(key)[1].split(".")[1:])
57 contents.append((short_key, extension, location, key))
59 return pd.DataFrame(contents, columns=["KEY", "EXTENSION", "LOCATION", "FULL_KEY"])