Source code for container_collection.manifest.update_manifest_contents

import os

import pandas as pd


[docs]def update_manifest_contents(location_keys: dict) -> pd.DataFrame: """ Update manifest using files at given keys at specified locations. Parameters ---------- location_keys Map of locations to list of file keys. Returns ------- : Combined manifest of file keys, extensions, and locations. """ all_manifests = [] for location, keys in location_keys.items(): location_manifest = make_file_manifest(location, keys) all_manifests.append(location_manifest) if len(all_manifests) == 0: return pd.DataFrame(columns=["KEY", "EXTENSION", "LOCATION", "FULL_KEY"]) manifest = pd.concat(all_manifests) manifest = manifest.sort_values(by=["EXTENSION", "KEY"]) return manifest.reset_index(drop=True)
[docs]def make_file_manifest(location: str, keys: list[str]) -> pd.DataFrame: """ Create manifest for location with given list of file keys. Parameters ---------- location File location (local path or S3 bucket). keys List of file keys. Returns ------- : Manifest of file keys, extensions, and locations. """ contents = [] for key in keys: short_key = os.path.split(key)[1].split(".")[0] extension = ".".join(os.path.split(key)[1].split(".")[1:]) contents.append((short_key, extension, location, key)) return pd.DataFrame(contents, columns=["KEY", "EXTENSION", "LOCATION", "FULL_KEY"])