Source code for container_collection.batch.check_batch_job

from __future__ import annotations

from time import sleep

import boto3
from prefect.context import TaskRunContext
from prefect.states import Failed, State

RETRIES_EXCEEDED_EXIT_CODE = 80
"""Exit code used when task run retries exceed the maximum retries."""


[docs]def check_batch_job(job_arn: str, max_retries: int) -> int | State | bool: """ Check for exit code of an AWS Batch job. If this task is running within a Prefect flow, it will use the task run context to get the current run count. While the run count is below the maximum number of retries, the task will continue to attempt to get the exit code, and can be called with a retry delay to periodically check the status of jobs. If this task is not running within a Prefect flow, the ``max_retries`` parameters is ignored. Jobs that are still running will throw an exception. Parameters ---------- job_arn Job ARN. max_retries Maximum number of retries. Returns ------- : Exit code if the job is complete, otherwise throws an exception. """ context = TaskRunContext.get() if context is not None and context.task_run.run_count > max_retries: return RETRIES_EXCEEDED_EXIT_CODE client = boto3.client("batch") response = client.describe_jobs(jobs=[job_arn])["jobs"] # Job responses are not immediately available. Wait until available. while len(response) != 1: sleep(10) response = client.describe_jobs(jobs=[job_arn])["jobs"] status = response[0]["status"] # Wait until job is running or completed. while status not in ("RUNNING", "SUCCEEDED", "FAILED"): sleep(10) response = client.describe_jobs(jobs=[job_arn])["jobs"] status = response[0]["status"] # For jobs that are running, throw the appropriate exception. if context is not None and status == "RUNNING": return Failed() if status == "RUNNING": message = "Job is in RUNNING state and does not have exit code." raise RuntimeError(message) return response[0]["attempts"][0]["container"]["exitCode"]