| 1 | """Optional: enforce the same GPU mutex from inside a Python dispatcher. |
| 2 | |
| 3 | If your GPU jobs are fired over SSH to a remote box (or from a long-running Python |
| 4 | loop rather than a shell), you can hold the *same* flock in-process so the wrapper |
| 5 | and the dispatcher share one lock. The reentrancy trick is identical to gpu_run.sh: |
| 6 | honor an inherited GPU_LOCK_HELD and skip re-locking, and respect a skip flag for |
| 7 | non-GPU work (builds, uploads) that happens to run on the same box. |
| 8 | |
| 9 | This is a generic skeleton - replace the dispatch body with your own call. |
| 10 | """ |
| 11 | import fcntl |
| 12 | import os |
| 13 | import subprocess |
| 14 | import sys |
| 15 | |
| 16 | GPU_LOCK_PATH = os.environ.get("GPU_LOCK_PATH", "/tmp/gpu.lock") |
| 17 | |
| 18 | |
| 19 | def gpu_dispatch(argv, is_gpu_job=True, timeout=600): |
| 20 | need_lock = ( |
| 21 | is_gpu_job |
| 22 | and not os.environ.get("GPU_LOCK_HELD") |
| 23 | and not os.environ.get("GPU_SKIP_LOCK") |
| 24 | ) |
| 25 | lockf = None |
| 26 | try: |
| 27 | if need_lock: |
| 28 | lockf = open(GPU_LOCK_PATH, "w") |
| 29 | sys.stderr.write("[dispatch] waiting for GPU lock...\n") |
| 30 | fcntl.flock(lockf, fcntl.LOCK_EX) |
| 31 | sys.stderr.write("[dispatch] acquired GPU lock\n") |
| 32 | os.environ["GPU_LOCK_HELD"] = "1" |
| 33 | return subprocess.run(argv, text=True, timeout=timeout).returncode |
| 34 | finally: |
| 35 | if lockf is not None: |
| 36 | fcntl.flock(lockf, fcntl.LOCK_UN) |
| 37 | lockf.close() |
| 38 | os.environ.pop("GPU_LOCK_HELD", None) |
| 39 | |
| 40 | |
| 41 | if __name__ == "__main__": |
| 42 | sys.exit(gpu_dispatch(sys.argv[1:])) |