examples/remote_dispatch.py

42 lines · python

"""Optional: enforce the same GPU mutex from inside a Python dispatcher.
 
If your GPU jobs are fired over SSH to a remote box (or from a long-running Python
loop rather than a shell), you can hold the *same* flock in-process so the wrapper
and the dispatcher share one lock. The reentrancy trick is identical to gpu_run.sh:
honor an inherited GPU_LOCK_HELD and skip re-locking, and respect a skip flag for
non-GPU work (builds, uploads) that happens to run on the same box.
 
This is a generic skeleton - replace the dispatch body with your own call.
"""
import fcntl
import os
import subprocess
import sys
 
GPU_LOCK_PATH = os.environ.get("GPU_LOCK_PATH", "/tmp/gpu.lock")
 
 
def gpu_dispatch(argv, is_gpu_job=True, timeout=600):
    need_lock = (
        is_gpu_job
        and not os.environ.get("GPU_LOCK_HELD")
        and not os.environ.get("GPU_SKIP_LOCK")
    )
    lockf = None
    try:
        if need_lock:
            lockf = open(GPU_LOCK_PATH, "w")
            sys.stderr.write("[dispatch] waiting for GPU lock...\n")
            fcntl.flock(lockf, fcntl.LOCK_EX)
            sys.stderr.write("[dispatch] acquired GPU lock\n")
            os.environ["GPU_LOCK_HELD"] = "1"
        return subprocess.run(argv, text=True, timeout=timeout).returncode
    finally:
        if lockf is not None:
            fcntl.flock(lockf, fcntl.LOCK_UN)
            lockf.close()
            os.environ.pop("GPU_LOCK_HELD", None)
 
 
if __name__ == "__main__":
    sys.exit(gpu_dispatch(sys.argv[1:]))

1	"""Optional: enforce the same GPU mutex from inside a Python dispatcher.
2
3	If your GPU jobs are fired over SSH to a remote box (or from a long-running Python
4	loop rather than a shell), you can hold the same flock in-process so the wrapper
5	and the dispatcher share one lock. The reentrancy trick is identical to gpu_run.sh:
6	honor an inherited GPU_LOCK_HELD and skip re-locking, and respect a skip flag for
7	non-GPU work (builds, uploads) that happens to run on the same box.
8
9	This is a generic skeleton - replace the dispatch body with your own call.
10	"""
11	import fcntl
12	import os
13	import subprocess
14	import sys
15
16	GPU_LOCK_PATH = os.environ.get("GPU_LOCK_PATH", "/tmp/gpu.lock")
17
18
19	def gpu_dispatch(argv, is_gpu_job=True, timeout=600):
20	need_lock = (
21	is_gpu_job
22	and not os.environ.get("GPU_LOCK_HELD")
23	and not os.environ.get("GPU_SKIP_LOCK")
24	)
25	lockf = None
26	try:
27	if need_lock:
28	lockf = open(GPU_LOCK_PATH, "w")
29	sys.stderr.write("[dispatch] waiting for GPU lock...\n")
30	fcntl.flock(lockf, fcntl.LOCK_EX)
31	sys.stderr.write("[dispatch] acquired GPU lock\n")
32	os.environ["GPU_LOCK_HELD"] = "1"
33	return subprocess.run(argv, text=True, timeout=timeout).returncode
34	finally:
35	if lockf is not None:
36	fcntl.flock(lockf, fcntl.LOCK_UN)
37	lockf.close()
38	os.environ.pop("GPU_LOCK_HELD", None)
39
40
41	if __name__ == "__main__":
42	sys.exit(gpu_dispatch(sys.argv[1:]))