forked from p04798526/LLaMA-Factory-Mirror
add DISABLE_TORCHRUN option
This commit is contained in:
parent
55c18c49b0
commit
45d8be8f93
|
@ -72,7 +72,12 @@ def main():
|
||||||
elif command == Command.EXPORT:
|
elif command == Command.EXPORT:
|
||||||
export_model()
|
export_model()
|
||||||
elif command == Command.TRAIN:
|
elif command == Command.TRAIN:
|
||||||
if get_device_count() > 0:
|
disable_torchrun = os.environ.get("DISABLE_TORCHRUN", "0").lower() in ["true", "1"]
|
||||||
|
if disable_torchrun and get_device_count() > 1:
|
||||||
|
logger.warning("`torchrun` cannot be disabled when device count > 1.")
|
||||||
|
disable_torchrun = False
|
||||||
|
|
||||||
|
if (not disable_torchrun) and (get_device_count() > 0):
|
||||||
master_addr = os.environ.get("MASTER_ADDR", "127.0.0.1")
|
master_addr = os.environ.get("MASTER_ADDR", "127.0.0.1")
|
||||||
master_port = os.environ.get("MASTER_PORT", str(random.randint(20001, 29999)))
|
master_port = os.environ.get("MASTER_PORT", str(random.randint(20001, 29999)))
|
||||||
logger.info("Initializing distributed tasks at: {}:{}".format(master_addr, master_port))
|
logger.info("Initializing distributed tasks at: {}:{}".format(master_addr, master_port))
|
||||||
|
|
Loading…
Reference in New Issue