diff --git a/FM_9G/apps/fm9g_2b/pretrain_dragonfly.sh b/FM_9G/apps/fm9g_2b/pretrain_dragonfly.sh index 3e88379..d52e175 100644 --- a/FM_9G/apps/fm9g_2b/pretrain_dragonfly.sh +++ b/FM_9G/apps/fm9g_2b/pretrain_dragonfly.sh @@ -219,10 +219,10 @@ else fi -GPUS_PER_NODE=1 +GPUS_PER_NODE=2 NNODES=1 RANK=0 -MASTER_ENDPOINT=g3006 +MASTER_ENDPOINT=ubuntu MASTER_PORT=23456 #CMD="torchrun --nnodes=${NNODES} --nproc_per_node=${GPUS_PER_NODE} --node_rank=${RANK} --master_addr=${MASTER_ENDPOINT} --master_port=${MASTER_PORT} ${PRETRAIN_ENTRY} ${OPTS}" CMD="torchrun --nnodes=${NNODES} --nproc_per_node=${GPUS_PER_NODE} --node_rank=${RANK} --rdzv_id=1 --rdzv_backend=c10d --rdzv_endpoint=${MASTER_ENDPOINT}:${MASTER_PORT} ${PRETRAIN_ENTRY} ${OPTS}"