LLaMA-Factory-310P3/mindie/examples/models/qwen/run_fa.sh

#!/bin/bash
export ASCEND_RT_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
export MASTER_PORT=20030

# 以下环境变量与性能和内存优化相关，通常情况下无需修改
export ATB_LAYER_INTERNAL_TENSOR_REUSE=1
export INF_NAN_MODE_ENABLE=0
export ATB_OPERATION_EXECUTE_ASYNC=1
export TASK_QUEUE_ENABLE=1
export ATB_CONVERT_NCHW_TO_ND=1
export HCCL_BUFFSIZE=120
export ATB_WORKSPACE_MEM_ALLOC_GLOBAL=1
export ATB_CONTEXT_WORKSPACE_SIZE=0

world_size=$(($(echo "${ASCEND_RT_VISIBLE_DEVICES}" | grep -o , | wc -l) +1))
model_path=""

function usage(){
    echo "$0 pls. use '-m|--model-path' input model path"
    exit -1
}

if [[ $# -eq 0 ]];then
        usage
fi

GETOP_ARGS=`getopt -o m: -al model-path: -- "$@"`
eval set -- "${GETOP_ARGS}"
while [ -n "$1" ]
do
    case "$1" in
        -m|--model-path) model_path=$2;shift 2;;
        --) shift;break;;
        *) usage;break;;
    esac
done

if [ "$TP_WORLD_SIZE" == "1" ]; then
    python -m examples.run_fa --model_path $model_path
else
    torchrun --nproc_per_node $world_size --master_port $MASTER_PORT -m examples.run_fa --model_path $model_path
fi