118 lines
3.4 KiB
Bash
118 lines
3.4 KiB
Bash
#!/bin/bash
|
||
|
||
parentDir=$(dirname "$PWD")
|
||
currentDir=$(cd "$(dirname "$0")"; pwd)
|
||
|
||
execModel=AlexNet
|
||
mode=host
|
||
framework=tensorflow
|
||
hardware=1p
|
||
|
||
framework_group="tensorflow pytorch mindspore"
|
||
hardware_group="1p 2p 4p 8p cluster ct"
|
||
|
||
echo_help(){
|
||
echo ""
|
||
echo " --execmodel, -e (选填) 需要执行的模型名称, 默认 ResNet50"
|
||
echo " --hardware, -hw (选填) 选择 1p, 2p, 4p, 8p, cluster|ct(集群), 默认 1p"
|
||
echo " --yamlpath, -y (选填) yaml 文件的路径, 默认为 yaml 路径下的 {execmodel}.yaml"
|
||
echo " --framework, -f (选填) 模型训练框架, 默认 tensorflow"
|
||
echo " -docker, -host (选填) 选择 docker 或 host 下执行, 默认使用 host"
|
||
echo ""
|
||
echo " --help, -h 显示帮助信息"
|
||
echo " --list, -l 显示支持的框架与模型"
|
||
echo ""
|
||
echo " 示例1,docker 环境下启动 MobileNet 多卡(8p)训练:"
|
||
echo " ./benckmark.sh -e MobileNet -hw 8p -y ./yaml/MobileNet.yaml -docker"
|
||
echo " 示例2,host 环境下启动 MobileNet 单卡(1p)训练,yaml 使用默认文件:"
|
||
echo " ./benckmark.sh -e MobileNet"
|
||
echo ""
|
||
exit 0
|
||
}
|
||
|
||
error_log(){
|
||
echo -e "\nERROR:\n\n$1"
|
||
exit 1
|
||
}
|
||
|
||
exec_train(){
|
||
|
||
if [ x"${yamlPath}" == x"" ];then
|
||
yamlPath=$(find ${currentDir}/yaml/ -iregex ".*${execModel}.yaml$" 2>/dev/null)
|
||
fi
|
||
|
||
[ -f ${yamlPath} ] || error_log "No such file or directory: ${yamlPath}\n"
|
||
|
||
error_msg=""
|
||
echo $hardware_group | grep -wq "$hardware" || error_msg+="hardware: $hardware not in '$hardware_group'\n"
|
||
echo $framework_group | grep -wq "$framework" || error_msg+="framework: $framework not in '$framework_group'\n"
|
||
if [ x"$error_msg" != x"" ];then
|
||
error_log "$error_msg"
|
||
fi
|
||
chmod -R u+x ${currentDir}/*
|
||
exec_train_file=$(find ${currentDir} -iregex ".*${execModel}/${framework}/scripts/run.sh$" 2>/dev/null)
|
||
file_count=$(echo ${exec_train_file} | wc -w)
|
||
start_file=$currentDir/atlas_benchmark-master/utils/shell/start.sh
|
||
[ x"$hardware" == x"ct" ] && hardware="cluster"
|
||
if [ "${file_count}" -eq 1 ] && [ -a ${exec_train_file} ]; then
|
||
modelDir=$(cd $(dirname "$exec_train_file")/..;pwd)
|
||
echo "find script path success"
|
||
echo "run train script file path is "${exec_train_file}
|
||
bash "${start_file}" ${mode} ${hardware} "${yamlPath}" "${modelDir}" "${framework}"
|
||
else
|
||
error_log "The model($execModel) does not support the framework($framework) temporarily.\nplease use --list\n"
|
||
exit 1
|
||
fi
|
||
}
|
||
|
||
list_model (){
|
||
for i in $framework_group;do
|
||
echo -e "\n${i}:\n"
|
||
for d in $(find ${currentDir} -iregex ".*/${i}/scripts/run.sh$" 2>/dev/null);do
|
||
echo $d|awk -F / '{print " "$(NF-3)}'
|
||
done
|
||
done
|
||
echo ""
|
||
}
|
||
|
||
while [ -n "$1" ]
|
||
do
|
||
case "$1" in
|
||
-e|--execmodel)
|
||
execModel=$2;
|
||
shift
|
||
;;
|
||
-host|-docker)
|
||
tmp=$1
|
||
mode=${tmp:1}
|
||
;;
|
||
-y|--yamlpath)
|
||
yamlPath=$2
|
||
shift
|
||
;;
|
||
-f|--framework)
|
||
framework=$2
|
||
shift
|
||
;;
|
||
-hw|--hardware)
|
||
hardware=$2
|
||
shift
|
||
;;
|
||
-l|--list)
|
||
list_model;
|
||
exit
|
||
;;
|
||
-h|--help)
|
||
echo_help;
|
||
exit
|
||
;;
|
||
*)
|
||
echo "$1 is not an option, please use --help"
|
||
exit 1
|
||
;;
|
||
esac
|
||
shift
|
||
done
|
||
|
||
exec_train
|