Files
ascend-tools/train/benchmark.sh
T
2020-10-19 20:22:23 +08:00

118 lines
3.4 KiB
Bash
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/bin/bash
parentDir=$(dirname "$PWD")
currentDir=$(cd "$(dirname "$0")"; pwd)
execModel=AlexNet
mode=host
framework=tensorflow
hardware=1p
framework_group="tensorflow pytorch mindspore"
hardware_group="1p 2p 4p 8p cluster ct"
echo_help(){
echo ""
echo " --execmodel, -e (选填) 需要执行的模型名称, 默认 ResNet50"
echo " --hardware, -hw (选填) 选择 1p, 2p, 4p, 8p, cluster|ct(集群), 默认 1p"
echo " --yamlpath, -y (选填) yaml 文件的路径, 默认为 yaml 路径下的 {execmodel}.yaml"
echo " --framework, -f (选填) 模型训练框架, 默认 tensorflow"
echo " -docker, -host (选填) 选择 docker 或 host 下执行, 默认使用 host"
echo ""
echo " --help, -h 显示帮助信息"
echo " --list, -l 显示支持的框架与模型"
echo ""
echo " 示例1docker 环境下启动 MobileNet 多卡(8p)训练:"
echo " ./benckmark.sh -e MobileNet -hw 8p -y ./yaml/MobileNet.yaml -docker"
echo " 示例2host 环境下启动 MobileNet 单卡(1p)训练,yaml 使用默认文件:"
echo " ./benckmark.sh -e MobileNet"
echo ""
exit 0
}
error_log(){
echo -e "\nERROR:\n\n$1"
exit 1
}
exec_train(){
if [ x"${yamlPath}" == x"" ];then
yamlPath=$(find ${currentDir}/yaml/ -iregex ".*${execModel}.yaml$" 2>/dev/null)
fi
[ -f ${yamlPath} ] || error_log "No such file or directory: ${yamlPath}\n"
error_msg=""
echo $hardware_group | grep -wq "$hardware" || error_msg+="hardware: $hardware not in '$hardware_group'\n"
echo $framework_group | grep -wq "$framework" || error_msg+="framework: $framework not in '$framework_group'\n"
if [ x"$error_msg" != x"" ];then
error_log "$error_msg"
fi
chmod -R u+x ${currentDir}/*
exec_train_file=$(find ${currentDir} -iregex ".*${execModel}/${framework}/scripts/run.sh$" 2>/dev/null)
file_count=$(echo ${exec_train_file} | wc -w)
start_file=$currentDir/atlas_benchmark-master/utils/shell/start.sh
[ x"$hardware" == x"ct" ] && hardware="cluster"
if [ "${file_count}" -eq 1 ] && [ -a ${exec_train_file} ]; then
modelDir=$(cd $(dirname "$exec_train_file")/..;pwd)
echo "find script path success"
echo "run train script file path is "${exec_train_file}
bash "${start_file}" ${mode} ${hardware} "${yamlPath}" "${modelDir}" "${framework}"
else
error_log "The model($execModel) does not support the framework($framework) temporarily.\nplease use --list\n"
exit 1
fi
}
list_model (){
for i in $framework_group;do
echo -e "\n${i}:\n"
for d in $(find ${currentDir} -iregex ".*/${i}/scripts/run.sh$" 2>/dev/null);do
echo $d|awk -F / '{print " "$(NF-3)}'
done
done
echo ""
}
while [ -n "$1" ]
do
case "$1" in
-e|--execmodel)
execModel=$2;
shift
;;
-host|-docker)
tmp=$1
mode=${tmp:1}
;;
-y|--yamlpath)
yamlPath=$2
shift
;;
-f|--framework)
framework=$2
shift
;;
-hw|--hardware)
hardware=$2
shift
;;
-l|--list)
list_model;
exit
;;
-h|--help)
echo_help;
exit
;;
*)
echo "$1 is not an option, please use --help"
exit 1
;;
esac
shift
done
exec_train