[add]上传训练benchmark by z00560161

This commit is contained in:
liang_chaoming@huawei.com
2020-10-19 20:22:23 +08:00
parent 22b83024f5
commit 82522e2f61
1225 changed files with 345421 additions and 0 deletions
+117
View File
@@ -0,0 +1,117 @@
#!/bin/bash
parentDir=$(dirname "$PWD")
currentDir=$(cd "$(dirname "$0")"; pwd)
execModel=AlexNet
mode=host
framework=tensorflow
hardware=1p
framework_group="tensorflow pytorch mindspore"
hardware_group="1p 2p 4p 8p cluster ct"
echo_help(){
echo ""
echo " --execmodel, -e (选填) 需要执行的模型名称, 默认 ResNet50"
echo " --hardware, -hw (选填) 选择 1p, 2p, 4p, 8p, cluster|ct(集群), 默认 1p"
echo " --yamlpath, -y (选填) yaml 文件的路径, 默认为 yaml 路径下的 {execmodel}.yaml"
echo " --framework, -f (选填) 模型训练框架, 默认 tensorflow"
echo " -docker, -host (选填) 选择 docker 或 host 下执行, 默认使用 host"
echo ""
echo " --help, -h 显示帮助信息"
echo " --list, -l 显示支持的框架与模型"
echo ""
echo " 示例1docker 环境下启动 MobileNet 多卡(8p)训练:"
echo " ./benckmark.sh -e MobileNet -hw 8p -y ./yaml/MobileNet.yaml -docker"
echo " 示例2host 环境下启动 MobileNet 单卡(1p)训练,yaml 使用默认文件:"
echo " ./benckmark.sh -e MobileNet"
echo ""
exit 0
}
error_log(){
echo -e "\nERROR:\n\n$1"
exit 1
}
exec_train(){
if [ x"${yamlPath}" == x"" ];then
yamlPath=$(find ${currentDir}/yaml/ -iregex ".*${execModel}.yaml$" 2>/dev/null)
fi
[ -f ${yamlPath} ] || error_log "No such file or directory: ${yamlPath}\n"
error_msg=""
echo $hardware_group | grep -wq "$hardware" || error_msg+="hardware: $hardware not in '$hardware_group'\n"
echo $framework_group | grep -wq "$framework" || error_msg+="framework: $framework not in '$framework_group'\n"
if [ x"$error_msg" != x"" ];then
error_log "$error_msg"
fi
chmod -R u+x ${currentDir}/*
exec_train_file=$(find ${currentDir} -iregex ".*${execModel}/${framework}/scripts/run.sh$" 2>/dev/null)
file_count=$(echo ${exec_train_file} | wc -w)
start_file=$currentDir/atlas_benchmark-master/utils/shell/start.sh
[ x"$hardware" == x"ct" ] && hardware="cluster"
if [ "${file_count}" -eq 1 ] && [ -a ${exec_train_file} ]; then
modelDir=$(cd $(dirname "$exec_train_file")/..;pwd)
echo "find script path success"
echo "run train script file path is "${exec_train_file}
bash "${start_file}" ${mode} ${hardware} "${yamlPath}" "${modelDir}" "${framework}"
else
error_log "The model($execModel) does not support the framework($framework) temporarily.\nplease use --list\n"
exit 1
fi
}
list_model (){
for i in $framework_group;do
echo -e "\n${i}:\n"
for d in $(find ${currentDir} -iregex ".*/${i}/scripts/run.sh$" 2>/dev/null);do
echo $d|awk -F / '{print " "$(NF-3)}'
done
done
echo ""
}
while [ -n "$1" ]
do
case "$1" in
-e|--execmodel)
execModel=$2;
shift
;;
-host|-docker)
tmp=$1
mode=${tmp:1}
;;
-y|--yamlpath)
yamlPath=$2
shift
;;
-f|--framework)
framework=$2
shift
;;
-hw|--hardware)
hardware=$2
shift
;;
-l|--list)
list_model;
exit
;;
-h|--help)
echo_help;
exit
;;
*)
echo "$1 is not an option, please use --help"
exit 1
;;
esac
shift
done
exec_train