[add]上传训练benchmark by z00560161

This commit is contained in:
liang_chaoming@huawei.com
2020-10-19 20:22:23 +08:00
parent 22b83024f5
commit 82522e2f61
1225 changed files with 345421 additions and 0 deletions
@@ -0,0 +1,42 @@
# main env
if [ -d /usr/local/Ascend/nnae/latest ];then
export LD_LIBRARY_PATH=/usr/local/:/usr/local/lib/:/usr/lib/:/usr/local/Ascend/nnae/latest/fwkacllib/lib64:/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:/usr/local/Ascend/add-ons/:/usr/local/Ascend/driver/tools/hccn_tool/:/usr/local/mpirun4.0/lib
export PYTHONPATH=$PYTHONPATH:/usr/local/Ascend/tfplugin/latest/tfplugin/python/site-packages:/usr/local/Ascend/nnae/latest/opp/op_impl/built-in/ai_core/tbe:/usr/local/Ascend/nnae/latest/fwkacllib/python/site-packages/:/usr/local/Ascend/tfplugin/latest/tfplugin/python/site-packages
export PATH=$PATH:/usr/local/Ascend/nnae/latest/fwkacllib/ccec_compiler/bin:/usr/local/mpirun4.0/bin
export ASCEND_OPP_PATH=/usr/local/Ascend/nnae/latest/opp
else
export LD_LIBRARY_PATH=/usr/local/lib/:/usr/lib/:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64:/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:/usr/local/Ascend/add-ons/:/usr/local/mpirun4.0/lib
export PYTHONPATH=$PYTHONPATH:/usr/local/Ascend/tfplugin/latest/tfplugin/python/site-packages:/usr/local/Ascend/ascend-toolkit/latest/opp/op_impl/built-in/ai_core/tbe:/usr/local/Ascend/ascend-toolkit/latest//fwkacllib/python/site-packages/:/usr/local/Ascend/ascend-toolkit/latest/tfplugin/python/site-packages:$projectDir
export PATH=$PATH:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin:/usr/local/mpirun4.0/bin
export ASCEND_OPP_PATH=/usr/local/Ascend/ascend-toolkit/latest/opp/
fi
export SOC_VERSION=Ascend910
export HCCL_CONNECT_TIMEOUT=600
# user env
export JOB_ID={JOB_ID}
export RANK_TABLE_FILE={RANK_TABLE_FILE}
#export RANK_SIZE={RANK_SIZE}
#export RANK_INDEX={RANK_INDEX}
#export RANK_ID={RANK_ID}
# profiling env
export PROFILING_MODE=false
export PROFILING_OPTIONS=training_trace
export FP_POINT=resnet_model/conv2d/Conv2Dresnet_model/batch_normalization/FusedBatchNormV3_Reduce
export BP_POINT=gradients/AddN_70
export AICPU_PROFILING_MODE=false
# debug env
#export DUMP_GE_GRAPH=2
#export DUMP_OP=1
#export DUMP_OP_LESS=1
#export PRINT_MODEL=1
#export TE_PARALLEL_COMPILER=0
# system env
ulimit -c unlimited
@@ -0,0 +1,18 @@
config = {
'batch_size': 32,
'train_epochs': 2,
'data_dir': '/home/imagenet_TF/',
'epochs_between_evals': 1,
'dynamic_loss_scale': True,
'rank_size': 16,
'max_train_steps': 1000,
'iterations_per_loop': 100,
'save_checkpoints_steps': 115200,
}
def resnet50_config():
config['global_batch_size'] = config['batch_size'] * config['rank_size']
return config
@@ -0,0 +1,18 @@
config = {
'batch_size': 32,
'train_epochs': 1,
'data_dir': '/home/data/imagenet_TF/',
'epochs_between_evals': 1,
'dynamic_loss_scale': True,
'rank_size': 1,
'max_train_steps': 1000,
'iterations_per_loop': 1000,
'save_checkpoints_steps': 115200,
}
def resnet50_config():
config['global_batch_size'] = config['batch_size'] * config['rank_size']
return config
@@ -0,0 +1,18 @@
config = {
'batch_size': 32,
'train_epochs': 1,
'data_dir': '/home/imagenet_TF/',
'epochs_between_evals': 1,
'dynamic_loss_scale': True,
'rank_size': 2,
'max_train_steps': 1000,
'iterations_per_loop': 1000,
'save_checkpoints_steps': 115200,
}
def resnet50_config():
config['global_batch_size'] = config['batch_size'] * config['rank_size']
return config
@@ -0,0 +1,18 @@
config = {
'batch_size': 32,
'train_epochs': 1,
'data_dir': '/home/imagenet_TF/',
'epochs_between_evals': 1,
'dynamic_loss_scale': True,
'rank_size': 4,
'max_train_steps': 1000,
'iterations_per_loop': 1000,
'save_checkpoints_steps': 115200,
}
def resnet50_config():
config['global_batch_size'] = config['batch_size'] * config['rank_size']
return config
@@ -0,0 +1,18 @@
config = {
'batch_size': 32,
'train_epochs': 2,
'data_dir': '/home/imagenet_TF/',
'epochs_between_evals': 1,
'dynamic_loss_scale': True,
'rank_size': 8,
'max_train_steps': 1000,
'iterations_per_loop': 100,
'save_checkpoints_steps': 115200,
}
def resnet50_config():
config['global_batch_size'] = config['batch_size'] * config['rank_size']
return config