[add]上传训练benchmark by z00560161
This commit is contained in:
@@ -0,0 +1,40 @@
|
||||
# Resnet50_HC_tensorflow训练说明
|
||||
|
||||
### 1. 模型训练参数配置
|
||||
|
||||
在train/yaml/Resnet50_HC.yaml中修改相应配置, 配置项含义:
|
||||
|
||||
```
|
||||
tensorflow_config:
|
||||
# 基本参数
|
||||
max_steps: 1000
|
||||
data_url: /home/imagenet_TF
|
||||
epoches: 1
|
||||
epochs_between_evals: 1
|
||||
mode: train
|
||||
# 仅多机执行需要配置: ip1:卡数量1,ip2:卡数量2
|
||||
mpirun_ip: 90.90.176.154:8,90.90.176.54:8
|
||||
|
||||
# docker 镜像名称:版本号
|
||||
docker_image: mpirun3:latest
|
||||
|
||||
|
||||
# 1. 指定 device id, 多个 id 使用空格分隔, 数量需与 rank_size 相同
|
||||
# 2. 仅在小于 8p 时生效
|
||||
# 3. 若不使用该配置, 请使用在行首添加'#'注释的方法将其关闭
|
||||
# device_group: 0 1 2 3
|
||||
device_group_1p: 0
|
||||
device_group_2p: 0 1
|
||||
device_group_4p: 0 1 2 3
|
||||
|
||||
```
|
||||
|
||||
------
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
+1
@@ -0,0 +1 @@
|
||||
13650
|
||||
+111
@@ -0,0 +1,111 @@
|
||||
import tensorflow as tf
|
||||
|
||||
import os
|
||||
log_dir = 'ckpt/'
|
||||
|
||||
#256
|
||||
config = {
|
||||
# ============ for testing =====================
|
||||
'accelerator': '1980', # 'gpu', '1980'
|
||||
'shuffle_enable': 'yes',
|
||||
'shuffle_buffer_size': 10000,
|
||||
'rank_size': 32,
|
||||
'shard': True,
|
||||
|
||||
# ======= basic config ======= #
|
||||
'mode':'train', # "train","evaluate","train_and_evaluate"
|
||||
'epochs_between_evals': 4, #used if mode is "train_and_evaluate"
|
||||
'stop_threshold': 80.0, #used if mode is "train_and_evaluate"
|
||||
#'data_dir':'/opt/npu/resnet_data_new',
|
||||
'data_url': '{DATA_URL}',
|
||||
'data_type': 'TFRECORD',
|
||||
'model_name': 'resnet50',
|
||||
'num_classes': 1001,
|
||||
'num_epochs': None,
|
||||
'height':224,
|
||||
'width':224,
|
||||
'dtype': tf.float32,
|
||||
'data_format': 'channels_last',
|
||||
'use_nesterov': True,
|
||||
'eval_interval': 1,
|
||||
'loss_scale': 1024, #could be float or string. If float, static loss scaling is applied.
|
||||
#If string, the corresponding automatic loss scaling algorithm is used.
|
||||
#Must be one of 'Backoff' of 'LogMax' (case insensitive).
|
||||
'use_lars': False,
|
||||
'label_smoothing':0.1, #If greater than 0 then smooth the labels.
|
||||
'weight_decay': 0.0001,
|
||||
'batch_size':256, #minibatch size per node, total batchsize = batch_size*hvd.size()*itersize
|
||||
|
||||
'momentum': [0.9],
|
||||
|
||||
#======= data processing config =======
|
||||
'min_object_covered': 0.1, #used for random crop
|
||||
'aspect_ratio_range':[3. / 4., 4. / 3.],
|
||||
'area_range':[0.16, 1.0],
|
||||
'max_attempts': 100,
|
||||
|
||||
#======= data augment config =======
|
||||
'increased_aug': False,
|
||||
'brightness':0.3,
|
||||
'saturation': 0.6,
|
||||
'contrast': 0.6,
|
||||
'hue': 0.13,
|
||||
'num_preproc_threads': 22,
|
||||
|
||||
#======= initialization config =======
|
||||
'conv_init': tf.variance_scaling_initializer(),
|
||||
'bn_init_mode': 'adv_bn_init', # "conv_bn_init" or "adv_bn_init",initializer the gamma in bn in different modes
|
||||
# "adv_bn_init" means initialize gamma to 0 in each residual block's last bn, and initialize other gamma to 1
|
||||
# "conv_bn_init" means initialize all the gamma to a constant, defined by "bn_gamma_initial_value"
|
||||
'bn_gamma_initial_value': 1.0,
|
||||
|
||||
#======== model architecture ==========
|
||||
'resnet_version': 'v1.5',
|
||||
'arch_type': 'original', # ------ input -------
|
||||
# C1,C2,C3: input block, stride in different layer
|
||||
# ------ shortcut ------
|
||||
# D1: average_pooling + conv1*1 in shortcut in downsample block
|
||||
# D2: conv3*3,stride=2 in shortcut in downsample block
|
||||
# D3: conv1*1 +average_pooling in shortcut in downsample block
|
||||
# ------ mainstream ----
|
||||
# E1: average_pooling + conv3*3 in mainstream in downsample block
|
||||
# E2: conv3*3 + average_pooling in mainstream in downsample block
|
||||
|
||||
#======= logger config =======
|
||||
'display_every': 1,
|
||||
'log_name': 'resnet50.log',
|
||||
'log_dir': log_dir,
|
||||
|
||||
#======= Learning Rate Config =======
|
||||
'lr_warmup_mode': 'linear', # "linear" or "cosine"
|
||||
'warmup_lr': 0.0,
|
||||
'warmup_epochs': 10,
|
||||
'learning_rate_maximum': 3.2,
|
||||
|
||||
'lr_decay_mode': 'cosine', # "steps", "poly", "poly_cycle", "cosine", "linear_cosine", "linear_twice", "constant" for 1980 only
|
||||
'learning_rate_end': 0.00001,
|
||||
|
||||
'decay_steps': '10,20,30', #for "steps"
|
||||
'lr_decay_steps': '6.4,0.64,0.064',
|
||||
|
||||
'ploy_power': 2.0, #for "poly" and "poly_cycle"
|
||||
|
||||
'cdr_first_decay_ratio': 0.33, #for "cosine_decay_restarts"
|
||||
'cdr_t_mul':2.0,
|
||||
'cdr_m_mul':0.1,
|
||||
|
||||
'lc_periods':0.47, #for "linear_consine"
|
||||
'lc_beta':0.00001,
|
||||
|
||||
'lr_mid': 0.5, #for "linear_twice"
|
||||
'epoch_mid': 80,
|
||||
|
||||
'bn_lr_scale':1.0,
|
||||
|
||||
}
|
||||
|
||||
def res50_config():
|
||||
config['global_batch_size'] = config['batch_size'] * config['rank_size']
|
||||
config['do_checkpoint'] = True
|
||||
|
||||
return config
|
||||
+109
@@ -0,0 +1,109 @@
|
||||
import tensorflow as tf
|
||||
|
||||
import os
|
||||
log_dir = 'ckpt/'
|
||||
|
||||
#256
|
||||
config = {
|
||||
# ============ for testing =====================
|
||||
'accelerator': '1980', # 'gpu', '1980'
|
||||
'shuffle_enable': 'yes',
|
||||
'shuffle_buffer_size': 10000,
|
||||
'rank_size': 1,
|
||||
'shard': False,
|
||||
|
||||
# ======= basic config ======= #
|
||||
'mode': 'train',
|
||||
'epochs_between_evals': 5,
|
||||
'stop_threshold': 80.0, #used if mode': 'train',
|
||||
#'data_dir':'/opt/npu/resnet_data_new',
|
||||
'data_url': '/home/mencai/training_shop-master/02-e2e/e2e_function/e2e_func_node/data/resnet50/imagenet_TF',
|
||||
'data_type': 'TFRECORD',
|
||||
'model_name': 'resnet50',
|
||||
'num_classes': 1001,
|
||||
'num_epochs': 100,
|
||||
'height':224,
|
||||
'width':224,
|
||||
'dtype': tf.float32,
|
||||
'data_format': 'channels_last',
|
||||
'use_nesterov': True,
|
||||
'eval_interval': 1,
|
||||
'loss_scale': 1024, #could be float or string. If float, static loss scaling is applied.
|
||||
#If string, the corresponding automatic loss scaling algorithm is used.
|
||||
#Must be one of 'Backoff' of 'LogMax' (case insensitive).
|
||||
'use_lars': False,
|
||||
'label_smoothing':0.1, #If greater than 0 then smooth the labels.
|
||||
'weight_decay': 0.0001,
|
||||
'batch_size':256, #minibatch size per node, total batchsize = batch_size*hvd.size()*itersize
|
||||
|
||||
'momentum': [0.9],
|
||||
|
||||
#======= data processing config =======
|
||||
'min_object_covered': 0.1, #used for random crop
|
||||
'aspect_ratio_range':[3. / 4., 4. / 3.],
|
||||
'area_range':[0.16, 1.0],
|
||||
'max_attempts': 100,
|
||||
|
||||
#======= data augment config =======
|
||||
'increased_aug': False,
|
||||
'brightness':0.3,
|
||||
'saturation': 0.6,
|
||||
'contrast': 0.6,
|
||||
'hue': 0.13,
|
||||
'num_preproc_threads': 22,
|
||||
|
||||
#======= initialization config =======
|
||||
'conv_init': tf.variance_scaling_initializer(),
|
||||
'bn_init_mode': 'adv_bn_init', # "adv_bn_init" means initialize gamma to 0 in each residual block's last bn, and initialize other gamma to 1
|
||||
# "conv_bn_init" means initialize all the gamma to a constant, defined by "bn_gamma_initial_value"
|
||||
'bn_gamma_initial_value': 1.0,
|
||||
|
||||
#======== mode': 'train',
|
||||
'resnet_version': 'v1.5',
|
||||
'arch_type': 'original', # ------ input -------
|
||||
# C1,C2,C3: input block, stride in different layer
|
||||
# ------ shortcut ------
|
||||
# D1: average_pooling + conv1*1 in shortcut in downsample block
|
||||
# D2: conv3*3,stride=2 in shortcut in downsample block
|
||||
# D3: conv1*1 +average_pooling in shortcut in downsample block
|
||||
# ------ mainstream ----
|
||||
# E1: average_pooling + conv3*3 in mainstream in downsample block
|
||||
# E2: conv3*3 + average_pooling in mainstream in downsample block
|
||||
|
||||
#======= logger config =======
|
||||
'display_every': 1,
|
||||
'log_name': 'resnet50.log',
|
||||
'log_dir': log_dir,
|
||||
#======= Learning Rate Config =======
|
||||
'lr_warmup_mode': 'train',
|
||||
'warmup_lr': 0.0,
|
||||
'warmup_epochs': 10,
|
||||
'learning_rate_maximum': 0.1,
|
||||
|
||||
'lr_decay_mode': 'train',
|
||||
'learning_rate_end': 0.00001,
|
||||
|
||||
'decay_steps': '10,20,30', #for "steps"
|
||||
'lr_decay_steps': '6.4,0.64,0.064',
|
||||
|
||||
'ploy_power': 2.0, #for "poly" and "poly_cycle"
|
||||
|
||||
'cdr_first_decay_ratio': 0.33, #for "cosine_decay_restarts"
|
||||
'cdr_t_mul':2.0,
|
||||
'cdr_m_mul':0.1,
|
||||
|
||||
'lc_periods':0.47, #for "linear_consine"
|
||||
'lc_beta':0.00001,
|
||||
|
||||
'lr_mid': 0.5, #for "linear_twice"
|
||||
'epoch_mid': 80,
|
||||
|
||||
'bn_lr_scale':1.0,
|
||||
|
||||
}
|
||||
|
||||
def res50_config():
|
||||
config['global_batch_size'] = config['batch_size'] * config['rank_size']
|
||||
config['do_checkpoint'] = True
|
||||
|
||||
return config
|
||||
+109
@@ -0,0 +1,109 @@
|
||||
import tensorflow as tf
|
||||
|
||||
import os
|
||||
log_dir = 'ckpt/'
|
||||
|
||||
#256
|
||||
config = {
|
||||
# ============ for testing =====================
|
||||
'accelerator': '1980', # 'gpu', '1980'
|
||||
'shuffle_enable': 'yes',
|
||||
'shuffle_buffer_size': 10000,
|
||||
'rank_size': 2,
|
||||
'shard': True,
|
||||
|
||||
# ======= basic config ======= #
|
||||
'mode': 'train',
|
||||
'epochs_between_evals': 5,
|
||||
'stop_threshold': 80.0, #used if mode': 'train',
|
||||
#'data_dir':'/opt/npu/resnet_data_new',
|
||||
'data_url': '/home/mencai/training_shop-master/02-e2e/e2e_function/e2e_func_node/data/resnet50/imagenet_TF',
|
||||
'data_type': 'TFRECORD',
|
||||
'model_name': 'resnet50',
|
||||
'num_classes': 1001,
|
||||
'num_epochs': 100,
|
||||
'height':224,
|
||||
'width':224,
|
||||
'dtype': tf.float32,
|
||||
'data_format': 'channels_last',
|
||||
'use_nesterov': True,
|
||||
'eval_interval': 1,
|
||||
'loss_scale': 1024, #could be float or string. If float, static loss scaling is applied.
|
||||
#If string, the corresponding automatic loss scaling algorithm is used.
|
||||
#Must be one of 'Backoff' of 'LogMax' (case insensitive).
|
||||
'use_lars': False,
|
||||
'label_smoothing':0.1, #If greater than 0 then smooth the labels.
|
||||
'weight_decay': 0.0001,
|
||||
'batch_size':256, #minibatch size per node, total batchsize = batch_size*hvd.size()*itersize
|
||||
|
||||
'momentum': [0.9],
|
||||
|
||||
#======= data processing config =======
|
||||
'min_object_covered': 0.1, #used for random crop
|
||||
'aspect_ratio_range':[3. / 4., 4. / 3.],
|
||||
'area_range':[0.16, 1.0],
|
||||
'max_attempts': 100,
|
||||
|
||||
#======= data augment config =======
|
||||
'increased_aug': False,
|
||||
'brightness':0.3,
|
||||
'saturation': 0.6,
|
||||
'contrast': 0.6,
|
||||
'hue': 0.13,
|
||||
'num_preproc_threads': 22,
|
||||
|
||||
#======= initialization config =======
|
||||
'conv_init': tf.variance_scaling_initializer(),
|
||||
'bn_init_mode': 'adv_bn_init', # "adv_bn_init" means initialize gamma to 0 in each residual block's last bn, and initialize other gamma to 1
|
||||
# "conv_bn_init" means initialize all the gamma to a constant, defined by "bn_gamma_initial_value"
|
||||
'bn_gamma_initial_value': 1.0,
|
||||
|
||||
#======== mode': 'train',
|
||||
'resnet_version': 'v1.5',
|
||||
'arch_type': 'original', # ------ input -------
|
||||
# C1,C2,C3: input block, stride in different layer
|
||||
# ------ shortcut ------
|
||||
# D1: average_pooling + conv1*1 in shortcut in downsample block
|
||||
# D2: conv3*3,stride=2 in shortcut in downsample block
|
||||
# D3: conv1*1 +average_pooling in shortcut in downsample block
|
||||
# ------ mainstream ----
|
||||
# E1: average_pooling + conv3*3 in mainstream in downsample block
|
||||
# E2: conv3*3 + average_pooling in mainstream in downsample block
|
||||
|
||||
#======= logger config =======
|
||||
'display_every': 1,
|
||||
'log_name': 'resnet50.log',
|
||||
'log_dir': log_dir,
|
||||
#======= Learning Rate Config =======
|
||||
'lr_warmup_mode': 'train',
|
||||
'warmup_lr': 0.0,
|
||||
'warmup_epochs': 10,
|
||||
'learning_rate_maximum': 0.1,
|
||||
|
||||
'lr_decay_mode': 'train',
|
||||
'learning_rate_end': 0.00001,
|
||||
|
||||
'decay_steps': '10,20,30', #for "steps"
|
||||
'lr_decay_steps': '6.4,0.64,0.064',
|
||||
|
||||
'ploy_power': 2.0, #for "poly" and "poly_cycle"
|
||||
|
||||
'cdr_first_decay_ratio': 0.33, #for "cosine_decay_restarts"
|
||||
'cdr_t_mul':2.0,
|
||||
'cdr_m_mul':0.1,
|
||||
|
||||
'lc_periods':0.47, #for "linear_consine"
|
||||
'lc_beta':0.00001,
|
||||
|
||||
'lr_mid': 0.5, #for "linear_twice"
|
||||
'epoch_mid': 80,
|
||||
|
||||
'bn_lr_scale':1.0,
|
||||
|
||||
}
|
||||
|
||||
def res50_config():
|
||||
config['global_batch_size'] = config['batch_size'] * config['rank_size']
|
||||
config['do_checkpoint'] = True
|
||||
|
||||
return config
|
||||
+109
@@ -0,0 +1,109 @@
|
||||
import tensorflow as tf
|
||||
|
||||
import os
|
||||
log_dir = 'ckpt/'
|
||||
|
||||
#256
|
||||
config = {
|
||||
# ============ for testing =====================
|
||||
'accelerator': '1980', # 'gpu', '1980'
|
||||
'shuffle_enable': 'yes',
|
||||
'shuffle_buffer_size': 10000,
|
||||
'rank_size': 4,
|
||||
'shard': True,
|
||||
|
||||
# ======= basic config ======= #
|
||||
'mode': 'train',
|
||||
'epochs_between_evals': 5,
|
||||
'stop_threshold': 80.0, #used if mode': 'train',
|
||||
#'data_dir':'/opt/npu/resnet_data_new',
|
||||
'data_url': '/home/mencai/training_shop-master/02-e2e/e2e_function/e2e_func_node/data/resnet50/imagenet_TF',
|
||||
'data_type': 'TFRECORD',
|
||||
'model_name': 'resnet50',
|
||||
'num_classes': 1001,
|
||||
'num_epochs': 100,
|
||||
'height':224,
|
||||
'width':224,
|
||||
'dtype': tf.float32,
|
||||
'data_format': 'channels_last',
|
||||
'use_nesterov': True,
|
||||
'eval_interval': 1,
|
||||
'loss_scale': 1024, #could be float or string. If float, static loss scaling is applied.
|
||||
#If string, the corresponding automatic loss scaling algorithm is used.
|
||||
#Must be one of 'Backoff' of 'LogMax' (case insensitive).
|
||||
'use_lars': False,
|
||||
'label_smoothing':0.1, #If greater than 0 then smooth the labels.
|
||||
'weight_decay': 0.0001,
|
||||
'batch_size':256, #minibatch size per node, total batchsize = batch_size*hvd.size()*itersize
|
||||
|
||||
'momentum': [0.9],
|
||||
|
||||
#======= data processing config =======
|
||||
'min_object_covered': 0.1, #used for random crop
|
||||
'aspect_ratio_range':[3. / 4., 4. / 3.],
|
||||
'area_range':[0.16, 1.0],
|
||||
'max_attempts': 100,
|
||||
|
||||
#======= data augment config =======
|
||||
'increased_aug': False,
|
||||
'brightness':0.3,
|
||||
'saturation': 0.6,
|
||||
'contrast': 0.6,
|
||||
'hue': 0.13,
|
||||
'num_preproc_threads': 22,
|
||||
|
||||
#======= initialization config =======
|
||||
'conv_init': tf.variance_scaling_initializer(),
|
||||
'bn_init_mode': 'adv_bn_init', # "adv_bn_init" means initialize gamma to 0 in each residual block's last bn, and initialize other gamma to 1
|
||||
# "conv_bn_init" means initialize all the gamma to a constant, defined by "bn_gamma_initial_value"
|
||||
'bn_gamma_initial_value': 1.0,
|
||||
|
||||
#======== mode': 'train',
|
||||
'resnet_version': 'v1.5',
|
||||
'arch_type': 'original', # ------ input -------
|
||||
# C1,C2,C3: input block, stride in different layer
|
||||
# ------ shortcut ------
|
||||
# D1: average_pooling + conv1*1 in shortcut in downsample block
|
||||
# D2: conv3*3,stride=2 in shortcut in downsample block
|
||||
# D3: conv1*1 +average_pooling in shortcut in downsample block
|
||||
# ------ mainstream ----
|
||||
# E1: average_pooling + conv3*3 in mainstream in downsample block
|
||||
# E2: conv3*3 + average_pooling in mainstream in downsample block
|
||||
|
||||
#======= logger config =======
|
||||
'display_every': 1,
|
||||
'log_name': 'resnet50.log',
|
||||
'log_dir': log_dir,
|
||||
#======= Learning Rate Config =======
|
||||
'lr_warmup_mode': 'train',
|
||||
'warmup_lr': 0.0,
|
||||
'warmup_epochs': 10,
|
||||
'learning_rate_maximum': 0.1,
|
||||
|
||||
'lr_decay_mode': 'train',
|
||||
'learning_rate_end': 0.00001,
|
||||
|
||||
'decay_steps': '10,20,30', #for "steps"
|
||||
'lr_decay_steps': '6.4,0.64,0.064',
|
||||
|
||||
'ploy_power': 2.0, #for "poly" and "poly_cycle"
|
||||
|
||||
'cdr_first_decay_ratio': 0.33, #for "cosine_decay_restarts"
|
||||
'cdr_t_mul':2.0,
|
||||
'cdr_m_mul':0.1,
|
||||
|
||||
'lc_periods':0.47, #for "linear_consine"
|
||||
'lc_beta':0.00001,
|
||||
|
||||
'lr_mid': 0.5, #for "linear_twice"
|
||||
'epoch_mid': 80,
|
||||
|
||||
'bn_lr_scale':1.0,
|
||||
|
||||
}
|
||||
|
||||
def res50_config():
|
||||
config['global_batch_size'] = config['batch_size'] * config['rank_size']
|
||||
config['do_checkpoint'] = True
|
||||
|
||||
return config
|
||||
+111
@@ -0,0 +1,111 @@
|
||||
import tensorflow as tf
|
||||
|
||||
import os
|
||||
log_dir = 'ckpt/'
|
||||
|
||||
#256
|
||||
config = {
|
||||
# ============ for testing =====================
|
||||
'accelerator': '1980', # 'gpu', '1980'
|
||||
'shuffle_enable': 'yes',
|
||||
'shuffle_buffer_size': 10000,
|
||||
'rank_size': 8,
|
||||
'shard': True,
|
||||
|
||||
# ======= basic config ======= #
|
||||
'mode':'train', # "train","evaluate","train_and_evaluate"
|
||||
'epochs_between_evals': 4, #used if mode is "train_and_evaluate"
|
||||
'stop_threshold': 80.0, #used if mode is "train_and_evaluate"
|
||||
#'data_dir':'/opt/npu/resnet_data_new',
|
||||
'data_url': '{DATA_URL}',
|
||||
'data_type': 'TFRECORD',
|
||||
'model_name': 'resnet50',
|
||||
'num_classes': 1001,
|
||||
'num_epochs': None,
|
||||
'height':224,
|
||||
'width':224,
|
||||
'dtype': tf.float32,
|
||||
'data_format': 'channels_last',
|
||||
'use_nesterov': True,
|
||||
'eval_interval': 1,
|
||||
'loss_scale': 1024, #could be float or string. If float, static loss scaling is applied.
|
||||
#If string, the corresponding automatic loss scaling algorithm is used.
|
||||
#Must be one of 'Backoff' of 'LogMax' (case insensitive).
|
||||
'use_lars': False,
|
||||
'label_smoothing':0.1, #If greater than 0 then smooth the labels.
|
||||
'weight_decay': 0.0001,
|
||||
'batch_size':256, #minibatch size per node, total batchsize = batch_size*hvd.size()*itersize
|
||||
|
||||
'momentum': [0.9],
|
||||
|
||||
#======= data processing config =======
|
||||
'min_object_covered': 0.1, #used for random crop
|
||||
'aspect_ratio_range':[3. / 4., 4. / 3.],
|
||||
'area_range':[0.16, 1.0],
|
||||
'max_attempts': 100,
|
||||
|
||||
#======= data augment config =======
|
||||
'increased_aug': False,
|
||||
'brightness':0.3,
|
||||
'saturation': 0.6,
|
||||
'contrast': 0.6,
|
||||
'hue': 0.13,
|
||||
'num_preproc_threads': 22,
|
||||
|
||||
#======= initialization config =======
|
||||
'conv_init': tf.variance_scaling_initializer(),
|
||||
'bn_init_mode': 'adv_bn_init', # "conv_bn_init" or "adv_bn_init",initializer the gamma in bn in different modes
|
||||
# "adv_bn_init" means initialize gamma to 0 in each residual block's last bn, and initialize other gamma to 1
|
||||
# "conv_bn_init" means initialize all the gamma to a constant, defined by "bn_gamma_initial_value"
|
||||
'bn_gamma_initial_value': 1.0,
|
||||
|
||||
#======== model architecture ==========
|
||||
'resnet_version': 'v1.5',
|
||||
'arch_type': 'original', # ------ input -------
|
||||
# C1,C2,C3: input block, stride in different layer
|
||||
# ------ shortcut ------
|
||||
# D1: average_pooling + conv1*1 in shortcut in downsample block
|
||||
# D2: conv3*3,stride=2 in shortcut in downsample block
|
||||
# D3: conv1*1 +average_pooling in shortcut in downsample block
|
||||
# ------ mainstream ----
|
||||
# E1: average_pooling + conv3*3 in mainstream in downsample block
|
||||
# E2: conv3*3 + average_pooling in mainstream in downsample block
|
||||
|
||||
#======= logger config =======
|
||||
'display_every': 1,
|
||||
'log_name': 'resnet50.log',
|
||||
'log_dir': log_dir,
|
||||
|
||||
#======= Learning Rate Config =======
|
||||
'lr_warmup_mode': 'linear', # "linear" or "cosine"
|
||||
'warmup_lr': 0.0,
|
||||
'warmup_epochs': 10,
|
||||
'learning_rate_maximum': 0.8,
|
||||
|
||||
'lr_decay_mode': 'cosine', # "steps", "poly", "poly_cycle", "cosine", "linear_cosine", "linear_twice", "constant" for 1980 only
|
||||
'learning_rate_end': 0.00001,
|
||||
|
||||
'decay_steps': '10,20,30', #for "steps"
|
||||
'lr_decay_steps': '6.4,0.64,0.064',
|
||||
|
||||
'ploy_power': 2.0, #for "poly" and "poly_cycle"
|
||||
|
||||
'cdr_first_decay_ratio': 0.33, #for "cosine_decay_restarts"
|
||||
'cdr_t_mul':2.0,
|
||||
'cdr_m_mul':0.1,
|
||||
|
||||
'lc_periods':0.47, #for "linear_consine"
|
||||
'lc_beta':0.00001,
|
||||
|
||||
'lr_mid': 0.5, #for "linear_twice"
|
||||
'epoch_mid': 80,
|
||||
|
||||
'bn_lr_scale':1.0,
|
||||
|
||||
}
|
||||
|
||||
def res50_config():
|
||||
config['global_batch_size'] = config['batch_size'] * config['rank_size']
|
||||
config['do_checkpoint'] = True
|
||||
|
||||
return config
|
||||
+237
@@ -0,0 +1,237 @@
|
||||
import numpy as np
|
||||
from . import preprocessing
|
||||
import tensorflow as tf
|
||||
from tensorflow.python.util import nest
|
||||
import os,sys
|
||||
import numpy as np
|
||||
sys.path.append("..")
|
||||
from trainers.train_helper import stage
|
||||
|
||||
class DataLoader:
|
||||
|
||||
def __init__(self, config):
|
||||
self.config = config
|
||||
|
||||
num_training_samples = 1281167
|
||||
#num_evaluating_samples = get_num_records(self.eval_filenames)
|
||||
self.config['num_training_samples'] = num_training_samples
|
||||
self.config['num_evaluating_samples'] = 50000
|
||||
print( 'total num_training_sampels: %d' % num_training_samples )
|
||||
|
||||
self.training_samples_per_rank = num_training_samples
|
||||
|
||||
|
||||
def get_train_input_fn_synthetic(self):
|
||||
batch_size = self.config['batch_size']
|
||||
input_shape = [self.config['height'], self.config['width'], 3]
|
||||
input_element = nest.map_structure(lambda s: tf.constant(0.5, tf.float32, s), tf.TensorShape(input_shape))
|
||||
label_element = nest.map_structure(lambda s: tf.constant(1, tf.int32, s), tf.TensorShape([1]))
|
||||
element = (input_element, label_element)
|
||||
ds = tf.data.Dataset.from_tensors(element).repeat()
|
||||
ds = ds.batch(batch_size)
|
||||
return ds
|
||||
|
||||
def get_train_input_fn(self):
|
||||
# filenames = self.train_filenames
|
||||
filenames = None
|
||||
take_count = self.training_samples_per_rank
|
||||
batch_size = self.config['batch_size']
|
||||
height = self.config['height']
|
||||
width = self.config['width']
|
||||
brightness = self.config['brightness']
|
||||
contrast = self.config['contrast']
|
||||
saturation = self.config['saturation']
|
||||
hue = self.config['hue']
|
||||
num_threads = self.config['num_preproc_threads']
|
||||
increased_aug = self.config['increased_aug']
|
||||
shard = self.config['shard']
|
||||
|
||||
return make_dataset(self.config, filenames, take_count, batch_size, height, width,
|
||||
brightness, contrast, saturation, hue,
|
||||
training=True, num_threads=num_threads, nsummary=10, shard=shard, synthetic=False,
|
||||
increased_aug=increased_aug )
|
||||
|
||||
def get_eval_input_fn(self):
|
||||
# filenames = self.eval_filenames
|
||||
filenames = None
|
||||
# take_count = get_num_records(self.eval_filenames)
|
||||
take_count = 50000
|
||||
batch_size = self.config['batch_size']
|
||||
height = self.config['height']
|
||||
width = self.config['width']
|
||||
brightness = self.config['brightness']
|
||||
contrast = self.config['contrast']
|
||||
saturation = self.config['saturation']
|
||||
hue = self.config['hue']
|
||||
num_threads = self.config['num_preproc_threads']
|
||||
shard = self.config['shard']
|
||||
|
||||
return make_dataset(self.config, filenames, take_count, batch_size, height, width,
|
||||
brightness, contrast, saturation, hue,
|
||||
training=False, num_threads=num_threads, nsummary=10, shard=shard, synthetic=False,
|
||||
increased_aug=False)
|
||||
|
||||
def get_input_pipeline_op(self, inputs, labels, mode):
|
||||
with tf.device('/cpu:0'):
|
||||
preload_op, (inputs, labels) = stage([inputs, labels])
|
||||
|
||||
with tf.device('/gpu:0'):
|
||||
gpucopy_op, (inputs, labels) = stage([inputs, labels])
|
||||
return preload_op, gpucopy_op, inputs, labels
|
||||
|
||||
def normalize_and_format(self, inputs, data_format):
|
||||
|
||||
dataset_mean = np.array([121, 115, 100], dtype=np.float32)
|
||||
dataset_std = np.array([70, 68, 71], dtype=np.float32)
|
||||
inputs = tf.subtract(inputs, dataset_mean)
|
||||
inputs = tf.multiply(inputs, 1. / dataset_std)
|
||||
if data_format == 'channels_first':
|
||||
inputs = tf.transpose(inputs, [0, 3, 1, 2])
|
||||
return inputs
|
||||
|
||||
|
||||
|
||||
|
||||
#-------------------------------- Funcs -----------------------------------
|
||||
def get_num_records(filenames):
|
||||
def count_records(tf_record_filename):
|
||||
count = 0
|
||||
for _ in tf.python_io.tf_record_iterator(tf_record_filename):
|
||||
count += 1
|
||||
return count
|
||||
|
||||
nfile = len(filenames)
|
||||
return (count_records(filenames[0]) * (nfile - 1) +
|
||||
count_records(filenames[-1]))
|
||||
|
||||
def _parse_example_proto(example_serialized):
|
||||
feature_map = {
|
||||
'image/encoded': tf.FixedLenFeature([], dtype=tf.string,
|
||||
default_value=''),
|
||||
'image/class/label': tf.FixedLenFeature([], dtype=tf.int64, default_value=-1),
|
||||
'image/class/text': tf.FixedLenFeature([], dtype=tf.string,
|
||||
default_value=''),
|
||||
}
|
||||
sparse_float32 = tf.VarLenFeature(dtype=tf.float32)
|
||||
# Sparse features in Example proto.
|
||||
feature_map.update(
|
||||
{k: sparse_float32 for k in ['image/object/bbox/xmin',
|
||||
'image/object/bbox/ymin',
|
||||
'image/object/bbox/xmax',
|
||||
'image/object/bbox/ymax']})
|
||||
|
||||
features = tf.parse_single_example(example_serialized, feature_map)
|
||||
label = tf.cast(features['image/class/label'], dtype=tf.int32)
|
||||
|
||||
xmin = tf.expand_dims(features['image/object/bbox/xmin'].values, 0)
|
||||
ymin = tf.expand_dims(features['image/object/bbox/ymin'].values, 0)
|
||||
xmax = tf.expand_dims(features['image/object/bbox/xmax'].values, 0)
|
||||
ymax = tf.expand_dims(features['image/object/bbox/ymax'].values, 0)
|
||||
|
||||
# Note that we impose an ordering of (y, x) just to make life difficult.
|
||||
bbox = tf.concat([ymin, xmin, ymax, xmax], 0)
|
||||
|
||||
# Force the variable number of bounding boxes into the shape
|
||||
# [1, num_boxes, coords].
|
||||
bbox = tf.expand_dims(bbox, 0)
|
||||
bbox = tf.transpose(bbox, [0, 2, 1])
|
||||
|
||||
return features['image/encoded'], label, bbox
|
||||
|
||||
def parse_record(raw_record):
|
||||
image_buffer, label, bbox = _parse_example_proto(raw_record)
|
||||
# for 1980 only
|
||||
config={'min_object_covered': 0.1, 'aspect_ratio_range': [3. / 4., 4. / 3.], 'area_range': [0.16, 1.0], 'max_attempts': 100}
|
||||
image = preprocessing.parse_and_preprocess_image_record(
|
||||
config, image_buffer, height=224, width=224,
|
||||
brightness=0.3, contrast=0.6, saturation=0.6, hue=0.13,
|
||||
distort=True, nsummary=10, increased_aug=False, random_search_aug=False)
|
||||
return image, label
|
||||
|
||||
def read_rawdata(file_path_tensor):
|
||||
def _read_file(file_path):
|
||||
image = tf.gfile.GFile(file_path, 'rb').read()
|
||||
return image
|
||||
return tf.py_func(_read_file, inp=[file_path_tensor], Tout=tf.string)
|
||||
|
||||
def parse_function(filename, label):
|
||||
image = read_rawdata(filename)
|
||||
image_decoded = tf.image.decode_jpeg(image, channels=3)
|
||||
image_resized = tf.image.resize_images(image_decoded, [224, 224])
|
||||
# 7.3,raw默认格式为int64,目前resnet50只支持int32,下沉前不影响,下沉后,没有增加该转换算子,影响性能考虑。
|
||||
label = tf.cast(label, dtype=tf.int32)
|
||||
return image_resized, label
|
||||
|
||||
def parse_record1(image, label):
|
||||
image = preprocessing.split_device(image)
|
||||
return image, label
|
||||
|
||||
def make_dataset(config, filenames, take_count, batch_size, height, width,
|
||||
brightness, contrast, saturation, hue,
|
||||
training=False, num_threads=10, nsummary=10, shard=False, synthetic=False,
|
||||
increased_aug=False, random_search_aug=False):
|
||||
if synthetic and training:
|
||||
input_shape = [height, width, 3]
|
||||
input_element = nest.map_structure(lambda s: tf.constant(0.5, tf.float32, s), tf.TensorShape(input_shape))
|
||||
label_element = nest.map_structure(lambda s: tf.constant(1, tf.int32, s), tf.TensorShape([1]))
|
||||
element = (input_element, label_element)
|
||||
ds = tf.data.Dataset.from_tensors(element).repeat()
|
||||
ds = ds.batch(batch_size)
|
||||
return ds
|
||||
else:
|
||||
shuffle_buffer_size = 10000
|
||||
num_readers = 10
|
||||
rank_size = int(os.getenv('RANK_SIZE'))
|
||||
rank_id = int(os.getenv('DEVICE_INDEX'))
|
||||
|
||||
if config['data_type'] == 'RAW DATA':
|
||||
images = []
|
||||
labels = []
|
||||
with tf.gfile.GFile(config['label_index_url'], 'r') as f:
|
||||
for line in f.readlines():
|
||||
tmp_list = line.strip().split(" ")
|
||||
image_file = os.path.join(config['data_url'], tmp_list[0])
|
||||
#image_raw = tf.gfile.GFile(image_file, 'rb').read()
|
||||
#images.append(image_raw)
|
||||
images.append(image_file)
|
||||
labels.append(int(tmp_list[-1]))
|
||||
|
||||
#images = tf.convert_to_tensor(images, dtype=tf.string)
|
||||
#labels = tf.convert_to_tensor(labels, dtype=tf.int32)
|
||||
ds = tf.data.Dataset.from_tensor_slices((images, labels))
|
||||
else:
|
||||
filename_pattern = os.path.join(config['data_url'], '%s-*')
|
||||
filenames = sorted(tf.gfile.Glob(filename_pattern % 'train'))
|
||||
ds = tf.data.Dataset.from_tensor_slices(filenames)
|
||||
|
||||
if shard:
|
||||
# split the dataset into parts for each GPU
|
||||
ds = ds.shard(rank_size, rank_id)
|
||||
|
||||
if not training:
|
||||
ds = ds.take(take_count) # make sure all ranks have the same amount
|
||||
|
||||
if training:
|
||||
ds = ds.shuffle(1000, seed=7 * (1 + rank_id))
|
||||
|
||||
if config['data_type'] == 'TFRECORD':
|
||||
ds = ds.interleave(tf.data.TFRecordDataset, cycle_length=num_readers, block_length=1)
|
||||
counter = tf.data.Dataset.range(sys.maxsize)
|
||||
ds = tf.data.Dataset.zip((ds, counter))
|
||||
|
||||
if training:
|
||||
ds = ds.apply(tf.data.experimental.shuffle_and_repeat(shuffle_buffer_size, seed=5*(1+rank_id)))
|
||||
|
||||
if config['data_type'] == 'RAW DATA':
|
||||
ds = ds.map(lambda image, label: parse_function(image, label), num_parallel_calls=14)
|
||||
else:
|
||||
#ds = ds.map(lambda image, label: parse_record(image), num_parallel_calls=192)
|
||||
#ds = ds.prefetch(buffer_size=tf.contrib.data.AUTOTUNE)
|
||||
#ds = ds.map(lambda image, label: parse_record1(image, label), num_parallel_calls=14)
|
||||
ds = ds.map(lambda image, label: parse_record(image), num_parallel_calls=192)
|
||||
#ds = ds.prefetch(10)
|
||||
ds = ds.batch(batch_size, drop_remainder=True)
|
||||
ds = ds.prefetch(buffer_size=tf.contrib.data.AUTOTUNE)
|
||||
return ds
|
||||
|
||||
|
||||
+160
@@ -0,0 +1,160 @@
|
||||
import tensorflow as tf
|
||||
#import horovod.tensorflow as hvd
|
||||
from tensorflow.contrib.image.python.ops import distort_image_ops
|
||||
import math
|
||||
#from .data_aug_search import random_aug_search
|
||||
|
||||
|
||||
|
||||
def deserialize_image_record(record):
|
||||
feature_map = {
|
||||
'image/encoded': tf.FixedLenFeature([], tf.string, ''),
|
||||
'image/class/label': tf.FixedLenFeature([1], tf.int64, -1),
|
||||
'image/class/text': tf.FixedLenFeature([], tf.string, ''),
|
||||
'image/object/bbox/xmin': tf.VarLenFeature(dtype=tf.float32),
|
||||
'image/object/bbox/ymin': tf.VarLenFeature(dtype=tf.float32),
|
||||
'image/object/bbox/xmax': tf.VarLenFeature(dtype=tf.float32),
|
||||
'image/object/bbox/ymax': tf.VarLenFeature(dtype=tf.float32)
|
||||
}
|
||||
with tf.name_scope('deserialize_image_record'):
|
||||
obj = tf.parse_single_example(record, feature_map)
|
||||
imgdata = obj['image/encoded']
|
||||
label = tf.cast(obj['image/class/label'], tf.int32)
|
||||
bbox = tf.stack([obj['image/object/bbox/%s' % x].values
|
||||
for x in ['ymin', 'xmin', 'ymax', 'xmax']])
|
||||
bbox = tf.transpose(tf.expand_dims(bbox, 0), [0, 2, 1])
|
||||
text = obj['image/class/text']
|
||||
return imgdata, label, bbox, text
|
||||
|
||||
def decode_jpeg(imgdata, channels=3):
|
||||
return tf.image.decode_jpeg(imgdata, channels=channels,
|
||||
fancy_upscaling=False,
|
||||
dct_method='INTEGER_FAST')
|
||||
|
||||
|
||||
def crop_and_resize_image(config, image, height, width,
|
||||
distort=False, nsummary=10):
|
||||
with tf.name_scope('crop_and_resize'):
|
||||
# Evaluation is done on a center-crop of this ratio
|
||||
eval_crop_ratio = 0.8
|
||||
if distort:
|
||||
# crop_window = tf.stack( [0, 0, 7, 7] )
|
||||
# image = tf.image.decode_and_crop_jpeg( image, crop_window, channels=3 )
|
||||
# image = tf.image.resize_images( image, [height, width] )
|
||||
initial_shape = [int(round(height / eval_crop_ratio)),
|
||||
int(round(width / eval_crop_ratio)),
|
||||
3]
|
||||
jpeg_shape = tf.image.extract_jpeg_shape( image )
|
||||
|
||||
bbox_begin, bbox_size, bbox = \
|
||||
tf.image.sample_distorted_bounding_box(
|
||||
initial_shape,
|
||||
bounding_boxes=tf.constant([0.0, 0.0, 1.0, 1.0], dtype=tf.float32, shape=[1, 1, 4]),
|
||||
# tf.zeros(shape=[1,0,4]), # No bounding boxes
|
||||
min_object_covered=config['min_object_covered'],
|
||||
aspect_ratio_range=config['aspect_ratio_range'],
|
||||
area_range=config['area_range'],
|
||||
max_attempts=config['max_attempts'],
|
||||
# seed=11 , # Need to set for deterministic results
|
||||
use_image_if_no_bounding_boxes=True)
|
||||
bbox = bbox[0, 0] # Remove batch, box_idx dims
|
||||
|
||||
# offset_y, offset_x, _ = tf.unstack(bbox_begin)
|
||||
# target_height, target_width, _ = tf.unstack( bbox_size )
|
||||
#
|
||||
|
||||
|
||||
|
||||
|
||||
# offset_y = tf.minimum( offset_y, jpeg_shape[0] - 1 )
|
||||
# offset_x = tf.minimum( offset_x, jpeg_shape[1] - 1 )
|
||||
|
||||
# target_height, target_width, _ = tf.unstack( bbox_size )
|
||||
# new_height = tf.maximum( tf.minimum( offset_y + target_height, jpeg_shape[0] ) - offset_y, 0 )
|
||||
# new_width = tf.maximum( tf.minimum( offset_x + target_width, jpeg_shape[1] ) - offset_x, 0 )
|
||||
|
||||
y_min = tf.cast( bbox[0] * (tf.cast( jpeg_shape[0], tf.float32) ), tf.int32)
|
||||
x_min = tf.cast( bbox[1] * (tf.cast(jpeg_shape[1], tf.float32) ), tf.int32)
|
||||
y_max = tf.cast( bbox[2] * (tf.cast(jpeg_shape[0], tf.float32) ), tf.int32)
|
||||
x_max = tf.cast( bbox[3] * (tf.cast(jpeg_shape[1], tf.float32) ), tf.int32)
|
||||
|
||||
crop_height = y_max - y_min
|
||||
crop_width = x_max - x_min
|
||||
# crop_window = tf.stack( [offset_y, offset_x, new_height, new_width] )
|
||||
crop_window = tf.stack( [y_min, x_min, crop_height, crop_width] )
|
||||
image = tf.image.decode_and_crop_jpeg( image, crop_window, channels=3 )
|
||||
image = tf.image.resize_images( image, [height, width] )
|
||||
|
||||
|
||||
# def func_decode_and_crop(image):
|
||||
# image = tf.image.decode_and_crop_jpeg( image, crop_window, channels=3 )
|
||||
# image = tf.image.resize_images( image, [height, width] )
|
||||
# return image
|
||||
|
||||
# def func_crop_and_resize(image):
|
||||
# image = decode_jpeg(image, channels=3)
|
||||
# image = tf.image.crop_and_resize(
|
||||
# image[None, :, :, :], bbox[None, :], [0], [height, width])[0]
|
||||
# return image
|
||||
|
||||
|
||||
# condtion_1 = tf.logical_and( tf.less(target_height, jpeg_shape[0]), tf.less( target_width, jpeg_shape[1] ) )
|
||||
# condtion_2 = tf.logical_and( tf.less(target_height + offset_y, jpeg_shape[0]), tf.less( target_width + offset_x, jpeg_shape[1] ) )
|
||||
|
||||
# image = tf.cond( tf.logical_and( condtion_1, condtion_2 ), lambda:func_decode_and_crop(image), lambda:func_crop_and_resize(image) )
|
||||
|
||||
|
||||
else:
|
||||
# Central crop
|
||||
|
||||
image = decode_jpeg(image, channels=3)
|
||||
ratio_y = ratio_x = eval_crop_ratio
|
||||
bbox = tf.constant([0.5 * (1 - ratio_y), 0.5 * (1 - ratio_x),
|
||||
0.5 * (1 + ratio_y), 0.5 * (1 + ratio_x)])
|
||||
image = tf.image.crop_and_resize(
|
||||
image[None, :, :, :], bbox[None, :], [0], [height, width])[0]
|
||||
|
||||
return image
|
||||
|
||||
|
||||
def parse_and_preprocess_image_record(config, record, height, width,
|
||||
brightness, contrast, saturation, hue,
|
||||
distort, nsummary=10, increased_aug=False, random_search_aug=False):
|
||||
#imgdata, label, bbox, text = deserialize_image_record(record)
|
||||
#label -= 1 # Change to 0-based (don't use background class)
|
||||
with tf.name_scope('preprocess_train'):
|
||||
image = crop_and_resize_image(config, record, height, width, distort)
|
||||
if distort:
|
||||
image = tf.image.random_flip_left_right(image)
|
||||
if increased_aug:
|
||||
image = tf.image.random_brightness(image, max_delta=brightness)
|
||||
image = distort_image_ops.random_hsv_in_yiq(image,
|
||||
lower_saturation=saturation,
|
||||
upper_saturation=2.0 - saturation,
|
||||
max_delta_hue=hue * math.pi)
|
||||
image = tf.image.random_contrast(image, lower=contrast, upper=2.0 - contrast)
|
||||
tf.summary.image('distorted_color_image', tf.expand_dims(image, 0))
|
||||
|
||||
# image = tf.clip_by_value(image, 0., 255.)
|
||||
#image = tf.cast(image, tf.uint8)
|
||||
# if random_search_aug:
|
||||
# image = random_aug_search(image)
|
||||
# image = normalize(image)
|
||||
# image = tf.cast(image, tf.float16)
|
||||
return image
|
||||
def normalize(inputs):
|
||||
dataset_mean = [121.0, 115.0, 100.0] #np.array([121, 115, 100], dtype=np.float32)
|
||||
dataset_std = [70.0, 68.0, 71.0] #np.array([70, 68, 71], dtype=np.float32)
|
||||
dataset_mean = tf.expand_dims(tf.expand_dims(dataset_mean, 0), 0)
|
||||
dataset_std = tf.expand_dims(tf.expand_dims(dataset_std, 0), 0)
|
||||
inputs = inputs - dataset_mean #tf.subtract(inputs, dataset_mean)
|
||||
inputs = inputs * (1.0 / dataset_std)
|
||||
#inputs = tf.multiply(inputs, 1. / dataset_std)
|
||||
|
||||
return inputs
|
||||
|
||||
def split_device(image):
|
||||
image = tf.clip_by_value(image, 0., 255.)
|
||||
image = normalize(image)
|
||||
image = tf.cast(image, tf.float16)
|
||||
return image
|
||||
+50
@@ -0,0 +1,50 @@
|
||||
import tensorflow as tf
|
||||
from .lr_schedule import warmup_decay, get_lr, get_1980_lr
|
||||
|
||||
|
||||
class HyperParams:
|
||||
def __init__(self, config):
|
||||
self.config=config
|
||||
nsteps_per_epoch = self.config['num_training_samples'] // self.config['global_batch_size']
|
||||
self.config['nsteps_per_epoch'] = nsteps_per_epoch
|
||||
# nstep = self.config['num_training_samples'] * self.config['num_epochs'] // self.config['global_batch_size']
|
||||
if self.config['num_epochs']:
|
||||
nstep = nsteps_per_epoch * self.config['num_epochs'] #------calculate nsteps in a different way------
|
||||
else:
|
||||
nstep = self.config['max_train_steps']
|
||||
self.config['nstep'] = nstep
|
||||
|
||||
self.config['total_steps_include_iterations'] = int( self.config['nstep'] + self.config['iterations_per_loop'])
|
||||
self.config['save_summary_steps'] = nsteps_per_epoch
|
||||
self.config['save_checkpoints_steps'] = nsteps_per_epoch
|
||||
|
||||
|
||||
def get_hyper_params(self):
|
||||
hyper_params = {}
|
||||
hyper_params['learning_rate'] = self.get_learning_rate()
|
||||
|
||||
return hyper_params
|
||||
|
||||
|
||||
def get_learning_rate(self):
|
||||
global_step = tf.train.get_global_step()
|
||||
nsteps_per_epoch = self.config['nsteps_per_epoch']
|
||||
|
||||
warmup_lr = self.config['warmup_lr']
|
||||
lr = self.config['learning_rate_maximum']
|
||||
lr_end = self.config['learning_rate_end']
|
||||
lr_decay_mode = self.config['lr_decay_mode']
|
||||
|
||||
|
||||
|
||||
with tf.device('/cpu:0'): # Allow fallback to CPU if no GPU support for these ops
|
||||
|
||||
if lr_decay_mode == 'constant' or self.config['num_epochs'] == None:
|
||||
learning_rate = tf.constant(lr, tf.float32)
|
||||
else:
|
||||
learning_rate = get_1980_lr(self.config, global_step, warmup_lr, lr_end, lr, self.config['warmup_epochs'], nsteps_per_epoch, self.config['nstep'], lr_decay_mode )
|
||||
|
||||
learning_rate = tf.identity(learning_rate, 'learning_rate')
|
||||
return learning_rate
|
||||
|
||||
|
||||
+168
@@ -0,0 +1,168 @@
|
||||
import tensorflow as tf
|
||||
import numpy as np
|
||||
|
||||
def get_lr(lr, lr_end, lr_decay_mode, warmup_it, decay_steps, global_step, steps, lr_steps, ploy_power,
|
||||
cdr_first_decay_ratio, cdr_t_mul, cdr_m_mul, cdr_alpha, cd_alpha, lc_periods, lc_alpha, lc_beta, lr_mid, it_mid):
|
||||
if lr_decay_mode == 'steps':
|
||||
learning_rate = tf.train.piecewise_constant(global_step,
|
||||
steps, lr_steps)
|
||||
elif lr_decay_mode == 'poly' or lr_decay_mode == 'poly_cycle':
|
||||
cycle = lr_decay_mode == 'poly_cycle'
|
||||
learning_rate = tf.train.polynomial_decay(lr,
|
||||
global_step - warmup_it,
|
||||
decay_steps=decay_steps - warmup_it,
|
||||
end_learning_rate=lr_end,
|
||||
power=ploy_power,
|
||||
cycle=cycle)
|
||||
elif lr_decay_mode == 'cosine_decay_restarts':
|
||||
learning_rate = tf.train.cosine_decay_restarts(lr,
|
||||
global_step - warmup_it,
|
||||
(decay_steps - warmup_it) * cdr_first_decay_ratio,
|
||||
t_mul=cdr_t_mul,
|
||||
m_mul=cdr_m_mul,
|
||||
alpha=cdr_alpha)
|
||||
elif lr_decay_mode == 'cosine':
|
||||
learning_rate = tf.train.cosine_decay(lr,
|
||||
global_step - warmup_it,
|
||||
decay_steps=decay_steps - warmup_it,
|
||||
alpha=cd_alpha)
|
||||
elif lr_decay_mode == 'linear_cosine':
|
||||
learning_rate = tf.train.linear_cosine_decay(lr,
|
||||
global_step - warmup_it,
|
||||
decay_steps=decay_steps - warmup_it,
|
||||
num_periods=lc_periods,#0.47,
|
||||
alpha=lc_alpha,#0.0,
|
||||
beta=lc_beta)#0.00001)
|
||||
elif lr_decay_mode == 'linear_twice':
|
||||
learning_rate = decay_linear_twice(lr, lr_mid, lr_end, warmup_it, it_mid, decay_steps, global_step )
|
||||
|
||||
else:
|
||||
raise ValueError('Invalid type of lr_decay_mode')
|
||||
return learning_rate
|
||||
|
||||
|
||||
def cos_warmup_1980( global_step, warmup_steps, max_lr ):
|
||||
PI = 3.14159265359
|
||||
ang = PI + PI * ( float(global_step+1) / float(warmup_steps) )
|
||||
offset = max_lr * 0.5*( 1.0 + np.cos( ang ) )
|
||||
return offset
|
||||
|
||||
def cos_decay_1980( global_step, warmup_steps, total_steps, max_lr ):
|
||||
PI = 3.14159265359
|
||||
ang = PI * ( float(global_step - warmup_steps+1) / float(total_steps - warmup_steps) )
|
||||
offset = max_lr * 0.5*( 1.0 + np.cos( ang ) )
|
||||
return offset
|
||||
|
||||
|
||||
def get_1980_lr(config, global_step, lr_init, lr_end, lr_max, warmup_epochs, steps_per_epoch, nsteps, lr_decay_mode):
|
||||
lr_each_step = []
|
||||
|
||||
if lr_decay_mode == 'steps':
|
||||
decay_epoch_index = [30 * steps_per_epoch,60 * steps_per_epoch,80 * steps_per_epoch]
|
||||
total_steps = int(nsteps)
|
||||
for i in range(total_steps):
|
||||
if i < decay_epoch_index[0]:
|
||||
lr = lr_max
|
||||
elif i < decay_epoch_index[1]:
|
||||
lr = lr_max * 0.1
|
||||
elif i < decay_epoch_index[2]:
|
||||
lr = lr_max * 0.01
|
||||
else:
|
||||
lr = lr_max * 0.001
|
||||
lr_each_step.append(lr)
|
||||
elif lr_decay_mode == 'poly':
|
||||
total_steps = int(nsteps)
|
||||
warmup_steps = steps_per_epoch * warmup_epochs
|
||||
inc_each_step = ( float(lr_max) - float(lr_init) ) / float(warmup_steps)
|
||||
for i in range( config['total_steps_include_iterations'] ):
|
||||
if i <= warmup_steps:
|
||||
lr = float(lr_init) + inc_each_step * float(i)
|
||||
elif i < total_steps:
|
||||
base = ( 1.0 - (float(i)-float(warmup_steps))/(float(total_steps)-float(warmup_steps)) )
|
||||
lr = float(lr_max) * base
|
||||
else:
|
||||
lr = 0.0
|
||||
lr_each_step.append(lr)
|
||||
|
||||
elif lr_decay_mode == 'cosine':
|
||||
total_steps = int(nsteps)
|
||||
|
||||
warmup_steps = steps_per_epoch * warmup_epochs
|
||||
for i in range( config['total_steps_include_iterations'] ):
|
||||
if i <= warmup_steps:
|
||||
lr = cos_warmup_1980( i, warmup_steps, lr_max )
|
||||
elif i < total_steps:
|
||||
lr = cos_decay_1980( i, warmup_steps, total_steps, lr_max )
|
||||
else:
|
||||
lr = 0.0
|
||||
lr_each_step.append(lr)
|
||||
elif lr_decay_mode == 'linear_cosine':
|
||||
total_steps = int(nsteps)
|
||||
warmup_steps = steps_per_epoch * warmup_epochs
|
||||
inc_each_step = ( float(lr_max) - float(lr_init) ) / float(warmup_steps)
|
||||
for i in range( config['total_steps_include_iterations'] ):
|
||||
if i <= warmup_steps:
|
||||
lr = float(lr_init) + inc_each_step * float(i)
|
||||
elif i < total_steps:
|
||||
lr = cos_decay_1980( i, warmup_steps, total_steps, lr_max )
|
||||
else:
|
||||
lr = 0.0
|
||||
lr_each_step.append(lr)
|
||||
else:
|
||||
total_steps = int(nsteps)
|
||||
warmup_steps = steps_per_epoch * warmup_epochs
|
||||
for i in range(total_steps):
|
||||
if i <= warmup_steps:
|
||||
lr = lr_init + (lr_max - lr_init) * i / warmup_steps
|
||||
else:
|
||||
lr = lr_max - ( lr_max - lr_end ) * (i - warmup_steps) / (total_steps - warmup_steps)
|
||||
lr_each_step.append( lr )
|
||||
|
||||
# current_step = tf.to_int32( tf.cast(global_step,tf.float32) / float(steps_per_epoch) )
|
||||
current_step = global_step
|
||||
lr_each_step = tf.convert_to_tensor( lr_each_step )
|
||||
print (lr_each_step)
|
||||
learning_rate = tf.gather( lr_each_step, current_step )
|
||||
|
||||
return learning_rate
|
||||
|
||||
def warmup_decay(lr_warmup_mode, warmup_lr, global_step, warmup_steps, warmup_end_lr):
|
||||
if lr_warmup_mode == 'linear':
|
||||
learning_rate = linear_warmup(warmup_lr, global_step, warmup_steps, warmup_end_lr)
|
||||
elif lr_warmup_mode == 'cosine':
|
||||
learning_rate = cos_warmup(warmup_lr, global_step, warmup_steps, warmup_end_lr)
|
||||
else:
|
||||
raise ValueError('Invalid type of lr_warmup_mode')
|
||||
return learning_rate
|
||||
|
||||
|
||||
def linear_warmup(warmup_lr, global_step, warmup_steps, warmup_end_lr):
|
||||
from tensorflow.python.ops import math_ops
|
||||
p = tf.cast(global_step, tf.float32) / tf.cast(warmup_steps, tf.float32)
|
||||
diff = math_ops.subtract(warmup_end_lr, warmup_lr)
|
||||
res = math_ops.add(warmup_lr, math_ops.multiply(diff, p))
|
||||
return res
|
||||
|
||||
def cos_warmup( warmup_lr, global_step, warmup_steps, warmup_end_lr ):
|
||||
PI = 3.14159265359
|
||||
diff = tf.subtract( warmup_end_lr, warmup_lr )
|
||||
ang = PI + PI * ( tf.cast( global_step, tf.float32 ) / tf.cast( warmup_steps,tf.float32 ))
|
||||
offset = diff * 0.5 * ( 1.0 + tf.math.cos( ang ) )
|
||||
res = tf.add( warmup_lr, offset )
|
||||
return res
|
||||
|
||||
|
||||
def decay_linear( lr_start, lr_end, it_start, it_end, global_step ):
|
||||
down_steps = it_end - it_start
|
||||
down_range = lr_start - lr_end
|
||||
down_per_step = float( down_range ) / float( down_steps )
|
||||
res = tf.subtract( tf.cast(lr_start, tf.float32), tf.multiply( tf.cast(down_per_step, tf.float32), tf.subtract(tf.cast(global_step, tf.float32), tf.cast(it_start, tf.float32) )) )
|
||||
return res
|
||||
|
||||
def decay_linear_twice(lr_start, lr_mid, lr_end, it_start, it_mid, it_end, global_step ):
|
||||
learning_rate = tf.cond( global_step < it_start, lambda: tf.cast(lr_start, tf.float32), lambda: decay_linear(lr_start, lr_mid, it_start, it_mid, global_step))
|
||||
learning_rate = tf.cond( global_step > it_mid, lambda: decay_linear(lr_mid, lr_end, it_mid, it_end, global_step) , lambda: learning_rate )
|
||||
return learning_rate
|
||||
|
||||
|
||||
|
||||
+169
@@ -0,0 +1,169 @@
|
||||
import tensorflow as tf
|
||||
import numpy as np
|
||||
|
||||
def get_lr(lr, lr_end, lr_decay_mode, warmup_it, decay_steps, global_step, steps, lr_steps, ploy_power,
|
||||
cdr_first_decay_ratio, cdr_t_mul, cdr_m_mul, cdr_alpha, cd_alpha, lc_periods, lc_alpha, lc_beta, lr_mid, it_mid):
|
||||
if lr_decay_mode == 'steps':
|
||||
learning_rate = tf.train.piecewise_constant(global_step,
|
||||
steps, lr_steps)
|
||||
elif lr_decay_mode == 'poly' or lr_decay_mode == 'poly_cycle':
|
||||
cycle = lr_decay_mode == 'poly_cycle'
|
||||
learning_rate = tf.train.polynomial_decay(lr,
|
||||
global_step - warmup_it,
|
||||
decay_steps=decay_steps - warmup_it,
|
||||
end_learning_rate=lr_end,
|
||||
power=ploy_power,
|
||||
cycle=cycle)
|
||||
elif lr_decay_mode == 'cosine_decay_restarts':
|
||||
learning_rate = tf.train.cosine_decay_restarts(lr,
|
||||
global_step - warmup_it,
|
||||
(decay_steps - warmup_it) * cdr_first_decay_ratio,
|
||||
t_mul=cdr_t_mul,
|
||||
m_mul=cdr_m_mul,
|
||||
alpha=cdr_alpha)
|
||||
elif lr_decay_mode == 'cosine':
|
||||
learning_rate = tf.train.cosine_decay(lr,
|
||||
global_step - warmup_it,
|
||||
decay_steps=decay_steps - warmup_it,
|
||||
alpha=cd_alpha)
|
||||
elif lr_decay_mode == 'linear_cosine':
|
||||
learning_rate = tf.train.linear_cosine_decay(lr,
|
||||
global_step - warmup_it,
|
||||
decay_steps=decay_steps - warmup_it,
|
||||
num_periods=lc_periods,#0.47,
|
||||
alpha=lc_alpha,#0.0,
|
||||
beta=lc_beta)#0.00001)
|
||||
elif lr_decay_mode == 'linear_twice':
|
||||
learning_rate = decay_linear_twice(lr, lr_mid, lr_end, warmup_it, it_mid, decay_steps, global_step )
|
||||
|
||||
else:
|
||||
raise ValueError('Invalid type of lr_decay_mode')
|
||||
return learning_rate
|
||||
|
||||
|
||||
def cos_warmup_1980( global_step, warmup_steps, max_lr ):
|
||||
PI = 3.14159265359
|
||||
ang = PI + PI * ( float(global_step) / float(warmup_steps) )
|
||||
offset = max_lr * 0.5*( 1.0 + np.cos( ang ) )
|
||||
return offset
|
||||
|
||||
def cos_decay_1980( global_step, warmup_steps, total_steps, max_lr ):
|
||||
PI = 3.14159265359
|
||||
ang = PI * ( float(global_step - warmup_steps) / float(total_steps - warmup_steps) )
|
||||
offset = max_lr * 0.5*( 1.0 + np.cos( ang ) )
|
||||
return offset
|
||||
|
||||
|
||||
def get_1980_lr(config, global_step, lr_init, lr_end, lr_max, warmup_epochs, total_epochs, steps_per_epoch, lr_decay_mode):
|
||||
lr_each_step = []
|
||||
|
||||
if lr_decay_mode == 'steps':
|
||||
decay_epoch_index = [30 * steps_per_epoch,60 * steps_per_epoch,80 * steps_per_epoch]
|
||||
total_steps = int(steps_per_epoch * total_epochs)
|
||||
# total_steps = total_epochs
|
||||
for i in range(total_steps):
|
||||
if i < decay_epoch_index[0]:
|
||||
lr = lr_max
|
||||
elif i < decay_epoch_index[1]:
|
||||
lr = lr_max * 0.1
|
||||
elif i < decay_epoch_index[2]:
|
||||
lr = lr_max * 0.01
|
||||
else:
|
||||
lr = lr_max * 0.001
|
||||
lr_each_step.append(lr)
|
||||
elif lr_decay_mode == 'poly':
|
||||
total_steps = int(steps_per_epoch * total_epochs)
|
||||
warmup_steps = steps_per_epoch * warmup_epochs
|
||||
inc_each_step = ( float(lr_max) - float(lr_init) ) / float(warmup_steps)
|
||||
for i in range( config['total_steps_include_iterations'] ):
|
||||
if i < warmup_steps:
|
||||
lr = float(lr_init) + inc_each_step * float(i)
|
||||
elif i <= total_steps:
|
||||
base = ( 1.0 - (float(i)-float(warmup_steps))/(float(total_steps)-float(warmup_steps)) )
|
||||
lr = float(lr_max) * base
|
||||
else:
|
||||
lr = 0.0
|
||||
lr_each_step.append(lr)
|
||||
|
||||
elif lr_decay_mode == 'cosine':
|
||||
total_steps = int(steps_per_epoch * total_epochs)
|
||||
|
||||
warmup_steps = steps_per_epoch * warmup_epochs
|
||||
for i in range( config['total_steps_include_iterations'] ):
|
||||
if i < warmup_steps:
|
||||
lr = cos_warmup_1980( i, warmup_steps, lr_max )
|
||||
elif i <= total_steps:
|
||||
lr = cos_decay_1980( i, warmup_steps, total_steps, lr_max )
|
||||
else:
|
||||
lr = 0.0
|
||||
lr_each_step.append(lr)
|
||||
# elif lr_decay_mode == 'linear_cosine':
|
||||
# total_steps = int(steps_per_epoch * total_epochs)
|
||||
# warmup_steps = steps_per_epoch * warmup_epochs
|
||||
# inc_each_step = ( float(lr_max) - float(lr_init) ) / float(warmup_steps)
|
||||
# for i in range( config['total_steps_include_iterations'] ):
|
||||
# if i < warmup_steps:
|
||||
# lr = float(lr_init) + inc_each_step * float(i)
|
||||
# elif i <= total_steps:
|
||||
# lr = cos_decay_1980( i, warmup_steps, total_steps, lr_max )
|
||||
# else:
|
||||
# lr = 0.0
|
||||
# lr_each_step.append(lr)
|
||||
else:
|
||||
total_steps = steps_per_epoch * total_epochs
|
||||
warmup_steps = steps_per_epoch * warmup_epochs
|
||||
for i in range(total_steps):
|
||||
if i < warmup_steps:
|
||||
lr = lr_init + (lr_max - lr_init) * i / warmup_steps
|
||||
else:
|
||||
lr = lr_max - ( lr_max - lr_end ) * (i - warmup_steps) / (total_steps - warmup_steps)
|
||||
lr_each_step.append( lr )
|
||||
|
||||
# current_step = tf.to_int32( tf.cast(global_step,tf.float32) / float(steps_per_epoch) )
|
||||
current_step = global_step
|
||||
lr_each_step = tf.convert_to_tensor( lr_each_step )
|
||||
print (lr_each_step)
|
||||
learning_rate = tf.gather( lr_each_step, current_step )
|
||||
|
||||
return learning_rate
|
||||
|
||||
def warmup_decay(lr_warmup_mode, warmup_lr, global_step, warmup_steps, warmup_end_lr):
|
||||
if lr_warmup_mode == 'linear':
|
||||
learning_rate = linear_warmup(warmup_lr, global_step, warmup_steps, warmup_end_lr)
|
||||
elif lr_warmup_mode == 'cosine':
|
||||
learning_rate = cos_warmup(warmup_lr, global_step, warmup_steps, warmup_end_lr)
|
||||
else:
|
||||
raise ValueError('Invalid type of lr_warmup_mode')
|
||||
return learning_rate
|
||||
|
||||
|
||||
def linear_warmup(warmup_lr, global_step, warmup_steps, warmup_end_lr):
|
||||
from tensorflow.python.ops import math_ops
|
||||
p = tf.cast(global_step, tf.float32) / tf.cast(warmup_steps, tf.float32)
|
||||
diff = math_ops.subtract(warmup_end_lr, warmup_lr)
|
||||
res = math_ops.add(warmup_lr, math_ops.multiply(diff, p))
|
||||
return res
|
||||
|
||||
def cos_warmup( warmup_lr, global_step, warmup_steps, warmup_end_lr ):
|
||||
PI = 3.14159265359
|
||||
diff = tf.subtract( warmup_end_lr, warmup_lr )
|
||||
ang = PI + PI * ( tf.cast( global_step, tf.float32 ) / tf.cast( warmup_steps,tf.float32 ))
|
||||
offset = diff * 0.5 * ( 1.0 + tf.math.cos( ang ) )
|
||||
res = tf.add( warmup_lr, offset )
|
||||
return res
|
||||
|
||||
|
||||
def decay_linear( lr_start, lr_end, it_start, it_end, global_step ):
|
||||
down_steps = it_end - it_start
|
||||
down_range = lr_start - lr_end
|
||||
down_per_step = float( down_range ) / float( down_steps )
|
||||
res = tf.subtract( tf.cast(lr_start, tf.float32), tf.multiply( tf.cast(down_per_step, tf.float32), tf.subtract(tf.cast(global_step, tf.float32), tf.cast(it_start, tf.float32) )) )
|
||||
return res
|
||||
|
||||
def decay_linear_twice(lr_start, lr_mid, lr_end, it_start, it_mid, it_end, global_step ):
|
||||
learning_rate = tf.cond( global_step < it_start, lambda: tf.cast(lr_start, tf.float32), lambda: decay_linear(lr_start, lr_mid, it_start, it_mid, global_step))
|
||||
learning_rate = tf.cond( global_step > it_mid, lambda: decay_linear(lr_mid, lr_end, it_mid, it_end, global_step) , lambda: learning_rate )
|
||||
return learning_rate
|
||||
|
||||
|
||||
|
||||
+23
@@ -0,0 +1,23 @@
|
||||
import tensorflow as tf
|
||||
#from tensorflow.contrib.hccl.python.ops import hccl_ops
|
||||
from npu_bridge.hccl import hccl_ops
|
||||
|
||||
class Layers:
|
||||
|
||||
def get_accuracy(self, labels, predicted_classes, logits, config):
|
||||
accuracy = tf.metrics.accuracy(
|
||||
labels=labels, predictions=predicted_classes)
|
||||
top5acc = tf.metrics.mean(
|
||||
tf.cast(tf.nn.in_top_k(logits, labels, 5), tf.float32))
|
||||
if config['rank_size'] == 1:
|
||||
newaccuracy = (accuracy[0], accuracy[1])
|
||||
newtop5acc = (top5acc[0], top5acc[1])
|
||||
else:
|
||||
newaccuracy = (hccl_ops.allreduce(accuracy[0],"sum")/config['rank_size'], accuracy[1])
|
||||
newtop5acc = (hccl_ops.allreduce(top5acc[0],"sum")/config['rank_size'], top5acc[1])
|
||||
metrics = {'val-top1acc': newaccuracy, 'val-top5acc': newtop5acc}
|
||||
return metrics
|
||||
|
||||
|
||||
|
||||
|
||||
+36
@@ -0,0 +1,36 @@
|
||||
import tensorflow as tf
|
||||
|
||||
class Loss:
|
||||
def __init__(self,config):
|
||||
self.config = config
|
||||
|
||||
def get_loss(self, logits, labels):
|
||||
labels_one_hot = tf.one_hot(labels, self.config['num_classes'])
|
||||
loss = tf.losses.softmax_cross_entropy(
|
||||
logits=logits, onehot_labels=labels_one_hot,label_smoothing=self.config['label_smoothing'])
|
||||
loss = tf.identity(loss, name='loss')
|
||||
return loss
|
||||
|
||||
def get_total_loss(self, loss):
|
||||
reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
|
||||
total_loss = tf.add_n([loss] + reg_losses, name='total_loss')
|
||||
return total_loss
|
||||
|
||||
|
||||
def optimize_loss(self, total_loss, opt):
|
||||
gate_gradients = (tf.train.Optimizer.GATE_NONE)
|
||||
# grads_and_vars = opt.compute_gradients(total_loss, colocate_gradients_with_ops=True, gate_gradients=gate_gradients)
|
||||
grads_and_vars = opt.compute_gradients(total_loss, gate_gradients=gate_gradients)
|
||||
|
||||
# train_op = opt.apply_gradients( grads_and_vars, global_step=None )
|
||||
train_op = opt.apply_gradients( grads_and_vars)
|
||||
|
||||
return train_op
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
+7
@@ -0,0 +1,7 @@
|
||||
ps -ef | grep TdtMain | awk '{print $2}' | xargs kill -9
|
||||
rm -rf *.pbtxt
|
||||
rm -rf /var/log/npu/slog/*.log
|
||||
rm ckpt* -rf
|
||||
find ./ -name "*.pyc" | xargs rm -rf
|
||||
find ./ -name __pycache__ | xargs rm -rf
|
||||
rm /var/log/npu/dataset/* -rf
|
||||
+120
@@ -0,0 +1,120 @@
|
||||
import tensorflow as tf
|
||||
import sys
|
||||
import ast
|
||||
#sys.path.append("..")
|
||||
#sys.path.append("../models")
|
||||
#sys.path.append("./resnet50_train/")
|
||||
#sys.path.append("./resnet50_train/models")
|
||||
import os
|
||||
sys.path.append(os.path.join(os.path.abspath(os.path.dirname(__file__)),'../../../../../../utils'))
|
||||
sys.path.append(os.path.join(os.path.abspath(os.path.dirname(__file__)),'../../../../../../utils/atlasboost'))
|
||||
base_path=os.path.split(os.path.realpath(__file__))[0]
|
||||
print ("#########base_path:", base_path)
|
||||
path_1 = base_path + "/.."
|
||||
print (path_1)
|
||||
path_2 = base_path + "/../models"
|
||||
print (path_2)
|
||||
path_3 = base_path + "/../../"
|
||||
print (path_3)
|
||||
|
||||
|
||||
sys.path.append(base_path + "/..")
|
||||
sys.path.append(base_path + "/../models")
|
||||
sys.path.append(base_path + "/../../")
|
||||
sys.path.append(base_path + "/../../models")
|
||||
|
||||
from utils import create_session as cs
|
||||
from utils import logger as lg
|
||||
from data_loader.resnet50 import data_loader as dl
|
||||
from models.resnet50 import res50_model as ml
|
||||
from optimizers import optimizer as op
|
||||
from losses import res50_loss as ls
|
||||
from trainers import gpu_base_trainer as tr
|
||||
# from configs import res50_config as cfg
|
||||
from hyper_param import hyper_param as hp
|
||||
from layers import layers as ly
|
||||
|
||||
import argparse
|
||||
from benchmark_log import hwlog
|
||||
from benchmark_log.basic_utils import get_environment_info
|
||||
from benchmark_log.basic_utils import get_model_parameter
|
||||
|
||||
def main():
|
||||
#-------------------choose the config file in .sh file-----------
|
||||
cmdline = argparse.ArgumentParser(
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
||||
cmdline.add_argument('--config_file', default="",
|
||||
help="""config file used.""")
|
||||
cmdline.add_argument('--iterations_per_loop', default=1,
|
||||
help="""config file used.""")
|
||||
cmdline.add_argument('--max_train_steps', default=200,
|
||||
help="""config file used.""")
|
||||
cmdline.add_argument('--debug', default=True, type=ast.literal_eval,
|
||||
help="""config file used.""")
|
||||
cmdline.add_argument('--eval', default=False, type=ast.literal_eval,
|
||||
help="""config file used.""")
|
||||
cmdline.add_argument('--model_dir', default="./model_dir",
|
||||
help="""config file used.""")
|
||||
FLAGS, unknown_args = cmdline.parse_known_args()
|
||||
if len(unknown_args) > 0:
|
||||
for bad_arg in unknown_args:
|
||||
print("ERROR: Unknown command line arg: %s" % bad_arg)
|
||||
raise ValueError("Invalid command line arg(s)")
|
||||
|
||||
cfg_file = FLAGS.config_file
|
||||
configs = 'configs'
|
||||
cfg = getattr(__import__(configs, fromlist=[cfg_file]), cfg_file)
|
||||
#------------------------------------------------------------------
|
||||
|
||||
config = cfg.res50_config()
|
||||
config['iterations_per_loop'] = int(FLAGS.iterations_per_loop)
|
||||
config['max_train_steps'] = int(FLAGS.max_train_steps)
|
||||
config['debug'] = FLAGS.debug
|
||||
config['eval'] = FLAGS.eval
|
||||
config['model_dir'] = FLAGS.model_dir
|
||||
print("iterations_per_loop:%d" %(config['iterations_per_loop']))
|
||||
print("max_train_steps :%d" %(config['max_train_steps']))
|
||||
print("debug :%s" %(config['debug']))
|
||||
print("eval :%s" %(config['eval']))
|
||||
print("model_dir :%s" %(config['model_dir']))
|
||||
Session = cs.CreateSession(config)
|
||||
data = dl.DataLoader(config)
|
||||
hyper_param = hp.HyperParams(config)
|
||||
layers = ly.Layers()
|
||||
optimizer = op.Optimizer(config)
|
||||
loss = ls.Loss(config)
|
||||
logger = lg.LogSessionRunHook(config) # add tensorboard summary
|
||||
|
||||
model = ml.Model(config, data, hyper_param,layers, optimizer, loss, logger) # get the model
|
||||
trainer = tr.GPUBaseTrain(Session, config, data, model, logger) # use Estimator to build training process
|
||||
|
||||
if config['mode'] =='train':
|
||||
trainer.train()
|
||||
if config['eval'] :
|
||||
trainer.evaluate()
|
||||
elif config['mode'] =='evaluate':
|
||||
trainer.evaluate()
|
||||
elif config['mode'] =='train_and_evaluate':
|
||||
trainer.train_and_evaluate()
|
||||
else:
|
||||
raise ValueError('Invalid type of mode')
|
||||
|
||||
if __name__ == '__main__':
|
||||
# add zwx5326390 日志打点
|
||||
hwlog.ROOT_DIR = os.path.split(os.path.abspath(__file__))[0]
|
||||
cpu_info, npu_info, framework_info, os_info, benchmark_version = get_environment_info("tensorflow")
|
||||
config_info = get_model_parameter("tensorflow_config")
|
||||
initinal_data = {"base_lr": 0.1, "dataset": "imagenet1024", "optimizer": "SGD", "loss_scale": 512,
|
||||
"batchsize": 256}
|
||||
hwlog.remark_print(key=hwlog.CPU_INFO, value=cpu_info)
|
||||
hwlog.remark_print(key=hwlog.NPU_INFO, value=npu_info)
|
||||
hwlog.remark_print(key=hwlog.OS_INFO, value=os_info)
|
||||
hwlog.remark_print(key=hwlog.FRAMEWORK_INFO, value=framework_info)
|
||||
hwlog.remark_print(key=hwlog.BENCHMARK_VERSION, value=benchmark_version)
|
||||
hwlog.remark_print(key=hwlog.CONFIG_INFO, value=config_info)
|
||||
hwlog.remark_print(key=hwlog.BASE_LR, value=initinal_data.get("base_lr"))
|
||||
hwlog.remark_print(key=hwlog.DATASET, value=initinal_data.get("dataset"))
|
||||
hwlog.remark_print(key=hwlog.OPT_NAME, value=initinal_data.get("optimizer"))
|
||||
hwlog.remark_print(key=hwlog.LOSS_SCALE, value=initinal_data.get("loss_scale"))
|
||||
hwlog.remark_print(key=hwlog.INPUT_BATCH_SIZE, value=initinal_data.get("batchsize"))
|
||||
main()
|
||||
+21
@@ -0,0 +1,21 @@
|
||||
#!/bin/bash
|
||||
#export CUDA_VISIBLE_DEVICES=0
|
||||
dir=`pwd`
|
||||
|
||||
#cp -rf ./config /tmp/
|
||||
export JOB_ID=10086
|
||||
#export PROFILING_DIR=/var/log/npu/profiling/container/0
|
||||
export DEVICE_ID=0
|
||||
#export PROFILING_MODE=true
|
||||
export PRINT_MODEL=1
|
||||
#export ENABLE_DATA_PRE_PROC=1
|
||||
export RANK_ID=0
|
||||
export RANK_SIZE=1
|
||||
export RANK_TABLE_FILE=/home/lxh/config/new_rank_table_1p.json
|
||||
export FUSION_TENSOR_SIZE=1000000000
|
||||
export PYTHONPATH=${dir}
|
||||
export LD_LIBRARY_PATH=/usr/local/HiAI/runtime/lib64/
|
||||
/usr/local/HiAI/runtime/bin/TdtMain --configfile=/home/lxh/test/config/job_tdt_2p_$DEVICE_ID.json &
|
||||
sleep 5
|
||||
|
||||
python3.6 res50.py --config_file res50_baseline
|
||||
+4
@@ -0,0 +1,4 @@
|
||||
#!/bin/bash
|
||||
export CUDA_VISIBLE_DEVICES=7
|
||||
|
||||
python3.5 res50.py --config_file res50_baseline_gpu
|
||||
+24
@@ -0,0 +1,24 @@
|
||||
import tensorflow as tf
|
||||
|
||||
def _fp32_trainvar_getter(getter, name, shape=None, dtype=None,
|
||||
trainable=True, regularizer=None,
|
||||
*args, **kwargs):
|
||||
storage_dtype = dtype
|
||||
variable = getter(name, shape, dtype=storage_dtype,
|
||||
trainable=trainable,
|
||||
regularizer=regularizer if trainable and 'BatchNorm' not in name and 'batchnorm' not in name and 'batch_norm' not in name and 'Batch_Norm' not in name else None,
|
||||
*args, **kwargs)
|
||||
|
||||
return variable
|
||||
|
||||
|
||||
def fp32_trainable_vars(name='fp32_vars', *args, **kwargs):
|
||||
"""A varible scope with custom variable getter to convert fp16 trainable
|
||||
variables with fp32 storage followed by fp16 cast.
|
||||
"""
|
||||
return tf.variable_scope(
|
||||
name, custom_getter=_fp32_trainvar_getter, *args, **kwargs)
|
||||
|
||||
def custom_getter_with_fp16_and_weight_decay(dtype, weight_decay):
|
||||
return fp32_trainable_vars(dtype=dtype, regularizer=tf.contrib.layers.l2_regularizer(weight_decay))
|
||||
|
||||
+222
@@ -0,0 +1,222 @@
|
||||
|
||||
import tensorflow as tf
|
||||
from . import resnet, res50_helper
|
||||
from trainers.train_helper import stage
|
||||
#from tensorflow.contrib.offline_train.python.npu.npu_optimizer import NPUDistributedOptimizer
|
||||
from npu_bridge.estimator.npu.npu_optimizer import NPUDistributedOptimizer
|
||||
#from tensorflow.contrib.offline_train.python import npu_ops
|
||||
from npu_bridge.estimator import npu_ops
|
||||
_NUM_EXAMPLES_NAME="num_examples"
|
||||
|
||||
|
||||
class Model(object):
|
||||
def __init__(self, config, data, hyper_param, layers, optimizer, loss, logger):
|
||||
self.config = config
|
||||
self.data = data
|
||||
self.hyper_param = hyper_param
|
||||
self.layers = layers
|
||||
self.optimizer = optimizer
|
||||
self.loss = loss
|
||||
self.logger = logger
|
||||
|
||||
def get_estimator_model_func(self, features, labels, mode, params=None):
|
||||
labels = tf.reshape(labels, (-1,)) # Squash unnecessary unary dim #----------------not use when use onehot label
|
||||
|
||||
model_func = self.get_model_func()
|
||||
inputs = features # TODO: Should be using feature columns?
|
||||
is_training = (mode == tf.estimator.ModeKeys.TRAIN)
|
||||
|
||||
with tf.device('/gpu:0'):
|
||||
if self.config['accelerator'] == 'gpu':
|
||||
inputs = tf.cast(inputs, self.config['dtype'])
|
||||
|
||||
inputs = tf.cast(inputs, self.config['dtype'])
|
||||
with res50_helper.custom_getter_with_fp16_and_weight_decay(dtype=self.config['dtype'], weight_decay=self.config['weight_decay']): # no BN decay
|
||||
|
||||
top_layer = model_func(
|
||||
inputs, data_format=self.config['data_format'], training=is_training,
|
||||
conv_initializer=self.config['conv_init'],
|
||||
bn_init_mode=self.config['bn_init_mode'], bn_gamma_initial_value=self.config['bn_gamma_initial_value'])
|
||||
|
||||
|
||||
logits = top_layer
|
||||
predicted_classes = tf.argmax(logits, axis=1, output_type=tf.int32)
|
||||
logits = tf.cast(logits, tf.float32)
|
||||
|
||||
#loss = self.loss.get_loss(logits, labels)
|
||||
#loss = tf.losses.sparse_softmax_cross_entropy(logits=logits, labels=labels)
|
||||
|
||||
labels_one_hot = tf.one_hot(labels, depth=1001)
|
||||
loss = tf.losses.softmax_cross_entropy(
|
||||
logits=logits, onehot_labels=labels_one_hot, label_smoothing=self.config['label_smoothing'])
|
||||
|
||||
|
||||
base_loss = tf.identity(loss, name='loss') # For access by logger (TODO: Better way to access it?)
|
||||
# base_loss = tf.add_n([loss])
|
||||
|
||||
def exclude_batch_norm(name):
|
||||
#return 'batch_normalization' not in name
|
||||
return 'BatchNorm' not in name
|
||||
loss_filter_fn = exclude_batch_norm
|
||||
|
||||
# Add weight decay to the loss.
|
||||
l2_loss = self.config['weight_decay'] * tf.add_n(
|
||||
# loss is computed using fp32 for numerical stability.
|
||||
[tf.nn.l2_loss(tf.cast(v, tf.float32)) for v in tf.trainable_variables()
|
||||
if loss_filter_fn(v.name)])
|
||||
#tf.summary.scalar('l2_loss', l2_loss)
|
||||
# total_loss = base_loss + l2_loss
|
||||
if self.config['use_lars']:
|
||||
total_loss = base_loss
|
||||
else:
|
||||
total_loss = base_loss + l2_loss
|
||||
|
||||
total_loss = tf.identity(total_loss, name = 'total_loss')
|
||||
|
||||
|
||||
if mode == tf.estimator.ModeKeys.EVAL:
|
||||
with tf.device(None):
|
||||
metrics = self.layers.get_accuracy( labels, predicted_classes, logits, self.config)
|
||||
|
||||
return tf.estimator.EstimatorSpec(
|
||||
mode, loss=loss, eval_metric_ops=metrics)
|
||||
|
||||
assert (mode == tf.estimator.ModeKeys.TRAIN)
|
||||
|
||||
#reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
|
||||
#total_loss = tf.add_n([tf.saturate_cast(loss, self.config['dtype']) ] + reg_losses, name='total_loss')
|
||||
#total_loss = tf.add_n([loss], name='total_loss')
|
||||
|
||||
batch_size = tf.shape(inputs)[0]
|
||||
|
||||
global_step = tf.train.get_global_step()
|
||||
with tf.device('/cpu:0'):
|
||||
learning_rate = self.hyper_param.get_learning_rate()
|
||||
|
||||
#-----------------------batchsize scaling----------------------------------
|
||||
momentum = self.config['momentum'][0]
|
||||
#------------------------------end------------------------------------------
|
||||
|
||||
opt = tf.train.MomentumOptimizer(
|
||||
learning_rate, momentum, use_nesterov=self.config['use_nesterov'])
|
||||
opt=NPUDistributedOptimizer(opt)
|
||||
if self.config['accelerator'] == 'gpu':
|
||||
opt = self.optimizer.get_lbs_optimizer(opt)
|
||||
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) or []
|
||||
with tf.control_dependencies(update_ops):
|
||||
if self.config['accelerator'] == 'gpu':
|
||||
gate_gradients = (tf.train.Optimizer.GATE_NONE)
|
||||
grads_and_vars = opt.compute_gradients(total_loss, gate_gradients=gate_gradients)
|
||||
train_op = opt.apply_gradients( grads_and_vars,global_step = global_step)
|
||||
else:
|
||||
with tf.name_scope('loss_scale'):
|
||||
loss_scale = float( self.config['loss_scale'] )
|
||||
scaled_grads_and_vars = opt.compute_gradients( total_loss * loss_scale )
|
||||
unscaled_grads_and_vars = [ (g/loss_scale, v) for g,v in scaled_grads_and_vars ]
|
||||
|
||||
|
||||
#-----------------------------------------Lars------------------------------------------
|
||||
with tf.name_scope('LARS'):
|
||||
fp32_grads_and_vars = [ (tf.cast(g, tf.float32), v) for g,v in unscaled_grads_and_vars ]
|
||||
grad_var_list = []
|
||||
|
||||
if self.config['use_lars']:
|
||||
if self.config['accelerator'] == 'gpu':
|
||||
for g, var in fp32_grads_and_vars:
|
||||
|
||||
if 'BatchNorm' not in var.name and 'bias' not in var.name:
|
||||
grad_norm = tf.norm(g,ord='euclidean')
|
||||
weight_norm = tf.norm(var,ord='euclidean')
|
||||
grad_norm_wd = tf.add( grad_norm, tf.multiply( self.config['weight_decay'] , weight_norm ) )
|
||||
rescale_factor = tf.div( tf.multiply(0.001, weight_norm), tf.add(grad_norm_wd, tf.constant(1e-5, tf.float32)) )
|
||||
decayed_g = tf.add( g, tf.multiply(self.config['weight_decay'], var ) )
|
||||
|
||||
with tf.name_scope('lars_grad'):
|
||||
g = tf.multiply(rescale_factor, decayed_g)
|
||||
|
||||
g_and_v = ( g, var )
|
||||
grad_var_list.append( g_and_v )
|
||||
|
||||
elif self.config['accelerator'] == '1980':
|
||||
print('lars9999999999999999999999')
|
||||
g_list_bn_bias = []
|
||||
var_list_bn_bias = []
|
||||
g_list_else = []
|
||||
var_list_else = []
|
||||
for g, var in fp32_grads_and_vars:
|
||||
if 'BatchNorm' not in var.name and 'bias' not in var.name:
|
||||
g_list_else.append(g)
|
||||
var_list_else.append(var)
|
||||
else:
|
||||
g_list_bn_bias.append(g)
|
||||
var_list_bn_bias.append(var)
|
||||
|
||||
|
||||
g_list_else_lars = npu_ops.LARS(inputs_w=var_list_else,
|
||||
inputs_g=g_list_else,
|
||||
weight_decay=self.config['weight_decay'],
|
||||
hyperpara=0.001,
|
||||
epsilon=1e-5)
|
||||
|
||||
g_list_lars = g_list_bn_bias + g_list_else_lars
|
||||
var_list = var_list_bn_bias + var_list_else
|
||||
|
||||
for (g, var) in zip(g_list_lars,var_list):
|
||||
g_and_v = ( g, var )
|
||||
grad_var_list.append( g_and_v )
|
||||
|
||||
|
||||
else:
|
||||
print('do not use lars111111111111111111')
|
||||
for g, var in fp32_grads_and_vars:
|
||||
#if 'BatchNorm' not in var.name and 'bias' not in var.name:
|
||||
# decayed_g = tf.add( g, tf.multiply( self.config['weight_decay'], var ) )
|
||||
# g = decayed_g
|
||||
g_and_v = ( g, var )
|
||||
grad_var_list.append( g_and_v )
|
||||
#-----------------------------------------end Lars------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
train_op = opt.apply_gradients( grad_var_list, global_step = global_step )
|
||||
|
||||
train_op = tf.group(train_op)
|
||||
|
||||
#with tf.device('/cpu:0'):
|
||||
#tf.summary.scalar('total_loss', total_loss)
|
||||
#tf.summary.scalar('base_loss', base_loss)
|
||||
#tf.summary.scalar('learning_rate', learning_rate)
|
||||
#tf.contrib.summary.flush()
|
||||
# if self.config['do_checkpoint']:
|
||||
# summary_hook = tf.train.SummarySaverHook( save_steps=20,
|
||||
# output_dir=self.config['log_dir']+'/train_summary',
|
||||
# summary_op = tf.summary.merge_all() )
|
||||
|
||||
#return tf.estimator.EstimatorSpec(mode, loss=total_loss, train_op=train_op, training_hooks=[summary_hook] )\
|
||||
# if self.config['do_checkpoint'] else tf.estimator.EstimatorSpec(mode, loss=total_loss, train_op=train_op )
|
||||
return tf.estimator.EstimatorSpec(mode, loss=total_loss, train_op=train_op )
|
||||
|
||||
# return tf.estimator.EstimatorSpec(mode, loss=total_loss, train_op=train_op)
|
||||
|
||||
|
||||
|
||||
def get_model_func(self):
|
||||
model_name = self.config['model_name']
|
||||
if model_name.startswith('resnet'):
|
||||
nlayer = int(model_name[len('resnet'):])
|
||||
return lambda images, *args, **kwargs: \
|
||||
resnet.inference_resnet_v1(self.config,images, nlayer, *args, **kwargs)
|
||||
else:
|
||||
raise ValueError("Invalid model type: %s" % model_name)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
+143
@@ -0,0 +1,143 @@
|
||||
import tensorflow as tf
|
||||
from . import resnet, res50_helper
|
||||
from trainers.train_helper import stage
|
||||
from tensorflow.contrib.offline_train.python.npu.npu_optimizer import NPUDistributedOptimizer
|
||||
_NUM_EXAMPLES_NAME="num_examples"
|
||||
|
||||
|
||||
class Model(object):
|
||||
def __init__(self, config, data, hyper_param, layers, optimizer, loss, logger):
|
||||
self.config = config
|
||||
self.data = data
|
||||
self.hyper_param = hyper_param
|
||||
self.layers = layers
|
||||
self.optimizer = optimizer
|
||||
self.loss = loss
|
||||
self.logger = logger
|
||||
|
||||
def get_estimator_model_func(self, features, labels, mode, params=None):
|
||||
labels = tf.reshape(labels, (-1,)) # Squash unnecessary unary dim
|
||||
|
||||
model_func = self.get_model_func()
|
||||
inputs = features # TODO: Should be using feature columns?
|
||||
is_training = (mode == tf.estimator.ModeKeys.TRAIN)
|
||||
|
||||
#if mode == tf.estimator.ModeKeys.TRAIN:
|
||||
# preload_op, gpucopy_op, inputs, labels = self.data.get_input_pipeline_op( inputs, labels, mode )
|
||||
|
||||
with tf.device('/gpu:0'):
|
||||
if self.config['accelerator'] == 'gpu':
|
||||
inputs = tf.cast(inputs, self.config['dtype'])
|
||||
|
||||
with res50_helper.custom_getter_with_fp16_and_weight_decay(dtype=self.config['dtype'], weight_decay=self.config['weight_decay']): # no BN decay
|
||||
|
||||
top_layer = model_func(
|
||||
inputs, data_format=self.config['data_format'], training=is_training,
|
||||
conv_initializer=self.config['conv_init'],
|
||||
bn_init_mode=self.config['bn_init_mode'], bn_gamma_initial_value=self.config['bn_gamma_initial_value'])
|
||||
|
||||
|
||||
logits = top_layer
|
||||
predicted_classes = tf.argmax(logits, axis=1, output_type=tf.int32)
|
||||
logits = tf.cast(logits, tf.float32)
|
||||
|
||||
#loss = self.loss.get_loss(logits, labels)
|
||||
loss = tf.losses.sparse_softmax_cross_entropy(logits=logits, labels=labels)
|
||||
base_loss = tf.identity(loss, name='loss') # For access by logger (TODO: Better way to access it?)
|
||||
# base_loss = tf.add_n([loss])
|
||||
|
||||
# def exclude_batch_norm(name):
|
||||
# #return 'batch_normalization' not in name
|
||||
# return 'BatchNorm' not in name
|
||||
# loss_filter_fn = exclude_batch_norm
|
||||
|
||||
# Add weight decay to the loss.
|
||||
# l2_loss = self.config['weight_decay'] * tf.add_n(
|
||||
# # loss is computed using fp32 for numerical stability.
|
||||
# [tf.nn.l2_loss(tf.cast(v, tf.float32)) for v in tf.trainable_variables()
|
||||
# if loss_filter_fn(v.name)])
|
||||
# #tf.summary.scalar('l2_loss', l2_loss)
|
||||
# total_loss = base_loss + l2_loss
|
||||
total_loss = base_loss
|
||||
total_loss = tf.identity(total_loss, name = 'total_loss')
|
||||
|
||||
|
||||
if mode == tf.estimator.ModeKeys.EVAL:
|
||||
with tf.device(None):
|
||||
metrics = self.layers.get_accuracy( labels, predicted_classes, logits )
|
||||
|
||||
return tf.estimator.EstimatorSpec(
|
||||
mode, loss=loss, eval_metric_ops=metrics)
|
||||
|
||||
assert (mode == tf.estimator.ModeKeys.TRAIN)
|
||||
|
||||
#reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
|
||||
#total_loss = tf.add_n([tf.saturate_cast(loss, self.config['dtype']) ] + reg_losses, name='total_loss')
|
||||
#total_loss = tf.add_n([loss], name='total_loss')
|
||||
|
||||
batch_size = tf.shape(inputs)[0]
|
||||
|
||||
global_step = tf.train.get_global_step()
|
||||
with tf.device('/cpu:0'):
|
||||
learning_rate = self.hyper_param.get_learning_rate()
|
||||
|
||||
#-----------------------batchsize scaling----------------------------------
|
||||
momentum = self.config['momentum'][0]
|
||||
#------------------------------end------------------------------------------
|
||||
|
||||
opt = tf.train.MomentumOptimizer(
|
||||
learning_rate, momentum, use_nesterov=False)
|
||||
optimizer=NPUDistributedOptimizer(opt)
|
||||
if self.config['accelerator'] == 'gpu':
|
||||
opt = self.optimizer.get_lbs_optimizer(opt)
|
||||
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) or []
|
||||
with tf.control_dependencies(update_ops):
|
||||
if self.config['accelerator'] == 'gpu':
|
||||
gate_gradients = (tf.train.Optimizer.GATE_NONE)
|
||||
grads_and_vars = opt.compute_gradients(total_loss, gate_gradients=gate_gradients)
|
||||
train_op = opt.apply_gradients( grads_and_vars,global_step = global_step)
|
||||
else:
|
||||
with tf.name_scope('loss_scale'):
|
||||
loss_scale = float( self.config['loss_scale'] )
|
||||
scaled_grads_and_vars = opt.compute_gradients( total_loss * loss_scale )
|
||||
unscaled_grads_and_vars = [ (g/loss_scale, v) for g,v in scaled_grads_and_vars ]
|
||||
train_op = opt.apply_gradients( unscaled_grads_and_vars, global_step = global_step )
|
||||
|
||||
train_op = tf.group(train_op)
|
||||
|
||||
with tf.device('/cpu:0'):
|
||||
#tf.summary.scalar('total_loss', total_loss)
|
||||
tf.summary.scalar('base_loss', base_loss)
|
||||
tf.summary.scalar('learning_rate', learning_rate)
|
||||
tf.contrib.summary.flush()
|
||||
if self.config['do_checkpoint']:
|
||||
summary_hook = tf.train.SummarySaverHook( save_steps=20,
|
||||
output_dir=self.config['log_dir']+'/train_summary',
|
||||
summary_op = tf.summary.merge_all() )
|
||||
|
||||
return tf.estimator.EstimatorSpec(mode, loss=total_loss, train_op=train_op, training_hooks=[summary_hook] )\
|
||||
if self.config['do_checkpoint'] else tf.estimator.EstimatorSpec(mode, loss=total_loss, train_op=train_op )
|
||||
|
||||
# return tf.estimator.EstimatorSpec(mode, loss=total_loss, train_op=train_op)
|
||||
|
||||
|
||||
|
||||
def get_model_func(self):
|
||||
model_name = self.config['model_name']
|
||||
if model_name.startswith('resnet'):
|
||||
nlayer = int(model_name[len('resnet'):])
|
||||
return lambda images, *args, **kwargs: \
|
||||
resnet.inference_resnet_v1(self.config,images, nlayer, *args, **kwargs)
|
||||
else:
|
||||
raise ValueError("Invalid model type: %s" % model_name)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
+436
@@ -0,0 +1,436 @@
|
||||
import tensorflow as tf
|
||||
|
||||
_BATCH_NORM_EPSILON = 1e-4
|
||||
_BATCH_NORM_DECAY = 0.9
|
||||
|
||||
|
||||
class LayerBuilder(object):
|
||||
def __init__(self, activation=None, data_format='channels_last',
|
||||
training=False, use_batch_norm=False, batch_norm_config=None,
|
||||
conv_initializer=None, bn_init_mode='adv_bn_init', bn_gamma_initial_value=1.0 ):
|
||||
self.activation = activation
|
||||
self.data_format = data_format
|
||||
self.training = training
|
||||
self.use_batch_norm = use_batch_norm
|
||||
self.batch_norm_config = batch_norm_config
|
||||
self.conv_initializer = conv_initializer
|
||||
self.bn_init_mode = bn_init_mode
|
||||
self.bn_gamma_initial_value = bn_gamma_initial_value
|
||||
if self.batch_norm_config is None:
|
||||
self.batch_norm_config = {
|
||||
'decay': _BATCH_NORM_DECAY,
|
||||
'epsilon': _BATCH_NORM_EPSILON,
|
||||
'scale': True,
|
||||
'zero_debias_moving_mean': False,
|
||||
}
|
||||
|
||||
def _conv2d(self, inputs, activation, *args, **kwargs):
|
||||
x = tf.layers.conv2d(
|
||||
inputs, data_format=self.data_format,
|
||||
# use_bias=not self.use_batch_norm,
|
||||
use_bias=False,
|
||||
kernel_initializer=self.conv_initializer,
|
||||
activation=None if self.use_batch_norm else activation,
|
||||
*args, **kwargs)
|
||||
if self.use_batch_norm:
|
||||
param_initializers = {
|
||||
'moving_mean': tf.zeros_initializer(),
|
||||
'moving_variance': tf.ones_initializer(),
|
||||
'beta': tf.zeros_initializer(),
|
||||
}
|
||||
if self.bn_init_mode == 'adv_bn_init':
|
||||
param_initializers['gamma'] = tf.ones_initializer()
|
||||
elif self.bn_init_mode == 'conv_bn_init':
|
||||
param_initializers['gamma'] = tf.constant_initializer(self.bn_gamma_initial_value)
|
||||
else:
|
||||
raise ValueError("--bn_init_mode must be 'conv_bn_init' or 'adv_bn_init' ")
|
||||
|
||||
x = self.batch_norm(x)
|
||||
x = activation(x) if activation is not None else x
|
||||
return x
|
||||
|
||||
def conv2d_linear_last_bn(self, inputs, *args, **kwargs):
|
||||
x = tf.layers.conv2d(
|
||||
inputs, data_format=self.data_format,
|
||||
use_bias=False,
|
||||
kernel_initializer=self.conv_initializer,
|
||||
activation=None, *args, **kwargs)
|
||||
param_initializers = {
|
||||
'moving_mean': tf.zeros_initializer(),
|
||||
'moving_variance': tf.ones_initializer(),
|
||||
'beta': tf.zeros_initializer(),
|
||||
}
|
||||
if self.bn_init_mode == 'adv_bn_init':
|
||||
param_initializers['gamma'] = tf.zeros_initializer()
|
||||
elif self.bn_init_mode == 'conv_bn_init':
|
||||
param_initializers['gamma'] = tf.constant_initializer(self.bn_gamma_initial_value)
|
||||
else:
|
||||
raise ValueError("--bn_init_mode must be 'conv_bn_init' or 'adv_bn_init' ")
|
||||
|
||||
x = self.batch_norm(x, param_initializers=param_initializers)
|
||||
return x
|
||||
|
||||
def conv2d_linear(self, inputs, *args, **kwargs):
|
||||
return self._conv2d(inputs, None, *args, **kwargs)
|
||||
|
||||
def conv2d(self, inputs, *args, **kwargs):
|
||||
return self._conv2d(inputs, self.activation, *args, **kwargs)
|
||||
|
||||
def pad2d(self, inputs, begin, end=None):
|
||||
if end is None:
|
||||
end = begin
|
||||
try:
|
||||
_ = begin[1]
|
||||
except TypeError:
|
||||
begin = [begin, begin]
|
||||
try:
|
||||
_ = end[1]
|
||||
except TypeError:
|
||||
end = [end, end]
|
||||
if self.data_format == 'channels_last':
|
||||
padding = [[0, 0], [begin[0], end[0]], [begin[1], end[1]], [0, 0]]
|
||||
else:
|
||||
padding = [[0, 0], [0, 0], [begin[0], end[0]], [begin[1], end[1]]]
|
||||
return tf.pad(inputs, padding)
|
||||
|
||||
def max_pooling2d(self, inputs, *args, **kwargs):
|
||||
return tf.layers.max_pooling2d(
|
||||
inputs, data_format=self.data_format, *args, **kwargs)
|
||||
|
||||
def average_pooling2d_stride_1(self, inputs, *args, **kwargs):
|
||||
# inputs = tf.nn.avg_pool(inputs, ksize=[1,1,1,1],strides=[1,1,1,1], padding="VALID", data_format="NHWC" )
|
||||
return inputs
|
||||
|
||||
def average_pooling2d(self, inputs, *args, **kwargs):
|
||||
inputs = tf.nn.avg_pool(inputs, ksize=[1,2,2,1],strides=[1,2,2,1], padding="VALID", data_format="NHWC" )
|
||||
return inputs
|
||||
|
||||
# return tf.layers.average_pooling2d(
|
||||
# inputs, data_format=self.data_format, *args, **kwargs)
|
||||
|
||||
def dense_linear(self, inputs, units, **kwargs):
|
||||
return tf.layers.dense(inputs, units, activation=None)
|
||||
|
||||
def dense(self, inputs, units, **kwargs):
|
||||
return tf.layers.dense(inputs, units, activation=self.activation)
|
||||
|
||||
def activate(self, inputs, activation=None):
|
||||
activation = activation or self.activation
|
||||
return activation(inputs) if activation is not None else inputs
|
||||
|
||||
def batch_norm(self, inputs, **kwargs):
|
||||
all_kwargs = dict(self.batch_norm_config)
|
||||
all_kwargs.update(kwargs)
|
||||
data_format = 'NHWC' if self.data_format == 'channels_last' else 'NCHW'
|
||||
bn_inputs = inputs
|
||||
outputs = tf.contrib.layers.batch_norm(
|
||||
inputs, is_training=self.training, data_format=data_format,
|
||||
fused=True, **all_kwargs)
|
||||
|
||||
return outputs
|
||||
|
||||
def spatial_average2d(self, inputs):
|
||||
shape = inputs.get_shape().as_list()
|
||||
if self.data_format == 'channels_last':
|
||||
n, h, w, c = shape
|
||||
else:
|
||||
n, c, h, w = shape
|
||||
n = -1 if n is None else n
|
||||
x = tf.layers.average_pooling2d(inputs, (h, w), (1, 1),
|
||||
data_format=self.data_format)
|
||||
return tf.reshape(x, [n, c])
|
||||
|
||||
def flatten2d(self, inputs):
|
||||
x = inputs
|
||||
if self.data_format != 'channel_last':
|
||||
# Note: This ensures the output order matches that of NHWC networks
|
||||
x = tf.transpose(x, [0, 2, 3, 1])
|
||||
input_shape = x.get_shape().as_list()
|
||||
num_inputs = 1
|
||||
for dim in input_shape[1:]:
|
||||
num_inputs *= dim
|
||||
return tf.reshape(x, [-1, num_inputs], name='flatten')
|
||||
|
||||
def residual2d(self, inputs, network, units=None, scale=1.0, activate=False):
|
||||
outputs = network(inputs)
|
||||
c_axis = -1 if self.data_format == 'channels_last' else 1
|
||||
h_axis = 1 if self.data_format == 'channels_last' else 2
|
||||
w_axis = h_axis + 1
|
||||
ishape, oshape = [y.get_shape().as_list() for y in [inputs, outputs]]
|
||||
ichans, ochans = ishape[c_axis], oshape[c_axis]
|
||||
strides = ((ishape[h_axis] - 1) // oshape[h_axis] + 1,
|
||||
(ishape[w_axis] - 1) // oshape[w_axis] + 1)
|
||||
with tf.name_scope('residual'):
|
||||
if (ochans != ichans or strides[0] != 1 or strides[1] != 1):
|
||||
inputs = self.conv2d_linear(inputs, units, 1, strides, 'SAME')
|
||||
x = inputs + scale * outputs
|
||||
if activate:
|
||||
x = self.activate(x)
|
||||
return x
|
||||
|
||||
|
||||
def resnet_bottleneck_v1(builder, inputs, depth, depth_bottleneck, stride, filters, arch_type,
|
||||
basic=False):
|
||||
num_inputs = inputs.get_shape().as_list()[3]
|
||||
x = inputs
|
||||
#with tf.name_scope('resnet_model'):
|
||||
if depth == num_inputs:
|
||||
if stride == 1:#v1.5
|
||||
shortcut = x
|
||||
else:#v1
|
||||
shortcut = builder.max_pooling2d(x, 1, stride)
|
||||
else: # the downsample(first) block in each layer
|
||||
if 'D1' in arch_type:
|
||||
if stride == 1:
|
||||
shortcut = builder.average_pooling2d_stride_1(x, stride, stride) #--------------------Resnet-D------------
|
||||
else:
|
||||
shortcut = builder.average_pooling2d(x, stride, stride) #--------------------Resnet-D------------
|
||||
shortcut = builder.conv2d_linear(shortcut, depth, 1, 1, 'SAME')
|
||||
elif 'D2' in arch_type:
|
||||
shortcut = builder.conv2d_linear(x, depth, 3, stride, 'SAME')
|
||||
elif 'D3' in arch_type:
|
||||
shortcut = builder.conv2d_linear(x, depth, 1, 1, 'SAME')
|
||||
shortcut = builder.average_pooling2d(shortcut, stride, stride) #--------------------Resnet-D------------
|
||||
else:
|
||||
shortcut = builder.conv2d_linear(x, depth, 1, stride, 'SAME')
|
||||
conv_input = x
|
||||
|
||||
if basic:
|
||||
x = builder.pad2d(x, 1)
|
||||
x = builder.conv2d(x, depth_bottleneck, 3, stride, 'VALID')
|
||||
x = builder.conv2d_linear(x, depth, 3, 1, 'SAME')
|
||||
else:
|
||||
conv_input = x
|
||||
x = builder.conv2d(x, depth_bottleneck, 1, 1, 'SAME')
|
||||
conv_input = x
|
||||
if stride == 1:
|
||||
x = builder.conv2d(x, depth_bottleneck, 3, stride, 'SAME')
|
||||
else:
|
||||
if 'E1' in arch_type:
|
||||
x = builder.average_pooling2d( x, stride, stride )
|
||||
x = builder.conv2d(x, depth_bottleneck, 3, 1, 'SAME')
|
||||
elif 'E2' in arch_type:
|
||||
x = builder.conv2d(x, depth_bottleneck, 3, 1, 'SAME')
|
||||
if stride == 1:
|
||||
x = builder.average_pooling2d_stride_1( x, stride, stride )
|
||||
else:
|
||||
x = builder.average_pooling2d( x, stride, stride )
|
||||
else: # E0
|
||||
x = builder.conv2d(x, depth_bottleneck, 3, stride, 'SAME')
|
||||
|
||||
# x = builder.conv2d_linear(x, depth, 1, 1, 'SAME')
|
||||
conv_input = x
|
||||
x = builder.conv2d_linear_last_bn(x, depth, 1, 1, 'SAME')
|
||||
|
||||
x = tf.nn.relu(x + shortcut)
|
||||
return x
|
||||
|
||||
def resnet_bottleneck_v2(builder, inputs, depth, depth_bottleneck, stride, filters, arch_type,
|
||||
basic=False):
|
||||
num_inputs = inputs.get_shape().as_list()[1]
|
||||
x = inputs
|
||||
with tf.name_scope('resnet_v1'):
|
||||
# ------- shortcut ---------------
|
||||
if depth == num_inputs:
|
||||
if stride == 1:#v1.5
|
||||
shortcut = x
|
||||
x = builder.batch_norm(x)
|
||||
x = tf.nn.relu(x)
|
||||
else:#v1
|
||||
shortcut = builder.max_pooling2d(x, 1, stride)
|
||||
else: # the downsample(first) block in each layer
|
||||
x = builder.batch_norm(x)
|
||||
x = tf.nn.relu(x)
|
||||
|
||||
if 'D1' in arch_type:
|
||||
shortcut = builder.average_pooling2d(x, stride, stride) #--------------------Resnet-D------------
|
||||
shortcut = builder.conv2d_linear(shortcut, depth, 1, 1, 'SAME')
|
||||
elif 'D2' in arch_type:
|
||||
shortcut = builder.conv2d_linear(x, depth, 3, stride, 'SAME')
|
||||
elif 'D3' in arch_type:
|
||||
shortcut = builder.conv2d_linear(x, depth, 1, 1, 'SAME')
|
||||
shortcut = builder.average_pooling2d(shortcut, stride, stride) #--------------------Resnet-D------------
|
||||
else:
|
||||
shortcut = builder.conv2d_linear(x, depth, 1, stride, 'SAME')
|
||||
|
||||
# -------- mainstream ----------------
|
||||
if basic:
|
||||
x = builder.pad2d(x, 1)
|
||||
x = builder.conv2d(x, depth_bottleneck, 3, stride, 'VALID')
|
||||
x = builder.conv2d_linear(x, depth, 3, 1, 'SAME')
|
||||
else:
|
||||
x = builder.conv2d(x, depth_bottleneck, 1, 1, 'SAME')
|
||||
x = builder.batch_norm(x)
|
||||
x = tf.nn.relu(x)
|
||||
|
||||
if stride == 1:
|
||||
x = builder.conv2d(x, depth_bottleneck, 3, stride, 'SAME')
|
||||
x = builder.batch_norm(x)
|
||||
x = tf.nn.relu(x)
|
||||
else:
|
||||
if 'E1' in arch_type:
|
||||
x = builder.average_pooling2d( x, stride, stride )
|
||||
x = builder.conv2d(x, depth_bottleneck, 3, 1, 'SAME')
|
||||
x = builder.batch_norm(x)
|
||||
x = tf.nn.relu(x)
|
||||
elif 'E2' in arch_type:
|
||||
x = builder.conv2d(x, depth_bottleneck, 3, 1, 'SAME')
|
||||
x = builder.batch_norm(x)
|
||||
x = tf.nn.relu(x)
|
||||
x = builder.average_pooling2d( x, stride, stride )
|
||||
else: # E0
|
||||
x = builder.conv2d(x, depth_bottleneck, 3, stride, 'SAME')
|
||||
x = builder.batch_norm(x)
|
||||
x = tf.nn.relu(x)
|
||||
|
||||
x = builder.conv2d_linear(x, depth, 1, 1, 'SAME')
|
||||
|
||||
|
||||
x = x + shortcut
|
||||
return x
|
||||
|
||||
def inference_resnet_v1_impl(builder, inputs, layer_counts, arch_type='C1+D', resnet_version='v1.5', basic=False):
|
||||
x = inputs
|
||||
#x = builder.pad2d(x, 1)
|
||||
|
||||
if 'C1' in arch_type: # --- Resnet C -----
|
||||
x = builder.conv2d(x, 32, 3, 2, 'SAME')
|
||||
x = builder.conv2d(x, 32, 3, 1, 'SAME')
|
||||
x = builder.conv2d(x, 64, 3, 1, 'SAME')
|
||||
elif 'C2' in arch_type:
|
||||
x = builder.conv2d(x, 32, 3, 1, 'SAME')
|
||||
x = builder.conv2d(x, 32, 3, 2, 'VALID')
|
||||
x = builder.conv2d(x, 64, 3, 1, 'VALID')
|
||||
elif 'C3' in arch_type:
|
||||
x = builder.conv2d(x, 32, 3, 1, 'VALID')
|
||||
x = builder.conv2d(x, 32, 3, 1, 'VALID')
|
||||
x = builder.conv2d(x, 64, 3, 2, 'VALID')
|
||||
else:
|
||||
x = builder.conv2d(x, 64, 7, 2, 'SAME')
|
||||
|
||||
num_filters=64
|
||||
|
||||
pooled_inputs = x
|
||||
#x = builder.max_pooling2d(x, 3, 2, 'SAME')
|
||||
x, argmax = tf.nn.max_pool_with_argmax(input=x, ksize=(1,3,3,1), strides=(1,2,2,1), padding='SAME')
|
||||
|
||||
for i in range(layer_counts[0]):
|
||||
x = resnet_bottleneck_v1(builder, x, 256, 64, 1, num_filters, arch_type, basic)
|
||||
for i in range(layer_counts[1]):
|
||||
num_filters=num_filters*2
|
||||
x = resnet_bottleneck_v1(builder, x, 512, 128, 2 if i == 0 else 1, num_filters, arch_type, basic)
|
||||
for i in range(layer_counts[2]):
|
||||
num_filters=num_filters*2
|
||||
x = resnet_bottleneck_v1(builder, x, 1024, 256, 2 if i == 0 else 1, num_filters, arch_type, basic)
|
||||
for i in range(layer_counts[3]):
|
||||
num_filters=num_filters*2
|
||||
x = resnet_bottleneck_v1(builder, x, 2048, 512, 2 if i == 0 else 1, num_filters, arch_type, basic)
|
||||
|
||||
axes = [1,2]
|
||||
x = tf.reduce_mean( x, axes, keepdims=True )
|
||||
x = tf.identity(x, 'final_reduce_mean')
|
||||
x = tf.reshape( x, [-1, 2048] )
|
||||
x = tf.layers.dense(inputs=x, units=1001,kernel_initializer=tf.random_normal_initializer(stddev=0.01))
|
||||
x = tf.identity( x, 'final_dense' )
|
||||
return x
|
||||
|
||||
def inference_resnet_v2_impl(builder, inputs, layer_counts, arch_type='C1+D', basic=False):
|
||||
x = inputs
|
||||
x = builder.pad2d(x, 3)
|
||||
|
||||
if 'C1' in arch_type: # --- Resnet C -----
|
||||
x = builder.conv2d(x, 32, 3, 2, 'VALID')
|
||||
x = builder.batch_norm(x)
|
||||
x = tf.nn.relu(x)
|
||||
x = builder.conv2d(x, 32, 3, 1, 'VALID')
|
||||
x = builder.batch_norm(x)
|
||||
x = tf.nn.relu(x)
|
||||
x = builder.conv2d(x, 64, 3, 1, 'SAME')
|
||||
x = builder.batch_norm(x)
|
||||
x = tf.nn.relu(x)
|
||||
elif 'C2' in arch_type:
|
||||
x = builder.conv2d(x, 32, 3, 1, 'SAME')
|
||||
x = builder.batch_norm(x)
|
||||
x = tf.nn.relu(x)
|
||||
x = builder.conv2d(x, 32, 3, 2, 'VALID')
|
||||
x = builder.batch_norm(x)
|
||||
x = tf.nn.relu(x)
|
||||
x = builder.conv2d(x, 64, 3, 1, 'VALID')
|
||||
x = builder.batch_norm(x)
|
||||
x = tf.nn.relu(x)
|
||||
elif 'C3' in arch_type:
|
||||
x = builder.conv2d(x, 32, 3, 1, 'VALID')
|
||||
x = builder.batch_norm(x)
|
||||
x = tf.nn.relu(x)
|
||||
x = builder.conv2d(x, 32, 3, 1, 'VALID')
|
||||
x = builder.batch_norm(x)
|
||||
x = tf.nn.relu(x)
|
||||
x = builder.conv2d(x, 64, 3, 2, 'VALID')
|
||||
x = builder.batch_norm(x)
|
||||
x = tf.nn.relu(x)
|
||||
else:
|
||||
x = builder.conv2d(x, 64, 7, 2, 'VALID')
|
||||
x = builder.batch_norm(x)
|
||||
x = tf.nn.relu(x)
|
||||
|
||||
num_filters=64
|
||||
|
||||
pooled_inputs = x
|
||||
x = builder.max_pooling2d(x, 3, 2, 'SAME')
|
||||
|
||||
for i in range(layer_counts[0]):
|
||||
x = resnet_bottleneck_v2(builder, x, 256, 64, 1, num_filters, arch_type, basic)
|
||||
for i in range(layer_counts[1]):
|
||||
num_filters=num_filters*2
|
||||
x = resnet_bottleneck_v2(builder, x, 512, 128, 2 if i == 0 else 1, num_filters, arch_type, basic)
|
||||
for i in range(layer_counts[2]):
|
||||
num_filters=num_filters*2
|
||||
x = resnet_bottleneck_v2(builder, x, 1024, 256, 2 if i == 0 else 1, num_filters, arch_type, basic)
|
||||
for i in range(layer_counts[3]):
|
||||
num_filters=num_filters*2
|
||||
x = resnet_bottleneck_v2(builder, x, 2048, 512, 2 if i == 0 else 1, num_filters, arch_type, basic)
|
||||
return builder.spatial_average2d(x)
|
||||
|
||||
def inference_resnet_v1(config, inputs, nlayer, data_format='channels_last',
|
||||
training=False, conv_initializer=None, bn_init_mode='adv_bn_init', bn_gamma_initial_value=1.0 ):
|
||||
"""Deep Residual Networks family of models
|
||||
https://arxiv.org/abs/1512.03385
|
||||
"""
|
||||
if config['resnet_version'] == 'v1.5':
|
||||
builder = LayerBuilder(tf.nn.relu, data_format, training, use_batch_norm=True,
|
||||
conv_initializer=conv_initializer, bn_init_mode=bn_init_mode, bn_gamma_initial_value=bn_gamma_initial_value)
|
||||
if nlayer == 18:
|
||||
return inference_resnet_v1_impl(builder, inputs, [2, 2, 2, 2], config['arch_type'], config['resnet_version'], basic=True)
|
||||
elif nlayer == 34:
|
||||
return inference_resnet_v1_impl(builder, inputs, [3, 4, 6, 3], config['arch_type'], config['resnet_version'], basic=True)
|
||||
elif nlayer == 50:
|
||||
return inference_resnet_v1_impl(builder, inputs, [3, 4, 6, 3], config['arch_type'], config['resnet_version'])
|
||||
elif nlayer == 101:
|
||||
return inference_resnet_v1_impl(builder, inputs, [3, 4, 23, 3], config['arch_type'], config['resnet_version'])
|
||||
elif nlayer == 152:
|
||||
return inference_resnet_v1_impl(builder, inputs, [3, 8, 36, 3], config['arch_type'], config['resnet_version'])
|
||||
else:
|
||||
raise ValueError("Invalid nlayer (%i); must be one of: 18,34,50,101,152" %
|
||||
nlayer)
|
||||
|
||||
elif config['resnet_version'] == 'v2':
|
||||
builder = LayerBuilder( None, data_format, training, use_batch_norm=False,
|
||||
conv_initializer=conv_initializer, bn_init_mode=bn_init_mode, bn_gamma_initial_value=bn_gamma_initial_value)
|
||||
if nlayer == 18:
|
||||
return inference_resnet_v2_impl(builder, inputs, [2, 2, 2, 2], config['arch_type'], basic=True)
|
||||
elif nlayer == 34:
|
||||
return inference_resnet_v2_impl(builder, inputs, [3, 4, 6, 3], config['arch_type'], basic=True)
|
||||
elif nlayer == 50:
|
||||
return inference_resnet_v2_impl(builder, inputs, [3, 4, 6, 3], config['arch_type'])
|
||||
elif nlayer == 101:
|
||||
return inference_resnet_v2_impl(builder, inputs, [3, 4, 23, 3], config['arch_type'])
|
||||
elif nlayer == 152:
|
||||
return inference_resnet_v2_impl(builder, inputs, [3, 8, 36, 3], config['arch_type'])
|
||||
else:
|
||||
raise ValueError("Invalid nlayer (%i); must be one of: 18,34,50,101,152" %
|
||||
nlayer)
|
||||
else:
|
||||
raise ValueError("Invalid resnet version")
|
||||
|
||||
|
||||
|
||||
+228
@@ -0,0 +1,228 @@
|
||||
import six
|
||||
import tensorflow as tf
|
||||
|
||||
class Optimizer:
|
||||
def __init__(self, config):
|
||||
self.config = config
|
||||
|
||||
def get_lbs_optimizer(self, opt): #TODO input is ( self, hyper_param )
|
||||
|
||||
# opt = LargeBatchSizeOptimizer(opt, weight_decay=self.config['weight_decay'],
|
||||
# accum_dtype = self.config['dtype'],
|
||||
# use_lars = self.config['use_lars'],
|
||||
# bn_lr_scale = self.config.get('bn_lr_scale', 1.0)
|
||||
# )
|
||||
opt = MixedPrecisionOptimizer(opt, self.config)
|
||||
|
||||
return opt
|
||||
|
||||
class MixedPrecisionOptimizer(tf.train.Optimizer):
|
||||
"""An optimizer that updates trainable variables in fp32."""
|
||||
|
||||
def __init__(self, optimizer, config):
|
||||
super(MixedPrecisionOptimizer, self).__init__(
|
||||
optimizer._use_locking,
|
||||
optimizer._name + '-MP',
|
||||
)
|
||||
self._optimizer = optimizer
|
||||
self._config = config
|
||||
loss_scale=self._config['loss_scale']
|
||||
self._loss_scale = float(loss_scale)
|
||||
self._fp32_to_fp16 = {}
|
||||
|
||||
var_list = (
|
||||
tf.trainable_variables() +
|
||||
tf.get_collection(tf.GraphKeys.TRAINABLE_RESOURCE_VARIABLES))
|
||||
with tf.device('/gpu:0'):
|
||||
self.var_fp32_copy = [ tf.Variable( tf.cast(v.initialized_value(), tf.float32),
|
||||
dtype=tf.float32, trainable=False,
|
||||
collections=[tf.GraphKeys.GLOBAL_VARIABLES, "FP32_MASTER_COPIES"] ) for v in var_list ]
|
||||
|
||||
def compute_gradients(self, loss, var_list=None,
|
||||
gate_gradients=tf.train.Optimizer.GATE_OP,
|
||||
aggregation_method=None,
|
||||
colocate_gradients_with_ops=False,
|
||||
grad_loss=None):
|
||||
if var_list is None:
|
||||
var_list = (
|
||||
tf.trainable_variables() +
|
||||
tf.get_collection(tf.GraphKeys.TRAINABLE_RESOURCE_VARIABLES))
|
||||
|
||||
if self._loss_scale != 1.0:
|
||||
loss = tf.scalar_mul(self._loss_scale, loss)
|
||||
|
||||
grads_and_vars_fp16 = self._optimizer.compute_gradients(
|
||||
loss, var_list=var_list,
|
||||
gate_gradients=gate_gradients,
|
||||
aggregation_method=aggregation_method,
|
||||
colocate_gradients_with_ops=colocate_gradients_with_ops,
|
||||
grad_loss=grad_loss,
|
||||
)
|
||||
# creating FP-32 variables and filling the fp32 dict
|
||||
grads_and_vars_fp32 = []
|
||||
|
||||
with tf.variable_scope('FP32-master-copy'):
|
||||
for i, (grad, var) in enumerate(grads_and_vars_fp16):
|
||||
if grad is not None:
|
||||
if var.dtype.base_dtype == tf.float16:
|
||||
fp32_var = self.var_fp32_copy[i]
|
||||
self._fp32_to_fp16[fp32_var.name] = var
|
||||
fp32_grad = tf.cast(grad, tf.float32)
|
||||
grads_and_vars_fp32.append((fp32_grad, fp32_var))
|
||||
else:
|
||||
grads_and_vars_fp32.append((grad, var))
|
||||
else:
|
||||
grads_and_vars_fp32.append((None, var))
|
||||
|
||||
grads_and_vars_fp32_rescaled = [ (g/self._loss_scale, v) for g,v in grads_and_vars_fp32 ]
|
||||
|
||||
|
||||
return grads_and_vars_fp32_rescaled
|
||||
|
||||
def apply_gradients(self, grads_and_vars, *args, **kwargs):
|
||||
update_op = self._optimizer.apply_gradients(grads_and_vars, *args, **kwargs)
|
||||
apply_ops = []
|
||||
with tf.control_dependencies([update_op]):
|
||||
for grad, var in grads_and_vars:
|
||||
if var.name in self._fp32_to_fp16:
|
||||
dst_var = self._fp32_to_fp16[var.name]
|
||||
apply_ops.append(
|
||||
tf.assign(dst_var, tf.saturate_cast(var, tf.float16)))
|
||||
if apply_ops:
|
||||
return tf.group(apply_ops)
|
||||
return update_op
|
||||
|
||||
|
||||
class LargeBatchSizeOptimizer(tf.train.Optimizer):
|
||||
""" LARC implementation
|
||||
-------------------
|
||||
Parameters:
|
||||
- optimizer: initial optimizer that you wanna apply
|
||||
example: tf.train.MomentumOptimizer
|
||||
- learning_rate: initial learning_rate from initial optimizer
|
||||
- clip: if True apply LARC otherwise LARS
|
||||
- epsilon: default value is weights or grads are 0.
|
||||
- name
|
||||
- use_locking
|
||||
"""
|
||||
|
||||
def __init__(self, optimizer, weight_decay, clip=True, epsilon=1., accum_dtype=tf.float16, use_lars=True, bn_lr_scale=1.0,
|
||||
name="LarcOptimizer", use_locking=False):
|
||||
super(LargeBatchSizeOptimizer, self).__init__(
|
||||
name=name, use_locking=use_locking)
|
||||
self._optimizer = optimizer
|
||||
# self._learning_rate = learning_rate
|
||||
self._weight_decay = weight_decay
|
||||
self._clip = clip
|
||||
self._epsilon = float(epsilon)
|
||||
self._accum_dtype=accum_dtype
|
||||
self._use_lars=use_lars
|
||||
self._bn_lr_scale=bn_lr_scale
|
||||
|
||||
var_list = (
|
||||
tf.trainable_variables() +
|
||||
tf.get_collection(tf.GraphKeys.TRAINABLE_RESOURCE_VARIABLES))
|
||||
with tf.device('/gpu:0'):
|
||||
self._grads_accum = [ tf.Variable( tf.cast(tf.zeros_like(v.initialized_value()), self._accum_dtype), dtype=self._accum_dtype, trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES] ) for v in var_list ]
|
||||
|
||||
|
||||
def compute_gradients(self, *args, **kwargs):
|
||||
return self._optimizer.compute_gradients(*args, **kwargs)
|
||||
|
||||
|
||||
def apply_gradients(self, gradvars, loss_scale, *args, **kwargs):
|
||||
|
||||
global_step = tf.train.get_global_step()
|
||||
|
||||
grads_and_vars_clean = []
|
||||
for grad, var in gradvars:
|
||||
if grad is not None:
|
||||
grads_and_vars_clean.append( (grad, var) )
|
||||
|
||||
processed_grads_and_vars = self.post_process_grads(grads_and_vars_clean, loss_scale) # post_process_grads includes Lars
|
||||
|
||||
def apply():
|
||||
red_grad_updates = self._optimizer.apply_gradients( processed_grads_and_vars, global_step=tf.train.get_global_step() )
|
||||
return tf.group(red_grad_updates)
|
||||
|
||||
update_weight_op_1 = apply()
|
||||
return update_weight_op_1
|
||||
|
||||
apply_gradients_op = update_weight_op_1
|
||||
|
||||
with tf.device('/cpu:0'):
|
||||
#tf.summary.scalar('loss_scale', loss_scale)
|
||||
for grad, var in gradvars:
|
||||
g = grad / loss_scale
|
||||
v_norm_2 = tf.norm(var, ord='euclidean')
|
||||
g_norm_2 = tf.norm(g, ord='euclidean')
|
||||
v_g_norm2_ratio = v_norm_2 / (
|
||||
g_norm_2 + self._weight_decay * v_norm_2)
|
||||
if grad is not None:
|
||||
if 'BatchNorm' in var.name:
|
||||
with tf.name_scope('bn_norm2/'):
|
||||
tf.summary.scalar(var.name + '/norm2',
|
||||
v_norm_2)
|
||||
with tf.name_scope('grad_bn_norm2/'):
|
||||
tf.summary.scalar(var.name + '/grad_norm2',
|
||||
g_norm_2)
|
||||
with tf.name_scope('bn_ratio_var_grad/'):
|
||||
tf.summary.scalar(var.name + '/ratio_var_grad',
|
||||
v_g_norm2_ratio)
|
||||
else:
|
||||
with tf.name_scope('conv_norm2/'):
|
||||
tf.summary.scalar(var.name + '/norm2',
|
||||
v_norm_2)
|
||||
with tf.name_scope('grad_conv_norm2/'):
|
||||
tf.summary.scalar(var.name + '/grad_norm2',
|
||||
g_norm_2)
|
||||
with tf.name_scope('conv_ratio_var_grad/'):
|
||||
tf.summary.scalar(var.name + '/ratio_var_grad',
|
||||
v_g_norm2_ratio)
|
||||
|
||||
return apply_gradients_op
|
||||
|
||||
def post_process_grads(self, grads_and_vars, loss_scale):
|
||||
|
||||
g_and_v_scaled = []
|
||||
for g, v in grads_and_vars:
|
||||
g = g / loss_scale
|
||||
g_and_v_scaled.append((g,v))
|
||||
|
||||
# Lars
|
||||
if self._use_lars:
|
||||
grad_var_list = []
|
||||
#-----------------------------------------------LARS and weight decay-----------------------------------
|
||||
for g, var in g_and_v_scaled:
|
||||
if 'BatchNorm' not in var.name and 'bias' not in var.name:
|
||||
grad_norm = tf.norm(g,ord='euclidean')
|
||||
weight_norm = tf.norm(var,ord='euclidean')
|
||||
|
||||
grad_norm_wd = tf.add( grad_norm, tf.multiply( self._weight_decay, weight_norm ) )
|
||||
rescale_factor = tf.div( tf.multiply(0.001, weight_norm), tf.add(grad_norm_wd, tf.constant(1e-5, tf.float32)) )
|
||||
|
||||
coeffi = tf.clip_by_value( rescale_factor, 0.001, 50.0 )
|
||||
decayed_g = tf.add( g, tf.multiply( self._weight_decay, var ) )
|
||||
|
||||
g = tf.multiply(coeffi, decayed_g)
|
||||
else:
|
||||
g = self._bn_lr_scale * g
|
||||
|
||||
g_and_v = ( g, var )
|
||||
grad_var_list.append( g_and_v )
|
||||
#-------------------------------------------LARS end---------------------------------
|
||||
return grad_var_list
|
||||
else:
|
||||
grad_var_list_without_lars = []
|
||||
#----------------------------------------weight decay-----------------------------------
|
||||
for g, var in g_and_v_scaled:
|
||||
if 'BatchNorm' not in var.name and 'bias' not in var.name:
|
||||
decayed_g = tf.add( g, tf.multiply( self._weight_decay, var ) )
|
||||
g = decayed_g
|
||||
else:
|
||||
g = self._bn_lr_scale * g
|
||||
|
||||
g_and_v = ( g, var )
|
||||
grad_var_list_without_lars.append( g_and_v )
|
||||
|
||||
return grad_var_list_without_lars
|
||||
+3
@@ -0,0 +1,3 @@
|
||||
PY3.6.1 (default, Jun 6 2019, 16:37:03)
|
||||
[GCC 4.8.5 20150623 (EulerOS 4.8.5-28)]TF1.12.0
|
||||
Step Epoch Speed Loss FinLoss LR
|
||||
+189
@@ -0,0 +1,189 @@
|
||||
import tensorflow as tf
|
||||
import math
|
||||
import time
|
||||
from . import train_helper
|
||||
from .train_helper import stage
|
||||
from utils.logger import rank0log
|
||||
from benchmark_log import hwlog
|
||||
#from tensorflow.contrib.offline_train.python.npu.npu_config import NPURunConfig
|
||||
from npu_bridge.estimator.npu.npu_config import NPURunConfig
|
||||
#from tensorflow.contrib.offline_train.python.npu.npu_estimator import NPUEstimator
|
||||
from npu_bridge.estimator.npu.npu_estimator import NPUEstimator
|
||||
#from tensorflow.contrib.offline_train.python.npu.npu_optimizer import NPUDistributedOptimizer
|
||||
from npu_bridge.estimator.npu.npu_optimizer import NPUDistributedOptimizer
|
||||
|
||||
class GPUBaseTrain(object):
|
||||
def __init__(self, session, config, data, model, logger):
|
||||
self.sess = session
|
||||
self.config = config
|
||||
self.data = data
|
||||
self.model = model
|
||||
self.logger = logger
|
||||
self.print_logger = self.logger.logger
|
||||
self.all_preds = []
|
||||
self.all_targets = []
|
||||
if self.config['accelerator'] == 'gpu':
|
||||
self.classifier, self.training_hook = self.get_classifier()
|
||||
else:
|
||||
# from tensorflow.contrib.offline_train.python.npu.npu_config import NPURunConfig
|
||||
from npu_bridge.estimator.npu.npu_config import NPURunConfig
|
||||
# from tensorflow.contrib.offline_train.python.npu.npu_estimator import NPUEstimator
|
||||
from npu_bridge.estimator.npu.npu_estimator import NPUEstimator
|
||||
# from tensorflow.contrib.offline_train.python.npu.npu_optimizer import NPUDistributedOptimizer
|
||||
from npu_bridge.estimator.npu.npu_optimizer import NPUDistributedOptimizer
|
||||
self.classifier, self.training_hook = self.get_npu_classifier()
|
||||
|
||||
|
||||
|
||||
def get_classifier(self):
|
||||
classifier = tf.estimator.Estimator(
|
||||
model_fn=self.model.get_estimator_model_func,
|
||||
model_dir=self.config['log_dir'],
|
||||
config = tf.estimator.RunConfig(
|
||||
session_config=self.sess.get_config(),
|
||||
save_summary_steps=self.config['save_summary_steps'] if self.config['do_checkpoint'] else None,
|
||||
save_checkpoints_steps=self.config['save_checkpoints_steps'] if self.config['do_checkpoint'] else None,
|
||||
keep_checkpoint_max=None
|
||||
)
|
||||
)
|
||||
|
||||
training_hooks = [train_helper.PrefillStagingAreasHook()]
|
||||
training_hooks.append(self.logger)
|
||||
|
||||
return classifier, training_hooks
|
||||
|
||||
def get_npu_classifier(self):
|
||||
session_config = tf.ConfigProto(
|
||||
inter_op_parallelism_threads=10,
|
||||
intra_op_parallelism_threads=10,
|
||||
allow_soft_placement=True,)
|
||||
|
||||
|
||||
if self.config['debug'] :
|
||||
run_config = NPURunConfig(hcom_parallel=True, precision_mode="allow_mix_precision", enable_data_pre_proc=True, save_checkpoints_steps=112590, session_config=session_config, model_dir = self.config['model_dir'], iterations_per_loop=self.config['iterations_per_loop'], keep_checkpoint_max=5)
|
||||
else :
|
||||
run_config = NPURunConfig(hcom_parallel=True, precision_mode="allow_mix_precision", save_summary_steps=0, log_step_count_steps=None, enable_data_pre_proc=True,save_checkpoints_secs=1e9, session_config=session_config, model_dir = self.config['model_dir'], iterations_per_loop=self.config['iterations_per_loop'])
|
||||
# run_config = NPURunConfig(enable_data_pre_proc=True,save_checkpoints_secs=1e9, session_config=session_config, model_dir = self.config['model_dir'])
|
||||
|
||||
# classifier = tf.estimator.Estimator(
|
||||
# model_fn=self.model.get_estimator_model_func,
|
||||
# model_dir=self.config['log_dir'],
|
||||
# config = tf.estimator.RunConfig(
|
||||
# session_config=self.sess.get_config(),
|
||||
# save_summary_steps=self.config['save_summary_steps'] if self.config['do_checkpoint'] else None,
|
||||
# save_checkpoints_steps=self.config['save_checkpoints_steps'] if self.config['do_checkpoint'] else None,
|
||||
# keep_checkpoint_max=None
|
||||
# )
|
||||
# )
|
||||
|
||||
classifier =NPUEstimator(
|
||||
model_fn= self.model.get_estimator_model_func,
|
||||
config= run_config
|
||||
)
|
||||
|
||||
training_hooks = []
|
||||
if self.config['debug']:
|
||||
training_hooks = [train_helper.PrefillStagingAreasHook()]
|
||||
training_hooks.append(self.logger)
|
||||
|
||||
return classifier, training_hooks
|
||||
|
||||
def train(self):
|
||||
hwlog.remark_print(key=hwlog.CURRENT_EPOCH, value=self.config['num_epochs'])
|
||||
print ('training steps: %d' % self.config['nstep'])
|
||||
self.classifier.train( input_fn=lambda:self.data.get_train_input_fn(),
|
||||
max_steps = self.config['nstep'],
|
||||
hooks = self.training_hook
|
||||
)
|
||||
|
||||
def evaluate(self):
|
||||
rank0log(self.print_logger, "Evaluating")
|
||||
rank0log(self.print_logger, "Validation dataset size: {}".format(self.config['num_evaluating_samples'] ))
|
||||
time.sleep(5) # a little extra margin...
|
||||
try:
|
||||
ckpts = train_helper.sort_and_load_ckpts(self.config['log_dir'])
|
||||
print("=========ckpt==========")
|
||||
print(ckpts)
|
||||
print("=========ckpt==========")
|
||||
for i, c in enumerate(ckpts):
|
||||
if i < len(ckpts) - 1:
|
||||
if i % self.config['eval_interval'] != 0:
|
||||
continue
|
||||
eval_result = self.classifier.evaluate(
|
||||
input_fn=lambda: self.data.get_eval_input_fn(),
|
||||
checkpoint_path=c['path'])
|
||||
|
||||
hwlog.remark_print(key=hwlog.EVAL_ACCURACY_TOP1, value=float(eval_result.get("val-top1acc")))
|
||||
hwlog.remark_print(key=hwlog.EVAL_ACCURACY_TOP5, value=float(eval_result.get("val-top5acc")))
|
||||
c['epoch'] = math.ceil(c['step'] / (self.config['num_training_samples']/ (self.config['global_batch_size'])))
|
||||
c['top1'] = eval_result['val-top1acc']
|
||||
c['top5'] = eval_result['val-top5acc']
|
||||
c['loss'] = eval_result['loss']
|
||||
|
||||
rank0log(self.print_logger, ' step epoch top1 top5 loss checkpoint_time(UTC)')
|
||||
for i, c in enumerate(ckpts):
|
||||
if 'top1' not in c:
|
||||
continue
|
||||
rank0log(self.print_logger,'{:5d} {:5.1f} {:5.3f} {:6.2f} {:6.2f} {time}'
|
||||
.format(c['step'],
|
||||
c['epoch'],
|
||||
c['top1'] * 100,
|
||||
c['top5'] * 100,
|
||||
c['loss'],
|
||||
time=time.strftime('%Y-%m-%d %H:%M:%S',
|
||||
time.localtime(c['mtime']))))
|
||||
rank0log(self.print_logger, "Finished evaluation")
|
||||
except KeyboardInterrupt:
|
||||
self.print_logger.error("Keyboard interrupt")
|
||||
|
||||
def train_and_evaluate(self):
|
||||
success = False
|
||||
epochs_between_evals = self.config.get('epochs_between_evals', 4)
|
||||
|
||||
|
||||
for i in range(self.config['num_epochs'] // epochs_between_evals):
|
||||
|
||||
rank0log(self.print_logger, "Starting a training cycle")
|
||||
# add by zwx5326390
|
||||
itepoches = 0
|
||||
itepoches += self.config['num_epochs']
|
||||
hwlog.remark_print(key=hwlog.CURRENT_EPOCH, value=itepoches)
|
||||
|
||||
self.classifier.train(input_fn=lambda:self.data.get_train_input_fn(),
|
||||
steps = self.config['nsteps_per_epoch']*epochs_between_evals,
|
||||
hooks = self.training_hook )
|
||||
|
||||
rank0log(self.print_logger, "Starting to evaluate")
|
||||
rank0log(self.print_logger, "Validation dataset size: {}".format(self.config['num_evaluating_samples'] ))
|
||||
time.sleep(5) # a little extra margin...
|
||||
|
||||
ckpts = train_helper.sort_and_load_ckpts(self.config['log_dir'])
|
||||
c = ckpts[-1]
|
||||
eval_result = self.classifier.evaluate(
|
||||
input_fn=lambda: self.data.get_eval_input_fn(),
|
||||
checkpoint_path=c['path'])
|
||||
|
||||
hwlog.remark_print(key=hwlog.EVAL_ACCURACY_TOP1, value=float(eval_result.get("val-top1acc")))
|
||||
hwlog.remark_print(key=hwlog.EVAL_ACCURACY_TOP5, value=float(eval_result.get("val-top5acc")))
|
||||
|
||||
c['epoch'] = math.ceil(c['step'] / (self.config['num_training_samples']/ (self.config['global_batch_size'])))
|
||||
c['top1'] = eval_result['val-top1acc']
|
||||
c['top5'] = eval_result['val-top5acc']
|
||||
c['loss'] = eval_result['loss']
|
||||
|
||||
rank0log(self.print_logger, ' step epoch top1 top5 loss checkpoint_time(UTC)')
|
||||
|
||||
rank0log(self.print_logger,'{:5d} {:5.1f} {:5.3f} {:6.2f} {:6.2f} {time}'
|
||||
.format(c['step'],
|
||||
c['epoch'],
|
||||
c['top1'] * 100,
|
||||
c['top5'] * 100,
|
||||
c['loss'],
|
||||
time=time.strftime('%Y-%m-%d %H:%M:%S',
|
||||
time.localtime(c['mtime']))))
|
||||
if eval_result['val-top1acc']*100 > self.config.get('stop_threshold', 74.9):
|
||||
success = True
|
||||
break
|
||||
|
||||
|
||||
|
||||
+39
@@ -0,0 +1,39 @@
|
||||
import tensorflow as tf
|
||||
from tensorflow.python.ops import data_flow_ops
|
||||
import re
|
||||
import os
|
||||
from operator import itemgetter
|
||||
|
||||
class PrefillStagingAreasHook(tf.train.SessionRunHook):
|
||||
def after_create_session(self, session, coord):
|
||||
enqueue_ops = tf.get_collection('STAGING_AREA_PUTS')
|
||||
for i in range(len(enqueue_ops)):
|
||||
session.run(enqueue_ops[:i + 1])
|
||||
|
||||
def stage(tensors):
|
||||
"""Stages the given tensors in a StagingArea for asynchronous put/get.
|
||||
"""
|
||||
stage_area = data_flow_ops.StagingArea(
|
||||
dtypes=[tensor.dtype for tensor in tensors],
|
||||
shapes=[tensor.get_shape() for tensor in tensors])
|
||||
put_op = stage_area.put(tensors)
|
||||
get_tensors = stage_area.get()
|
||||
tf.add_to_collection('STAGING_AREA_PUTS', put_op)
|
||||
return put_op, get_tensors
|
||||
|
||||
|
||||
def sort_and_load_ckpts(log_dir):
|
||||
ckpts = []
|
||||
for f in os.listdir(log_dir):
|
||||
m = re.match(r'model.ckpt-([0-9]+).index', f)
|
||||
if m is None:
|
||||
continue
|
||||
fullpath = os.path.join(log_dir, f)
|
||||
ckpts.append({'step': int(m.group(1)),
|
||||
'path': os.path.splitext(fullpath)[0],
|
||||
'mtime': os.stat(fullpath).st_mtime,
|
||||
})
|
||||
ckpts.sort(key=itemgetter('step'))
|
||||
return ckpts
|
||||
|
||||
|
||||
+43
@@ -0,0 +1,43 @@
|
||||
import tensorflow as tf
|
||||
import os,sys
|
||||
|
||||
class CreateSession():
|
||||
def __init__(self, config):
|
||||
self.config = config
|
||||
|
||||
if self.config['accelerator'] == '1980':
|
||||
from tensorflow.python.client import device_lib
|
||||
from npu_bridge.estimator import npu_ops
|
||||
# self.estimator_config = tf.ConfigProto(allow_soft_placement=True, min_group_size=20, use_off_line=True)
|
||||
self.estimator_config = tf.ConfigProto(allow_soft_placement=True)
|
||||
else:
|
||||
self.estimator_config = tf.ConfigProto(allow_soft_placement=False)
|
||||
|
||||
self.estimator_config.gpu_options.allow_growth = True
|
||||
|
||||
if self.config['accelerator'] == '1980':
|
||||
local_device_protos = device_lib.list_local_devices(self.estimator_config)
|
||||
|
||||
self.set_env()
|
||||
|
||||
|
||||
def set_env(self):
|
||||
# TODO, get env from config file
|
||||
gpu_thread_count = 2
|
||||
os.environ['TF_GPU_THREAD_MODE'] = 'gpu_private'
|
||||
os.environ['TF_GPU_THREAD_COUNT'] = str(gpu_thread_count)
|
||||
os.environ['TF_USE_CUDNN_BATCHNORM_SPATIAL_PERSISTENT'] = '1'
|
||||
os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1'
|
||||
|
||||
# barrier = self.hvd.allreduce(tf.constant(0, dtype=tf.float32))
|
||||
# tf.Session(config=self.estimator_config).run(barrier)
|
||||
|
||||
|
||||
def get_config(self):
|
||||
self.estimator_config.gpu_options.visible_device_list = str(0)
|
||||
# self.estimator_config.gpu_options.force_gpu_compatible = True # Force pinned memory
|
||||
self.estimator_config.intra_op_parallelism_threads = 1 # Avoid pool of Eigen threads
|
||||
self.estimator_config.inter_op_parallelism_threads = 5
|
||||
return self.estimator_config
|
||||
|
||||
|
||||
+87
@@ -0,0 +1,87 @@
|
||||
from __future__ import print_function
|
||||
import tensorflow as tf
|
||||
import logging
|
||||
import numpy as np
|
||||
import time
|
||||
import sys,os
|
||||
from benchmark_log import hwlog
|
||||
class LogSessionRunHook(tf.train.SessionRunHook):
|
||||
def __init__(self, config, warmup_steps=5):
|
||||
# def __init__(self, global_batch_size, num_records, display_every=10, logger=None):
|
||||
self.global_batch_size = config['global_batch_size']
|
||||
self.iterations_per_loop = config['iterations_per_loop']
|
||||
self.warmup_steps = warmup_steps
|
||||
self.iter_times = []
|
||||
self.num_records = config['num_training_samples']
|
||||
self.display_every = config['display_every']
|
||||
self.logger = get_logger(config['log_name'], config['log_dir'])
|
||||
rank0log(self.logger, 'PY' + str(sys.version) + 'TF' + str(tf.__version__))
|
||||
|
||||
|
||||
|
||||
def after_create_session(self, session, coord):
|
||||
rank0log(self.logger, 'Step Epoch Speed Loss FinLoss LR')
|
||||
self.elapsed_secs = 0.
|
||||
self.count = 0
|
||||
|
||||
def before_run(self, run_context):
|
||||
self.t0 = time.time()
|
||||
return tf.train.SessionRunArgs(
|
||||
fetches=[tf.train.get_global_step(), 'loss:0', 'total_loss:0', 'learning_rate:0'])
|
||||
# 'loss:0', 'loss:0', 'learning_rate:0'])
|
||||
|
||||
def after_run(self, run_context, run_values):
|
||||
batch_time = time.time() - self.t0
|
||||
self.iter_times.append(batch_time)
|
||||
self.elapsed_secs += batch_time
|
||||
self.count += 1
|
||||
global_step, loss, total_loss, lr = run_values.results
|
||||
if global_step == 1 or global_step % self.display_every == 0:
|
||||
dt = self.elapsed_secs / self.count
|
||||
img_per_sec = self.global_batch_size * self.iterations_per_loop / dt
|
||||
epoch = global_step * self.global_batch_size / self.num_records
|
||||
self.logger.info('step:%6i epoch:%5.1f FPS:%7.1f loss:%6.3f total_loss:%6.3f lr:%7.5f' %
|
||||
(global_step, epoch, img_per_sec, loss, total_loss, lr))
|
||||
self.elapsed_secs = 0.
|
||||
self.count = 0
|
||||
hwlog.remark_print(key=hwlog.FPS, value='%7.1f' % img_per_sec)
|
||||
|
||||
def get_average_speed(self):
|
||||
avg_time = np.mean(self.iter_times[self.warmup_steps:])
|
||||
speed = self.global_batch_size / avg_time
|
||||
return speed
|
||||
|
||||
|
||||
|
||||
def rank0log(logger, *args, **kwargs):
|
||||
if logger:
|
||||
logger.info(''.join([str(x) for x in list(args)]))
|
||||
else:
|
||||
print(*args, **kwargs)
|
||||
|
||||
|
||||
def get_logger(log_name, log_dir):
|
||||
logger = logging.getLogger(log_name)
|
||||
logger.setLevel(logging.INFO) # INFO, ERROR
|
||||
# file handler which logs debug messages
|
||||
if not os.path.isdir(log_dir):
|
||||
try:
|
||||
os.makedirs(log_dir)
|
||||
except FileExistsError:
|
||||
# if log_dir is common for multiple ranks like on nfs
|
||||
pass
|
||||
# console handler
|
||||
ch = logging.StreamHandler()
|
||||
ch.setLevel(logging.INFO)
|
||||
# add formatter to the handlers
|
||||
# formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
||||
formatter = logging.Formatter('%(message)s')
|
||||
ch.setFormatter(formatter)
|
||||
logger.addHandler(ch)
|
||||
fh = logging.FileHandler(os.path.join(log_dir, log_name))
|
||||
fh.setLevel(logging.DEBUG)
|
||||
fh.setFormatter(formatter)
|
||||
# add handlers to logger
|
||||
logger.addHandler(fh)
|
||||
return logger
|
||||
|
||||
+41
@@ -0,0 +1,41 @@
|
||||
# main env
|
||||
if [ -d /usr/local/Ascend/nnae/latest ];then
|
||||
|
||||
export LD_LIBRARY_PATH=/usr/local/:/usr/local/lib/:/usr/lib/:/usr/local/Ascend/nnae/latest/fwkacllib/lib64:/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:/usr/local/Ascend/add-ons/:/usr/local/Ascend/driver/tools/hccn_tool/:/usr/local/mpirun4.0/lib
|
||||
export PYTHONPATH=$PYTHONPATH:/usr/local/Ascend/tfplugin/latest/tfplugin/python/site-packages:/usr/local/Ascend/nnae/latest/opp/op_impl/built-in/ai_core/tbe:/usr/local/Ascend/nnae/latest/fwkacllib/python/site-packages/:/usr/local/Ascend/tfplugin/latest/tfplugin/python/site-packages
|
||||
export PATH=$PATH:/usr/local/Ascend/nnae/latest/fwkacllib/ccec_compiler/bin:/usr/local/mpirun4.0/bin
|
||||
export ASCEND_OPP_PATH=/usr/local/Ascend/nnae/latest/opp
|
||||
else
|
||||
export LD_LIBRARY_PATH=/usr/local/lib/:/usr/lib/:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64:/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:/usr/local/Ascend/add-ons/:/usr/local/mpirun4.0/lib
|
||||
export PYTHONPATH=$PYTHONPATH:/usr/local/Ascend/tfplugin/latest/tfplugin/python/site-packages:/usr/local/Ascend/ascend-toolkit/latest/opp/op_impl/built-in/ai_core/tbe:/usr/local/Ascend/ascend-toolkit/latest//fwkacllib/python/site-packages/:/usr/local/Ascend/ascend-toolkit/latest/tfplugin/python/site-packages:$projectDir
|
||||
export PATH=$PATH:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin:/usr/local/mpirun4.0/bin
|
||||
export ASCEND_OPP_PATH=/usr/local/Ascend/ascend-toolkit/latest/opp/
|
||||
|
||||
fi
|
||||
|
||||
export SOC_VERSION=Ascend910
|
||||
export HCCL_CONNECT_TIMEOUT=600
|
||||
|
||||
# user env
|
||||
export JOB_ID={JOB_ID}
|
||||
export RANK_TABLE_FILE={RANK_TABLE_FILE}
|
||||
#export RANK_SIZE={RANK_SIZE}
|
||||
#export RANK_INDEX={RANK_INDEX}
|
||||
#export RANK_ID={RANK_ID}
|
||||
|
||||
# profiling env
|
||||
export PROFILING_MODE={PROFILING_MODE}
|
||||
export AICPU_PROFILING_MODE={AICPU_PROFILING_MODE}
|
||||
export PROFILING_OPTIONS={PROFILING_OPTIONS}
|
||||
export FP_POINT={FP_POINT}
|
||||
export BP_POINT={BP_POINT}
|
||||
|
||||
# debug env
|
||||
#export DUMP_GE_GRAPH=2
|
||||
#export DUMP_OP=1
|
||||
#export DUMP_OP_LESS=1
|
||||
#export PRINT_MODEL=1
|
||||
#export TE_PARALLEL_COMPILER=0
|
||||
|
||||
# system env
|
||||
ulimit -c unlimited
|
||||
+86
@@ -0,0 +1,86 @@
|
||||
#!/bin/bash
|
||||
|
||||
rank_size=$1
|
||||
yamlPath=$2
|
||||
toolsPath=$3
|
||||
currentDir=$(cd "$(dirname "$0")/.."; pwd)
|
||||
if [ -f /.dockerenv ];then
|
||||
CLUSTER=$4
|
||||
MPIRUN_ALL_IP="$5"
|
||||
export CLUSTER=${CLUSTER}
|
||||
fi
|
||||
#export RANK_ID=npu${rank_size}p
|
||||
|
||||
# 从 yaml 获取配置
|
||||
eval $(${toolsPath}/get_params_for_yaml.sh ${yamlPath} "tensorflow_config")
|
||||
|
||||
data_url_new=`echo ${data_url//\//\\\\/}`
|
||||
echo ${data_url}
|
||||
echo ${max_steps}
|
||||
echo ${epoches}
|
||||
if [ x"${CLUSTER}" == x"True" ];then
|
||||
jsonFilePath=${currentDir}/code/resnet50_train/configs/res50_256bs_16p.py
|
||||
elif [ ${rank_size} -le 1 ];then
|
||||
jsonFilePath=${currentDir}/code/resnet50_train/configs/res50_256bs_1p.py
|
||||
elif [ ${rank_size} -le 2 ];then
|
||||
jsonFilePath=${currentDir}/code/resnet50_train/configs/res50_256bs_2p.py
|
||||
elif [ ${rank_size} -le 4 ];then
|
||||
jsonFilePath=${currentDir}/code/resnet50_train/configs/res50_256bs_4p.py
|
||||
else
|
||||
jsonFilePath=${currentDir}/code/resnet50_train/configs/res50_256bs_8p.py
|
||||
fi
|
||||
|
||||
#echo "jsonfilepath is "${jsonFilePath}
|
||||
sed -i "s/data_url.*$/data_url\': \'${data_url_new}\',/g" ${jsonFilePath}
|
||||
#sed -i "s/max_train_steps.*$/max_train_steps\': ${max_steps},/g" ${jsonFilePath}
|
||||
sed -i "s/num_epochs.*$/num_epochs\': ${epoches},/g" ${jsonFilePath}
|
||||
sed -i "s/epochs_between_evals.*$/epochs_between_evals\': ${epochs_between_evals},/g" ${jsonFilePath}
|
||||
sed -i "0,/mode.*$/s//mode\': \'${mode}\',/g" ${jsonFilePath}
|
||||
sed -i 's/\r//g' ${jsonFilePath}
|
||||
sed -i "0,/batch_size.*$/s//batch_size\': ${batch_size},/" ${jsonFilePath}
|
||||
|
||||
currtime=`date +%Y%m%d%H%M%S`
|
||||
mkdir -p ${currentDir%train*}/train/result/tf_resnet50_hc/training_job_${currtime}/
|
||||
train_job_dir=${currentDir%train*}/train/result/tf_resnet50_hc/training_job_${currtime}/
|
||||
echo "[`date +%Y%m%d-%H:%M:%S`] [INFO] ${train_job_dir} &"
|
||||
# device 列表, 若无指定 device 或大于等于 8p 时根据 rank_size 顺序选择
|
||||
eval device_group=\$device_group_${rank_size}p
|
||||
if [ x"${device_group}" == x"" ] || [ ${rank_size} -ge 8 ];then
|
||||
device_group="$(seq 0 "$(expr $rank_size - 1)")"
|
||||
fi
|
||||
|
||||
# get last device id in device_group, hw log in performance from the dir named last_device_id
|
||||
device_group_str=`echo ${device_group} | sed 's/ //g'`
|
||||
first_device_id=`echo ${device_group_str: 0:1}`
|
||||
|
||||
rank_id=0
|
||||
|
||||
if [ x"${CLUSTER}" == x"True" ];then
|
||||
# ln hw log
|
||||
ln -snf ${currentDir%train*}/train/result/tf_resnet50_hc/training_job_${currtime}/0/hw_resnet50_hc.log ${currentDir%train*}/train/result/tf_resnet50_hc/training_job_${currtime}/
|
||||
this_ip=$(hostname -I |awk '{print $1}')
|
||||
for ip in $MPIRUN_ALL_IP;do
|
||||
if [ x"$this_ip" != x"$ip" ];then
|
||||
scp $yamlPath root@$ip:$yamlPath
|
||||
scp $jsonFilePath root@$ip:$jsonFilePath
|
||||
fi
|
||||
done
|
||||
export PATH=$PATH:/usr/local/mpirun4.0/bin
|
||||
mpirun -H ${mpirun_ip} \
|
||||
--bind-to none -map-by slot\
|
||||
--allow-run-as-root \
|
||||
--mca btl_tcp_if_exclude lo,docker0,endvnic,virbr0,vethf40501b,docker_gwbridge,br-f42ac38052b4\
|
||||
--prefix /usr/local/mpirun4.0/ \
|
||||
${currentDir}/scripts/train.sh 0 $rank_size $yamlPath $currtime ${toolsPath} ${CLUSTER}
|
||||
else
|
||||
# ln hw log
|
||||
ln -snf ${currentDir%train*}/train/result/tf_resnet50_hc/training_job_${currtime}/0/hw_resnet50_hc.log ${currentDir%train*}/train/result/tf_resnet50_hc/training_job_${currtime}/
|
||||
for device_id in $device_group;do
|
||||
#echo "[`date +%Y%m%d-%H:%M:%S`] [INFO] start: train ${device_id} & " >> ${currentDir}/result/main.log
|
||||
${currentDir}/scripts/train.sh $device_id $rank_size $yamlPath $currtime ${toolsPath} $rank_id&
|
||||
let rank_id++
|
||||
done
|
||||
fi
|
||||
wait
|
||||
|
||||
|
||||
+109
@@ -0,0 +1,109 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
device_id=$1
|
||||
rank_size=$2
|
||||
yamlPath=$3
|
||||
currtime=$4
|
||||
toolsPath=$5
|
||||
currentDir=$(cd "$(dirname "$0")/.."; pwd)
|
||||
mkdir -p ${currentDir%train*}/train/result/tf_resnet50_hc/training_job_${currtime}/
|
||||
export train_job_dir=${currentDir%train*}/train/result/tf_resnet50_hc/training_job_${currtime}/
|
||||
|
||||
|
||||
source ${currentDir}/config/npu_set_env.sh
|
||||
|
||||
eval $(${toolsPath}/get_params_for_yaml.sh ${yamlPath} "tensorflow_config")
|
||||
|
||||
# 声明变量
|
||||
export REMARK_LOG_FILE=hw_resnet50_hc.log # 打点日志文件名称, 必须hw_后跟模型名称小写
|
||||
# 添加日志打点模块路径
|
||||
benchmark_log_path=${currentDir%atlas_benchmark-master*}/atlas_benchmark-master/utils
|
||||
export PYTHONPATH=$PYTHONPATH:${benchmark_log_path}
|
||||
|
||||
|
||||
|
||||
# user env
|
||||
export HCCL_CONNECT_TIMEOUT=600
|
||||
export JOB_ID=9999001
|
||||
export RANK_TABLE_FILE=${currentDir}/config/${rank_size}p.json
|
||||
export RANK_SIZE=${rank_size}
|
||||
export RANK_INDEX=0
|
||||
export SLOG_PRINT_TO_STDOUT=0
|
||||
export DEVICE_ID=$1
|
||||
DEVICE_INDEX=$(( DEVICE_ID + RANK_INDEX * 8 ))
|
||||
export DEVICE_INDEX=${DEVICE_INDEX}
|
||||
export YAML_PATH=$3
|
||||
export MODEL_CKPT_PATH=${currentDir}/result/ckpt${device_id}
|
||||
|
||||
cd ${train_job_dir}
|
||||
curd_dir=${currentDir%atlas_benchmark-master*}/atlas_benchmark-master/utils/atlasboost
|
||||
export PYTHONPATH=$PYTHONPATH:${curd_dir}
|
||||
|
||||
if [ x"$6" != x"True" ];then
|
||||
rank_id=$6
|
||||
export RANK_ID=$6
|
||||
else
|
||||
device_id_mo=$(python3.7 -c "import src.tensorflow.mpi_ops as atlasboost;atlasboost.init(); \
|
||||
device_id = atlasboost.local_rank();cluster_device_id = str(device_id); \
|
||||
atlasboost.set_device_id(device_id);print(atlasboost.rank())")
|
||||
device_id_mo=`echo $device_id_mo`
|
||||
rank_id=${device_id_mo##* }
|
||||
export RANK_ID=${rank_id}
|
||||
device=${device_id_mo##*deviceid = }
|
||||
device_id=${device%% phyid=*}
|
||||
export DEVICE_ID=${device_id}
|
||||
hccljson=${train_job_dir}/*.json
|
||||
cp ${hccljson} ${currentDir}/config/${rank_size}p.json
|
||||
fi
|
||||
|
||||
#mkdir exec path
|
||||
mkdir -p ${train_job_dir}/${device_id}
|
||||
cd ${train_job_dir}/${device_id}
|
||||
|
||||
startTime=`date +%Y%m%d-%H:%M:%S`
|
||||
startTime_s=`date +%s`
|
||||
#cd ${currentDir}/code
|
||||
# 根据单卡/多卡区分调用参数
|
||||
if [ x"$6" == x"True" ];then
|
||||
export CLUSTER=True
|
||||
# 多卡多机
|
||||
rm -rf ${currentDir}/result/*.log
|
||||
rm -rf ${currentDir}/code/core.*
|
||||
python3.7 ${currentDir}/code/resnet50_train/mains/res50.py --config_file=res50_256bs_16p --max_train_steps=${max_steps} --iterations_per_loop=1000 --debug=True --eval=True --model_dir=./ckpt > ${train_job_dir}/train_${device_id}.log 2>&1
|
||||
|
||||
elif [ ${rank_size} -le 1 ];then
|
||||
# 单卡
|
||||
|
||||
python3.7 ${currentDir}/code/resnet50_train/mains/res50.py --config_file=res50_256bs_1p --max_train_steps=${max_steps} --iterations_per_loop=1000 --debug=True --eval=False --model_dir=./ckpt > ${train_job_dir}/train_${device_id}.log 2>&1
|
||||
elif [ ${rank_size} -le 2 ];then
|
||||
# 单卡
|
||||
python3.7 ${currentDir}/code/resnet50_train/mains/res50.py --config_file=res50_256bs_2p --max_train_steps=${max_steps} --iterations_per_loop=1000 --debug=True --eval=False --model_dir=./ckpt > ${train_job_dir}/train_${device_id}.log 2>&1
|
||||
elif [ ${rank_size} -le 4 ];then
|
||||
# 单卡
|
||||
python3.7 ${currentDir}/code/resnet50_train/mains/res50.py --config_file=res50_256bs_4p --max_train_steps=${max_steps} --iterations_per_loop=1000 --debug=True --eval=False --model_dir=./ckpt > ${train_job_dir}/train_${device_id}.log 2>&1
|
||||
elif [ ${rank_size} -le 8 ];then
|
||||
# 多卡单机
|
||||
python3.7 ${currentDir}/code/resnet50_train/mains/res50.py --config_file=res50_256bs_8p --max_train_steps=${max_steps} --iterations_per_loop=1000 --debug=True --eval=True --model_dir=./ckpt > ${train_job_dir}/train_${device_id}.log 2>&1
|
||||
fi
|
||||
#cp ./hw_resnet50_hc.log ${currentDir}/../../../../performance/
|
||||
|
||||
if [ $? -eq 0 ] ;then
|
||||
echo ":::ABK 1.0.0 resnet50_hc train success"
|
||||
echo ":::ABK 1.0.0 resnet50_hc train success" >> ${train_job_dir}/${device_id}/train_${device_id}.log
|
||||
echo ":::ABK 1.0.0 resnet50_hc train success" >> ${train_job_dir}/${device_id}/hw_resnet50_hc.log
|
||||
else
|
||||
echo ":::ABK 1.0.0 resnet50_hc train failed"
|
||||
echo ":::ABK 1.0.0 resnet50_hc train failed" >> ${train_job_dir}/${device_id}/train_${device_id}.log
|
||||
echo ":::ABK 1.0.0 resnet50_hc train failed" >> ${train_job_dir}/${device_id}/hw_resnet50_hc.log
|
||||
fi
|
||||
|
||||
endTime=`date +%Y%m%d-%H:%M:%S`
|
||||
endTime_s=`date +%s`
|
||||
sumTime=$[ $endTime_s - $startTime_s ]
|
||||
hour=$(( $sumTime/3600 ))
|
||||
min=$(( ($sumTime-${hour}*3600)/60 ))
|
||||
sec=$(( $sumTime-${hour}*3600-${min}*60 ))
|
||||
echo ${hour}:${min}:${sec}
|
||||
echo ":::ABK 1.0.0 resnet50_hc train total time ${hour}:${min}:${sec}" >> ${train_job_dir}/${device_id}/hw_resnet50_hc.log
|
||||
|
||||
|
||||
Reference in New Issue
Block a user