[add]上传训练benchmark by z00560161

This commit is contained in:
liang_chaoming@huawei.com
2020-10-19 20:22:23 +08:00
parent 22b83024f5
commit 82522e2f61
1225 changed files with 345421 additions and 0 deletions
@@ -0,0 +1,89 @@
# SSD-Resnet34 TensorFlow训练说明
### 1. 运行环境
Python版本: 3.7.5
主要python三方库:
- tensorflow >= 1.15.0 (satisfied with NPU)
### 2. 参数配置
在train/yaml/SSD-Resnet34.yaml中修改相应配置, 配置项含义:
```
tensorflow_config: tensorflow框架下ssd-resnet34的配置项
train_batch_size: 训练时设置的batch size大小
training_file_pattern: 数据集中训练数据集文件标签类型, 数据集中有该类型的文件夹
resnet_checkpoint: ckpt路径
validation_file_pattern: 数据集中验证数据文件标签类型, 数据集中有该类型的文件夹
val_json_file: 数据集中验证数据json文件
eval_batch_size: 评测时设置的batch size大小
num_epochs: epochs数量
model_dir: 存放模型graph等数据的路径
max_steps: 最大步数
runmode: 运行模式 边训练边评测、只训练、只评测
device_group_1p: 跑1p时的device_id
device_group_2p: 跑2p时的device_id
device_group_4p: 跑4p时的device_id
mpirun_ip: 仅集群场景时需要配置, 格式ip1:卡数量1,ip2:卡数量2
docker_image: docker镜像名称:版本号
```
SSD-Resnet34.yaml中配置项示例:
```
tensorflow_config:
train_batch_size: 32
training_file_pattern: /home/data/raw_data/tfrecord/train2017*
resnet_checkpoint: /home/data/raw_data/resnet34_pretrain_model/model.ckpt-28152
validation_file_pattern: /home/data/raw_data/tfrecord/val2017*
val_json_file: /home/data/raw_data/annotations/instances_val2017.json
eval_batch_size: 32
num_epochs: 1
model_dir: result_npu
max_steps: 432000
runmode: train_and_eval
device_group_1p: 0
device_group_2p: 0 1
device_group_4p: 0 1 2 3
mpirun_ip: 90.90.176.152:8,90.90.176.154:8
docker_image: mpirun3:latest
```
SSD-Resnet34.yaml中配置注意事项:
当ssd-resnet34在docker侧进行训练时,resnet_checkpoint、validation_file_pattern和val_json_file的路径都必须规划在training_file_pattern字段路径中的raw_data下,因配置路径较多,脚本中统一只对training_file_pattern字段路径中的raw_data下文件做映射
### 3. 启动训练脚本
#### 3.1 训练脚本启动
当前路径为benchmark包的train文件夹下
```
bash benchmark.sh -e SSD-Resnet34 -hw 1p # host侧1p
bash benchmark.sh -e SSD-Resnet34 -hw 8p # host侧8p
bash benchmark.sh -e SSD-Resnet34 -hw 1p -docker # docker侧1p
bash benchmark.sh -e SSD-Resnet34 -hw 8p -docker # docker侧8p
bash benchmark.sh -e SSD-Resnet34 -ct # host侧集群
bash benchmark.sh -e SSD-Resnet34 -ct -docker # docker侧集群
```
#### 3.2 训练日志
日志在benchmark包的train路径下reuslt中找到ssd-resnet34的文件夹里。
```
./result/tf_ssd-resnet34/TrainingJob-2020xxxxxxxxxx/train_${device_id}.log
./result/tf_ssd-resnet34/TrainingJob-2020xxxxxxxxxx/device_id/hw_ssd-resnet34.log
```
### 4. 模型评测
将train/yaml/SSD-Resnet34.yaml中resnet_checkpoint的值改为训练产生的日志的路径, runmode的值改为evaluate,如2中示例;
然后运行与训练时相同的脚本,结果参看见train.log。
### 5. 训练结果参考
1p: 600
4P: 2000
8p: 4000
@@ -0,0 +1,281 @@
# Copyright 2018 Google. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""COCO-style evaluation metrics.
Implements the interface of COCO API and metric_fn in tf.TPUEstimator.
COCO API: github.com/cocodataset/cocoapi/
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import atexit
import tempfile
import time
from absl import flags
import numpy as np
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
import six
#COCO = coco.COCO
#COCOeval = coco.COCOeval
import tensorflow as tf
import ssd_constants
FLAGS = flags.FLAGS
# https://github.com/cocodataset/cocoapi/issues/49
if six.PY3:
import pycocotools.coco
pycocotools.coco.unicode = str
def create_coco(val_json_file, use_cpp_extension=True):
"""Creates Microsoft COCO helper class object and return it."""
if val_json_file.startswith('gs://'):
_, local_val_json = tempfile.mkstemp(suffix='.json')
tf.gfile.Remove(local_val_json)
tf.gfile.Copy(val_json_file, local_val_json)
atexit.register(tf.gfile.Remove, local_val_json)
else:
local_val_json = val_json_file
if use_cpp_extension:
coco_gt = coco.COCO(local_val_json, False)
else:
coco_gt = COCO(local_val_json)
return coco_gt
def compute_map(labels_and_predictions,
coco_gt,
use_cpp_extension=True,
nms_on_tpu=True):
"""Use model predictions to compute mAP.
The evaluation code is largely copied from the MLPerf reference
implementation. While it is possible to write the evaluation as a tensor
metric and use Estimator.evaluate(), this approach was selected for simplicity
and ease of duck testing.
Args:
labels_and_predictions: A map from TPU predict method.
coco_gt: ground truch COCO object.
use_cpp_extension: use cocoeval C++ library.
nms_on_tpu: do NMS on TPU.
Returns:
Evaluation result.
"""
predictions = []
tic = time.time()
if nms_on_tpu:
p = []
for i in labels_and_predictions:
for j in i:
p.append(np.array(j, dtype=np.float32))
predictions = np.concatenate(list(p)).reshape((-1, 7))
else:
k = 0
for example in labels_and_predictions:
if ssd_constants.IS_PADDED in example and example[
ssd_constants.IS_PADDED]:
continue
print(k)
k += 1
htot, wtot, _ = example[ssd_constants.RAW_SHAPE]
pred_box = example['pred_box']
pred_scores = example['pred_scores']
indices = example['indices']
loc, label, prob = decode_single(
pred_box, pred_scores, indices, ssd_constants.OVERLAP_CRITERIA,
ssd_constants.MAX_NUM_EVAL_BOXES, ssd_constants.MAX_NUM_EVAL_BOXES)
for loc_, label_, prob_ in zip(loc, label, prob):
# Ordering convention differs, hence [1], [0] rather than [0], [1]
predictions.append([
int(example[ssd_constants.SOURCE_ID]),
loc_[1] * wtot, loc_[0] * htot, (loc_[3] - loc_[1]) * wtot,
(loc_[2] - loc_[0]) * htot, prob_,
ssd_constants.CLASS_INV_MAP[label_]
])
toc = time.time()
tf.logging.info('Prepare predictions DONE (t={:0.2f}s).'.format(toc - tic))
if coco_gt is None:
coco_gt = create_coco(
FLAGS.val_json_file, use_cpp_extension=use_cpp_extension)
if use_cpp_extension:
coco_dt = coco_gt.LoadRes(np.array(predictions, dtype=np.float32))
coco_eval = COCOeval(coco_gt, coco_dt, iou_type='bbox')
coco_eval.Evaluate()
coco_eval.Accumulate()
coco_eval.Summarize()
stats = coco_eval.GetStats()
else:
coco_dt = coco_gt.loadRes(np.array(predictions))
coco_eval = COCOeval(coco_gt, coco_dt, iouType='bbox')
coco_eval.evaluate()
coco_eval.accumulate()
coco_eval.summarize()
stats = coco_eval.stats
print('Current AP: {:.5f}'.format(stats[0]))
metric_names = ['AP', 'AP50', 'AP75', 'APs', 'APm', 'APl', 'ARmax1',
'ARmax10', 'ARmax100', 'ARs', 'ARm', 'ARl']
coco_time = time.time()
tf.logging.info('COCO eval DONE (t={:0.2f}s).'.format(coco_time - toc))
# Prefix with "COCO" to group in TensorBoard.
return {'COCO/' + key: value for key, value in zip(metric_names, stats)}
def calc_iou(target, candidates):
target_tiled = np.tile(target[np.newaxis, :], (candidates.shape[0], 1))
# Left Top & Right Bottom
lt = np.maximum(target_tiled[:,:2], candidates[:,:2])
rb = np.minimum(target_tiled[:,2:], candidates[:,2:])
delta = np.maximum(rb - lt, 0)
intersect = delta[:,0] * delta[:,1]
delta1 = target_tiled[:, 2:] - target_tiled[:, :2]
area1 = delta1[:,0] * delta1[:,1]
delta2 = candidates[:, 2:] - candidates[:, :2]
area2 = delta2[:,0] * delta2[:,1]
iou = intersect/(area1 + area2 - intersect)
return iou
def decode_single(bboxes_in,
scores_in,
indices,
criteria,
max_output,
max_num=200):
"""Implement Non-maximum suppression.
Reference to https://github.com/amdegroot/ssd.pytorch
Args:
bboxes_in: a Tensor with shape [N, 4], which stacks box regression outputs
on all feature levels. The N is the number of total anchors on all levels.
scores_in: a Tensor with shape [ssd_constants.MAX_NUM_EVAL_BOXES,
num_classes]. The top ssd_constants.MAX_NUM_EVAL_BOXES box scores for each
class.
indices: a Tensor with shape [ssd_constants.MAX_NUM_EVAL_BOXES,
num_classes]. The indices for these top boxes for each class.
criteria: a float number to specify the threshold of NMS.
max_output: maximum output length.
max_num: maximum number of boxes before NMS.
Returns:
boxes, labels and scores after NMS.
"""
bboxes_out = []
scores_out = []
labels_out = []
for i, score in enumerate(np.split(scores_in, scores_in.shape[1], 1)):
class_indices = indices[:, i]
bboxes = bboxes_in[class_indices, :]
score = np.squeeze(score, 1)
# skip background
if i == 0:
continue
mask = score > ssd_constants.MIN_SCORE
if not np.any(mask):
continue
bboxes, score = bboxes[mask, :], score[mask]
# remain_list = []
# for r in range(bboxes.shape[0]):
# if bboxes[r, 0] < 0 or bboxes[r, 1] < 0 or bboxes[r, 2] < 0 or bboxes[r, 3] < 0 or bboxes[r, 0] >= bboxes[r, 2] or \
# bboxes[r, 1] >= bboxes[r, 3]:
# continue
# remain_list.append(r)
# bboxes = bboxes[remain_list, :]
# score = score[remain_list]
remain_list = []
for r in range(bboxes.shape[0]):
for j in range(4):
if bboxes[r, j] < 0:
bboxes[r, j] = 0.00001
if bboxes[r, 0] >= bboxes[r, 2]:
bboxes[r, 2] = bboxes[r, 0] + 0.00001
if bboxes[r, 1] >= bboxes[r, 3]:
bboxes[r, 3] = bboxes[r, 1] + 0.00001
remain_list.append(r)
bboxes = bboxes[remain_list, :]
score = score[remain_list]
score_idx_sorted = np.argsort(score)
score_sorted = score[score_idx_sorted]
score_idx_sorted = score_idx_sorted[-max_num:]
candidates = []
# perform non-maximum suppression
while len(score_idx_sorted):
idx = score_idx_sorted[-1]
bboxes_sorted = bboxes[score_idx_sorted, :]
bboxes_idx = bboxes[idx, :]
iou = calc_iou(bboxes_idx, bboxes_sorted)
score_idx_sorted = score_idx_sorted[iou < criteria]
candidates.append(idx)
bboxes_out.append(bboxes[candidates, :])
scores_out.append(score[candidates])
labels_out.extend([i]*len(candidates))
if len(scores_out) == 0:
tf.logging.info("No objects detected. Returning dummy values.")
return (
np.zeros(shape=(1, 4), dtype=np.float32),
np.zeros(shape=(1,), dtype=np.int32),
np.ones(shape=(1,), dtype=np.float32) * ssd_constants.DUMMY_SCORE,
)
bboxes_out = np.concatenate(bboxes_out, axis=0)
scores_out = np.concatenate(scores_out, axis=0)
labels_out = np.array(labels_out)
max_ids = np.argsort(scores_out)[-max_output:]
return bboxes_out[max_ids, :], labels_out[max_ids], scores_out[max_ids]
@@ -0,0 +1,369 @@
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Convert raw COCO dataset to TFRecord for object_detection.
Example usage:
python create_coco_tf_record.py --logtostderr \
--image_dir="${TRAIN_IMAGE_DIR}" \
--object_annotations_file="${TRAIN_ANNOTATIONS_FILE}" \
--caption_annotations_file="${CAPTION_ANNOTATIONS_FILE}" \
--output_file_prefix="${OUTPUT_DIR/FILE_PREFIX}" \
--num_shards=32
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import hashlib
import io
import json
import logging
import multiprocessing
import os
from absl import app
from absl import flags
import numpy as np
import PIL.Image
from pycocotools import mask
from research.object_detection.utils import dataset_util
from research.object_detection.utils import label_map_util
import tensorflow.compat.v1 as tf
flags.DEFINE_boolean(
'include_masks', False, 'Whether to include instance segmentations masks '
'(PNG encoded) in the result. default: False.')
flags.DEFINE_string('image_dir', '', 'Directory containing images.')
flags.DEFINE_string(
'image_info_file', '', 'File containing image information. '
'Tf Examples in the output files correspond to the image '
'info entries in this file. If this file is not provided '
'object_annotations_file is used if present. Otherwise, '
'caption_annotations_file is used to get image info.')
flags.DEFINE_string(
'object_annotations_file', '', 'File containing object '
'annotations - boxes and instance masks.')
flags.DEFINE_string('caption_annotations_file', '', 'File containing image '
'captions.')
flags.DEFINE_string('output_file_prefix', '/tmp/train', 'Path to output file')
flags.DEFINE_integer('num_shards', 32, 'Number of shards for output file.')
FLAGS = flags.FLAGS
logger = tf.get_logger()
logger.setLevel(logging.INFO)
def create_tf_example(image,
image_dir,
bbox_annotations=None,
category_index=None,
caption_annotations=None,
include_masks=False):
"""Converts image and annotations to a tf.Example proto.
Args:
image: dict with keys: [u'license', u'file_name', u'coco_url', u'height',
u'width', u'date_captured', u'flickr_url', u'id']
image_dir: directory containing the image files.
bbox_annotations:
list of dicts with keys: [u'segmentation', u'area', u'iscrowd',
u'image_id', u'bbox', u'category_id', u'id'] Notice that bounding box
coordinates in the official COCO dataset are given as [x, y, width,
height] tuples using absolute coordinates where x, y represent the
top-left (0-indexed) corner. This function converts to the format
expected by the Tensorflow Object Detection API (which is which is
[ymin, xmin, ymax, xmax] with coordinates normalized relative to image
size).
category_index: a dict containing COCO category information keyed by the
'id' field of each category. See the label_map_util.create_category_index
function.
caption_annotations:
list of dict with keys: [u'id', u'image_id', u'str'].
include_masks: Whether to include instance segmentations masks
(PNG encoded) in the result. default: False.
Returns:
example: The converted tf.Example
num_annotations_skipped: Number of (invalid) annotations that were ignored.
Raises:
ValueError: if the image pointed to by data['filename'] is not a valid JPEG
"""
image_height = image['height']
image_width = image['width']
filename = image['file_name']
image_id = image['id']
full_path = os.path.join(image_dir, filename)
with tf.gfile.GFile(full_path, 'rb') as fid:
encoded_jpg = fid.read()
encoded_jpg_io = io.BytesIO(encoded_jpg)
image = PIL.Image.open(encoded_jpg_io)
key = hashlib.sha256(encoded_jpg).hexdigest()
feature_dict = {
'image/height':
dataset_util.int64_feature(image_height),
'image/width':
dataset_util.int64_feature(image_width),
'image/filename':
dataset_util.bytes_feature(filename.encode('utf8')),
'image/source_id':
dataset_util.bytes_feature(str(image_id).encode('utf8')),
'image/key/sha256':
dataset_util.bytes_feature(key.encode('utf8')),
'image/encoded':
dataset_util.bytes_feature(encoded_jpg),
'image/format':
dataset_util.bytes_feature('jpeg'.encode('utf8')),
}
num_annotations_skipped = 0
if bbox_annotations:
xmin = []
xmax = []
ymin = []
ymax = []
is_crowd = []
category_names = []
category_ids = []
area = []
encoded_mask_png = []
for object_annotations in bbox_annotations:
(x, y, width, height) = tuple(object_annotations['bbox'])
if width <= 0 or height <= 0:
num_annotations_skipped += 1
continue
if x + width > image_width or y + height > image_height:
num_annotations_skipped += 1
continue
xmin.append(float(x) / image_width)
xmax.append(float(x + width) / image_width)
ymin.append(float(y) / image_height)
ymax.append(float(y + height) / image_height)
is_crowd.append(object_annotations['iscrowd'])
category_id = int(object_annotations['category_id'])
category_ids.append(category_id)
category_names.append(category_index[category_id]['name'].encode('utf8'))
area.append(object_annotations['area'])
if include_masks:
run_len_encoding = mask.frPyObjects(object_annotations['segmentation'],
image_height, image_width)
binary_mask = mask.decode(run_len_encoding)
if not object_annotations['iscrowd']:
binary_mask = np.amax(binary_mask, axis=2)
pil_image = PIL.Image.fromarray(binary_mask)
output_io = io.BytesIO()
pil_image.save(output_io, format='PNG')
encoded_mask_png.append(output_io.getvalue())
feature_dict.update({
'image/object/bbox/xmin':
dataset_util.float_list_feature(xmin),
'image/object/bbox/xmax':
dataset_util.float_list_feature(xmax),
'image/object/bbox/ymin':
dataset_util.float_list_feature(ymin),
'image/object/bbox/ymax':
dataset_util.float_list_feature(ymax),
'image/object/class/text':
dataset_util.bytes_list_feature(category_names),
'image/object/class/label':
dataset_util.int64_list_feature(category_ids),
'image/object/is_crowd':
dataset_util.int64_list_feature(is_crowd),
'image/object/area':
dataset_util.float_list_feature(area),
})
if include_masks:
feature_dict['image/object/mask'] = (
dataset_util.bytes_list_feature(encoded_mask_png))
if caption_annotations:
captions = []
for caption_annotation in caption_annotations:
captions.append(caption_annotation['caption'].encode('utf8'))
feature_dict.update(
{'image/caption': dataset_util.bytes_list_feature(captions)})
example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
return key, example, num_annotations_skipped
def _pool_create_tf_example(args):
return create_tf_example(*args)
def _load_object_annotations(object_annotations_file):
"""Loads object annotation JSON file."""
with tf.gfile.GFile(object_annotations_file, 'r') as fid:
obj_annotations = json.load(fid)
images = obj_annotations['images']
category_index = label_map_util.create_category_index(
obj_annotations['categories'])
img_to_obj_annotation = collections.defaultdict(list)
logging.info('Building bounding box index.')
for annotation in obj_annotations['annotations']:
image_id = annotation['image_id']
img_to_obj_annotation[image_id].append(annotation)
missing_annotation_count = 0
for image in images:
image_id = image['id']
if image_id not in img_to_obj_annotation:
missing_annotation_count += 1
logging.info('%d images are missing bboxes.', missing_annotation_count)
return img_to_obj_annotation, category_index
def _load_caption_annotations(caption_annotations_file):
"""Loads caption annotation JSON file."""
with tf.gfile.GFile(caption_annotations_file, 'r') as fid:
caption_annotations = json.load(fid)
img_to_caption_annotation = collections.defaultdict(list)
logging.info('Building caption index.')
for annotation in caption_annotations['annotations']:
image_id = annotation['image_id']
img_to_caption_annotation[image_id].append(annotation)
missing_annotation_count = 0
images = caption_annotations['images']
for image in images:
image_id = image['id']
if image_id not in img_to_caption_annotation:
missing_annotation_count += 1
logging.info('%d images are missing captions.', missing_annotation_count)
return img_to_caption_annotation
def _load_images_info(images_info_file):
with tf.gfile.GFile(images_info_file, 'r') as fid:
info_dict = json.load(fid)
return info_dict['images']
def _create_tf_record_from_coco_annotations(images_info_file,
image_dir,
output_path,
num_shards,
object_annotations_file=None,
caption_annotations_file=None,
include_masks=False):
"""Loads COCO annotation json files and converts to tf.Record format.
Args:
images_info_file: JSON file containing image info. The number of tf.Examples
in the output tf Record files is exactly equal to the number of image info
entries in this file. This can be any of train/val/test annotation json
files Eg. 'image_info_test-dev2017.json',
'instance_annotations_train2017.json',
'caption_annotations_train2017.json', etc.
image_dir: Directory containing the image files.
output_path: Path to output tf.Record file.
num_shards: Number of output files to create.
object_annotations_file: JSON file containing bounding box annotations.
caption_annotations_file: JSON file containing caption annotations.
include_masks: Whether to include instance segmentations masks
(PNG encoded) in the result. default: False.
"""
logging.info('writing to output path: %s', output_path)
writers = [
tf.python_io.TFRecordWriter(
output_path + '-%05d-of-%05d.tfrecord' % (i, num_shards))
for i in range(num_shards)
]
images = _load_images_info(images_info_file)
img_to_obj_annotation = None
img_to_caption_annotation = None
category_index = None
if object_annotations_file:
img_to_obj_annotation, category_index = (
_load_object_annotations(object_annotations_file))
if caption_annotations_file:
img_to_caption_annotation = (
_load_caption_annotations(caption_annotations_file))
def _get_object_annotation(image_id):
if img_to_obj_annotation:
return img_to_obj_annotation[image_id]
else:
return None
def _get_caption_annotation(image_id):
if img_to_caption_annotation:
return img_to_caption_annotation[image_id]
else:
return None
pool = multiprocessing.Pool()
total_num_annotations_skipped = 0
for idx, (_, tf_example, num_annotations_skipped) in enumerate(
pool.imap(_pool_create_tf_example,
[(image, image_dir, _get_object_annotation(image['id']),
category_index, _get_caption_annotation(image['id']),
include_masks) for image in images])):
if idx % 100 == 0:
logging.info('On image %d of %d', idx, len(images))
total_num_annotations_skipped += num_annotations_skipped
writers[idx % num_shards].write(tf_example.SerializeToString())
pool.close()
pool.join()
for writer in writers:
writer.close()
logging.info('Finished writing, skipped %d annotations.',
total_num_annotations_skipped)
def main(_):
assert FLAGS.image_dir, '`image_dir` missing.'
assert (FLAGS.image_info_file or FLAGS.object_annotations_file or
FLAGS.caption_annotations_file), ('All annotation files are '
'missing.')
if FLAGS.image_info_file:
images_info_file = FLAGS.image_info_file
elif FLAGS.object_annotations_file:
images_info_file = FLAGS.object_annotations_file
else:
images_info_file = FLAGS.caption_annotations_file
directory = os.path.dirname(FLAGS.output_file_prefix)
if not tf.gfile.IsDirectory(directory):
tf.gfile.MakeDirs(directory)
_create_tf_record_from_coco_annotations(images_info_file, FLAGS.image_dir,
FLAGS.output_file_prefix,
FLAGS.num_shards,
FLAGS.object_annotations_file,
FLAGS.caption_annotations_file,
FLAGS.include_masks)
if __name__ == '__main__':
logger = tf.get_logger()
logger.setLevel(logging.INFO)
app.run(main)
@@ -0,0 +1,436 @@
# Copyright 2018 Google. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Data loader and processing."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import itertools as it
import math
import os
import numpy as np
import tensorflow as tf
from object_detection import argmax_matcher
from object_detection import box_list
from object_detection import faster_rcnn_box_coder
from object_detection import preprocessor
from object_detection import region_similarity_calculator
from object_detection import target_assigner
from object_detection import tf_example_decoder
import ssd_constants
def get_rank_size():
return int(os.environ['RANK_SIZE'])
def get_rank_id():
return int(os.environ['DEVICE_ID'])
class DefaultBoxes(object):
"""Default bounding boxes for 300x300 5 layer SSD.
Default bounding boxes generation follows the order of (W, H, anchor_sizes).
Therefore, the tensor converted from DefaultBoxes has a shape of
[anchor_sizes, H, W, 4]. The last dimension is the box coordinates; 'ltrb'
is [ymin, xmin, ymax, xmax] while 'xywh' is [cy, cx, h, w].
"""
def __init__(self):
fk = ssd_constants.IMAGE_SIZE / np.array(ssd_constants.STEPS)
self.default_boxes = []
# size of feature and number of feature
for idx, feature_size in enumerate(ssd_constants.FEATURE_SIZES):
sk1 = ssd_constants.SCALES[idx] / ssd_constants.IMAGE_SIZE
sk2 = ssd_constants.SCALES[idx+1] / ssd_constants.IMAGE_SIZE
sk3 = math.sqrt(sk1*sk2)
all_sizes = [(sk1, sk1), (sk3, sk3)]
for alpha in ssd_constants.ASPECT_RATIOS[idx]:
w, h = sk1 * math.sqrt(alpha), sk1 / math.sqrt(alpha)
all_sizes.append((w, h))
all_sizes.append((h, w))
assert len(all_sizes) == ssd_constants.NUM_DEFAULTS[idx]
for i, j in it.product(range(feature_size), repeat=2):
for w, h in all_sizes:
cx, cy = (j + 0.5) / fk[idx], (i + 0.5) / fk[idx]
box = tuple(np.clip(k, 0, 1) for k in (cy, cx, h, w))
self.default_boxes.append(box)
assert len(self.default_boxes) == ssd_constants.NUM_SSD_BOXES
def to_ltrb(cy, cx, h, w):
return cy - h / 2, cx - w / 2, cy + h / 2, cx + w / 2
# For IoU calculation
self.default_boxes_ltrb = tuple(to_ltrb(*i) for i in self.default_boxes)
def __call__(self, order='ltrb'):
if order == 'ltrb': return self.default_boxes_ltrb
if order == 'xywh': return self.default_boxes
def calc_iou_tensor(box1, box2):
""" Calculation of IoU based on two boxes tensor,
Reference to https://github.com/kuangliu/pytorch-ssd
input:
box1 (N, 4)
box2 (M, 4)
output:
IoU (N, M)
"""
N = tf.shape(box1)[0]
M = tf.shape(box2)[0]
be1 = tf.tile(tf.expand_dims(box1, axis=1), (1, M, 1))
be2 = tf.tile(tf.expand_dims(box2, axis=0), (N, 1, 1))
# Left Top & Right Bottom
lt = tf.maximum(be1[:,:,:2], be2[:,:,:2])
rb = tf.minimum(be1[:,:,2:], be2[:,:,2:])
delta = tf.maximum(rb - lt, 0)
intersect = delta[:,:,0]*delta[:,:,1]
delta1 = be1[:,:,2:] - be1[:,:,:2]
area1 = delta1[:,:,0]*delta1[:,:,1]
delta2 = be2[:,:,2:] - be2[:,:,:2]
area2 = delta2[:,:,0]*delta2[:,:,1]
iou = intersect/(area1 + area2 - intersect)
return iou
def ssd_crop(image, boxes, classes):
"""IoU biassed random crop.
Reference: https://github.com/chauhan-utk/ssd.DomainAdaptation
"""
num_boxes = tf.shape(boxes)[0]
def no_crop_check():
return (tf.random_uniform(shape=(), minval=0, maxval=1, dtype=tf.float32)
< ssd_constants.P_NO_CROP_PER_PASS)
def no_crop_proposal():
return (
tf.ones((), tf.bool),
tf.convert_to_tensor([0, 0, 1, 1], dtype=tf.float32),
tf.ones((num_boxes,), tf.bool),
)
def crop_proposal():
rand_vec = lambda minval, maxval: tf.random_uniform(
shape=(ssd_constants.NUM_CROP_PASSES, 1), minval=minval, maxval=maxval,
dtype=tf.float32)
width, height = rand_vec(0.3, 1), rand_vec(0.3, 1)
left, top = rand_vec(0, 1-width), rand_vec(0, 1-height)
right = left + width
bottom = top + height
ltrb = tf.concat([left, top, right, bottom], axis=1)
min_iou = tf.random_shuffle(ssd_constants.CROP_MIN_IOU_CHOICES)[0]
ious = calc_iou_tensor(ltrb, boxes)
# discard any bboxes whose center not in the cropped image
xc, yc = [tf.tile(0.5 * (boxes[:, i + 0] + boxes[:, i + 2])[tf.newaxis, :],
(ssd_constants.NUM_CROP_PASSES, 1)) for i in range(2)]
masks = tf.reduce_all(tf.stack([
tf.greater(xc, tf.tile(left, (1, num_boxes))),
tf.less(xc, tf.tile(right, (1, num_boxes))),
tf.greater(yc, tf.tile(top, (1, num_boxes))),
tf.less(yc, tf.tile(bottom, (1, num_boxes))),
], axis=2), axis=2)
# Checks of whether a crop is valid.
valid_aspect = tf.logical_and(tf.less(height/width, 2),
tf.less(width/height, 2))
valid_ious = tf.reduce_all(tf.greater(ious, min_iou), axis=1, keepdims=True)
valid_masks = tf.reduce_any(masks, axis=1, keepdims=True)
valid_all = tf.cast(tf.reduce_all(tf.concat(
[valid_aspect, valid_ious, valid_masks], axis=1), axis=1), tf.int32)
# One indexed, as zero is needed for the case of no matches.
index = tf.range(1, 1 + ssd_constants.NUM_CROP_PASSES, dtype=tf.int32)
# Either one-hot, or zeros if there is no valid crop.
selection = tf.equal(tf.reduce_max(index * valid_all), index)
use_crop = tf.reduce_any(selection)
output_ltrb = tf.reduce_sum(tf.multiply(ltrb, tf.tile(tf.cast(
selection, tf.float32)[:, tf.newaxis], (1, 4))), axis=0)
output_masks = tf.reduce_any(tf.logical_and(masks, tf.tile(
selection[:, tf.newaxis], (1, num_boxes))), axis=0)
return use_crop, output_ltrb, output_masks
def proposal(*args):
return tf.cond(
pred=no_crop_check(),
true_fn=no_crop_proposal,
false_fn=crop_proposal,
)
_, crop_bounds, box_masks = tf.while_loop(
cond=lambda x, *_: tf.logical_not(x),
body=proposal,
loop_vars=[tf.zeros((), tf.bool), tf.zeros((4,), tf.float32), tf.zeros((num_boxes,), tf.bool)],
)
filtered_boxes = tf.boolean_mask(boxes, box_masks, axis=0)
# Clip boxes to the cropped region.
filtered_boxes = tf.stack([
tf.maximum(filtered_boxes[:, 0], crop_bounds[0]),
tf.maximum(filtered_boxes[:, 1], crop_bounds[1]),
tf.minimum(filtered_boxes[:, 2], crop_bounds[2]),
tf.minimum(filtered_boxes[:, 3], crop_bounds[3]),
], axis=1)
left = crop_bounds[0]
top = crop_bounds[1]
width = crop_bounds[2] - left
height = crop_bounds[3] - top
cropped_boxes = tf.stack([
(filtered_boxes[:, 0] - left) / width,
(filtered_boxes[:, 1] - top) / height,
(filtered_boxes[:, 2] - left) / width,
(filtered_boxes[:, 3] - top) / height,
], axis=1)
cropped_image = tf.image.crop_and_resize(
image=image[tf.newaxis, :, :, :],
boxes=crop_bounds[tf.newaxis, :],
box_ind=tf.zeros((1,), tf.int32),
crop_size=(ssd_constants.IMAGE_SIZE, ssd_constants.IMAGE_SIZE),
)[0, :, :, :]
cropped_classes = tf.boolean_mask(classes, box_masks, axis=0)
return cropped_image, cropped_boxes, cropped_classes
def color_jitter(image, brightness=0, contrast=0, saturation=0, hue=0):
"""Distorts the color of the image.
Args:
image: The input image tensor.
brightness: A float, specifying the brightness for color jitter.
contrast: A float, specifying the contrast for color jitter.
saturation: A float, specifying the saturation for color jitter.
hue: A float, specifying the hue for color jitter.
Returns:
The distorted image tensor.
"""
with tf.name_scope('distort_color'):
if brightness > 0:
image = tf.image.random_brightness(image, max_delta=brightness)
if contrast > 0:
image = tf.image.random_contrast(
image, lower=1-contrast, upper=1+contrast)
if saturation > 0:
image = tf.image.random_saturation(
image, lower=1-saturation, upper=1+saturation)
if hue > 0:
image = tf.image.random_hue(image, max_delta=hue)
return image
def encode_labels(gt_boxes, gt_labels):
"""Labels anchors with ground truth inputs.
Args:
gt_boxes: A float tensor with shape [N, 4] representing groundtruth boxes.
For each row, it stores [y0, x0, y1, x1] for four corners of a box.
gt_labels: A integer tensor with shape [N, 1] representing groundtruth
classes.
Returns:
encoded_classes: a tensor with shape [num_anchors, 1].
encoded_boxes: a tensor with shape [num_anchors, 4].
num_positives: scalar tensor storing number of positives in an image.
"""
similarity_calc = region_similarity_calculator.IouSimilarity()
matcher = argmax_matcher.ArgMaxMatcher(
matched_threshold=ssd_constants.MATCH_THRESHOLD,
unmatched_threshold=ssd_constants.MATCH_THRESHOLD,
negatives_lower_than_unmatched=True,
force_match_for_each_row=True)
box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder(
scale_factors=ssd_constants.BOX_CODER_SCALES)
default_boxes = box_list.BoxList(tf.convert_to_tensor(DefaultBoxes()('ltrb')))
target_boxes = box_list.BoxList(gt_boxes)
assigner = target_assigner.TargetAssigner(
similarity_calc, matcher, box_coder)
encoded_classes, _, encoded_boxes, _, matches = assigner.assign(
default_boxes, target_boxes, gt_labels)
num_matched_boxes = tf.reduce_sum(
tf.cast(tf.not_equal(matches.match_results, -1), tf.float32))
return encoded_classes, encoded_boxes, num_matched_boxes
class SSDInputReader(object):
"""Input reader for dataset."""
def __init__(self,
file_pattern,
transpose_input=False,
is_training=False,
distributed_eval=False,
count=-1):
self._file_pattern = file_pattern
self._transpose_input = transpose_input
self._is_training = is_training
self._distributed_eval = distributed_eval
self._count = count
def __call__(self, params):
example_decoder = tf_example_decoder.TfExampleDecoder()
def _parse_example(data):
with tf.name_scope('augmentation'):
source_id = data['source_id']
image = data['image'] # dtype uint8
raw_shape = tf.shape(image)
boxes = data['groundtruth_boxes']
classes = tf.reshape(data['groundtruth_classes'], [-1, 1])
# Only 80 of the 90 COCO classes are used.
class_map = tf.convert_to_tensor(ssd_constants.CLASS_MAP)
classes = tf.gather(class_map, classes)
classes = tf.cast(classes, dtype=tf.float32)
if self._is_training:
image, boxes, classes = ssd_crop(image, boxes, classes)
# ssd_crop resizes and returns image of dtype float32 and does not
# change its range (i.e., value in between 0--255). Divide by 255.
# converts it to [0, 1] range. Not doing this before cropping to
# avoid dtype cast (which incurs additional memory copy).
image /= 255.0
# random_horizontal_flip() is hard coded to flip with 50% chance.
image, boxes = preprocessor.random_horizontal_flip(
image=image, boxes=boxes)
# TODO(shibow): Investigate the parameters for color jitter.
image = color_jitter(
image, brightness=0.125, contrast=0.5, saturation=0.5, hue=0.05)
encoded_classes, encoded_boxes, num_matched_boxes = encode_labels(
boxes, classes)
# TODO(taylorrobie): Check that this cast is valid.
encoded_classes = tf.cast(encoded_classes, tf.int32)
labels = {
ssd_constants.NUM_MATCHED_BOXES: num_matched_boxes,
ssd_constants.BOXES: encoded_boxes,
ssd_constants.CLASSES: tf.squeeze(encoded_classes, axis=1),
}
return image, labels
else:
image = tf.image.resize_images(
image, size=(ssd_constants.IMAGE_SIZE, ssd_constants.IMAGE_SIZE))
# resize_image returns image of dtype float32 and does not change its
# range. Divide by 255 to convert image to [0, 1] range.
image /= 255.
def trim_and_pad(inp_tensor, dim_1):
"""Limit the number of boxes, and pad if necessary."""
inp_tensor = inp_tensor[:ssd_constants.MAX_NUM_EVAL_BOXES]
num_pad = ssd_constants.MAX_NUM_EVAL_BOXES - tf.shape(inp_tensor)[0]
inp_tensor = tf.pad(inp_tensor, [[0, num_pad], [0, 0]])
return tf.reshape(
inp_tensor, [ssd_constants.MAX_NUM_EVAL_BOXES, dim_1])
boxes, classes = trim_and_pad(boxes, 4), trim_and_pad(classes, 1)
sample = {
ssd_constants.IMAGE: image,
ssd_constants.BOXES: boxes,
ssd_constants.CLASSES: classes,
ssd_constants.SOURCE_ID: tf.string_to_number(source_id, tf.int32),
ssd_constants.RAW_SHAPE: raw_shape,
}
return sample
batch_size = params['batch_size']
dataset = tf.data.Dataset.list_files(self._file_pattern, shuffle=False)
if self._is_training or self._distributed_eval:
if get_rank_size() == 1:
dataset = dataset.shard(1, 0)
else:
dataset = dataset.shard(get_rank_size(), get_rank_id())
if self._is_training:
dataset = dataset.shuffle( tf.to_int64(256))
# Prefetch data from files.
def _prefetch_dataset(filename):
dataset = tf.data.TFRecordDataset(filename).prefetch(1)
return dataset
dataset = dataset.apply(
tf.data.experimental.parallel_interleave(
_prefetch_dataset, cycle_length=32, sloppy=self._is_training))
# Parse the fetched records to input tensors for model function.
dataset = dataset.map(example_decoder.decode, num_parallel_calls=64)
if self._is_training:
dataset = dataset.map(
# pylint: disable=g-long-lambda
lambda data: (data,
tf.greater(tf.shape(data['groundtruth_boxes'])[0], 0)),
num_parallel_calls=64)
dataset = dataset.filter(lambda data, pred: pred)
dataset = dataset.shuffle(64).repeat()
dataset = dataset.map(lambda data, pred: data) # use the first value
dataset = dataset.map(_parse_example, num_parallel_calls=64)
dataset = dataset.batch(batch_size=batch_size, drop_remainder=True)
else:
dataset = dataset.prefetch(batch_size * 64)
dataset = dataset.map(_parse_example, num_parallel_calls=64)
dataset = dataset.batch(batch_size=batch_size, drop_remainder=True)
dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)
options = tf.data.Options()
options.experimental_threading.max_intra_op_parallelism = 1
options.experimental_threading.private_threadpool_size = 48
dataset = dataset.with_options(options)
return dataset
@@ -0,0 +1,24 @@
#!/bin/bash
export RANK_ID=$1
export RANK_SIZE=$2
export DEVICE_ID=$RANK_ID
export DEVICE_INDEX=$RANK_ID
export JOB_ID=990
export FUSION_TENSOR_SIZE=1000000000
python3 ${3}/ssd_main.py --mode=train_and_eval \
--train_batch_size=32 \
--training_file_pattern="train_tfrecord_path/train2017*" \
--resnet_checkpoint=resnet34_path/model.ckpt-28152 \
--validation_file_pattern="val_tfrecord_path/val2017*" \
--val_json_file="annotations_patah/instances_val2017.json" \
--eval_batch_size=32 \
--model_dir=result_npu
sleep 2
echo "**************** train finished ***************"
cp /var/log/npu/slog/host-0/* ./slog
cp /var/log/npu/slog/device-$DEVICE_ID/* ./slog
cp /var/log/npu/slog/device-os-$DEVICE_ID/* ./slog
@@ -0,0 +1,14 @@
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
@@ -0,0 +1,199 @@
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Argmax matcher implementation.
This class takes a similarity matrix and matches columns to rows based on the
maximum value per column. One can specify matched_thresholds and
to prevent columns from matching to rows (generally resulting in a negative
training example) and unmatched_theshold to ignore the match (generally
resulting in neither a positive or negative training example).
This matcher is used in Fast(er)-RCNN.
Note: matchers are used in TargetAssigners. There is a create_target_assigner
factory function for popular implementations.
"""
import tensorflow as tf
from object_detection import matcher
from object_detection import shape_utils
class ArgMaxMatcher(matcher.Matcher):
"""Matcher based on highest value.
This class computes matches from a similarity matrix. Each column is matched
to a single row.
To support object detection target assignment this class enables setting both
matched_threshold (upper threshold) and unmatched_threshold (lower thresholds)
defining three categories of similarity which define whether examples are
positive, negative, or ignored:
(1) similarity >= matched_threshold: Highest similarity. Matched/Positive!
(2) matched_threshold > similarity >= unmatched_threshold: Medium similarity.
Depending on negatives_lower_than_unmatched, this is either
Unmatched/Negative OR Ignore.
(3) unmatched_threshold > similarity: Lowest similarity. Depending on flag
negatives_lower_than_unmatched, either Unmatched/Negative OR Ignore.
For ignored matches this class sets the values in the Match object to -2.
"""
def __init__(self,
matched_threshold,
unmatched_threshold=None,
negatives_lower_than_unmatched=True,
force_match_for_each_row=False):
"""Construct ArgMaxMatcher.
Args:
matched_threshold: Threshold for positive matches. Positive if
sim >= matched_threshold, where sim is the maximum value of the
similarity matrix for a given column. Set to None for no threshold.
unmatched_threshold: Threshold for negative matches. Negative if
sim < unmatched_threshold. Defaults to matched_threshold
when set to None.
negatives_lower_than_unmatched: Boolean which defaults to True. If True
then negative matches are the ones below the unmatched_threshold,
whereas ignored matches are in between the matched and umatched
threshold. If False, then negative matches are in between the matched
and unmatched threshold, and everything lower than unmatched is ignored.
force_match_for_each_row: If True, ensures that each row is matched to
at least one column (which is not guaranteed otherwise if the
matched_threshold is high). Defaults to False. See
argmax_matcher_test.testMatcherForceMatch() for an example.
Raises:
ValueError: if unmatched_threshold is set but matched_threshold is not set
or if unmatched_threshold > matched_threshold.
"""
if (matched_threshold is None) and (unmatched_threshold is not None):
raise ValueError('Need to also define matched_threshold when'
'unmatched_threshold is defined')
self._matched_threshold = matched_threshold
if unmatched_threshold is None:
self._unmatched_threshold = matched_threshold
else:
if unmatched_threshold > matched_threshold:
raise ValueError('unmatched_threshold needs to be smaller or equal'
'to matched_threshold')
self._unmatched_threshold = unmatched_threshold
if not negatives_lower_than_unmatched:
if self._unmatched_threshold == self._matched_threshold:
raise ValueError('When negatives are in between matched and '
'unmatched thresholds, these cannot be of equal '
'value. matched: %s, unmatched: %s',
self._matched_threshold, self._unmatched_threshold)
self._force_match_for_each_row = force_match_for_each_row
self._negatives_lower_than_unmatched = negatives_lower_than_unmatched
def _match(self, similarity_matrix):
"""Tries to match each column of the similarity matrix to a row.
Args:
similarity_matrix: tensor of shape [N, M] representing any similarity
metric.
Returns:
Match object with corresponding matches for each of M columns.
"""
def _match_when_rows_are_empty():
"""Performs matching when the rows of similarity matrix are empty.
When the rows are empty, all detections are false positives. So we return
a tensor of -1's to indicate that the columns do not match to any rows.
Returns:
matches: int32 tensor indicating the row each column matches to.
"""
similarity_matrix_shape = shape_utils.combined_static_and_dynamic_shape(
similarity_matrix)
return -1 * tf.ones([similarity_matrix_shape[1]], dtype=tf.int32)
def _match_when_rows_are_non_empty():
"""Performs matching when the rows of similarity matrix are non empty.
Returns:
matches: int32 tensor indicating the row each column matches to.
"""
# Matches for each column
matches = tf.argmax(similarity_matrix, 0, output_type=tf.int32)
# Deal with matched and unmatched threshold
if self._matched_threshold is not None:
# Get logical indices of ignored and unmatched columns as tf.int64
matched_vals = tf.reduce_max(similarity_matrix, 0)
below_unmatched_threshold = tf.greater(self._unmatched_threshold,
matched_vals)
between_thresholds = tf.logical_and(
tf.greater_equal(matched_vals, self._unmatched_threshold),
tf.greater(self._matched_threshold, matched_vals))
if self._negatives_lower_than_unmatched:
matches = self._set_values_using_indicator(matches,
below_unmatched_threshold,
-1)
matches = self._set_values_using_indicator(matches,
between_thresholds,
-2)
else:
matches = self._set_values_using_indicator(matches,
below_unmatched_threshold,
-2)
matches = self._set_values_using_indicator(matches,
between_thresholds,
-1)
if self._force_match_for_each_row:
similarity_matrix_shape = shape_utils.combined_static_and_dynamic_shape(
similarity_matrix)
force_match_column_ids = tf.argmax(similarity_matrix, 1,
output_type=tf.int32)
force_match_column_indicators = tf.one_hot(
force_match_column_ids, depth=similarity_matrix_shape[1])
force_match_row_ids = tf.argmax(force_match_column_indicators, 0,
output_type=tf.int32)
force_match_column_mask = tf.cast(
tf.reduce_max(force_match_column_indicators, 0), tf.bool)
final_matches = tf.where(force_match_column_mask,
force_match_row_ids, matches)
return final_matches
else:
return matches
if similarity_matrix.shape.is_fully_defined():
if similarity_matrix.shape[0].value == 0:
return _match_when_rows_are_empty()
else:
return _match_when_rows_are_non_empty()
else:
return tf.cond(
tf.greater(tf.shape(similarity_matrix)[0], 0),
_match_when_rows_are_non_empty, _match_when_rows_are_empty)
def _set_values_using_indicator(self, x, indicator, val):
"""Set the indicated fields of x to val.
Args:
x: tensor.
indicator: boolean with same shape as x.
val: scalar with value to set.
Returns:
modified tensor.
"""
indicator = tf.cast(indicator, x.dtype)
return tf.add(tf.multiply(x, 1 - indicator), val * indicator)
@@ -0,0 +1,151 @@
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Base box coder.
Box coders convert between coordinate frames, namely image-centric
(with (0,0) on the top left of image) and anchor-centric (with (0,0) being
defined by a specific anchor).
Users of a BoxCoder can call two methods:
encode: which encodes a box with respect to a given anchor
(or rather, a tensor of boxes wrt a corresponding tensor of anchors) and
decode: which inverts this encoding with a decode operation.
In both cases, the arguments are assumed to be in 1-1 correspondence already;
it is not the job of a BoxCoder to perform matching.
"""
from abc import ABCMeta
from abc import abstractmethod
from abc import abstractproperty
import tensorflow as tf
# Box coder types.
FASTER_RCNN = 'faster_rcnn'
KEYPOINT = 'keypoint'
MEAN_STDDEV = 'mean_stddev'
SQUARE = 'square'
class BoxCoder(object):
"""Abstract base class for box coder."""
__metaclass__ = ABCMeta
@abstractproperty
def code_size(self):
"""Return the size of each code.
This number is a constant and should agree with the output of the `encode`
op (e.g. if rel_codes is the output of self.encode(...), then it should have
shape [N, code_size()]). This abstractproperty should be overridden by
implementations.
Returns:
an integer constant
"""
pass
def encode(self, boxes, anchors):
"""Encode a box list relative to an anchor collection.
Args:
boxes: BoxList holding N boxes to be encoded
anchors: BoxList of N anchors
Returns:
a tensor representing N relative-encoded boxes
"""
with tf.name_scope('Encode'):
return self._encode(boxes, anchors)
def decode(self, rel_codes, anchors):
"""Decode boxes that are encoded relative to an anchor collection.
Args:
rel_codes: a tensor representing N relative-encoded boxes
anchors: BoxList of anchors
Returns:
boxlist: BoxList holding N boxes encoded in the ordinary way (i.e.,
with corners y_min, x_min, y_max, x_max)
"""
with tf.name_scope('Decode'):
return self._decode(rel_codes, anchors)
@abstractmethod
def _encode(self, boxes, anchors):
"""Method to be overriden by implementations.
Args:
boxes: BoxList holding N boxes to be encoded
anchors: BoxList of N anchors
Returns:
a tensor representing N relative-encoded boxes
"""
pass
@abstractmethod
def _decode(self, rel_codes, anchors):
"""Method to be overriden by implementations.
Args:
rel_codes: a tensor representing N relative-encoded boxes
anchors: BoxList of anchors
Returns:
boxlist: BoxList holding N boxes encoded in the ordinary way (i.e.,
with corners y_min, x_min, y_max, x_max)
"""
pass
def batch_decode(encoded_boxes, box_coder, anchors):
"""Decode a batch of encoded boxes.
This op takes a batch of encoded bounding boxes and transforms
them to a batch of bounding boxes specified by their corners in
the order of [y_min, x_min, y_max, x_max].
Args:
encoded_boxes: a float32 tensor of shape [batch_size, num_anchors,
code_size] representing the location of the objects.
box_coder: a BoxCoder object.
anchors: a BoxList of anchors used to encode `encoded_boxes`.
Returns:
decoded_boxes: a float32 tensor of shape [batch_size, num_anchors,
coder_size] representing the corners of the objects in the order
of [y_min, x_min, y_max, x_max].
Raises:
ValueError: if batch sizes of the inputs are inconsistent, or if
the number of anchors inferred from encoded_boxes and anchors are
inconsistent.
"""
encoded_boxes.get_shape().assert_has_rank(3)
if encoded_boxes.get_shape()[1].value != anchors.num_boxes_static():
raise ValueError('The number of anchors inferred from encoded_boxes'
' and anchors are inconsistent: shape[1] of encoded_boxes'
' %s should be equal to the number of anchors: %s.' %
(encoded_boxes.get_shape()[1].value,
anchors.num_boxes_static()))
decoded_boxes = tf.stack([
box_coder.decode(boxes, anchors).get()
for boxes in tf.unstack(encoded_boxes)
])
return decoded_boxes
@@ -0,0 +1,207 @@
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Bounding Box List definition.
BoxList represents a list of bounding boxes as tensorflow
tensors, where each bounding box is represented as a row of 4 numbers,
[y_min, x_min, y_max, x_max]. It is assumed that all bounding boxes
within a given list correspond to a single image. See also
box_list_ops.py for common box related operations (such as area, iou, etc).
Optionally, users can add additional related fields (such as weights).
We assume the following things to be true about fields:
* they correspond to boxes in the box_list along the 0th dimension
* they have inferrable rank at graph construction time
* all dimensions except for possibly the 0th can be inferred
(i.e., not None) at graph construction time.
Some other notes:
* Following tensorflow conventions, we use height, width ordering,
and correspondingly, y,x (or ymin, xmin, ymax, xmax) ordering
* Tensors are always provided as (flat) [N, 4] tensors.
"""
import tensorflow as tf
class BoxList(object):
"""Box collection."""
def __init__(self, boxes):
"""Constructs box collection.
Args:
boxes: a tensor of shape [N, 4] representing box corners
Raises:
ValueError: if invalid dimensions for bbox data or if bbox data is not in
float32 format.
"""
if len(boxes.get_shape()) != 2 or boxes.get_shape()[-1] != 4:
raise ValueError('Invalid dimensions for box data.')
if boxes.dtype != tf.float32:
raise ValueError('Invalid tensor type: should be tf.float32')
self.data = {'boxes': boxes}
def num_boxes(self):
"""Returns number of boxes held in collection.
Returns:
a tensor representing the number of boxes held in the collection.
"""
return tf.shape(self.data['boxes'])[0]
def num_boxes_static(self):
"""Returns number of boxes held in collection.
This number is inferred at graph construction time rather than run-time.
Returns:
Number of boxes held in collection (integer) or None if this is not
inferrable at graph construction time.
"""
return self.data['boxes'].get_shape()[0].value
def get_all_fields(self):
"""Returns all fields."""
return self.data.keys()
def get_extra_fields(self):
"""Returns all non-box fields (i.e., everything not named 'boxes')."""
return [k for k in self.data.keys() if k != 'boxes']
def add_field(self, field, field_data):
"""Add field to box list.
This method can be used to add related box data such as
weights/labels, etc.
Args:
field: a string key to access the data via `get`
field_data: a tensor containing the data to store in the BoxList
"""
self.data[field] = field_data
def has_field(self, field):
return field in self.data
def get(self):
"""Convenience function for accessing box coordinates.
Returns:
a tensor with shape [N, 4] representing box coordinates.
"""
return self.get_field('boxes')
def set(self, boxes):
"""Convenience function for setting box coordinates.
Args:
boxes: a tensor of shape [N, 4] representing box corners
Raises:
ValueError: if invalid dimensions for bbox data
"""
if len(boxes.get_shape()) != 2 or boxes.get_shape()[-1] != 4:
raise ValueError('Invalid dimensions for box data.')
self.data['boxes'] = boxes
def get_field(self, field):
"""Accesses a box collection and associated fields.
This function returns specified field with object; if no field is specified,
it returns the box coordinates.
Args:
field: this optional string parameter can be used to specify
a related field to be accessed.
Returns:
a tensor representing the box collection or an associated field.
Raises:
ValueError: if invalid field
"""
if not self.has_field(field):
raise ValueError('field ' + str(field) + ' does not exist')
return self.data[field]
def set_field(self, field, value):
"""Sets the value of a field.
Updates the field of a box_list with a given value.
Args:
field: (string) name of the field to set value.
value: the value to assign to the field.
Raises:
ValueError: if the box_list does not have specified field.
"""
if not self.has_field(field):
raise ValueError('field %s does not exist' % field)
self.data[field] = value
def get_center_coordinates_and_sizes(self, scope=None):
"""Computes the center coordinates, height and width of the boxes.
Args:
scope: name scope of the function.
Returns:
a list of 4 1-D tensors [ycenter, xcenter, height, width].
"""
with tf.name_scope(scope, 'get_center_coordinates_and_sizes'):
box_corners = self.get()
ymin, xmin, ymax, xmax = tf.unstack(tf.transpose(box_corners))
width = xmax - xmin
height = ymax - ymin
ycenter = ymin + height / 2.
xcenter = xmin + width / 2.
return [ycenter, xcenter, height, width]
def transpose_coordinates(self, scope=None):
"""Transpose the coordinate representation in a boxlist.
Args:
scope: name scope of the function.
"""
with tf.name_scope(scope, 'transpose_coordinates'):
y_min, x_min, y_max, x_max = tf.split(
value=self.get(), num_or_size_splits=4, axis=1)
self.set(tf.concat([x_min, y_min, x_max, y_max], 1))
def as_tensor_dict(self, fields=None):
"""Retrieves specified fields as a dictionary of tensors.
Args:
fields: (optional) list of fields to return in the dictionary.
If None (default), all fields are returned.
Returns:
tensor_dict: A dictionary of tensors specified by fields.
Raises:
ValueError: if specified field is not contained in boxlist.
"""
tensor_dict = {}
if fields is None:
fields = self.get_all_fields()
for field in fields:
if not self.has_field(field):
raise ValueError('boxlist must contain all specified fields')
tensor_dict[field] = self.get_field(field)
return tensor_dict
@@ -0,0 +1,118 @@
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Faster RCNN box coder.
Faster RCNN box coder follows the coding schema described below:
ty = (y - ya) / ha
tx = (x - xa) / wa
th = log(h / ha)
tw = log(w / wa)
where x, y, w, h denote the box's center coordinates, width and height
respectively. Similarly, xa, ya, wa, ha denote the anchor's center
coordinates, width and height. tx, ty, tw and th denote the anchor-encoded
center, width and height respectively.
See http://arxiv.org/abs/1506.01497 for details.
"""
import tensorflow as tf
from object_detection import box_coder
from object_detection import box_list
EPSILON = 1e-8
class FasterRcnnBoxCoder(box_coder.BoxCoder):
"""Faster RCNN box coder."""
def __init__(self, scale_factors=None):
"""Constructor for FasterRcnnBoxCoder.
Args:
scale_factors: List of 4 positive scalars to scale ty, tx, th and tw.
If set to None, does not perform scaling. For Faster RCNN,
the open-source implementation recommends using [10.0, 10.0, 5.0, 5.0].
"""
if scale_factors:
assert len(scale_factors) == 4
for scalar in scale_factors:
assert scalar > 0
self._scale_factors = scale_factors
@property
def code_size(self):
return 4
def _encode(self, boxes, anchors):
"""Encode a box collection with respect to anchor collection.
Args:
boxes: BoxList holding N boxes to be encoded.
anchors: BoxList of anchors.
Returns:
a tensor representing N anchor-encoded boxes of the format
[ty, tx, th, tw].
"""
# Convert anchors to the center coordinate representation.
ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
ycenter, xcenter, h, w = boxes.get_center_coordinates_and_sizes()
# Avoid NaN in division and log below.
ha += EPSILON
wa += EPSILON
h += EPSILON
w += EPSILON
tx = (xcenter - xcenter_a) / wa
ty = (ycenter - ycenter_a) / ha
tw = tf.log(w / wa)
th = tf.log(h / ha)
# Scales location targets as used in paper for joint training.
if self._scale_factors:
ty *= self._scale_factors[0]
tx *= self._scale_factors[1]
th *= self._scale_factors[2]
tw *= self._scale_factors[3]
return tf.transpose(tf.stack([ty, tx, th, tw]))
def _decode(self, rel_codes, anchors):
"""Decode relative codes to boxes.
Args:
rel_codes: a tensor representing N anchor-encoded boxes.
anchors: BoxList of anchors.
Returns:
boxes: BoxList holding N bounding boxes.
"""
ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
ty, tx, th, tw = tf.unstack(tf.transpose(rel_codes))
if self._scale_factors:
ty /= self._scale_factors[0]
tx /= self._scale_factors[1]
th /= self._scale_factors[2]
tw /= self._scale_factors[3]
w = tf.exp(tw) * wa
h = tf.exp(th) * ha
ycenter = ty * ha + ycenter_a
xcenter = tx * wa + xcenter_a
ymin = ycenter - h / 2.
xmin = xcenter - w / 2.
ymax = ycenter + h / 2.
xmax = xcenter + w / 2.
return box_list.BoxList(tf.transpose(tf.stack([ymin, xmin, ymax, xmax])))
@@ -0,0 +1,241 @@
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Matcher interface and Match class.
This module defines the Matcher interface and the Match object. The job of the
matcher is to match row and column indices based on the similarity matrix and
other optional parameters. Each column is matched to at most one row. There
are three possibilities for the matching:
1) match: A column matches a row.
2) no_match: A column does not match any row.
3) ignore: A column that is neither 'match' nor no_match.
The ignore case is regularly encountered in object detection: when an anchor has
a relatively small overlap with a ground-truth box, one neither wants to
consider this box a positive example (match) nor a negative example (no match).
The Match class is used to store the match results and it provides simple apis
to query the results.
"""
from abc import ABCMeta
from abc import abstractmethod
import tensorflow as tf
class Match(object):
"""Class to store results from the matcher.
This class is used to store the results from the matcher. It provides
convenient methods to query the matching results.
"""
def __init__(self, match_results):
"""Constructs a Match object.
Args:
match_results: Integer tensor of shape [N] with (1) match_results[i]>=0,
meaning that column i is matched with row match_results[i].
(2) match_results[i]=-1, meaning that column i is not matched.
(3) match_results[i]=-2, meaning that column i is ignored.
Raises:
ValueError: if match_results does not have rank 1 or is not an
integer int32 scalar tensor
"""
if match_results.shape.ndims != 1:
raise ValueError('match_results should have rank 1')
if match_results.dtype != tf.int32:
raise ValueError('match_results should be an int32 or int64 scalar '
'tensor')
self._match_results = match_results
@property
def match_results(self):
"""The accessor for match results.
Returns:
the tensor which encodes the match results.
"""
return self._match_results
def matched_column_indices(self):
"""Returns column indices that match to some row.
The indices returned by this op are always sorted in increasing order.
Returns:
column_indices: int32 tensor of shape [K] with column indices.
"""
return self._reshape_and_cast(tf.where(tf.greater(self._match_results, -1)))
def matched_column_indicator(self):
"""Returns column indices that are matched.
Returns:
column_indices: int32 tensor of shape [K] with column indices.
"""
return tf.greater_equal(self._match_results, 0)
def num_matched_columns(self):
"""Returns number (int32 scalar tensor) of matched columns."""
return tf.size(self.matched_column_indices())
def unmatched_column_indices(self):
"""Returns column indices that do not match any row.
The indices returned by this op are always sorted in increasing order.
Returns:
column_indices: int32 tensor of shape [K] with column indices.
"""
return self._reshape_and_cast(tf.where(tf.equal(self._match_results, -1)))
def unmatched_column_indicator(self):
"""Returns column indices that are unmatched.
Returns:
column_indices: int32 tensor of shape [K] with column indices.
"""
return tf.equal(self._match_results, -1)
def num_unmatched_columns(self):
"""Returns number (int32 scalar tensor) of unmatched columns."""
return tf.size(self.unmatched_column_indices())
def ignored_column_indices(self):
"""Returns column indices that are ignored (neither Matched nor Unmatched).
The indices returned by this op are always sorted in increasing order.
Returns:
column_indices: int32 tensor of shape [K] with column indices.
"""
return self._reshape_and_cast(tf.where(self.ignored_column_indicator()))
def ignored_column_indicator(self):
"""Returns boolean column indicator where True means the colum is ignored.
Returns:
column_indicator: boolean vector which is True for all ignored column
indices.
"""
return tf.equal(self._match_results, -2)
def num_ignored_columns(self):
"""Returns number (int32 scalar tensor) of matched columns."""
return tf.size(self.ignored_column_indices())
def unmatched_or_ignored_column_indices(self):
"""Returns column indices that are unmatched or ignored.
The indices returned by this op are always sorted in increasing order.
Returns:
column_indices: int32 tensor of shape [K] with column indices.
"""
return self._reshape_and_cast(tf.where(tf.greater(0, self._match_results)))
def matched_row_indices(self):
"""Returns row indices that match some column.
The indices returned by this op are ordered so as to be in correspondence
with the output of matched_column_indicator(). For example if
self.matched_column_indicator() is [0,2], and self.matched_row_indices() is
[7, 3], then we know that column 0 was matched to row 7 and column 2 was
matched to row 3.
Returns:
row_indices: int32 tensor of shape [K] with row indices.
"""
return self._reshape_and_cast(
tf.gather(self._match_results, self.matched_column_indices()))
def _reshape_and_cast(self, t):
return tf.cast(tf.reshape(t, [-1]), tf.int32)
def gather_based_on_match(self, input_tensor, unmatched_value,
ignored_value):
"""Gathers elements from `input_tensor` based on match results.
For columns that are matched to a row, gathered_tensor[col] is set to
input_tensor[match_results[col]]. For columns that are unmatched,
gathered_tensor[col] is set to unmatched_value. Finally, for columns that
are ignored gathered_tensor[col] is set to ignored_value.
Note that the input_tensor.shape[1:] must match with unmatched_value.shape
and ignored_value.shape
Args:
input_tensor: Tensor to gather values from.
unmatched_value: Constant tensor value for unmatched columns.
ignored_value: Constant tensor value for ignored columns.
Returns:
gathered_tensor: A tensor containing values gathered from input_tensor.
The shape of the gathered tensor is [match_results.shape[0]] +
input_tensor.shape[1:].
"""
input_tensor = tf.concat([tf.stack([ignored_value, unmatched_value]),
input_tensor], axis=0)
gather_indices = tf.maximum(self.match_results + 2, 0)
gathered_tensor = tf.gather(input_tensor, gather_indices)
return gathered_tensor
class Matcher(object):
"""Abstract base class for matcher.
"""
__metaclass__ = ABCMeta
def match(self, similarity_matrix, scope=None, **params):
"""Computes matches among row and column indices and returns the result.
Computes matches among the row and column indices based on the similarity
matrix and optional arguments.
Args:
similarity_matrix: Float tensor of shape [N, M] with pairwise similarity
where higher value means more similar.
scope: Op scope name. Defaults to 'Match' if None.
**params: Additional keyword arguments for specific implementations of
the Matcher.
Returns:
A Match object with the results of matching.
"""
with tf.name_scope(scope, 'Match', [similarity_matrix, params]) as scope:
return Match(self._match(similarity_matrix, **params))
@abstractmethod
def _match(self, similarity_matrix, **params):
"""Method to be overridden by implementations.
Args:
similarity_matrix: Float tensor of shape [N, M] with pairwise similarity
where higher value means more similar.
**params: Additional keyword arguments for specific implementations of
the Matcher.
Returns:
match_results: Integer tensor of shape [M]: match_results[i]>=0 means
that column i is matched to row match_results[i], match_results[i]=-1
means that the column is not matched. match_results[i]=-2 means that
the column is ignored (usually this happens when there is a very weak
match which one neither wants as positive nor negative example).
"""
pass
@@ -0,0 +1,442 @@
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Preprocess images and bounding boxes for detection.
We perform two sets of operations in preprocessing stage:
(a) operations that are applied to both training and testing data,
(b) operations that are applied only to training data for the purpose of
data augmentation.
A preprocessing function receives a set of inputs,
e.g. an image and bounding boxes,
performs an operation on them, and returns them.
Some examples are: randomly cropping the image, randomly mirroring the image,
randomly changing the brightness, contrast, hue and
randomly jittering the bounding boxes.
The image is a rank 4 tensor: [1, height, width, channels] with
dtype=tf.float32. The groundtruth_boxes is a rank 2 tensor: [N, 4] where
in each row there is a box with [ymin xmin ymax xmax].
Boxes are in normalized coordinates meaning
their coordinate values range in [0, 1]
Important Note: In tensor_dict, images is a rank 4 tensor, but preprocessing
functions receive a rank 3 tensor for processing the image. Thus, inside the
preprocess function we squeeze the image to become a rank 3 tensor and then
we pass it to the functions. At the end of the preprocess we expand the image
back to rank 4.
"""
import tensorflow as tf
from object_detection import box_list
def _flip_boxes_left_right(boxes):
"""Left-right flip the boxes.
Args:
boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
Boxes are in normalized form meaning their coordinates vary
between [0, 1].
Each row is in the form of [ymin, xmin, ymax, xmax].
Returns:
Flipped boxes.
"""
ymin, xmin, ymax, xmax = tf.split(value=boxes, num_or_size_splits=4, axis=1)
flipped_xmin = tf.subtract(1.0, xmax)
flipped_xmax = tf.subtract(1.0, xmin)
flipped_boxes = tf.concat([ymin, flipped_xmin, ymax, flipped_xmax], 1)
return flipped_boxes
def _flip_masks_left_right(masks):
"""Left-right flip masks.
Args:
masks: rank 3 float32 tensor with shape
[num_instances, height, width] representing instance masks.
Returns:
flipped masks: rank 3 float32 tensor with shape
[num_instances, height, width] representing instance masks.
"""
return masks[:, :, ::-1]
def keypoint_flip_horizontal(keypoints, flip_point, flip_permutation,
scope=None):
"""Flips the keypoints horizontally around the flip_point.
This operation flips the x coordinate for each keypoint around the flip_point
and also permutes the keypoints in a manner specified by flip_permutation.
Args:
keypoints: a tensor of shape [num_instances, num_keypoints, 2]
flip_point: (float) scalar tensor representing the x coordinate to flip the
keypoints around.
flip_permutation: rank 1 int32 tensor containing the keypoint flip
permutation. This specifies the mapping from original keypoint indices
to the flipped keypoint indices. This is used primarily for keypoints
that are not reflection invariant. E.g. Suppose there are 3 keypoints
representing ['head', 'right_eye', 'left_eye'], then a logical choice for
flip_permutation might be [0, 2, 1] since we want to swap the 'left_eye'
and 'right_eye' after a horizontal flip.
scope: name scope.
Returns:
new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
"""
with tf.name_scope(scope, 'FlipHorizontal'):
keypoints = tf.transpose(keypoints, [1, 0, 2])
keypoints = tf.gather(keypoints, flip_permutation)
v, u = tf.split(value=keypoints, num_or_size_splits=2, axis=2)
u = flip_point * 2.0 - u
new_keypoints = tf.concat([v, u], 2)
new_keypoints = tf.transpose(new_keypoints, [1, 0, 2])
return new_keypoints
def random_horizontal_flip(image,
boxes=None,
masks=None,
keypoints=None,
keypoint_flip_permutation=None,
seed=None):
"""Randomly flips the image and detections horizontally.
The probability of flipping the image is 50%.
Args:
image: rank 3 float32 tensor with shape [height, width, channels].
boxes: (optional) rank 2 float32 tensor with shape [N, 4]
containing the bounding boxes.
Boxes are in normalized form meaning their coordinates vary
between [0, 1].
Each row is in the form of [ymin, xmin, ymax, xmax].
masks: (optional) rank 3 float32 tensor with shape
[num_instances, height, width] containing instance masks. The masks
are of the same height, width as the input `image`.
keypoints: (optional) rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]. The keypoints are in y-x
normalized coordinates.
keypoint_flip_permutation: rank 1 int32 tensor containing the keypoint flip
permutation.
seed: random seed
Returns:
image: image which is the same shape as input image.
If boxes, masks, keypoints, and keypoint_flip_permutation are not None,
the function also returns the following tensors.
boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
Boxes are in normalized form meaning their coordinates vary
between [0, 1].
masks: rank 3 float32 tensor with shape [num_instances, height, width]
containing instance masks.
keypoints: rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]
Raises:
ValueError: if keypoints are provided but keypoint_flip_permutation is not.
"""
def _flip_image(image):
# flip image
image_flipped = tf.image.flip_left_right(image)
return image_flipped
if keypoints is not None and keypoint_flip_permutation is None:
raise ValueError(
'keypoints are provided but keypoints_flip_permutation is not provided')
with tf.name_scope('RandomHorizontalFlip', values=[image, boxes]):
result = []
# random variable defining whether to do flip or not
do_a_flip_random = tf.greater(tf.random_uniform([], seed=seed), 0.5)
# flip image
image = tf.cond(do_a_flip_random, lambda: _flip_image(image), lambda: image)
result.append(image)
# flip boxes
if boxes is not None:
boxes = tf.cond(do_a_flip_random, lambda: _flip_boxes_left_right(boxes),
lambda: boxes)
result.append(boxes)
# flip masks
if masks is not None:
masks = tf.cond(do_a_flip_random, lambda: _flip_masks_left_right(masks),
lambda: masks)
result.append(masks)
# flip keypoints
if keypoints is not None and keypoint_flip_permutation is not None:
permutation = keypoint_flip_permutation
keypoints = tf.cond(
do_a_flip_random,
lambda: keypoint_flip_horizontal(keypoints, 0.5, permutation),
lambda: keypoints)
result.append(keypoints)
return tuple(result)
def _compute_new_static_size(image, min_dimension, max_dimension):
"""Compute new static shape for resize_to_range method."""
image_shape = image.get_shape().as_list()
orig_height = image_shape[0]
orig_width = image_shape[1]
num_channels = image_shape[2]
orig_min_dim = min(orig_height, orig_width)
# Calculates the larger of the possible sizes
large_scale_factor = min_dimension / float(orig_min_dim)
# Scaling orig_(height|width) by large_scale_factor will make the smaller
# dimension equal to min_dimension, save for floating point rounding errors.
# For reasonably-sized images, taking the nearest integer will reliably
# eliminate this error.
large_height = int(round(orig_height * large_scale_factor))
large_width = int(round(orig_width * large_scale_factor))
large_size = [large_height, large_width]
if max_dimension:
# Calculates the smaller of the possible sizes, use that if the larger
# is too big.
orig_max_dim = max(orig_height, orig_width)
small_scale_factor = max_dimension / float(orig_max_dim)
# Scaling orig_(height|width) by small_scale_factor will make the larger
# dimension equal to max_dimension, save for floating point rounding
# errors. For reasonably-sized images, taking the nearest integer will
# reliably eliminate this error.
small_height = int(round(orig_height * small_scale_factor))
small_width = int(round(orig_width * small_scale_factor))
small_size = [small_height, small_width]
new_size = large_size
if max(large_size) > max_dimension:
new_size = small_size
else:
new_size = large_size
return tf.constant(new_size + [num_channels])
def _compute_new_dynamic_size(image, min_dimension, max_dimension):
"""Compute new dynamic shape for resize_to_range method."""
image_shape = tf.shape(image)
orig_height = tf.to_float(image_shape[0])
orig_width = tf.to_float(image_shape[1])
num_channels = image_shape[2]
orig_min_dim = tf.minimum(orig_height, orig_width)
# Calculates the larger of the possible sizes
min_dimension = tf.constant(min_dimension, dtype=tf.float32)
large_scale_factor = min_dimension / orig_min_dim
# Scaling orig_(height|width) by large_scale_factor will make the smaller
# dimension equal to min_dimension, save for floating point rounding errors.
# For reasonably-sized images, taking the nearest integer will reliably
# eliminate this error.
large_height = tf.to_int32(tf.round(orig_height * large_scale_factor))
large_width = tf.to_int32(tf.round(orig_width * large_scale_factor))
large_size = tf.stack([large_height, large_width])
if max_dimension:
# Calculates the smaller of the possible sizes, use that if the larger
# is too big.
orig_max_dim = tf.maximum(orig_height, orig_width)
max_dimension = tf.constant(max_dimension, dtype=tf.float32)
small_scale_factor = max_dimension / orig_max_dim
# Scaling orig_(height|width) by small_scale_factor will make the larger
# dimension equal to max_dimension, save for floating point rounding
# errors. For reasonably-sized images, taking the nearest integer will
# reliably eliminate this error.
small_height = tf.to_int32(tf.round(orig_height * small_scale_factor))
small_width = tf.to_int32(tf.round(orig_width * small_scale_factor))
small_size = tf.stack([small_height, small_width])
new_size = tf.cond(
tf.to_float(tf.reduce_max(large_size)) > max_dimension,
lambda: small_size, lambda: large_size)
else:
new_size = large_size
return tf.stack(tf.unstack(new_size) + [num_channels])
def resize_to_range(image,
masks=None,
min_dimension=None,
max_dimension=None,
method=tf.image.ResizeMethod.BILINEAR,
align_corners=False,
pad_to_max_dimension=False):
"""Resizes an image so its dimensions are within the provided value.
The output size can be described by two cases:
1. If the image can be rescaled so its minimum dimension is equal to the
provided value without the other dimension exceeding max_dimension,
then do so.
2. Otherwise, resize so the largest dimension is equal to max_dimension.
Args:
image: A 3D tensor of shape [height, width, channels]
masks: (optional) rank 3 float32 tensor with shape
[num_instances, height, width] containing instance masks.
min_dimension: (optional) (scalar) desired size of the smaller image
dimension.
max_dimension: (optional) (scalar) maximum allowed size
of the larger image dimension.
method: (optional) interpolation method used in resizing. Defaults to
BILINEAR.
align_corners: bool. If true, exactly align all 4 corners of the input
and output. Defaults to False.
pad_to_max_dimension: Whether to resize the image and pad it with zeros
so the resulting image is of the spatial size
[max_dimension, max_dimension]. If masks are included they are padded
similarly.
Returns:
Note that the position of the resized_image_shape changes based on whether
masks are present.
resized_image: A 3D tensor of shape [new_height, new_width, channels],
where the image has been resized (with bilinear interpolation) so that
min(new_height, new_width) == min_dimension or
max(new_height, new_width) == max_dimension.
resized_masks: If masks is not None, also outputs masks. A 3D tensor of
shape [num_instances, new_height, new_width].
resized_image_shape: A 1D tensor of shape [3] containing shape of the
resized image.
Raises:
ValueError: if the image is not a 3D tensor.
"""
if len(image.get_shape()) != 3:
raise ValueError('Image should be 3D tensor')
with tf.name_scope('ResizeToRange', values=[image, min_dimension]):
if image.get_shape().is_fully_defined():
new_size = _compute_new_static_size(image, min_dimension, max_dimension)
else:
new_size = _compute_new_dynamic_size(image, min_dimension, max_dimension)
new_image = tf.image.resize_images(
image, new_size[:-1], method=method, align_corners=align_corners)
if pad_to_max_dimension:
new_image = tf.image.pad_to_bounding_box(
new_image, 0, 0, max_dimension, max_dimension)
result = [new_image]
if masks is not None:
new_masks = tf.expand_dims(masks, 3)
new_masks = tf.image.resize_images(
new_masks,
new_size[:-1],
method=tf.image.ResizeMethod.NEAREST_NEIGHBOR,
align_corners=align_corners)
new_masks = tf.squeeze(new_masks, 3)
if pad_to_max_dimension:
new_masks = tf.image.pad_to_bounding_box(
new_masks, 0, 0, max_dimension, max_dimension)
result.append(new_masks)
result.append(new_size)
return result
def _copy_extra_fields(boxlist_to_copy_to, boxlist_to_copy_from):
"""Copies the extra fields of boxlist_to_copy_from to boxlist_to_copy_to.
Args:
boxlist_to_copy_to: BoxList to which extra fields are copied.
boxlist_to_copy_from: BoxList from which fields are copied.
Returns:
boxlist_to_copy_to with extra fields.
"""
for field in boxlist_to_copy_from.get_extra_fields():
boxlist_to_copy_to.add_field(field, boxlist_to_copy_from.get_field(field))
return boxlist_to_copy_to
def box_list_scale(boxlist, y_scale, x_scale, scope=None):
"""scale box coordinates in x and y dimensions.
Args:
boxlist: BoxList holding N boxes
y_scale: (float) scalar tensor
x_scale: (float) scalar tensor
scope: name scope.
Returns:
boxlist: BoxList holding N boxes
"""
with tf.name_scope(scope, 'Scale'):
y_scale = tf.cast(y_scale, tf.float32)
x_scale = tf.cast(x_scale, tf.float32)
y_min, x_min, y_max, x_max = tf.split(
value=boxlist.get(), num_or_size_splits=4, axis=1)
y_min = y_scale * y_min
y_max = y_scale * y_max
x_min = x_scale * x_min
x_max = x_scale * x_max
scaled_boxlist = box_list.BoxList(
tf.concat([y_min, x_min, y_max, x_max], 1))
return _copy_extra_fields(scaled_boxlist, boxlist)
def keypoint_scale(keypoints, y_scale, x_scale, scope=None):
"""Scales keypoint coordinates in x and y dimensions.
Args:
keypoints: a tensor of shape [num_instances, num_keypoints, 2]
y_scale: (float) scalar tensor
x_scale: (float) scalar tensor
scope: name scope.
Returns:
new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
"""
with tf.name_scope(scope, 'Scale'):
y_scale = tf.cast(y_scale, tf.float32)
x_scale = tf.cast(x_scale, tf.float32)
new_keypoints = keypoints * [[[y_scale, x_scale]]]
return new_keypoints
def scale_boxes_to_pixel_coordinates(image, boxes, keypoints=None):
"""Scales boxes from normalized to pixel coordinates.
Args:
image: A 3D float32 tensor of shape [height, width, channels].
boxes: A 2D float32 tensor of shape [num_boxes, 4] containing the bounding
boxes in normalized coordinates. Each row is of the form
[ymin, xmin, ymax, xmax].
keypoints: (optional) rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]. The keypoints are in y-x normalized
coordinates.
Returns:
image: unchanged input image.
scaled_boxes: a 2D float32 tensor of shape [num_boxes, 4] containing the
bounding boxes in pixel coordinates.
scaled_keypoints: a 3D float32 tensor with shape
[num_instances, num_keypoints, 2] containing the keypoints in pixel
coordinates.
"""
boxlist = box_list.BoxList(boxes)
image_height = tf.shape(image)[0]
image_width = tf.shape(image)[1]
scaled_boxes = box_list_scale(boxlist, image_height, image_width).get()
result = [image, scaled_boxes]
if keypoints is not None:
scaled_keypoints = keypoint_scale(keypoints, image_height, image_width)
result.append(scaled_keypoints)
return tuple(result)
@@ -0,0 +1,135 @@
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Region Similarity Calculators for BoxLists.
Region Similarity Calculators compare a pairwise measure of similarity
between the boxes in two BoxLists.
"""
from abc import ABCMeta
from abc import abstractmethod
import tensorflow as tf
def area(boxlist, scope=None):
"""Computes area of boxes.
Args:
boxlist: BoxList holding N boxes
scope: name scope.
Returns:
a tensor with shape [N] representing box areas.
"""
with tf.name_scope(scope, 'Area'):
y_min, x_min, y_max, x_max = tf.split(
value=boxlist.get(), num_or_size_splits=4, axis=1)
return tf.squeeze((y_max - y_min) * (x_max - x_min), [1])
def intersection(boxlist1, boxlist2, scope=None):
"""Compute pairwise intersection areas between boxes.
Args:
boxlist1: BoxList holding N boxes
boxlist2: BoxList holding M boxes
scope: name scope.
Returns:
a tensor with shape [N, M] representing pairwise intersections
"""
with tf.name_scope(scope, 'Intersection'):
y_min1, x_min1, y_max1, x_max1 = tf.split(
value=boxlist1.get(), num_or_size_splits=4, axis=1)
y_min2, x_min2, y_max2, x_max2 = tf.split(
value=boxlist2.get(), num_or_size_splits=4, axis=1)
all_pairs_min_ymax = tf.minimum(y_max1, tf.transpose(y_max2))
all_pairs_max_ymin = tf.maximum(y_min1, tf.transpose(y_min2))
intersect_heights = tf.maximum(0.0, all_pairs_min_ymax - all_pairs_max_ymin)
all_pairs_min_xmax = tf.minimum(x_max1, tf.transpose(x_max2))
all_pairs_max_xmin = tf.maximum(x_min1, tf.transpose(x_min2))
intersect_widths = tf.maximum(0.0, all_pairs_min_xmax - all_pairs_max_xmin)
return intersect_heights * intersect_widths
def iou(boxlist1, boxlist2, scope=None):
"""Computes pairwise intersection-over-union between box collections.
Args:
boxlist1: BoxList holding N boxes
boxlist2: BoxList holding M boxes
scope: name scope.
Returns:
a tensor with shape [N, M] representing pairwise iou scores.
"""
with tf.name_scope(scope, 'IOU'):
intersections = intersection(boxlist1, boxlist2)
areas1 = area(boxlist1)
areas2 = area(boxlist2)
unions = (
tf.expand_dims(areas1, 1) + tf.expand_dims(areas2, 0) - intersections)
return tf.where(
tf.equal(intersections, 0.0),
tf.zeros_like(intersections), tf.truediv(intersections, unions))
class RegionSimilarityCalculator(object):
"""Abstract base class for region similarity calculator."""
__metaclass__ = ABCMeta
def compare(self, boxlist1, boxlist2, scope=None):
"""Computes matrix of pairwise similarity between BoxLists.
This op (to be overriden) computes a measure of pairwise similarity between
the boxes in the given BoxLists. Higher values indicate more similarity.
Note that this method simply measures similarity and does not explicitly
perform a matching.
Args:
boxlist1: BoxList holding N boxes.
boxlist2: BoxList holding M boxes.
scope: Op scope name. Defaults to 'Compare' if None.
Returns:
a (float32) tensor of shape [N, M] with pairwise similarity score.
"""
with tf.name_scope(scope, 'Compare', [boxlist1, boxlist2]) as scope:
return self._compare(boxlist1, boxlist2)
@abstractmethod
def _compare(self, boxlist1, boxlist2):
pass
class IouSimilarity(RegionSimilarityCalculator):
"""Class to compute similarity based on Intersection over Union (IOU) metric.
This class computes pairwise similarity between two BoxLists based on IOU.
"""
def _compare(self, boxlist1, boxlist2):
"""Compute pairwise IOU similarity between the two BoxLists.
Args:
boxlist1: BoxList holding N boxes.
boxlist2: BoxList holding M boxes.
Returns:
A tensor with shape [N, M] representing pairwise iou scores.
"""
return iou(boxlist1, boxlist2)
@@ -0,0 +1,70 @@
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Utils used to manipulate tensor shapes."""
import tensorflow as tf
def assert_shape_equal(shape_a, shape_b):
"""Asserts that shape_a and shape_b are equal.
If the shapes are static, raises a ValueError when the shapes
mismatch.
If the shapes are dynamic, raises a tf InvalidArgumentError when the shapes
mismatch.
Args:
shape_a: a list containing shape of the first tensor.
shape_b: a list containing shape of the second tensor.
Returns:
Either a tf.no_op() when shapes are all static and a tf.assert_equal() op
when the shapes are dynamic.
Raises:
ValueError: When shapes are both static and unequal.
"""
if (all(isinstance(dim, int) for dim in shape_a) and
all(isinstance(dim, int) for dim in shape_b)):
if shape_a != shape_b:
raise ValueError('Unequal shapes {}, {}'.format(shape_a, shape_b))
else: return tf.no_op()
else:
return tf.assert_equal(shape_a, shape_b)
def combined_static_and_dynamic_shape(tensor):
"""Returns a list containing static and dynamic values for the dimensions.
Returns a list of static and dynamic values for shape dimensions. This is
useful to preserve static shapes when available in reshape operation.
Args:
tensor: A tensor of any type.
Returns:
A list of size tensor.shape.ndims containing integers or a scalar tensor.
"""
static_tensor_shape = tensor.shape.as_list()
dynamic_tensor_shape = tf.shape(tensor)
combined_shape = []
for index, dim in enumerate(static_tensor_shape):
if dim is not None:
combined_shape.append(dim)
else:
combined_shape.append(dynamic_tensor_shape[index])
return combined_shape
@@ -0,0 +1,310 @@
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Base target assigner module.
The job of a TargetAssigner is, for a given set of anchors (bounding boxes) and
groundtruth detections (bounding boxes), to assign classification and regression
targets to each anchor as well as weights to each anchor (specifying, e.g.,
which anchors should not contribute to training loss).
It assigns classification/regression targets by performing the following steps:
1) Computing pairwise similarity between anchors and groundtruth boxes using a
provided RegionSimilarity Calculator
2) Computing a matching based on the similarity matrix using a provided Matcher
3) Assigning regression targets based on the matching and a provided BoxCoder
4) Assigning classification targets based on the matching and groundtruth labels
Note that TargetAssigners only operate on detections from a single
image at a time, so any logic for applying a TargetAssigner to multiple
images must be handled externally.
"""
import tensorflow as tf
from object_detection import box_list
from object_detection import shape_utils
KEYPOINTS_FIELD_NAME = 'keypoints'
class TargetAssigner(object):
"""Target assigner to compute classification and regression targets."""
def __init__(self, similarity_calc, matcher, box_coder,
negative_class_weight=1.0, unmatched_cls_target=None):
"""Construct Object Detection Target Assigner.
Args:
similarity_calc: a RegionSimilarityCalculator
matcher: Matcher used to match groundtruth to anchors.
box_coder: BoxCoder used to encode matching groundtruth boxes with
respect to anchors.
negative_class_weight: classification weight to be associated to negative
anchors (default: 1.0). The weight must be in [0., 1.].
unmatched_cls_target: a float32 tensor with shape [d_1, d_2, ..., d_k]
which is consistent with the classification target for each
anchor (and can be empty for scalar targets). This shape must thus be
compatible with the groundtruth labels that are passed to the "assign"
function (which have shape [num_gt_boxes, d_1, d_2, ..., d_k]).
If set to None, unmatched_cls_target is set to be [0] for each anchor.
Raises:
ValueError: if similarity_calc is not a RegionSimilarityCalculator or
if matcher is not a Matcher or if box_coder is not a BoxCoder
"""
self._similarity_calc = similarity_calc
self._matcher = matcher
self._box_coder = box_coder
self._negative_class_weight = negative_class_weight
if unmatched_cls_target is None:
self._unmatched_cls_target = tf.constant([0], tf.float32)
else:
self._unmatched_cls_target = unmatched_cls_target
@property
def box_coder(self):
return self._box_coder
def assign(self, anchors, groundtruth_boxes, groundtruth_labels=None,
groundtruth_weights=None, **params):
"""Assign classification and regression targets to each anchor.
For a given set of anchors and groundtruth detections, match anchors
to groundtruth_boxes and assign classification and regression targets to
each anchor as well as weights based on the resulting match (specifying,
e.g., which anchors should not contribute to training loss).
Anchors that are not matched to anything are given a classification target
of self._unmatched_cls_target which can be specified via the constructor.
Args:
anchors: a BoxList representing N anchors
groundtruth_boxes: a BoxList representing M groundtruth boxes
groundtruth_labels: a tensor of shape [M, d_1, ... d_k]
with labels for each of the ground_truth boxes. The subshape
[d_1, ... d_k] can be empty (corresponding to scalar inputs). When set
to None, groundtruth_labels assumes a binary problem where all
ground_truth boxes get a positive label (of 1).
groundtruth_weights: a float tensor of shape [M] indicating the weight to
assign to all anchors match to a particular groundtruth box. The weights
must be in [0., 1.]. If None, all weights are set to 1.
**params: Additional keyword arguments for specific implementations of
the Matcher.
Returns:
cls_targets: a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k],
where the subshape [d_1, ..., d_k] is compatible with groundtruth_labels
which has shape [num_gt_boxes, d_1, d_2, ... d_k].
cls_weights: a float32 tensor with shape [num_anchors]
reg_targets: a float32 tensor with shape [num_anchors, box_code_dimension]
reg_weights: a float32 tensor with shape [num_anchors]
match: a matcher.Match object encoding the match between anchors and
groundtruth boxes, with rows corresponding to groundtruth boxes
and columns corresponding to anchors.
Raises:
ValueError: if anchors or groundtruth_boxes are not of type
box_list.BoxList
"""
if not isinstance(anchors, box_list.BoxList):
raise ValueError('anchors must be an BoxList')
if not isinstance(groundtruth_boxes, box_list.BoxList):
raise ValueError('groundtruth_boxes must be an BoxList')
if groundtruth_labels is None:
groundtruth_labels = tf.ones(tf.expand_dims(groundtruth_boxes.num_boxes(),
0))
groundtruth_labels = tf.expand_dims(groundtruth_labels, -1)
unmatched_shape_assert = shape_utils.assert_shape_equal(
shape_utils.combined_static_and_dynamic_shape(groundtruth_labels)[1:],
shape_utils.combined_static_and_dynamic_shape(
self._unmatched_cls_target))
labels_and_box_shapes_assert = shape_utils.assert_shape_equal(
shape_utils.combined_static_and_dynamic_shape(
groundtruth_labels)[:1],
shape_utils.combined_static_and_dynamic_shape(
groundtruth_boxes.get())[:1])
if groundtruth_weights is None:
num_gt_boxes = groundtruth_boxes.num_boxes_static()
if not num_gt_boxes:
num_gt_boxes = groundtruth_boxes.num_boxes()
groundtruth_weights = tf.ones([num_gt_boxes], dtype=tf.float32)
with tf.control_dependencies(
[unmatched_shape_assert, labels_and_box_shapes_assert]):
match_quality_matrix = self._similarity_calc.compare(groundtruth_boxes,
anchors)
match = self._matcher.match(match_quality_matrix, **params)
reg_targets = self._create_regression_targets(anchors,
groundtruth_boxes,
match)
cls_targets = self._create_classification_targets(groundtruth_labels,
match)
reg_weights = self._create_regression_weights(match, groundtruth_weights)
cls_weights = self._create_classification_weights(match,
groundtruth_weights)
num_anchors = anchors.num_boxes_static()
if num_anchors is not None:
reg_targets = self._reset_target_shape(reg_targets, num_anchors)
cls_targets = self._reset_target_shape(cls_targets, num_anchors)
reg_weights = self._reset_target_shape(reg_weights, num_anchors)
cls_weights = self._reset_target_shape(cls_weights, num_anchors)
return cls_targets, cls_weights, reg_targets, reg_weights, match
def _reset_target_shape(self, target, num_anchors):
"""Sets the static shape of the target.
Args:
target: the target tensor. Its first dimension will be overwritten.
num_anchors: the number of anchors, which is used to override the target's
first dimension.
Returns:
A tensor with the shape info filled in.
"""
target_shape = target.get_shape().as_list()
target_shape[0] = num_anchors
target.set_shape(target_shape)
return target
def _create_regression_targets(self, anchors, groundtruth_boxes, match):
"""Returns a regression target for each anchor.
Args:
anchors: a BoxList representing N anchors
groundtruth_boxes: a BoxList representing M groundtruth_boxes
match: a matcher.Match object
Returns:
reg_targets: a float32 tensor with shape [N, box_code_dimension]
"""
matched_gt_boxes = match.gather_based_on_match(
groundtruth_boxes.get(),
unmatched_value=tf.zeros(4),
ignored_value=tf.zeros(4))
matched_gt_boxlist = box_list.BoxList(matched_gt_boxes)
if groundtruth_boxes.has_field(KEYPOINTS_FIELD_NAME):
groundtruth_keypoints = groundtruth_boxes.get_field(KEYPOINTS_FIELD_NAME)
matched_keypoints = match.gather_based_on_match(
groundtruth_keypoints,
unmatched_value=tf.zeros(groundtruth_keypoints.get_shape()[1:]),
ignored_value=tf.zeros(groundtruth_keypoints.get_shape()[1:]))
matched_gt_boxlist.add_field(KEYPOINTS_FIELD_NAME, matched_keypoints)
matched_reg_targets = self._box_coder.encode(matched_gt_boxlist, anchors)
match_results_shape = shape_utils.combined_static_and_dynamic_shape(
match.match_results)
# Zero out the unmatched and ignored regression targets.
unmatched_ignored_reg_targets = tf.tile(
self._default_regression_target(), [match_results_shape[0], 1])
matched_anchors_mask = match.matched_column_indicator()
reg_targets = tf.where(matched_anchors_mask,
matched_reg_targets,
unmatched_ignored_reg_targets)
return reg_targets
def _default_regression_target(self):
"""Returns the default target for anchors to regress to.
Default regression targets are set to zero (though in
this implementation what these targets are set to should
not matter as the regression weight of any box set to
regress to the default target is zero).
Returns:
default_target: a float32 tensor with shape [1, box_code_dimension]
"""
return tf.constant([self._box_coder.code_size*[0]], tf.float32)
def _create_classification_targets(self, groundtruth_labels, match):
"""Create classification targets for each anchor.
Assign a classification target of for each anchor to the matching
groundtruth label that is provided by match. Anchors that are not matched
to anything are given the target self._unmatched_cls_target
Args:
groundtruth_labels: a tensor of shape [num_gt_boxes, d_1, ... d_k]
with labels for each of the ground_truth boxes. The subshape
[d_1, ... d_k] can be empty (corresponding to scalar labels).
match: a matcher.Match object that provides a matching between anchors
and groundtruth boxes.
Returns:
a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k], where the
subshape [d_1, ..., d_k] is compatible with groundtruth_labels which has
shape [num_gt_boxes, d_1, d_2, ... d_k].
"""
return match.gather_based_on_match(
groundtruth_labels,
unmatched_value=self._unmatched_cls_target,
ignored_value=self._unmatched_cls_target)
def _create_regression_weights(self, match, groundtruth_weights):
"""Set regression weight for each anchor.
Only positive anchors are set to contribute to the regression loss, so this
method returns a weight of 1 for every positive anchor and 0 for every
negative anchor.
Args:
match: a matcher.Match object that provides a matching between anchors
and groundtruth boxes.
groundtruth_weights: a float tensor of shape [M] indicating the weight to
assign to all anchors match to a particular groundtruth box.
Returns:
a float32 tensor with shape [num_anchors] representing regression weights.
"""
return match.gather_based_on_match(
groundtruth_weights, ignored_value=0., unmatched_value=0.)
def _create_classification_weights(self,
match,
groundtruth_weights):
"""Create classification weights for each anchor.
Positive (matched) anchors are associated with a weight of
positive_class_weight and negative (unmatched) anchors are associated with
a weight of negative_class_weight. When anchors are ignored, weights are set
to zero. By default, both positive/negative weights are set to 1.0,
but they can be adjusted to handle class imbalance (which is almost always
the case in object detection).
Args:
match: a matcher.Match object that provides a matching between anchors
and groundtruth boxes.
groundtruth_weights: a float tensor of shape [M] indicating the weight to
assign to all anchors match to a particular groundtruth box.
Returns:
a float32 tensor with shape [num_anchors] representing classification
weights.
"""
return match.gather_based_on_match(
groundtruth_weights,
ignored_value=0.,
unmatched_value=self._negative_class_weight)
def get_box_coder(self):
"""Get BoxCoder of this TargetAssigner.
Returns:
BoxCoder object.
"""
return self._box_coder
@@ -0,0 +1,210 @@
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tensorflow Example proto decoder for object detection.
A decoder to decode string tensors containing serialized tensorflow.Example
protos for object detection.
"""
import tensorflow as tf
slim_example_decoder = tf.contrib.slim.tfexample_decoder
class TfExampleDecoder(object):
"""Tensorflow Example proto decoder."""
def __init__(self):
"""Constructor sets keys_to_features and items_to_handlers."""
self.keys_to_features = {
'image/encoded':
tf.FixedLenFeature((), tf.string, default_value=''),
'image/format':
tf.FixedLenFeature((), tf.string, default_value='jpeg'),
'image/filename':
tf.FixedLenFeature((), tf.string, default_value=''),
'image/key/sha256':
tf.FixedLenFeature((), tf.string, default_value=''),
'image/source_id':
tf.FixedLenFeature((), tf.string, default_value=''),
'image/height':
tf.FixedLenFeature((), tf.int64, 1),
'image/width':
tf.FixedLenFeature((), tf.int64, 1),
# Object boxes and classes.
'image/object/bbox/xmin':
tf.VarLenFeature(tf.float32),
'image/object/bbox/xmax':
tf.VarLenFeature(tf.float32),
'image/object/bbox/ymin':
tf.VarLenFeature(tf.float32),
'image/object/bbox/ymax':
tf.VarLenFeature(tf.float32),
'image/object/class/label':
tf.VarLenFeature(tf.int64),
'image/object/class/text':
tf.VarLenFeature(tf.string),
'image/object/area':
tf.VarLenFeature(tf.float32),
'image/object/is_crowd':
tf.VarLenFeature(tf.int64),
'image/object/difficult':
tf.VarLenFeature(tf.int64),
'image/object/group_of':
tf.VarLenFeature(tf.int64),
'image/object/weight':
tf.VarLenFeature(tf.float32),
}
self.items_to_handlers = {
'image': slim_example_decoder.Image(
image_key='image/encoded', format_key='image/format', channels=3),
'source_id': (
slim_example_decoder.Tensor('image/source_id')),
'key': (
slim_example_decoder.Tensor('image/key/sha256')),
'filename': (
slim_example_decoder.Tensor('image/filename')),
# Object boxes and classes.
'groundtruth_boxes': (
slim_example_decoder.BoundingBox(
['ymin', 'xmin', 'ymax', 'xmax'], 'image/object/bbox/')),
'groundtruth_area': slim_example_decoder.Tensor(
'image/object/area'),
'groundtruth_is_crowd': (
slim_example_decoder.Tensor('image/object/is_crowd')),
'groundtruth_difficult': (
slim_example_decoder.Tensor('image/object/difficult')),
'groundtruth_group_of': (
slim_example_decoder.Tensor('image/object/group_of')),
'groundtruth_weights': (
slim_example_decoder.Tensor('image/object/weight')),
}
label_handler = slim_example_decoder.Tensor('image/object/class/label')
self.items_to_handlers['groundtruth_classes'] = label_handler
def decode(self, tf_example_string_tensor):
"""Decodes serialized tensorflow example and returns a tensor dictionary.
Args:
tf_example_string_tensor: a string tensor holding a serialized tensorflow
example proto.
Returns:
A dictionary of the following tensors.
image - 3D uint8 tensor of shape [None, None, 3]
containing image.
source_id - string tensor containing original
image id.
key - string tensor with unique sha256 hash key.
filename - string tensor with original dataset
filename.
groundtruth_boxes - 2D float32 tensor of shape
[None, 4] containing box corners.
groundtruth_classes - 1D int64 tensor of shape
groundtruth_weights - 1D float32 tensor of
shape [None] indicating the weights of groundtruth boxes.
[None] containing classes for the boxes.
groundtruth_area - 1D float32 tensor of shape
[None] containing containing object mask area in pixel squared.
groundtruth_is_crowd - 1D bool tensor of shape
[None] indicating if the boxes enclose a crowd.
Optional:
groundtruth_difficult - 1D bool tensor of shape
[None] indicating if the boxes represent `difficult` instances.
groundtruth_group_of - 1D bool tensor of shape
[None] indicating if the boxes represent `group_of` instances.
groundtruth_instance_masks - 3D float32 tensor of
shape [None, None, None] containing instance masks.
"""
serialized_example = tf.reshape(tf_example_string_tensor, shape=[])
decoder = slim_example_decoder.TFExampleDecoder(self.keys_to_features,
self.items_to_handlers)
keys = sorted(decoder.list_items())
tensors = decoder.decode(serialized_example, items=keys)
tensor_dict = dict(zip(keys, tensors))
is_crowd = 'groundtruth_is_crowd'
tensor_dict[is_crowd] = tf.cast(tensor_dict[is_crowd], dtype=tf.bool)
tensor_dict['image'].set_shape([None, None, 3])
def default_groundtruth_weights():
return tf.ones(
tf.shape(tensor_dict['groundtruth_boxes'])[0],
dtype=tf.float32)
tensor_dict['groundtruth_weights'] = tf.cond(
tf.greater(
tf.shape(
tensor_dict['groundtruth_weights'])[0],
0), lambda: tensor_dict['groundtruth_weights'],
default_groundtruth_weights)
return tensor_dict
class TfExampleSegmentationDecoder(object):
"""Tensorflow Example proto decoder."""
def __init__(self):
"""Constructor sets keys_to_features and items_to_handlers."""
self.keys_to_features = {
'image/encoded':
tf.FixedLenFeature((), tf.string, default_value=''),
'image/filename':
tf.FixedLenFeature((), tf.string, default_value=''),
'image/format':
tf.FixedLenFeature((), tf.string, default_value='jpeg'),
'image/height':
tf.FixedLenFeature((), tf.int64, default_value=0),
'image/width':
tf.FixedLenFeature((), tf.int64, default_value=0),
'image/segmentation/class/encoded':
tf.FixedLenFeature((), tf.string, default_value=''),
'image/segmentation/class/format':
tf.FixedLenFeature((), tf.string, default_value='png'),
}
self.items_to_handlers = {
'image': slim_example_decoder.Image(
image_key='image/encoded', format_key='image/format', channels=3),
'labels_class': slim_example_decoder.Image(
image_key='image/segmentation/class/encoded',
format_key='image/segmentation/class/format',
channels=1)
}
def decode(self, tf_example_string_tensor):
"""Decodes serialized tensorflow example and returns a tensor dictionary.
Args:
tf_example_string_tensor: a string tensor holding a serialized tensorflow
example proto.
Returns:
A dictionary of the following tensors.
image - 3D uint8 tensor of shape [None, None, 3] containing image.
labels_class - 2D unit8 tensor of shape [None, None] containing
pixel-wise class labels.
"""
serialized_example = tf.reshape(tf_example_string_tensor, shape=[])
decoder = slim_example_decoder.TFExampleDecoder(self.keys_to_features,
self.items_to_handlers)
keys = sorted(decoder.list_items())
keys = ['image', 'labels_class']
tensors = decoder.decode(serialized_example, items=keys)
tensor_dict = dict(zip(keys, tensors))
tensor_dict['image'].set_shape([None, None, 3])
return tensor_dict
@@ -0,0 +1,44 @@
#clean slog
rm -rf /var/log/npu/slog/host-0/*.log
rm -rf /var/log/npu/slog/device-*/*.log
# set env
export PYTHONPATH=/usr/local/Ascend/ops/op_impl/built-in/ai_core/tbe
export LD_LIBRARY_PATH=/usr/local/lib/:/usr/lib/:/usr/local/Ascend/fwkacllib/lib64/:/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:/usr/local/Ascend/add-ons/:/usr/lib/x86_64-linux-gnu
PATH=$PATH:$HOME/bin
export PATH=$PATH:/usr/local/Ascend/fwkacllib/ccec_compiler/bin:$PATH
export ASCEND_OPP_PATH=/usr/local/Ascend/opp
export DDK_VERSION_FLAG=1.71.T5.0.B060
export NEW_GE_FE_ID=1
export GE_AICPU_FLAG=1
export SOC_VERSION=Ascend910
export DUMP_GE_GRAPH=1
export DUMP_GRAPH_LEVEL=3
export PRINT_MODEL=1
export SLOG_PRINT_TO_STDOUT=1
export RANK_SIZE=1
RANK_ID_START=1
SAVE_PATH=training
BASE_PATH=`pwd`
echo $BASE_PATH
for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
do
echo
su HwHiAiUser -c "adc --host 0.0.0.0:22118 --log \"SetLogLevel(0)[error]\" --device "$RANK_ID
TMP_PATH=$SAVE_PATH/D$RANK_ID
mkdir -p $TMP_PATH
cp exec_main.sh $TMP_PATH/
cd $TMP_PATH
bash exec_main.sh $RANK_ID $RANK_SIZE $BASE_PATH > train_$RANK_ID.log &
cd -
done
@@ -0,0 +1,41 @@
#clean slog
rm -rf /var/log/npu/slog/host-0/*.log
rm -rf /var/log/npu/slog/device-*/*.log
# set env
export PYTHONPATH=/usr/local/Ascend/ops/op_impl/built-in/ai_core/tbe/
export LD_LIBRARY_PATH=/usr/local/lib/:/usr/lib/:/usr/local/Ascend/fwkacllib/lib64/:/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:/usr/local/Ascend/add-ons/:/usr/lib/x86_64-linux-gnu
PATH=$PATH:$HOME/bin
export PATH=$PATH:/usr/local/Ascend/fwkacllib/ccec_compiler/bin:$PATH
export ASCEND_OPP_PATH=/usr/local/Ascend/opp
export DDK_VERSION_FLAG=1.71.T5.0.B060
export NEW_GE_FE_ID=1
export GE_AICPU_FLAG=1
export SOC_VERSION=Ascend910
export DUMP_GE_GRAPH=1
export DUMP_GRAPH_LEVEL=3
export PRINT_MODEL=1
export SLOG_PRINT_TO_STDOUT=1
export RANK_SIZE=8
export RANK_TABLE_FILE=${PWD}/npu_config/${RANK_SIZE}p.json
RANK_ID_START=0
BASE_PATH=`pwd`
SAVE_PATH=training
for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
do
echo
su HwHiAiUser -c "adc --host 0.0.0.0:22118 --log \"SetLogLevel(0)[error]\" --device "$RANK_ID
TMP_PATH=$SAVE_PATH/D$RANK_ID
mkdir -p $TMP_PATH
cp exec_main.sh $TMP_PATH/
cd $TMP_PATH
nohup bash exec_main.sh $RANK_ID $RANK_SIZE $BASE_PATH > train_$RANK_ID.log &
cd -
done
@@ -0,0 +1,484 @@
# Copyright 2018 Google. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""SSD (via ResNet50) model definition.
Defines the SSD model and loss functions from this paper:
https://arxiv.org/pdf/1708.02002
Uses the ResNet model as a basis.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
import ssd_constants
def batch_norm_relu(inputs,
is_training_bn,
params,
relu=True,
init_zero=False,
data_format='channels_last',
name=None):
"""Performs a batch normalization followed by a ReLU.
Args:
inputs: `Tensor` of shape `[batch, channels, ...]`.
is_training_bn: `bool` for whether the model is training.
params: params of the model, a dict including `distributed_group_size`
and `num_shards`.
relu: `bool` if False, omits the ReLU operation.
init_zero: `bool` if True, initializes scale parameter of batch
normalization with 0 instead of 1 (default).
data_format: `str` either "channels_first" for `[batch, channels, height,
width]` or "channels_last for `[batch, height, width, channels]`.
name: the name of the batch normalization layer
Returns:
A normalized `Tensor` with the same `data_format`.
"""
if init_zero:
gamma_initializer = tf.zeros_initializer()
else:
gamma_initializer = tf.ones_initializer()
if data_format == 'channels_first':
axis = 1
else:
axis = 3
inputs = tf.layers.batch_normalization(
inputs=inputs,
axis=axis,
momentum=ssd_constants.BATCH_NORM_DECAY,
epsilon=ssd_constants.BATCH_NORM_EPSILON,
center=True,
scale=True,
training=is_training_bn,
fused=True,
gamma_initializer=gamma_initializer,
name=name)
if relu:
inputs = tf.nn.relu(inputs)
return inputs
def fixed_padding(inputs, kernel_size, data_format='channels_last'):
"""Pads the input along the spatial dimensions independently of input size.
Args:
inputs: `Tensor` of size `[batch, channels, height, width]` or
`[batch, height, width, channels]` depending on `data_format`.
kernel_size: `int` kernel size to be used for `conv2d` or max_pool2d`
operations. Should be a positive integer.
data_format: `str` either "channels_first" for `[batch, channels, height,
width]` or "channels_last for `[batch, height, width, channels]`.
Returns:
A padded `Tensor` of the same `data_format` with size either intact
(if `kernel_size == 1`) or padded (if `kernel_size > 1`).
"""
pad_total = kernel_size - 1
pad_beg = pad_total // 2
pad_end = pad_total - pad_beg
if data_format == 'channels_first':
padded_inputs = tf.pad(
inputs, [[0, 0], [0, 0], [pad_beg, pad_end], [pad_beg, pad_end]])
else:
padded_inputs = tf.pad(
inputs, [[0, 0], [pad_beg, pad_end], [pad_beg, pad_end], [0, 0]])
return padded_inputs
def conv2d_fixed_padding(inputs,
filters,
kernel_size,
strides,
data_format='channels_last'):
"""Strided 2-D convolution with explicit padding.
The padding is consistent and is based only on `kernel_size`, not on the
dimensions of `inputs` (as opposed to using `tf.layers.conv2d` alone).
Args:
inputs: `Tensor` of size `[batch, channels, height_in, width_in]`.
filters: `int` number of filters in the convolution.
kernel_size: `int` size of the kernel to be used in the convolution.
strides: `int` strides of the convolution.
data_format: `str` either "channels_first" for `[batch, channels, height,
width]` or "channels_last for `[batch, height, width, channels]`.
Returns:
A `Tensor` of shape `[batch, filters, height_out, width_out]`.
"""
if strides > 1:
inputs = fixed_padding(inputs, kernel_size, data_format=data_format)
return tf.layers.conv2d(
inputs=inputs,
filters=filters,
kernel_size=kernel_size,
strides=strides,
padding=('SAME' if strides == 1 else 'VALID'),
use_bias=False,
kernel_initializer=tf.variance_scaling_initializer(),
data_format=data_format)
def residual_block(inputs,
filters,
is_training_bn,
strides,
params,
use_projection=False,
data_format='channels_last'):
"""Standard building block for residual networks with BN after convolutions.
Args:
inputs: `Tensor` of size `[batch, channels, height, width]`.
filters: `int` number of filters for the first two convolutions. Note that
the third and final convolution will use 4 times as many filters.
is_training_bn: `bool` for whether the model is in training.
strides: `int` block stride. If greater than 1, this block will ultimately
downsample the input.
params: params of the model, a dict.
use_projection: `bool` for whether this block should use a projection
shortcut (versus the default identity shortcut). This is usually `True`
for the first block of a block group, which may change the number of
filters and the resolution.
data_format: `str` either "channels_first" for `[batch, channels, height,
width]` or "channels_last for `[batch, height, width, channels]`.
Returns:
The output `Tensor` of the block.
"""
shortcut = inputs
if use_projection:
# Projection shortcut in first layer to match filters and strides
shortcut = conv2d_fixed_padding(
inputs=inputs,
filters=filters,
kernel_size=1,
strides=strides,
data_format=data_format)
shortcut = batch_norm_relu(
shortcut, is_training_bn, params, relu=False, data_format=data_format)
inputs = conv2d_fixed_padding(
inputs=inputs,
filters=filters,
kernel_size=3,
strides=strides,
data_format=data_format)
inputs = batch_norm_relu(
inputs, is_training_bn, params, data_format=data_format)
inputs = conv2d_fixed_padding(
inputs=inputs,
filters=filters,
kernel_size=3,
strides=1,
data_format=data_format)
inputs = batch_norm_relu(
inputs,
is_training_bn,
params,
relu=False,
init_zero=True,
data_format=data_format)
return tf.nn.relu(inputs + shortcut)
def block_group(inputs,
filters,
block_fn,
blocks,
strides,
is_training_bn,
name,
params,
data_format='channels_last',
use_projection=True):
"""Creates one group of blocks for the ResNet model.
Args:
inputs: `Tensor` of size `[batch, channels, height, width]`.
filters: `int` number of filters for the first convolution of the layer.
block_fn: `function` for the block to use within the model
blocks: `int` number of blocks contained in the layer.
strides: `int` stride to use for the first convolution of the layer. If
greater than 1, this layer will downsample the input.
is_training_bn: `bool` for whether the model is training.
name: `str`name for the Tensor output of the block layer.
params: params of the model, a dict.
data_format: `str` either "channels_first" for `[batch, channels, height,
width]` or "channels_last for `[batch, height, width, channels]`.
use_projection: `bool` for whether this block should use a projection
shortcut (versus the default identity shortcut). This is usually `True`
for the first block of a block group, which may change the number of
filters and the resolution.
Returns:
The output `Tensor` of the block layer.
"""
# Only the first block per block_group uses projection shortcut and strides.
inputs = block_fn(
inputs,
filters,
is_training_bn,
strides,
params,
use_projection=use_projection,
data_format=data_format)
for _ in range(1, blocks):
inputs = block_fn(
inputs, filters, is_training_bn, 1, params, data_format=data_format)
return tf.identity(inputs, name)
def resnet_v1_generator(block_fn, layers, params, data_format='channels_last'):
"""Generator of ResNet v1 model with classification layers removed.
Our actual ResNet network. We return the output of c2, c3,c4,c5
N.B. batch norm is always run with trained parameters, as we use very small
batches when training the object layers.
Args:
block_fn: `function` for the block to use within the model. Either
`residual_block` or `bottleneck_block`.
layers: list of 4 `int`s denoting the number of blocks to include in each
of the 4 block groups. Each group consists of blocks that take inputs of
the same resolution.
params: params of the model, a dict.
data_format: `str` either "channels_first" for `[batch, channels, height,
width]` or "channels_last for `[batch, height, width, channels]`.
Returns:
Model `function` that takes in `inputs` and `is_training` and returns the
output `Tensor` of the ResNet model.
"""
def model(inputs, is_training_bn=False):
"""Creation of the model graph."""
inputs = conv2d_fixed_padding(
inputs=inputs,
filters=64,
kernel_size=7,
strides=2,
data_format=data_format)
inputs = tf.identity(inputs, 'initial_conv')
inputs = batch_norm_relu(
inputs, is_training_bn, params, data_format=data_format)
inputs = tf.layers.max_pooling2d(
inputs=inputs,
pool_size=3,
strides=2,
padding='SAME',
data_format=data_format)
inputs = tf.identity(inputs, 'initial_max_pool')
c2 = block_group(
inputs=inputs,
filters=64,
blocks=layers[0],
strides=1,
block_fn=block_fn,
is_training_bn=is_training_bn,
params=params,
name='block_group1',
data_format=data_format,
use_projection=False)
c3 = block_group(
inputs=c2,
filters=128,
blocks=layers[1],
strides=2,
block_fn=block_fn,
is_training_bn=is_training_bn,
params=params,
name='block_group2',
data_format=data_format)
c4 = block_group(
inputs=c3,
filters=256,
blocks=layers[2],
strides=1,
block_fn=block_fn,
is_training_bn=is_training_bn,
params=params,
name='block_group3',
data_format=data_format)
return c2, c3, c4
return model
def resnet_v1(resnet_depth, params, data_format='channels_last'):
"""Returns the ResNet model for a given size and number of output classes."""
model_params = {
34: {'block': residual_block, 'layers': [3, 4, 6, 3]}
}
if resnet_depth not in model_params:
raise ValueError('Not a valid resnet_depth:', resnet_depth)
resnet_params = model_params[resnet_depth]
return resnet_v1_generator(resnet_params['block'], resnet_params['layers'],
params, data_format)
def class_net(images, level, num_classes):
"""Class prediction network for SSD."""
return tf.layers.conv2d(
images,
num_classes * ssd_constants.NUM_DEFAULTS_BY_LEVEL[level],
kernel_size=(3, 3),
padding='same',
activation=None,
name='class-%d' % (level),
)
def box_net(images, level):
"""Box regression network for SSD."""
return tf.layers.conv2d(
images,
4 * ssd_constants.NUM_DEFAULTS_BY_LEVEL[level],
kernel_size=(3, 3),
padding='same',
activation=None,
name='box-%d' % (level),
)
def ssd(features, params, is_training_bn=False):
"""SSD classification and regression model."""
# upward layers
with tf.variable_scope(
'resnet%s' % ssd_constants.RESNET_DEPTH, reuse=tf.AUTO_REUSE):
resnet_fn = resnet_v1(ssd_constants.RESNET_DEPTH, params)
_, _, u4 = resnet_fn(features, is_training_bn)
with tf.variable_scope('ssd', reuse=tf.AUTO_REUSE):
feats = {}
# output channels for mlperf logging.
out_channels = [256]
feats[3] = u4
feats[4] = tf.layers.conv2d(
feats[3],
filters=256,
kernel_size=(1, 1),
padding='same',
activation=tf.nn.relu,
name='block7-conv1x1')
feats[4] = tf.layers.conv2d(
feats[4],
filters=512,
strides=(2, 2),
kernel_size=(3, 3),
padding='same',
activation=tf.nn.relu,
name='block7-conv3x3')
out_channels.append(512)
feats[5] = tf.layers.conv2d(
feats[4],
filters=256,
kernel_size=(1, 1),
padding='same',
activation=tf.nn.relu,
name='block8-conv1x1')
feats[5] = tf.layers.conv2d(
feats[5],
filters=512,
strides=(2, 2),
kernel_size=(3, 3),
padding='same',
activation=tf.nn.relu,
name='block8-conv3x3')
out_channels.append(512)
feats[6] = tf.layers.conv2d(
feats[5],
filters=128,
kernel_size=(1, 1),
padding='same',
activation=tf.nn.relu,
name='block9-conv1x1')
feats[6] = tf.layers.conv2d(
feats[6],
filters=256,
strides=(2, 2),
kernel_size=(3, 3),
padding='same',
activation=tf.nn.relu,
name='block9-conv3x3')
out_channels.append(256)
feats[7] = tf.layers.conv2d(
feats[6],
filters=128,
kernel_size=(1, 1),
padding='same',
activation=tf.nn.relu,
name='block10-conv1x1')
feats[7] = tf.layers.conv2d(
feats[7],
filters=256,
kernel_size=(3, 3),
padding='valid',
activation=tf.nn.relu,
name='block10-conv3x3')
out_channels.append(256)
feats[8] = tf.layers.conv2d(
feats[7],
filters=128,
kernel_size=(1, 1),
padding='same',
activation=tf.nn.relu,
name='block11-conv1x1')
feats[8] = tf.layers.conv2d(
feats[8],
filters=256,
kernel_size=(3, 3),
padding='valid',
activation=tf.nn.relu,
name='block11-conv3x3')
out_channels.append(256)
class_outputs = {}
box_outputs = {}
min_level = ssd_constants.MIN_LEVEL
max_level = ssd_constants.MAX_LEVEL
num_classes = ssd_constants.NUM_CLASSES
with tf.variable_scope('class_net', reuse=tf.AUTO_REUSE):
for level in range(min_level, max_level + 1):
class_outputs[level] = class_net(
feats[level], level, num_classes)
with tf.variable_scope('box_net', reuse=tf.AUTO_REUSE):
for level in range(min_level, max_level + 1):
box_outputs[level] = box_net(
feats[level], level)
return class_outputs, box_outputs
@@ -0,0 +1,122 @@
# Copyright 2018 Google. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Central location for all constants related to MLPerf SSD."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
# ==============================================================================
# == Model =====================================================================
# ==============================================================================
IMAGE_SIZE = 300
SPACE_TO_DEPTH_BLOCK_SIZE = 2
# TODO(taylorrobie): MLPerf uses 80, but COCO documents 90. (RetinaNet uses 90)
# Update(taylorrobie): Labels > 81 show up in the pipeline. This will need to
# be resolved.
NUM_CLASSES = 81 # Including "no class". Not all COCO classes are used.
# Note: Zero is special. (Background class) CLASS_INV_MAP[0] must be zero.
CLASS_INV_MAP = (
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87,
88, 89, 90)
_MAP = {j: i for i, j in enumerate(CLASS_INV_MAP)}
CLASS_MAP = tuple(_MAP.get(i, -1) for i in range(max(CLASS_INV_MAP) + 1))
NUM_SSD_BOXES = 8732
RESNET_DEPTH = 34
"""SSD specific"""
MIN_LEVEL = 3
MAX_LEVEL = 8
FEATURE_SIZES = (38, 19, 10, 5, 3, 1)
STEPS = (8, 16, 32, 64, 100, 300)
# https://github.com/amdegroot/ssd.pytorch/blob/master/data/config.py
SCALES = (21, 45, 99, 153, 207, 261, 315)
ASPECT_RATIOS = ((2,), (2, 3), (2, 3), (2, 3), (2,), (2,))
NUM_DEFAULTS = (4, 6, 6, 6, 4, 4)
NUM_DEFAULTS_BY_LEVEL = {3: 4, 4: 6, 5: 6, 6: 6, 7: 4, 8: 4}
SCALE_XY = 0.1
SCALE_HW = 0.2
BOX_CODER_SCALES = (1 / SCALE_XY, 1 / SCALE_XY, 1 / SCALE_HW, 1 / SCALE_HW)
MATCH_THRESHOLD = 0.5
# https://discuss.pytorch.org/t/how-to-preprocess-input-for-pre-trained-networks/683
NORMALIZATION_MEAN = (0.485, 0.456, 0.406)
NORMALIZATION_STD = (0.229, 0.224, 0.225)
# SSD Cropping
NUM_CROP_PASSES = 50
CROP_MIN_IOU_CHOICES = (0, 0.1, 0.3, 0.5, 0.7, 0.9)
P_NO_CROP_PER_PASS = 1 / (len(CROP_MIN_IOU_CHOICES) + 1)
# Hard example mining
NEGS_PER_POSITIVE = 3
# Batch normalization
BATCH_NORM_DECAY = 0.9
BATCH_NORM_EPSILON = 1e-5
# ==============================================================================
# == Optimizer =================================================================
# ==============================================================================
BASE_LEARNING_RATE = 3.0e-3
FIRST_LR_DROP_STEP = 160000 # 该参数不起作用
SECOND_LR_DROP_STEP = 200000 # 该参数不起作用
MOMENTUM = 0.9
WEIGHT_DECAY = 5e-4
DEFAULT_BATCH_SIZE = 32.0
# ==============================================================================
# == Keys ======================================================================
# ==============================================================================
BOXES = "boxes"
CLASSES = "classes"
NUM_MATCHED_BOXES = "num_matched_boxes"
IMAGE = "image"
SOURCE_ID = "source_id"
RAW_SHAPE = "raw_shape"
IS_PADDED = "is_padded"
# ==============================================================================
# == Evaluation ================================================================
# ==============================================================================
# Note: This is based on a batch size of 32
# https://github.com/mlperf/reference/blob/master/single_stage_detector/ssd/train.py#L21-L37 # pylint: disable=line-too-long
EVAL_SAMPLES = 5000
CHECKPOINT_FREQUENCY = 5000
MAX_NUM_EVAL_BOXES = 200
OVERLAP_CRITERIA = 0.5 # Used for nonmax supression
MIN_SCORE = 0.05 # Minimum score to be considered during evaluation.
DUMMY_SCORE = -1e5 # If no boxes are matched.
# Eval step intervals starting from 0
#EVAL_STEPS = (24000, 24000, 24000, 24000, 24000,24000, 24000, 24000, 24000, 24000)
EVAL_STEPS = (432000,)
# Target COCO/AP for mlperf.
EVAL_TARGET = 0.24
# For multiprocessing.
QUEUE_SIZE = 24
WORKER_COUNT = 10
@@ -0,0 +1,309 @@
# Copyright 2018 Google. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Training script for SSD.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import multiprocessing
import os
import sys
sys.path.append(os.path.join(os.path.abspath(os.path.dirname(__file__)),'../../../../utils/atlasboost'))
import threading
from absl import app
import numpy as np
import tensorflow as tf
from npu_bridge.estimator import npu_ops
from tensorflow.core.protobuf import rewriter_config_pb2
from npu_bridge.estimator.npu.npu_config import NPURunConfig
from npu_bridge.estimator.npu.npu_estimator import NPUEstimator
import coco_metric
import dataloader
import ssd_constants
import ssd_model
def get_rank_size():
return int(os.environ['RANK_SIZE'])
from benchmark_log import hwlog
from benchmark_log.basic_utils import get_environment_info
from benchmark_log.basic_utils import get_model_parameter
tf.flags.DEFINE_string('model_dir', None, 'Location of model_dir')
tf.flags.DEFINE_string('resnet_checkpoint', '',
'Location of the ResNet checkpoint to use for model '
'initialization.')
tf.flags.DEFINE_integer('train_batch_size', 64, 'training batch size')
tf.flags.DEFINE_integer('eval_batch_size', 1, 'evaluation batch size')
tf.flags.DEFINE_integer('eval_samples', 5000, 'The number of samples for '
'evaluation.')
tf.flags.DEFINE_string(
'training_file_pattern', None,
'Glob for training data files (e.g., COCO train - minival set)')
tf.flags.DEFINE_string(
'validation_file_pattern', None,
'Glob for evaluation tfrecords (e.g., COCO val2017 set)')
tf.flags.DEFINE_string(
'val_json_file',
None,
'COCO validation JSON containing golden bounding boxes.')
tf.flags.DEFINE_integer('num_examples_per_epoch', 120000,
'Number of examples in one epoch')
tf.flags.DEFINE_float('num_epochs', 58, 'Number of epochs for training')
tf.flags.DEFINE_string('mode', 'train_and_eval',
'Mode to run: train_and_eval, train, eval')
tf.flags.DEFINE_integer(
'keep_checkpoint_max', 32,
'Maximum number of checkpoints to keep.')
FLAGS = tf.flags.FLAGS
SUCCESS = False
def construct_run_config():
"""Construct the run config."""
# Parse hparams
hparams = ssd_model.default_hparams()
params = dict(
hparams.values(),
num_examples_per_epoch=FLAGS.num_examples_per_epoch,
resnet_checkpoint=FLAGS.resnet_checkpoint,
val_json_file=FLAGS.val_json_file,
mode=FLAGS.mode,
model_dir=FLAGS.model_dir,
eval_samples=FLAGS.eval_samples,
)
return NPURunConfig(
model_dir=FLAGS.model_dir,
session_config=tf.ConfigProto(),
keep_checkpoint_max=FLAGS.keep_checkpoint_max,
save_checkpoints_steps=ssd_constants.CHECKPOINT_FREQUENCY,
enable_data_pre_proc=True,
save_summary_steps=100,
iterations_per_loop=100,
precision_mode='allow_mix_precision'
), params
def coco_eval(predictions,
current_step,
summary_writer,
coco_gt,
use_cpp_extension=True,
nms_on_tpu=True):
"""Call the coco library to get the eval metrics."""
global SUCCESS
eval_results = coco_metric.compute_map(
predictions,
coco_gt,
use_cpp_extension=use_cpp_extension,
nms_on_tpu=nms_on_tpu)
if eval_results['COCO/AP'] >= ssd_constants.EVAL_TARGET and not SUCCESS:
SUCCESS = True
tf.logging.info('Eval results: %s' % eval_results)
hwlog.remark_print(key=hwlog.EVAL_RESULTS, value=eval_results)
# Write out eval results for the checkpoint.
with tf.Graph().as_default():
summaries = []
for metric in eval_results:
summaries.append(
tf.Summary.Value(tag=metric, simple_value=eval_results[metric]))
tf_summary = tf.Summary(value=list(summaries))
summary_writer.add_summary(tf_summary, current_step)
def init_npu():
"""Initialize npu manually.
Returns:
`init_sess` npu init session config.
`npu_init` npu init ops.
"""
npu_init = npu_ops.initialize_system()
config = tf.ConfigProto()
#npu mix precision attribute set to true when using mix precision
config.graph_options.rewrite_options.remapping = rewriter_config_pb2.RewriterConfig.OFF
custom_op = config.graph_options.rewrite_options.custom_optimizers.add()
custom_op.name = "NpuOptimizer"
custom_op.parameter_map["use_off_line"].b = True
init_sess = tf.Session(config=config)
return init_sess,npu_init
def main(argv):
init_sess, npu_init = init_npu()
init_sess.run(npu_init)
del argv # Unused.
global SUCCESS
# Check data path
if FLAGS.mode in ('train',
'train_and_eval') and FLAGS.training_file_pattern is None:
raise RuntimeError('You must specify --training_file_pattern for training.')
if FLAGS.mode in ('train_and_eval', 'eval'):
if FLAGS.validation_file_pattern is None:
raise RuntimeError('You must specify --validation_file_pattern '
'for evaluation.')
if FLAGS.val_json_file is None:
raise RuntimeError('You must specify --val_json_file for evaluation.')
run_config, params = construct_run_config()
if FLAGS.mode == 'train':
train_params = dict(params)
hwlog.remark_print(key=hwlog.CURRENT_EPOCH, value=train_params['num_examples_per_epoch'])
train_params['batch_size'] = FLAGS.train_batch_size
train_estimator = NPUEstimator(
model_fn=ssd_model.ssd_model_fn,
model_dir=FLAGS.model_dir,
config=run_config,
params=train_params)
tf.logging.info(params)
train_estimator.train(
input_fn=dataloader.SSDInputReader(
FLAGS.training_file_pattern,
params['transpose_input'],
is_training=True),
steps=int((FLAGS.num_epochs * FLAGS.num_examples_per_epoch) /
FLAGS.train_batch_size / get_rank_size()))
elif FLAGS.mode == 'train_and_eval':
output_dir = os.path.join(FLAGS.model_dir, 'eval')
tf.gfile.MakeDirs(output_dir)
# Summary writer writes out eval metrics.
summary_writer = tf.summary.FileWriter(output_dir)
current_step = 0
coco_gt = coco_metric.create_coco(
FLAGS.val_json_file, use_cpp_extension=params['use_cocoeval_cc'])
for eval_step in ssd_constants.EVAL_STEPS:
# Compute the actual eval steps based on the actural train_batch_size
steps = int(eval_step / get_rank_size() * ssd_constants.DEFAULT_BATCH_SIZE /
FLAGS.train_batch_size)
print('###################################', steps)
tf.logging.info('Starting training cycle for %d steps.' % steps)
run_config, params = construct_run_config()
train_params = dict(params)
hwlog.remark_print(key=hwlog.CURRENT_EPOCH, value=train_params['num_examples_per_epoch'])
train_params['batch_size'] = FLAGS.train_batch_size
train_estimator = NPUEstimator(
model_fn=ssd_model.ssd_model_fn,
model_dir=FLAGS.model_dir,
config=run_config,
params=train_params)
tf.logging.info(params)
train_estimator.train(
input_fn=dataloader.SSDInputReader(
FLAGS.training_file_pattern,
params['transpose_input'],
is_training=True),
steps=steps)
if SUCCESS:
break
current_step = current_step + steps
tf.logging.info('Starting evaluation cycle at step %d.' % current_step)
# Run evaluation at the given step.
eval_params = dict(params)
eval_params['batch_size'] = FLAGS.eval_batch_size
eval_estimator = NPUEstimator(
model_fn=ssd_model.ssd_model_fn,
model_dir=FLAGS.model_dir,
config=run_config,
params=eval_params)
predictions = list(
eval_estimator.predict(
input_fn=dataloader.SSDInputReader(
FLAGS.validation_file_pattern,
is_training=False)))
coco_eval(predictions, current_step, summary_writer, coco_gt, params['use_cocoeval_cc'], False)
summary_writer.close()
elif FLAGS.mode == 'eval':
coco_gt = coco_metric.create_coco(
FLAGS.val_json_file, use_cpp_extension=params['use_cocoeval_cc'])
eval_params = dict(params)
eval_params['batch_size'] = FLAGS.eval_batch_size
eval_estimator = NPUEstimator(
model_fn=ssd_model.ssd_model_fn,
model_dir=FLAGS.model_dir,
config=run_config,
params=eval_params)
output_dir = os.path.join(FLAGS.model_dir, 'eval')
tf.gfile.MakeDirs(output_dir)
# Summary writer writes out eval metrics.
summary_writer = tf.summary.FileWriter(output_dir)
ckpt = tf.train.latest_checkpoint(FLAGS.model_dir)
tf.logging.info('Starting to evaluate on newest checkpoint.')
predictions = list(
eval_estimator.predict(
checkpoint_path=ckpt,
input_fn=dataloader.SSDInputReader(
FLAGS.validation_file_pattern,
is_training=False)))
tf.logging.info('Starting to cal coco ap.')
current_step = int(os.path.basename(ckpt).split('-')[1])
coco_eval(predictions, current_step, summary_writer, coco_gt,
params['use_cocoeval_cc'], False)
tf.logging.info('end to evaluate.')
summary_writer.close()
npu_shutdown = npu_ops.shutdown_system()
init_sess.run(npu_shutdown)
init_sess.close()
if __name__ == '__main__':
hwlog.ROOT_DIR = os.path.split(os.path.abspath(__file__))[0]
cpu_info, npu_info, framework_info, os_info, benchmark_version = get_environment_info("tensorflow")
config_info = get_model_parameter("tensorflow_config")
initinal_data = {"base_lr": 0.01, "dataset": "imagenet1024", "optimizer": "SGD", "loss_scale": 512}
hwlog.remark_print(key=hwlog.CPU_INFO, value=cpu_info)
hwlog.remark_print(key=hwlog.NPU_INFO, value=npu_info)
hwlog.remark_print(key=hwlog.OS_INFO, value=os_info)
hwlog.remark_print(key=hwlog.FRAMEWORK_INFO, value=framework_info)
hwlog.remark_print(key=hwlog.BENCHMARK_VERSION, value=benchmark_version)
hwlog.remark_print(key=hwlog.CONFIG_INFO, value=config_info)
hwlog.remark_print(key=hwlog.BASE_LR, value=initinal_data.get("base_lr"))
hwlog.remark_print(key=hwlog.DATASET, value=initinal_data.get("dataset"))
hwlog.remark_print(key=hwlog.OPT_NAME, value=initinal_data.get("optimizer"))
hwlog.remark_print(key=hwlog.LOSS_SCALE, value=initinal_data.get("loss_scale"))
hwlog.remark_print(key=hwlog.INPUT_BATCH_SIZE, value=initinal_data.get("batchsize"))
tf.logging.set_verbosity(tf.logging.INFO)
app.run(main)
@@ -0,0 +1,500 @@
# Copyright 2018 Google. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Model defination for the SSD Model.
Defines model_fn of SSD for TF Estimator. The model_fn includes SSD
model architecture, loss function, learning rate schedule, and evaluation
procedure.
T.-Y. Lin, P. Goyal, R. Girshick, K. He, and P. Dollar
Focal Loss for Dense Object Detection. arXiv:1708.02002
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import tensorflow as tf
from object_detection import box_coder
from object_detection import box_list
from object_detection import faster_rcnn_box_coder
from tensorflow.python.estimator import model_fn as model_fn_lib
import dataloader
import ssd_architecture
import ssd_constants
from benchmark_log import hwlog
from benchmark_log.basic_utils import get_environment_info
from benchmark_log.basic_utils import get_model_parameter
def get_rank_size():
return int(os.environ['RANK_SIZE'])
def select_top_k_scores(scores_in, pre_nms_num_detections=5000):
"""Select top_k scores and indices for each class.
Args:
scores_in: a Tensor with shape [batch_size, N, num_classes], which stacks
class logit outputs on all feature levels. The N is the number of total
anchors on all levels. The num_classes is the number of classes predicted
by the model.
pre_nms_num_detections: Number of candidates before NMS.
Returns:
scores and indices: Tensors with shape [batch_size, pre_nms_num_detections,
num_classes].
"""
scores_trans = tf.transpose(scores_in, perm=[0, 2, 1])
top_k_scores, top_k_indices = tf.nn.top_k(
scores_trans, k=pre_nms_num_detections, sorted=True)
return tf.transpose(top_k_scores, [0, 2, 1]), tf.transpose(
top_k_indices, [0, 2, 1])
def concat_outputs(cls_outputs, box_outputs):
"""Concatenate predictions into a single tensor.
This function takes the dicts of class and box prediction tensors and
concatenates them into a single tensor for comparison with the ground truth
boxes and class labels.
Args:
cls_outputs: an OrderDict with keys representing levels and values
representing logits in [batch_size, height, width,
num_anchors * num_classses].
box_outputs: an OrderDict with keys representing levels and values
representing box regression targets in
[batch_size, height, width, num_anchors * 4].
Returns:
concatenanted cls_outputs and box_outputs.
"""
assert set(cls_outputs.keys()) == set(box_outputs.keys())
# This sort matters. The labels assume a certain order based on
# ssd_constants.FEATURE_SIZES, and this sort matches that convention.
keys = sorted(cls_outputs.keys())
batch_size = int(cls_outputs[keys[0]].shape[0])
flat_cls = []
flat_box = []
for i, k in enumerate(keys):
# TODO(taylorrobie): confirm that this reshape, transpose,
# reshape is correct.
scale = ssd_constants.FEATURE_SIZES[i] # 不同特征尺度, 38,19,10,5,3,1
split_shape = (ssd_constants.NUM_DEFAULTS[i], ssd_constants.NUM_CLASSES) # 4,81)、(6,81...
assert cls_outputs[k].shape[3] == split_shape[0] * split_shape[1]
intermediate_shape = (batch_size, scale, scale) + split_shape # (32,38,38)+ (4,81)=(32,38,38,4,81)
final_shape = (batch_size, scale ** 2 * split_shape[0], split_shape[1]) # (32, 38^2 * 4, 81)
flat_cls.append(tf.reshape(tf.reshape(
cls_outputs[k], intermediate_shape), final_shape))
split_shape = (ssd_constants.NUM_DEFAULTS[i], 4) # (4,4), (6,4)...
assert box_outputs[k].shape[3] == split_shape[0] * split_shape[1]
intermediate_shape = (batch_size, scale, scale) + split_shape # (32, 19,19) + (6,4) 为避免歧义,以第二个default box为例
final_shape = (batch_size, scale ** 2 * split_shape[0], split_shape[1]) # (32, 19^2 * 6, 4)
flat_box.append(tf.reshape(tf.reshape(
box_outputs[k], intermediate_shape), final_shape))
return tf.concat(flat_cls, axis=1), tf.concat(flat_box, axis=1)
def _localization_loss(pred_locs, gt_locs, gt_labels, num_matched_boxes):
"""Computes the localization loss.
Computes the localization loss using smooth l1 loss.
Args:
pred_locs: a dict from index to tensor of predicted locations. The shape
of each tensor is [batch_size, num_anchors, 4].
gt_locs: a list of tensors representing box regression targets in
[batch_size, num_anchors, 4].
gt_labels: a list of tensors that represents the classification groundtruth
targets. The shape is [batch_size, num_anchors, 1].
num_matched_boxes: the number of anchors that are matched to a groundtruth
targets, used as the loss normalizater. The shape is [batch_size].
Returns:
box_loss: a float32 representing total box regression loss.
"""
keys = sorted(pred_locs.keys())
box_loss = 0
for i, k in enumerate(keys):
gt_label = gt_labels[i]
gt_loc = gt_locs[i]
pred_loc = tf.reshape(pred_locs[k], gt_loc.shape)
mask = tf.greater(gt_label, 0)
float_mask = tf.cast(mask, tf.float32)
smooth_l1 = tf.reduce_sum(
tf.losses.huber_loss(
gt_loc, pred_loc, reduction=tf.losses.Reduction.NONE),
axis=-1)
smooth_l1 = tf.multiply(smooth_l1, float_mask)
box_loss = box_loss + tf.reduce_sum(
smooth_l1, axis=list(range(1, smooth_l1.shape.ndims)))
# TODO(taylorrobie): Confirm that normalizing by the number of boxes matches
# reference
return tf.reduce_mean(box_loss / num_matched_boxes)
@tf.custom_gradient
def _softmax_cross_entropy(logits, label):
"""Helper function to compute softmax cross entropy loss."""
shifted_logits = logits - tf.expand_dims(tf.reduce_max(logits, -1), -1)
exp_shifted_logits = tf.math.exp(shifted_logits)
sum_exp = tf.reduce_sum(exp_shifted_logits, -1)
log_sum_exp = tf.math.log(sum_exp)
one_hot_label = tf.one_hot(label, ssd_constants.NUM_CLASSES)
shifted_logits = tf.reduce_sum(shifted_logits * one_hot_label, -1)
loss = log_sum_exp - shifted_logits
def grad(dy):
return (exp_shifted_logits / tf.expand_dims(sum_exp, -1) -
one_hot_label) * tf.expand_dims(dy, -1), dy
return loss, grad
def _classification_loss(pred_labels, gt_labels, num_matched_boxes):
"""Computes the classification loss.
Computes the classification loss with hard negative mining.
Args:
pred_labels: a dict from index to tensor of predicted class. The shape
of the tensor is [batch_size, num_anchors, num_classes].
gt_labels: a list of tensor that represents the classification groundtruth
targets. The shape is [batch_size, num_anchors, 1].
num_matched_boxes: the number of anchors that are matched to a groundtruth
targets. This is used as the loss normalizater.
Returns:
box_loss: a float32 representing total box regression loss.
"""
keys = sorted(pred_labels.keys())
batch_size = gt_labels[0].shape[0]
cross_entropy = []
for i, k in enumerate(keys):
gt_label = gt_labels[i]
pred_label = tf.reshape(
pred_labels[k],
gt_label.get_shape().as_list() + [ssd_constants.NUM_CLASSES])
cross_entropy.append(
tf.reshape(
_softmax_cross_entropy(pred_label, gt_label), [batch_size, -1]))
# Put the rest of the loss computation on one device to avoid excessive
# communication inside topk_mask with spatial partition
#with tf.device(tf.contrib.tpu.core(0)):
cross_entropy = tf.concat(cross_entropy, 1)
gt_label = tf.concat([tf.reshape(l, [batch_size, -1]) for l in gt_labels],
1)
mask = tf.greater(gt_label, 0)
float_mask = tf.cast(mask, tf.float32)
# Hard example mining
neg_masked_cross_entropy = cross_entropy * (1 - float_mask)
value1, _ = tf.math.top_k(neg_masked_cross_entropy, k=4096)
kth1 = tf.reduce_min(value1, 1, keepdims=True)
mask1 = tf.cast(tf.less(neg_masked_cross_entropy, kth1), tf.float32)
value2, _ = tf.math.top_k(tf.multiply(neg_masked_cross_entropy, mask1), k=4096)
kth2 = tf.reduce_min(value2, 1, keepdims=True)
mask2 = tf.cast(tf.less(neg_masked_cross_entropy, kth2), tf.float32)
value3, _ = tf.math.top_k(tf.multiply(neg_masked_cross_entropy, mask2), k=540)
value = tf.concat([value1, value2, value3], axis=1)
num_neg_boxes = tf.minimum(
tf.to_int32(num_matched_boxes) * ssd_constants.NEGS_PER_POSITIVE, 8731)
large_neg_ce = tf.batch_gather(value, num_neg_boxes[:, tf.newaxis])
top_k_neg_mask = tf.cast(tf.greater_equal(neg_masked_cross_entropy, large_neg_ce), tf.float32)
class_loss = tf.reduce_sum(
tf.multiply(cross_entropy, float_mask + top_k_neg_mask), axis=1)
# TODO(taylorrobie): Confirm that normalizing by the number of boxes matches
# reference
return tf.reduce_mean(class_loss / num_matched_boxes)
def detection_loss(cls_outputs, box_outputs, labels):
"""Computes total detection loss.
Computes total detection loss including box and class loss from all levels.
Args:
cls_outputs: an OrderDict with keys representing levels and values
representing logits in [batch_size, height, width, num_anchors].
box_outputs: an OrderDict with keys representing levels and values
representing box regression targets in
[batch_size, height, width, num_anchors * 4].
labels: the dictionary that returned from dataloader that includes
groundturth targets.
Returns:
total_loss: a float32 representing total loss reducing from class and box
losses from all levels.
cls_loss: a float32 representing total class loss.
box_loss: a float32 representing total box regression loss.
"""
if isinstance(labels[ssd_constants.BOXES], dict):
gt_boxes = list(labels[ssd_constants.BOXES].values())
gt_classes = list(labels[ssd_constants.CLASSES].values())
else:
gt_boxes = [labels[ssd_constants.BOXES]]
gt_classes = [labels[ssd_constants.CLASSES]]
cls_outputs, box_outputs = concat_outputs(cls_outputs, box_outputs)
cls_outputs = {'flatten': cls_outputs}
box_outputs = {'flatten': box_outputs}
box_loss = _localization_loss(box_outputs, gt_boxes, gt_classes,
labels[ssd_constants.NUM_MATCHED_BOXES])
class_loss = _classification_loss(cls_outputs, gt_classes,
labels[ssd_constants.NUM_MATCHED_BOXES])
return class_loss + box_loss, class_loss, box_loss
def update_learning_rate_schedule_parameters(params):
"""Updates params that are related to the learning rate schedule.
Args:
params: a parameter dictionary that includes learning_rate, lr_warmup_epoch,
first_lr_drop_epoch, and second_lr_drop_epoch.
"""
batch_size = params['batch_size']
# Learning rate is proportional to the batch size
steps_per_epoch = params['num_examples_per_epoch'] / batch_size // get_rank_size()
params['lr_warmup_step'] = int(params['lr_warmup_epoch'] * steps_per_epoch)
params['cos_decay_step'] = int(
params['cos_decay_epoch'] * steps_per_epoch)
def learning_rate_schedule(params, global_step):
"""Handles learning rate scaling, linear warmup, and learning rate decay.
Args:
params: A dictionary that defines hyperparameters of model.
global_step: A tensor representing current global step.
Returns:
A tensor representing current learning rate.
"""
base_learning_rate = params['base_learning_rate']
lr_warmup_step = params['lr_warmup_step']
cos_decay_step = params['cos_decay_step']
batch_size = params['batch_size']
scaling_factor = get_rank_size() * batch_size / ssd_constants.DEFAULT_BATCH_SIZE
adjusted_learning_rate = base_learning_rate * scaling_factor
learning_rate = (tf.cast(global_step, dtype=tf.float32) /
lr_warmup_step) * adjusted_learning_rate
learning_rate = tf.where(global_step < lr_warmup_step, learning_rate,
tf.train.cosine_decay(adjusted_learning_rate, global_step, cos_decay_step, alpha=0.01))
return learning_rate
class ExamplesPerSecondHook(tf.train.SessionRunHook):
def __init__(
self,
batch_size,
lr=0,
loss=0,
every_n_steps=100,
every_n_secs=None,):
if (every_n_steps is None) == (every_n_secs is None):
raise ValueError('exactly one of every_n_steps'
' and every_n_secs should be provided.')
self._timer = tf.train.SecondOrStepTimer(
every_steps=every_n_steps, every_secs=every_n_secs)
self._step_train_time = 0
self._total_steps = 0
self._batch_size = batch_size
self._lr = lr
self._loss = loss
def begin(self):
self._global_step_tensor = tf.compat.v1.train.get_global_step()
if self._global_step_tensor is None:
raise RuntimeError(
'Global step should be created to use StepCounterHook.')
def before_run(self, run_context): # pylint: disable=unused-argument
return tf.train.SessionRunArgs([self._global_step_tensor, self._lr, self._loss])
def after_run(self, run_context, run_values):
_ = run_context
global_step, lr, loss = run_values.results
if self._timer.should_trigger_for_step(global_step):
elapsed_time, elapsed_steps = self._timer.update_last_triggered_step(
global_step)
if elapsed_time is not None:
steps_per_sec = elapsed_steps / elapsed_time
self._step_train_time += elapsed_time
self._total_steps += elapsed_steps
current_examples_per_sec = steps_per_sec * self._batch_size
tf.logging.info('%s: %g, %s: %s, %s: %s', 'FPS', current_examples_per_sec, 'learning rate', lr, 'loss', loss)
hwlog.remark_print(key=hwlog.FPS, value='%7.1f' % current_examples_per_sec)
def _model_fn(features, labels, mode, params, model):
"""Model defination for the SSD model based on ResNet-50.
Args:
features: the input image tensor with shape [batch_size, height, width, 3].
The height and width are fixed and equal.
labels: the input labels in a dictionary. The labels include class targets
and box targets which are dense label maps. The labels are generated from
get_input_fn function in data/dataloader.py
mode: the mode of TPUEstimator including TRAIN, EVAL, and PREDICT.
params: the dictionary defines hyperparameters of model. The default
settings are in default_hparams function in this file.
model: the SSD model outputs class logits and box regression outputs.
Returns:
spec: the EstimatorSpec or TPUEstimatorSpec to run training, evaluation,
or prediction.
"""
if mode == tf.estimator.ModeKeys.PREDICT:
labels = features
features = labels.pop('image')
features -= tf.constant(
ssd_constants.NORMALIZATION_MEAN, shape=[1, 1, 3], dtype=features.dtype)
features /= tf.constant(
ssd_constants.NORMALIZATION_STD, shape=[1, 1, 3], dtype=features.dtype)
def _model_outputs():
return model(
features, params, is_training_bn=(mode == tf.estimator.ModeKeys.TRAIN))
cls_outputs, box_outputs = _model_outputs()
# First check if it is in PREDICT mode.
if mode == tf.estimator.ModeKeys.PREDICT:
flattened_cls, flattened_box = concat_outputs(cls_outputs, box_outputs)
ssd_box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder(
scale_factors=ssd_constants.BOX_CODER_SCALES)
anchors = box_list.BoxList(
tf.convert_to_tensor(dataloader.DefaultBoxes()('ltrb')))
decoded_boxes = box_coder.batch_decode(
encoded_boxes=flattened_box, box_coder=ssd_box_coder, anchors=anchors)
pred_scores = tf.nn.softmax(flattened_cls, axis=2)
pred_scores, indices = select_top_k_scores(pred_scores,
ssd_constants.MAX_NUM_EVAL_BOXES)
predictions = dict(
labels,
indices=indices,
pred_scores=pred_scores,
pred_box=decoded_boxes,
)
return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
# Load pretrained model from checkpoint.
if params['resnet_checkpoint'] and mode == tf.estimator.ModeKeys.TRAIN:
def scaffold_fn():
"""Loads pretrained model through scaffold function."""
tf.train.init_from_checkpoint(params['resnet_checkpoint'], {
'/': 'resnet%s/' % ssd_constants.RESNET_DEPTH,
})
return tf.train.Scaffold()
else:
scaffold_fn = None
# Set up training loss and learning rate.
update_learning_rate_schedule_parameters(params)
global_step = tf.train.get_or_create_global_step()
learning_rate = learning_rate_schedule(params, global_step)
# cls_loss and box_loss are for logging. only total_loss is optimized.
total_loss, cls_loss, box_loss = detection_loss(
cls_outputs, box_outputs, labels)
total_loss += params['weight_decay'] * tf.add_n(
[tf.nn.l2_loss(v) for v in tf.trainable_variables()])
if mode == tf.estimator.ModeKeys.TRAIN:
total_loss_t = tf.reduce_mean(tf.reshape(total_loss, [1]))
cls_loss_t = tf.reduce_mean(tf.reshape(cls_loss, [1]))
box_loss_t = tf.reduce_mean(tf.reshape(box_loss, [1]))
learning_rate_t = tf.reduce_mean(tf.reshape(learning_rate, [1]))
tf.summary.scalar('total_loss', total_loss_t)
tf.summary.scalar('cls_loss_t', cls_loss_t)
tf.summary.scalar('box_loss_t', box_loss_t)
tf.summary.scalar('learning_rate_t', learning_rate_t)
optimizer = tf.train.MomentumOptimizer(
learning_rate, momentum=ssd_constants.MOMENTUM)
from npu_bridge.estimator.npu.npu_optimizer import NPUDistributedOptimizer
optimizer = NPUDistributedOptimizer(optimizer) # 使用NPU分布式计算,更新梯度
# Batch norm requires update_ops to be added as a train_op dependency.
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
examples_sec_hook = ExamplesPerSecondHook(get_rank_size() * 32, learning_rate, total_loss)
train_op = tf.group(optimizer.minimize(total_loss, global_step),
update_ops)
return model_fn_lib.EstimatorSpec(
mode=mode, loss=total_loss, train_op=train_op, scaffold=scaffold_fn(),
training_hooks=[examples_sec_hook])
if mode == tf.estimator.ModeKeys.EVAL:
raise NotImplementedError
def ssd_model_fn(features, labels, mode, params):
"""SSD model."""
return _model_fn(features, labels, mode, params, model=ssd_architecture.ssd)
def default_hparams():
# TODO(taylorrobie): replace params useages with global constants.
return tf.contrib.training.HParams(
num_examples_per_epoch=120000,
lr_warmup_epoch=0.8,
cos_decay_epoch=106,
weight_decay=ssd_constants.WEIGHT_DECAY,
base_learning_rate=ssd_constants.BASE_LEARNING_RATE,
eval_every_checkpoint=False,
transpose_input=False,
use_cocoeval_cc=False
)
@@ -0,0 +1,197 @@
# Alexnet for Tensorflow
This repository provides a script and recipe to train the AlexNet model .
## Table Of Contents
* [Model overview](#model-overview)
* [Model Architecture](#model-architecture)
* [Default configuration](#default-configuration)
* [Data augmentation](#data-augmentation)
* [Setup](#setup)
* [Requirements](#requirements)
* [Quick start guide](#quick-start-guide)
* [Advanced](#advanced)
* [Command line arguments](#command-line-arguments)
* [Training process](#training-process)
* [Performance](#performance)
* [Results](#results)
* [Training accuracy results](#training-accuracy-results)
* [Training performance results](#training-performance-results)
## Model overview
AlexNet model from
`Alex Krizhevsky. "One weird trick for parallelizing convolutional neural networks". <https://arxiv.org/abs/1404.5997>.`
reference implementation: <https://pytorch.org/docs/stable/_modules/torchvision/models/alexnet.html#alexnet>
### Model architecture
### Default configuration
The following sections introduce the default configurations and hyperparameters for AlexNet model.
#### Optimizer
This model uses Momentum optimizer from Tensorflow with the following hyperparameters:
- Momentum : 0.9
- Learning rate (LR) : 0.06
- LR schedule: cosine_annealing
- Batch size : 128
- Weight decay : 0.0001.
- Label smoothing = 0.1
- We train for:
- 150 epochs -> 60.1% top1 accuracy
#### Data augmentation
This model uses the following data augmentation:
- For training:
- RandomResizeCrop, scale=(0.08, 1.0), ratio=(0.75, 1.333)
- RandomHorizontalFlip, prob=0.5
- Normalize, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)
- For inference:
- Resize to (256, 256)
- CenterCrop to (224, 224)
- Normalize, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)
## Setup
The following section lists the requirements to start training the Alexnet model.
### Requirements
Tensorflow
NPU environmemnt
## Quick Start Guide
### 1. Clone the respository
```shell
git clone xxx
cd Model_zoo_Alexnet_HARD
```
### 2. Download and preprocess the dataset
1. down load the imagenet dataset
2. Extract the training data
3. The train and val images are under the train/ and val/ directories, respectively. All images within one folder have the same label.
### 3. Train
- train on single NPU
- **edit** *scripts/train_alexnet_1p.sh*( see example below)
- bash scripts/run_npu_1p.sh
- train on 8 NPUs
- **edit** *scripts/train_alexnet_8p.sh*(see example below)
- bash scripts/run_npu_8p.sh
for example:
- case for single NPU
- In scripts/train_alexnet_1p.sh , python scripts part should look like as follows. For more detailed command lines arguments, please refer to [Command line arguments](#command-line-arguments)
```shell
python3.7 ${EXEC_DIR}/train.py --rank_size=1 \
--iterations_per_loop=100 \
--batch_size=256 \
--data_dir=/path/to/dataset \
--mode=train \
--lr=0.015 \
--log_dir=./model_1p > ./train_${device_id}.log 2>&1
```
run the program
```
bash scripts/run_npu_1p.sh
```
- case for 8 NPUs
- In scripts/train_alexnet_8p.sh , python scripts part should look like as follows.
```shell
python3.7 ${EXEC_DIR}/train.py --rank_size=8 \
--iterations_per_loop=100 \
--batch_size=128 \
--data_dir=/path/to/dataset \
--mode=train \
--lr=0.06 \
--log_dir=./model_8p > ./train_${device_id}.log 2>&1
```
run the program
```
bash scripts/run_npu_1p.sh
```
### 4. Test
- same procedure as training except 2 following modifications
- change `--mode=train` to `--mode=evaluate`
- add `--checkpoint_dir=/path/to/checkpoints`
## Advanced
### Commmand-line options
```
--data_dir train data dir
--num_classes num of classes in ImageNetdefault:1000)
--image_size image size of the dataset
--batch_size mini-batch size (default: 128) per npu
--pretrained path of pretrained model
--lr initial learning rate
--max_epochs max epoch num to train the model
--warmup_epochs warmup epoch(when batchsize is large)
--weight_decay weight decay (default: 1e-4)
--momentum momentum(default: 0.9)
--label_smoothing use label smooth in CE, default 0.1
--save_summary_steps logging interval(dafault:100)
--log_dir path to save checkpoint and log
--log_name name of log file
--save_checkpoints_steps the interval to save checkpoint
--mode mode to run the program (train, evaluate)
--checkpoint_dir path to checkpoint for evaluation
--max_train_steps max number of training steps
--synthetic whether to use synthetic data or not
--version weight initialization for model
--do_checkpoint whether to save checkpoint or not
--rank_size local rank of distributed(default: 0)
--group_size world size of distributed(default: 1)
--max_train_steps number of training step , default : None, when set ,it will override the max_epoch
```
for a complete list of options, please refer to `train.py`
### Training process
All the results of the training will be stored in the directory `results`.
Script will store:
- checkpoints.
- log.
## Performance
### Result
Our result were obtained by running the applicable training script. To achieve the same results, follow the steps in the Quick Start Guide.
#### Training accuracy results
| **epochs** | Top1/Top5 |
| :--------: | :-----------: |
| 150 | 60.12%/82.06% |
#### Training performance results
| **NPUs** | train performance |
| :------: | :---------------: |
| 8 | 30000+ img/s |
@@ -0,0 +1,9 @@
{
"server_count": "1",
"server_list": [{
"device": [{devices}],
"server_id": "127.0.0.1"
}],
"status": "completed",
"version": "1.0"
}
@@ -0,0 +1,36 @@
#!/bin/bash
rm -rf /var/log/npu/slog/host-0/*
# main env
if [ -d /usr/local/Ascend/nnae/latest ];then
export LD_LIBRARY_PATH=/usr/local/:/usr/local/lib/:/usr/lib/:/usr/local/Ascend/nnae/latest/fwkacllib/lib64:/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:/usr/local/Ascend/add-ons/:/usr/local/Ascend/driver/tools/hccn_tool/:/usr/local/mpirun4.0/lib
export PYTHONPATH=$PYTHONPATH:/usr/local/Ascend/tfplugin/latest/tfplugin/python/site-packages:/usr/local/Ascend/nnae/latest/opp/op_impl/built-in/ai_core/tbe:/usr/local/Ascend/nnae/latest/fwkacllib/python/site-packages/:/usr/local/Ascend/tfplugin/latest/tfplugin/python/site-packages
export PATH=$PATH:/usr/local/Ascend/nnae/latest/fwkacllib/ccec_compiler/bin:/usr/local/mpirun4.0/bin
export ASCEND_OPP_PATH=/usr/local/Ascend/nnae/latest/opp
else
export LD_LIBRARY_PATH=/usr/local/lib/:/usr/lib/:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64:/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:/usr/local/Ascend/add-ons/:/usr/local/mpirun4.0/lib
export PYTHONPATH=$PYTHONPATH:/usr/local/Ascend/tfplugin/latest/tfplugin/python/site-packages:/usr/local/Ascend/ascend-toolkit/latest/opp/op_impl/built-in/ai_core/tbe:/usr/local/Ascend/ascend-toolkit/latest//fwkacllib/python/site-packages/:/usr/local/Ascend/ascend-toolkit/latest/tfplugin/python/site-packages:$projectDir
export PATH=$PATH:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin:/usr/local/mpirun4.0/bin
export ASCEND_OPP_PATH=/usr/local/Ascend/ascend-toolkit/latest/opp/
fi
export DDK_VERSION_FLAG=1.60.T17.B830
export HCCL_CONNECT_TIMEOUT=600
export JOB_ID=9999001
export NEW_GE_FE_ID=1
export GE_AICPU_FLAG=1
export SOC_VERSION=Ascend910
export DUMP_GE_GRAPH=1
export DUMP_GRAPH_LEVEL=3
export PRINT_MODEL=1
export SLOG_PRINT_TO_STDOUT=1
export PROFILING_MODE=false
export PROFILING_OPTIONS=training_trace
export FP_POINT=ssd/block7-conv1x1/Relu
export BP_POINT=gradients/resnet34/Relu_grad/ReluGrad
export AICPU_PROFILING_MODE=false
@@ -0,0 +1,70 @@
#!/bin/bash
rank_size=$1
yamlPath=$2
toolsPath=$3
if [ -f /.dockerenv ];then
CLUSTER=$4
MPIRUN_ALL_IP="$5"
export CLUSTER=${CLUSTER}
fi
currentDir=$(cd "$(dirname "$0")/.."; pwd)
model_name=$(cd $currentDir/..;basename `pwd`)
# 从 yaml 获取配置
eval $(${toolsPath}/get_params_for_yaml.sh ${yamlPath} "tensorflow_config")
#mkdir train job path
currtime=`date +%Y%m%d%H%M%S`
mkdir -p ${currentDir%train*}/train/result/tf_ssd_resnet34/training_job_${currtime}/
train_job_dir=${currentDir%train*}/train/result/tf_ssd_resnet34/training_job_${currtime}/
echo "[`date +%Y%m%d-%H:%M:%S`] [INFO] ${train_job_dir} &"
jsonFilePath=${currentDir}/code/ssd_constants.py
echo "start to modify inner config file"
echo "jsonfilepath is "${jsonFilePath}
sed -i "s/EVAL_STEPS = (.*,)$/EVAL_STEPS = (${max_steps},)/g" ${jsonFilePath}
# device 列表, 若无指定 device 根据 rank_size 顺序选择
eval device_group=\$device_group_${rank_size}p
if [ x"${device_group}" == x"" ] || [ ${rank_size} -ge 8 ];then
device_group="$(seq 0 "$(expr $rank_size - 1)")"
fi
# get last device id in device_group, hw log in performance from the dir named first_device_id
device_group_str=`echo ${device_group} | sed 's/ //g'`
first_device_id=`echo ${device_group_str: 0:1}`
if [ x"${CLUSTER}" == x"True" ];then
# ln hw log
ln -snf ${train_job_dir}/0/hw_SSD-Resnet34.log ${train_job_dir}
this_ip=$(hostname -I |awk '{print $1}')
for ip in $MPIRUN_ALL_IP;do
if [ x"$ip" != x"$this_ip" ];then
scp $yamlPath root@$ip:$yamlPath
scp $jsonFilePath root@$ip:$jsonFilePath
fi
done
export PATH=$PATH:/usr/local/mpirun4.0/bin
mpirun -H ${mpirun_ip} \
--bind-to none -map-by slot\
--allow-run-as-root \
--mca btl_tcp_if_exclude lo,docker0,endvnic,virbr0,vethf40501b,docker_gwbridge,br-f42ac38052b4\
--prefix /usr/local/mpirun4.0/ \
${currentDir}/scripts/train.sh 0 $rank_size $yamlPath $currtime ${toolsPath} ${CLUSTER}
else
# ln hw log
ln -snf ${train_job_dir}/${first_device_id}/hw_SSD-Resnet34.log ${train_job_dir}
rank_id=0
for device_id in $device_group;do
${currentDir}/scripts/train.sh $device_id $rank_size $yamlPath $currtime ${toolsPath} $rank_id &
let rank_id++
done
fi
wait
#echo "[`date +%Y%m%d-%H:%M:%S`] [INFO] all train exit " >> ${currentDir}/result/main.log
@@ -0,0 +1,104 @@
#!/usr/bin/env bash
device_id=$1
rank_size=$2
yamlPath=$3
currentDir=$(cd "$(dirname "$0")/.."; pwd)
#model_name="SSD-Resnet34"
currtime=$4
toolsPath=$5
export YAML_PATH=$3
mkdir -p ${currentDir%train*}/train/result/tf_ssd_resnet34/training_job_${currtime}/
train_job_dir=${currentDir%train*}/train/result/tf_ssd_resnet34/training_job_${currtime}/
# 从 yaml 获取配置
eval $(${toolsPath}/get_params_for_yaml.sh ${yamlPath} "tensorflow_config")
source ${currentDir}/config/npu_set_env.sh
# 声明变量
export REMARK_LOG_FILE=hw_SSD-Resnet34.log # 打点日志文件名称, 必须hw_后跟模型名称小写
# 添加日志打点模块路径
benchmark_log_path=${currentDir%atlas_benchmark-master*}/atlas_benchmark-master/utils
export PYTHONPATH=$PYTHONPATH:${benchmark_log_path}
# user env
export DDK_VERSION_FLAG=1.60.T17.B830
export HCCL_CONNECT_TIMEOUT=600
export RANK_TABLE_FILE=${currentDir}/config/${rank_size}p.json
export RANK_SIZE=${rank_size}
export SLOG_PRINT_TO_STDOUT=0
export DEVICE_ID=${device_id}
export DEVICE_INDEX=$RANK_ID
export JOB_ID=990
export FUSION_TENSOR_SIZE=1000000000
startTime=`date +%Y%m%d-%H:%M:%S`
startTime_s=`date +%s`
cd ${train_job_dir}
curd_dir=${currentDir%atlas_benchmark-master*}/atlas_benchmark-master/utils/atlasboost
export PYTHONPATH=$PYTHONPATH:${curd_dir}
if [ x"$6" != x"True" ];then
rank_id=$6
export RANK_ID=$6
else
device_id_mo=$(python3.7 -c "import src.tensorflow.mpi_ops as atlasboost;atlasboost.init(); \
device_id = atlasboost.local_rank();cluster_device_id = str(device_id); \
atlasboost.set_device_id(device_id);print(atlasboost.rank())")
device_id_mo=`echo $device_id_mo`
rank_id=${device_id_mo##* }
export RANK_ID=${rank_id}
device=${device_id_mo##*deviceid = }
device_id=${device%% phyid=*}
export DEVICE_ID=${device_id}
hccljson=${train_job_dir}/*.json
cp ${hccljson} ${currentDir}/config/${rank_size}p.json
fi
#mkdir exec path
mkdir -p ${train_job_dir}/${device_id}
cd ${train_job_dir}/${device_id}
# 根据单卡/多卡区分调用参数
if [ x"$6" == x"True" ];then
# 多卡多机
export CLUSTER=True
fi
python3.7 ${currentDir}/code/ssd_main.py \
--mode=${runmode} \
--train_batch_size=${train_batch_size} \
--training_file_pattern=${training_file_pattern} \
--resnet_checkpoint=${resnet_checkpoint} \
--validation_file_pattern=${validation_file_pattern} \
--val_json_file=${val_json_file} \
--eval_batch_size=${eval_batch_size} \
--num_epochs=${num_epochs} \
--model_dir=${model_dir} > ${train_job_dir}/train_${device_id}.log 2>&1
if [ $? -eq 0 ] ;then
echo ":::ABK 1.0.0 SSD-Resnet34 train success"
echo ":::ABK 1.0.0 SSD-Resnet34 train success" >> ${train_job_dir}/train_${device_id}.log
echo ":::ABK 1.0.0 SSD-Resnet34 train success" >> ${train_job_dir}/${device_id}/hw_SSD-Resnet34.log
else
echo ":::ABK 1.0.0 SSD-Resnet34 train failed"
echo ":::ABK 1.0.0 SSD-Resnet34 train failed" >> ${train_job_dir}/train_${device_id}.log
echo ":::ABK 1.0.0 SSD-Resnet34 train failed" >> ${train_job_dir}/${device_id}/hw_SSD-Resnet34.log
fi
endTime=`date +%Y%m%d-%H:%M:%S`
endTime_s=`date +%s`
sumTime=$[ $endTime_s - $startTime_s ]
hour=$(( $sumTime/3600 ))
min=$(( ($sumTime-${hour}*3600)/60 ))
sec=$(( $sumTime-${hour}*3600-${min}*60 ))
echo ${hour}:${min}:${sec}
echo ":::ABK 1.0.0 SSD-Resnet34 train total time ${hour}:${min}:${sec}" >> ${train_job_dir}/${device_id}/hw_SSD-Resnet34.log
@@ -0,0 +1,141 @@
# YOLOv3_TensorFlow训练说明
### 1. 介绍
YOLOv3是基于第三方TensorFlow开源代码,使用darknet-53作为主干网络,同时支持单尺度与多尺度训练。包含训练集和验证集两部分,可选用包括COCO2014、COCO2017等, 本文档以COCO2014数据集为例,说明yolov3训练操作步骤。
### 2. 运行环境
Python版本: 3.7.5
主要python三方库:
- tensorflow >= 1.15.0 (satisfied with NPU)
- opencv-python
1、直接pip install opencv-python
2、如果直接使用pip install opencv-python无法正常安装三方库,则采用离线安装方法安装。
(1)'解压opencv包'
(2)'进入解压后的opencv包 cd opencv'
(3)'mkdir -p build'
(4)'cd build'
(5)'cmake -D BUILD_opencv_python3=yes -D BUILD_opencv_python2=no -D PYTHON3_EXECUTABLE=/usr/local/python3.7.5/bin/python3.7m -D PYTHON3_INCLUDE_DIR=/usr/local/python3.7.5/include/python3.7m -D PYTHON3_LIBRARY=/usr/local/python3.7.5/lib/libpython3.7m.so -D PYTHON3_NUMPY_INCLUDE_DIRS=/usr/local/python3.7.5/lib/python3.7/site-packages/numpy/core/include -D PYTHON3_PACKAGES_PATH=/usr/local/python3.7.5/lib/python3.7/site-packages -D PYTHON_DEFAULT_EXECUTABLE=/usr/local/python3.7.5/bin/python3.7m ..'
(5)'make -j4'
(6)'make install'
说明:cmake -D 后参数匹配当前环境
- tqdm 安装方式:pip install tqdm
- pycocotools 安装方式:pip install pycocotools
说明: 评测的时候需要用到三方库pycocotools
### 3. 数据集预处理
#### 3.1 修改coco_dataset_path的值
在yolov3/tensorflow/code下对coco_minival_anns.py和coco_trainval_anns.py中coco_dataset_path的值改为当前环境的数据集路径, 如/opt/dataset/coco2014。
#### 3.2 运行脚本
```
python3.7 coco_minival_anns.py
python3.7 coco_trainval_anns.py
```
生成训练和验证样本标注文件coco2014_trainval.txt和coco2014_minival.txt,请将这2个文件放置到yolov3/tensorflow/code/data下。
生成的txt文件内容示例如下:
```
0 xxx/xxx/a.jpg 1920 1080 0 453 369 473 391 1 588 245 608 268
1 xxx/xxx/b.jpg 1920 1080 1 466 403 485 422 2 793 300 809 320
...
```
### 4. 准备预训练模型
#### 4.1 下载预训练模型
请从链接https://pjreddie.com/media/files/yolov3.weights下载darknet框架下的预训练模型。
#### 4.2 模型转换
使用train/atlas_benchmark-master/object_detection/yolov3/tensorflow/code下的convert_weight.py将预处理模型转换为TensorFlow框架的ckpt文件:
在convert_weight.py中将weight_path修改为下载下的预训练模型文件的路径,save_path的值修改为命名的转换为TensorFlow框架的ckpt文件的路径; 如
```
weight_path = '../yolov3-tf2/data/darknet53.conv.74'
save_path = './data/darknet_weights/darknet53.ckpt'
```
然后执行
```
python3.7 convert_weight.py
```
注意:save_path中ckpt文件的路径不是在train/atlas_benchmark-master/object_detection/yolov3/tensorflow/code/data/darknet_weights/下时, 请将其手动移至该路径;
### 5. 模型训练
#### 5.1 训练参数配置
在train/yaml/YoLoV3.yaml中修改相应配置, 配置项含义:
```
mode: yolov3的单尺度或者多尺度模式,值为single或者 multi
data_url:数据集路径
runmode: 运行模式,是训练还是评测,值为train或者evaluate
ckpt_path: 评测时要用到的ckpt文件的路径, 仅在evaluate时用到
total_epoches: 跑多少个epoch
save_epoch: 多少epoch保存一次ckpt文件
device_group_1p: 跑1p时的device_id
device_group_2p: 跑2p时的device_id
device_group_4p: 跑4p时的device_id
mpirun_ip: 仅集群场景时需要配置, 格式ip1:卡数量1,ip2:卡数量2
docker_image: docker镜像名称:版本号
```
YoLoV3.yaml中配置项示例:
```
mode: single
data_url: /opt/npu/dataset
runmode: train
ckpt_path: /home/benchmark-master720/train/atlas_benchmark-master/object_detection/yolov3/tensorflow/result/TrainingJob-20200724115042
total_epoches: 1
save_epoch: 3
device_group_1p: 0
device_group_2p: 0 1
device_group_4p: 0 1 2 3
mpirun_ip: 90.90.176.152:8,90.90.176.154:8
docker_image: mpirun3:latest
```
#### 5.2 训练脚本启动
当前路径为benchmark包的train文件夹下
```
bash benchmark.sh -e YoLoV3 -hw 1p # host侧1p
bash benchmark.sh -e YoLoV3 -hw 8p # host侧8p
bash benchmark.sh -e YoLoV3 -hw 1p -docker # docker侧1p
bash benchmark.sh -e YoLoV3 -hw 8p -docker # docker侧8p
bash benchmark.sh -e YoLoV3 -ct # host侧集群
bash benchmark.sh -e YoLoV3 -ct -docker # docker侧集群
```
#### 5.3 训练日志
日志在benchmark包的train路径下reuslt中找到YoLoV3的文件夹里。
```
./result/tf_yolov3/TrainingJob-2020xxxxxxxxxx/train_${device_id}.log
./result/TrainingJob-2020xxxxxxxxxx/train_${device_id}.log
./result/tensorflow/yolov3t/TrainingJob-2020xxxxxxxxxx/device_id/hw_yolov3.log
```
### 6. 模型评测
将train/yaml/YoLoV3.yaml中ckpt_path的值改为训练产生的日志的路径, runmode的值改为evaluate,如5.1中示例;
然后运行与训练时相同的脚本,结果参看见train.log。
### 7. 训练结果参考
| Model | Npu_nums | mAP | FPS |
| :-------------------- | :------: | :------: | :------: |
| single_scale | 8 | 30.0 | 740 |
| multi_scale | 8 | 31.0 | 340 |
| single_scale | 1 | ---- | 96 |
| multi_scale | 1 | ---- | 44 |
-------
@@ -0,0 +1,13 @@
# dirs
.idea/
__pycache__/
tmp*/
# fils
*.pyc
*.log
*.out
data/darknet_weights/*.ckpt*
@@ -0,0 +1,140 @@
# YOLOv3_TensorFlow
### 1. Introduction
This is npu implementation of [YOLOv3](https://pjreddie.com/media/files/papers/YOLOv3.pdf) using TensorFlow modified from [YOLOv3_TensorFlow](https://github.com/wizyoung/YOLOv3_TensorFlow).
### 2. Requirements
Python version: 3.7.5
Main Python Packages:
- tensorflow >= 1.15.0 (satisfied with NPU)
- opencv-python
- tqdm
### 3. Weights convertion
The pretrained darknet53 weights file can be downloaded [here](https://pjreddie.com/media/files/darknet53.conv.74).
Place this weights file under directory `./data/darknet_weights/` and then run:
```python
python3 convert_weight.py
```
Then the converted TensorFlow checkpoint file will be saved to `./data/darknet_weights/` directory.
In this repo, conerted weight file may be contained.
### 4. Training
#### 4.1 Data preparation
0. dataset
To compare with official implement, for example, we use [get_coco_dataset.sh](https://github.com/pjreddie/darknet/blob/master/scripts/get_coco_dataset.sh) to prepare our dataset.
1. annotation file
- ATTENTION: you can use easy tricks to fit default setting
- ln -s ${real_dataset_path} /opt/npu/dataset/coco
Using script generate `coco2014_trainval.txt/coco2014_minival.txt` files under `./data/` directory.
```python
python3 coco_trainval_anns.py
python3 coco_minival_anns.py
```
One line for one image, in the format like `image_index image_absolute_path img_width img_height box_1 box_2 ... box_n`.
Box_x format:
- `label_index x_min y_min x_max y_max`. (The origin of coordinates is at the left top corner, left top => (xmin, ymin), right bottom => (xmax, ymax).)
- `image_index` is the line index which starts from zero. `label_index` is in range [0, class_num - 1].
For example:
```
0 xxx/xxx/a.jpg 1920 1080 0 453 369 473 391 1 588 245 608 268
1 xxx/xxx/b.jpg 1920 1080 1 466 403 485 422 2 793 300 809 320
...
```
(2) class_names file:
Generate the `data.names` file under `./data/` directory. Each line represents a class name.
For example:
```
bird
person
bike
...
```
The COCO dataset class names file is placed at `./data/coco.names`.
(3) prior anchor file:
Using the kmeans algorithm to get the prior anchors:
```
python get_kmeans.py
```
Then you will get 9 anchors and the average IoU. Save the anchors to a txt file.
The COCO dataset anchors offered by YOLO's author is placed at `./data/yolo_anchors.txt`, you can use that one too.
The yolo anchors computed by the kmeans script is on the resized image scale. The default resize method is the letterbox resize, i.e., keep the original aspect ratio in the resized image.
#### 4.2 Training
1. single scale
Using `npu_train_*p_single.sh`. The hyper-parameters and the corresponding annotations can be found in `args_single.py`:
```shell
bash npu_train_1p_single.sh
or
bash npu_train_8p_single.sh
```
2. multi scale
Using `npu_train_*p_multi.sh`. The hyper-parameters and the corresponding annotations can be found in `args_multi.py`:
```shell
bash npu_train_1p_multi.sh
or
bash npu_train_8p_multi.sh
```
Check the `args.py` for more details. You should set the parameters yourself in your own specific task.
3. training details
1. nohup.out -- training task main_log
2. ./training/t1/D0/train_0.log -- training host log
3. training/t1/D0/training/train.log -- training perf log
### 5. Evaluation
Using `eval.sh` to evaluate the validation or test dataset. The parameters are as following:
```shell
bash eval.sh
```
Check the `eval.py` for more details. You could set the parameters yourself.
You will get the mAP metrics results using official cocoapi.
Using `tail -f eval_*.out` to watching results of models.
### 6. Training result
| Model | Npu_nums | mAP | FPS |
| :-------------------- | :------: | :------: | :------: |
| single_scale | 8 | 30.0 | 740 |
| multi_scale | 8 | 31.0 | 340 |
| single_scale | 1 | ---- | 96 |
| multi_scale | 1 | ---- | 44 |
-------
### Credits:
I referred to many fantastic repos during the implementation:
[YunYang1994/tensorflow-yolov3](https://github.com/YunYang1994/tensorflow-yolov3)
[qqwweee/keras-yolo3](https://github.com/qqwweee/keras-yolo3)
[eriklindernoren/PyTorch-YOLOv3](https://github.com/eriklindernoren/PyTorch-YOLOv3)
[pjreddie/darknet](https://github.com/pjreddie/darknet)
[dmlc/gluon-cv](https://github.com/dmlc/gluon-cv/tree/master/scripts/detection/yolo)
@@ -0,0 +1,110 @@
# coding: utf-8
# This file contains the parameter used in train.py
from __future__ import division, print_function
from utils.misc_utils import parse_anchors, read_class_names
import math
import os
save_dir = './training/' # The directory of the weights to save.
log_dir = './training/logs/' # The directory to store the tensorboard log files.
progress_log_path = './training/train.log' # The path to record the training progress.
# save_dir = os.path.join(work_path, save_dir)
# log_dir = os.path.join(work_path, log_dir)
# progress_log_path = os.path.join(work_path, progress_log_path)
if not os.path.exists(save_dir):
os.makedirs(save_dir)
if not os.path.exists(log_dir):
os.makedirs(log_dir)
work_path = os.path.realpath(__file__+"/..")
### Some paths
train_file = os.path.realpath(os.path.join(work_path, './data/coco2014_trainval.txt')) # The path of the training txt file.
val_file = os.path.realpath(os.path.join(work_path, './data/coco2014_minival.txt')) # The path of the validation txt file.
restore_path = os.path.realpath(os.path.join(work_path, './data/darknet_weights/darknet53.ckpt')) # The path of the weights to restore.
anchor_path = os.path.realpath(os.path.join(work_path, './data/yolo_anchors.txt')) # The path of the anchor txt file.
class_name_path = os.path.realpath(os.path.join(work_path, './data/coco.names')) # The path of the class names.
### Distribution setting
num_gpus=int(os.environ['RANK_SIZE'])
iterations_per_loop=10
### Training releated numbersls
batch_size = 16
img_size = [608, 608] # Images will be resized to `img_size` and fed to the network, size format: [width, height]
letterbox_resize = True # Whether to use the letterbox resize, i.e., keep the original aspect ratio in the resized image.
total_epoches = 200
train_evaluation_step = 1000 # Evaluate on the training batch after some steps.
val_evaluation_epoch = 2 # Evaluate on the whole validation dataset after some epochs. Set to None to evaluate every epoch.
save_epoch = 10 # Save the model after some epochs.
batch_norm_decay = 0.99 # decay in bn ops
weight_decay = 5e-4 # l2 weight decay
global_step = 0 # used when resuming training
### tf.data parameters
num_threads = 8 # Number of threads for image processing used in tf.data pipeline.
prefetech_buffer = batch_size * 4 # Prefetech_buffer used in tf.data pipeline.
### Learning rate and optimizer
optimizer_name = 'momentum' # Chosen from [sgd, momentum, adam, rmsprop]
save_optimizer = True # Whether to save the optimizer parameters into the checkpoint file.
learning_rate_base = 75e-4
learning_rate_base_batch_size = 64
learning_rate_init = learning_rate_base * ((batch_size * num_gpus) / learning_rate_base_batch_size)
lr_type = 'piecewise' # Chosen from [fixed, exponential, cosine_decay, cosine_decay_restart, piecewise]
lr_decay_epoch = 5 # Epochs after which learning rate decays. Int or float. Used when chosen `exponential` and `cosine_decay_restart` lr_type.
lr_decay_factor = 0.96 # The learning rate decay factor. Used when chosen `exponential` lr_type.
lr_lower_bound = 1e-6 # The minimum learning rate.
# only used in piecewise lr type
pw_boundaries = [80, 90] # epoch based boundaries
pw_values = [learning_rate_init, learning_rate_init*0.1, learning_rate_init*0.01]
### Load and finetune
# Choose the parts you want to restore the weights. List form.
# restore_include: None, restore_exclude: None => restore the whole model
# restore_include: None, restore_exclude: scope => restore the whole model except `scope`
# restore_include: scope1, restore_exclude: scope2 => if scope1 contains scope2, restore scope1 and not restore scope2 (scope1 - scope2)
# choise 1: only restore the darknet body
# restore_include = ['yolov3/darknet53_body']
restore_exclude = None
# choise 2: restore all layers except the last 3 conv2d layers in 3 scale
restore_include = None
# restore_exclude = ['yolov3/yolov3_head/Conv_14', 'yolov3/yolov3_head/Conv_6', 'yolov3/yolov3_head/Conv_22']
# restore_exclude = None
# Choose the parts you want to finetune. List form.
# Set to None to train the whole model.
# update_part = ['yolov3/yolov3_head']
update_part = None
### other training strategies
multi_scale_train = True # Whether to apply multi-scale training strategy. Image size varies from [320, 320] to [640, 640] by default.
use_label_smooth = False # Whether to use class label smoothing strategy.
use_focal_loss = False # Whether to apply focal loss on the conf loss.
use_mix_up = False # Whether to use mix up data augmentation strategy.
use_warm_up = True # whether to use warm up strategy to prevent from gradient exploding.
warm_up_epoch = min(total_epoches*0.1, 3) # Warm up training epoches. Set to a larger value if gradient explodes.
### some constants in validation
# nms
nms_threshold = 0.5 # iou threshold in nms operation
score_threshold = 0.001 # threshold of the probability of the classes in nms operation, i.e. score = pred_confs * pred_probs. set lower for higher recall.
nms_topk = 100 # keep at most nms_topk outputs after nms
# mAP eval
eval_threshold = 0.5 # the iou threshold applied in mAP evaluation
use_voc_07_metric = False # whether to use voc 2007 evaluation metric, i.e. the 11-point metric
### parse some params
anchors = parse_anchors(anchor_path)
classes = read_class_names(class_name_path)
class_num = len(classes)
train_img_cnt = len(open(train_file, 'r').readlines())
val_img_cnt = len(open(val_file, 'r').readlines())
train_batch_num = int(float(train_img_cnt) / batch_size / num_gpus)
lr_decay_freq = int(train_batch_num * lr_decay_epoch)
pw_boundaries = [float(i) * train_batch_num + global_step for i in pw_boundaries]
@@ -0,0 +1,105 @@
# coding: utf-8
# This file contains the parameter used in train.py
from __future__ import division, print_function
from utils.misc_utils import parse_anchors, read_class_names
import math
import os
save_dir = './training/' # The directory of the weights to save.
log_dir = './training/logs/' # The directory to store the tensorboard log files.
progress_log_path = './training/train.log' # The path to record the training progress.
if not os.path.exists(save_dir):
os.makedirs(save_dir)
if not os.path.exists(log_dir):
os.makedirs(log_dir)
work_path = os.path.realpath(__file__+"/..")
### Some paths
train_file = os.path.realpath(os.path.join(work_path, './data/coco2014_trainval.txt')) # The path of the training txt file.
val_file = os.path.realpath(os.path.join(work_path, './data/coco2014_minival.txt')) # The path of the validation txt file.
restore_path = os.path.realpath(os.path.join(work_path, './data/darknet_weights/darknet53.ckpt')) # The path of the weights to restore.
anchor_path = os.path.realpath(os.path.join(work_path, './data/yolo_anchors.txt')) # The path of the anchor txt file.
class_name_path = os.path.realpath(os.path.join(work_path, './data/coco.names')) # The path of the class names.
### Distribution setting
num_gpus=int(os.environ['RANK_SIZE'])
iterations_per_loop=10
### Training releated numbersls
batch_size = 32
img_size = [416, 416] # Images will be resized to `img_size` and fed to the network, size format: [width, height]
letterbox_resize = True # Whether to use the letterbox resize, i.e., keep the original aspect ratio in the resized image.
total_epoches = 200
train_evaluation_step = 1000 # Evaluate on the training batch after some steps.
val_evaluation_epoch = 2 # Evaluate on the whole validation dataset after some epochs. Set to None to evaluate every epoch.
save_epoch = 10 # Save the model after some epochs.
batch_norm_decay = 0.99 # decay in bn ops
weight_decay = 5e-4 # l2 weight decay
global_step = 0 # used when resuming training
### tf.data parameters
num_threads = 8 # Number of threads for image processing used in tf.data pipeline.
prefetech_buffer = batch_size * 4 # Prefetech_buffer used in tf.data pipeline.
### Learning rate and optimizer
optimizer_name = 'momentum' # Chosen from [sgd, momentum, adam, rmsprop]
save_optimizer = True # Whether to save the optimizer parameters into the checkpoint file.
learning_rate_base = 5e-3
learning_rate_base_batch_size = 64
learning_rate_init = learning_rate_base * ((batch_size * num_gpus) / learning_rate_base_batch_size)
lr_type = 'piecewise' # Chosen from [fixed, exponential, cosine_decay, cosine_decay_restart, piecewise]
lr_decay_epoch = 5 # Epochs after which learning rate decays. Int or float. Used when chosen `exponential` and `cosine_decay_restart` lr_type.
lr_decay_factor = 0.96 # The learning rate decay factor. Used when chosen `exponential` lr_type.
lr_lower_bound = 1e-6 # The minimum learning rate.
# only used in piecewise lr type
pw_boundaries = [80, 90] # epoch based boundaries
pw_values = [learning_rate_init, learning_rate_init*0.1, learning_rate_init*0.01]
### Load and finetune
# Choose the parts you want to restore the weights. List form.
# restore_include: None, restore_exclude: None => restore the whole model
# restore_include: None, restore_exclude: scope => restore the whole model except `scope`
# restore_include: scope1, restore_exclude: scope2 => if scope1 contains scope2, restore scope1 and not restore scope2 (scope1 - scope2)
# choise 1: only restore the darknet body
# restore_include = ['yolov3/darknet53_body']
restore_exclude = None
# choise 2: restore all layers except the last 3 conv2d layers in 3 scale
restore_include = None
# restore_exclude = ['yolov3/yolov3_head/Conv_14', 'yolov3/yolov3_head/Conv_6', 'yolov3/yolov3_head/Conv_22']
# Choose the parts you want to finetune. List form.
# Set to None to train the whole model.
# update_part = ['yolov3/yolov3_head']
update_part = None
### other training strategies
multi_scale_train = False # Whether to apply multi-scale training strategy. Image size varies from [320, 320] to [640, 640] by default.
use_label_smooth = False # Whether to use class label smoothing strategy.
use_focal_loss = False # Whether to apply focal loss on the conf loss.
use_mix_up = False # Whether to use mix up data augmentation strategy.
use_warm_up = True # whether to use warm up strategy to prevent from gradient exploding.
warm_up_epoch = min(total_epoches*0.1, 3) # Warm up training epoches. Set to a larger value if gradient explodes.
### some constants in validation
# nms
nms_threshold = 0.5 # iou threshold in nms operation
score_threshold = 0.001 # threshold of the probability of the classes in nms operation, i.e. score = pred_confs * pred_probs. set lower for higher recall.
nms_topk = 100 # keep at most nms_topk outputs after nms
# mAP eval
eval_threshold = 0.5 # the iou threshold applied in mAP evaluation
use_voc_07_metric = False # whether to use voc 2007 evaluation metric, i.e. the 11-point metric
### parse some params
anchors = parse_anchors(anchor_path)
classes = read_class_names(class_name_path)
class_num = len(classes)
train_img_cnt = len(open(train_file, 'r').readlines())
val_img_cnt = len(open(val_file, 'r').readlines())
train_batch_num = int(float(train_img_cnt) / batch_size / num_gpus)
lr_decay_freq = int(train_batch_num * lr_decay_epoch)
pw_boundaries = [float(i) * train_batch_num + global_step for i in pw_boundaries]
@@ -0,0 +1,113 @@
import json,cv2
from collections import defaultdict
ban_path = './data/5k.txt'
with open(ban_path, 'r')as f:
ban_list = f.read().split('\n')[:-1]
ban_list = [i.split('/')[-1] for i in ban_list]
name_box_id = defaultdict(list)
id_name = dict()
coco_dataset_path = '/opt/npu/dataset/coco/coco2014'
f = open(
coco_dataset_path + "/annotations/instances_train2014.json",
encoding='utf-8')
data = json.load(f)
annotations = data['annotations']
for ant in annotations:
id = ant['image_id']
name = coco_dataset_path + '/train2014/COCO_train2014_%012d.jpg' % id
cat = ant['category_id']
if cat >= 1 and cat <= 11:
cat = cat - 1
elif cat >= 13 and cat <= 25:
cat = cat - 2
elif cat >= 27 and cat <= 28:
cat = cat - 3
elif cat >= 31 and cat <= 44:
cat = cat - 5
elif cat >= 46 and cat <= 65:
cat = cat - 6
elif cat == 67:
cat = cat - 7
elif cat == 70:
cat = cat - 9
elif cat >= 72 and cat <= 82:
cat = cat - 10
elif cat >= 84 and cat <= 90:
cat = cat - 11
name_box_id[name].append([ant['bbox'], cat])
f = open(
coco_dataset_path + "/annotations/instances_val2014.json",
encoding='utf-8')
data = json.load(f)
annotations = data['annotations']
for ant in annotations:
id = ant['image_id']
name = coco_dataset_path + '/val2014/COCO_val2014_%012d.jpg' % id
cat = ant['category_id']
if cat >= 1 and cat <= 11:
cat = cat - 1
elif cat >= 13 and cat <= 25:
cat = cat - 2
elif cat >= 27 and cat <= 28:
cat = cat - 3
elif cat >= 31 and cat <= 44:
cat = cat - 5
elif cat >= 46 and cat <= 65:
cat = cat - 6
elif cat == 67:
cat = cat - 7
elif cat == 70:
cat = cat - 9
elif cat >= 72 and cat <= 82:
cat = cat - 10
elif cat >= 84 and cat <= 90:
cat = cat - 11
name_box_id[name].append([ant['bbox'], cat])
f = open('data/coco2014_minival.txt', 'w')
ii = 0
for idx, key in enumerate(name_box_id.keys()):
if key.split('/')[-1] not in ban_list:
continue
print('5k', key.split('/')[-1])
f.write('%d '%ii)
ii += 1
f.write(key)
img = cv2.imread(key)
h,w,c = img.shape
f.write(' %d %d'%(w,h))
box_infos = name_box_id[key]
for info in box_infos:
x_min = int(info[0][0])
y_min = int(info[0][1])
x_max = x_min + int(info[0][2])
y_max = y_min + int(info[0][3])
box_info = " %d %d %d %d %d" % (
int(info[1]), x_min, y_min, x_max, y_max
)
f.write(box_info)
f.write('\n')
f.close()
@@ -0,0 +1,113 @@
import json,cv2
from collections import defaultdict
ban_path = './data/5k.txt'
with open(ban_path, 'r')as f:
ban_list = f.read().split('\n')[:-1]
ban_list = [i.split('/')[-1] for i in ban_list]
name_box_id = defaultdict(list)
id_name = dict()
coco_dataset_path = '/opt/npu/dataset/coco/coco2014'
f = open(
coco_dataset_path + "/annotations/instances_train2014.json",
encoding='utf-8')
data = json.load(f)
annotations = data['annotations']
for ant in annotations:
id = ant['image_id']
name = coco_dataset_path + '/train2014/COCO_train2014_%012d.jpg' % id
cat = ant['category_id']
if cat >= 1 and cat <= 11:
cat = cat - 1
elif cat >= 13 and cat <= 25:
cat = cat - 2
elif cat >= 27 and cat <= 28:
cat = cat - 3
elif cat >= 31 and cat <= 44:
cat = cat - 5
elif cat >= 46 and cat <= 65:
cat = cat - 6
elif cat == 67:
cat = cat - 7
elif cat == 70:
cat = cat - 9
elif cat >= 72 and cat <= 82:
cat = cat - 10
elif cat >= 84 and cat <= 90:
cat = cat - 11
name_box_id[name].append([ant['bbox'], cat])
f = open(
coco_dataset_path + "/annotations/instances_val2014.json",
encoding='utf-8')
data = json.load(f)
annotations = data['annotations']
for ant in annotations:
id = ant['image_id']
name = coco_dataset_path + '/val2014/COCO_val2014_%012d.jpg' % id
cat = ant['category_id']
if cat >= 1 and cat <= 11:
cat = cat - 1
elif cat >= 13 and cat <= 25:
cat = cat - 2
elif cat >= 27 and cat <= 28:
cat = cat - 3
elif cat >= 31 and cat <= 44:
cat = cat - 5
elif cat >= 46 and cat <= 65:
cat = cat - 6
elif cat == 67:
cat = cat - 7
elif cat == 70:
cat = cat - 9
elif cat >= 72 and cat <= 82:
cat = cat - 10
elif cat >= 84 and cat <= 90:
cat = cat - 11
name_box_id[name].append([ant['bbox'], cat])
f = open('data/coco2014_trainval.txt', 'w')
ii = 0
for idx, key in enumerate(name_box_id.keys()):
if key.split('/')[-1] in ban_list:
continue
print('trainval', key.split('/')[-1])
f.write('%d '%ii)
ii += 1
f.write(key)
img = cv2.imread(key)
h,w,c = img.shape
f.write(' %d %d'%(w,h))
box_infos = name_box_id[key]
for info in box_infos:
x_min = int(info[0][0])
y_min = int(info[0][1])
x_max = x_min + int(info[0][2])
y_max = y_min + int(info[0][3])
box_info = " %d %d %d %d %d" % (
int(info[1]), x_min, y_min, x_max, y_max
)
f.write(box_info)
f.write('\n')
f.close()
@@ -0,0 +1,38 @@
# coding: utf-8
# for more details about the yolo darknet weights file, refer to
# https://itnext.io/implementing-yolo-v3-in-tensorflow-tf-slim-c3c55ff59dbe
from __future__ import division, print_function
import os
import sys
import tensorflow as tf
import numpy as np
from model import yolov3
from utils.misc_utils import parse_anchors, load_weights
num_class = 80
img_size = 416
weight_path = '../yolov3-tf2/data/darknet53.conv.74'
save_path = './data/darknet_weights/darknet53.ckpt'
anchors = parse_anchors('./data/yolo_anchors.txt')
model = yolov3(80, anchors)
with tf.Session() as sess:
inputs = tf.placeholder(tf.float32, [1, img_size, img_size, 3])
with tf.variable_scope('yolov3'):
feature_map = model.forward(inputs)
saver = tf.train.Saver(var_list=tf.global_variables(scope='yolov3'))
load_ops = load_weights(tf.global_variables(scope='yolov3'), weight_path)
sess.run(tf.global_variables_initializer())
sess.run(load_ops)
saver.save(sess, save_path=save_path)
print('TensorFlow model checkpoint has been saved to {}'.format(save_path))
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,80 @@
person
bicycle
car
motorbike
aeroplane
bus
train
truck
boat
traffic light
fire hydrant
stop sign
parking meter
bench
bird
cat
dog
horse
sheep
cow
elephant
bear
zebra
giraffe
backpack
umbrella
handbag
tie
suitcase
frisbee
skis
snowboard
sports ball
kite
baseball bat
baseball glove
skateboard
surfboard
tennis racket
bottle
wine glass
cup
fork
knife
spoon
bowl
banana
apple
sandwich
orange
broccoli
carrot
hot dog
pizza
donut
cake
chair
sofa
pottedplant
bed
diningtable
toilet
tvmonitor
laptop
mouse
remote
keyboard
cell phone
microwave
oven
toaster
sink
refrigerator
book
clock
vase
scissors
teddy bear
hair drier
toothbrush
@@ -0,0 +1 @@
10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
Binary file not shown.

After

Width:  |  Height:  |  Size: 23 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 84 KiB

@@ -0,0 +1,220 @@
# coding: utf-8
from __future__ import division, print_function
import tensorflow as tf
import numpy as np
import argparse
import cv2
from utils.misc_utils import parse_anchors, read_class_names
from utils.nms_utils import gpu_nms, cpu_nms
from utils.plot_utils import get_color_table, plot_one_box
from utils.data_aug import letterbox_resize
from model import yolov3
from tqdm import trange
import json
import os,time
# npu modified
from npu_bridge.estimator import npu_ops
from npu_bridge.estimator.npu.npu_optimizer import NPUDistributedOptimizer
from tensorflow.core.protobuf.rewriter_config_pb2 import RewriterConfig
from npu_bridge.estimator.npu import util
'''
coco weight from official checked
Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.309
Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.555
Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.311
Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.136
Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.337
Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.460
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.273
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.430
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.465
Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.270
Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.511
Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.629
'''
parser = argparse.ArgumentParser(description="YOLO-V3 test single image test procedure.")
parser.add_argument("--annotation_txt", type=str, default='../code/data/coco2014_minival.txt',
help="The path of the input image. Or annotation label txt.")
parser.add_argument("--anchor_path", type=str, default="../code/data/yolo_anchors.txt",
help="The path of the anchor txt file.")
parser.add_argument("--new_size", nargs='*', type=int, default=[416, 416],
help="Resize the input image with `new_size`, size format: [width, height]")
parser.add_argument("--max_test", type=int, default=-1,
help="max step for test")
parser.add_argument("--score_thresh", type=float, default=1e-3,
help="score_threshold for test")
parser.add_argument("--nms_thresh", type=float, default=0.5,
help="iou_threshold for test")
parser.add_argument("--max_boxes", type=int, default=100,
help="max_boxes for test")
parser.add_argument("--letterbox_resize", type=lambda x: (str(x).lower() == 'true'), default=True,
help="Whether to use the letterbox resize.")
parser.add_argument("--class_name_path", type=str, default="../code/data/coco.names",
help="The path of the class names.")
parser.add_argument("--restore_path", type=str, default="../code/data/darknet_weights/yolo3.ckpt",
# parser.add_argument("--restore_path", type=str, default="./training_s2/checkpoint_dir/model.ckpt-45800",
help="The path of the weights to restore.")
parser.add_argument("--save_img", type=bool, default=False,
help="whether to save detected-result image")
parser.add_argument("--save_json", type=bool, default=False,
help="whether to save detected-result cocolike json")
parser.add_argument("--save_json_path", type=str, default="../result/result.json",
help="The path of the result.json.")
args = parser.parse_args()
args.anchors = parse_anchors(args.anchor_path)
args.classes = read_class_names(args.class_name_path)
args.num_class = len(args.classes)
color_table = get_color_table(args.num_class)
cat_id_to_real_id = \
{1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 13: 12, 14: 13, 15: 14, 16: 15, 17: 16,
18: 17, 19: 18, 20: 19, 21: 20, 22: 21, 23: 22, 24: 23, 25: 24, 27: 25, 28: 26, 31: 27, 32: 28, 33: 29, 34: 30,
35: 31, 36: 32, 37: 33, 38: 34, 39: 35, 40: 36, 41: 37, 42: 38, 43: 39, 44: 40, 46: 41, 47: 42, 48: 43, 49: 44,
50: 45, 51: 46, 52: 47, 53: 48, 54: 49, 55: 50, 56: 51, 57: 52, 58: 53, 59: 54, 60: 55, 61: 56, 62: 57, 63: 58,
64: 59, 65: 60, 67: 61, 70: 62, 72: 63, 73: 64, 74: 65, 75: 66, 76: 67, 77: 68, 78: 69, 79: 70, 80: 71, 81: 72,
82: 73, 84: 74, 85: 75, 86: 76, 87: 77, 88: 78, 89: 79, 90: 80}
real_id_to_cat_id = {cat_id_to_real_id[i]: i for i in cat_id_to_real_id}
def get_default_dict():
return {"image_id": -1, "category_id": -1, "bbox": [], "score": 0}
eval_path = args.annotation_txt
with open(eval_path, 'r')as f:
eval_file_list = f.read().split('\n')[:-1]
print(len(eval_file_list))
eval_file_dict = {}
for i in eval_file_list:
tmp_list = i.split(' ')
idx = int(tmp_list[0])
path = tmp_list[1]
w = float(tmp_list[2])
h = float(tmp_list[3])
bbox_len = len(tmp_list[4:]) // 5
bbox = []
for bbox_idx in range(bbox_len):
label, x1, y1, x2, y2 = tmp_list[4:][bbox_idx * 5:bbox_idx * 5 + 5]
bbox.append([label, x1, y1, x2, y2])
eval_file_dict[idx] = {
'path': path,
'w': w,
'h': h,
'bbox': bbox
}
config = tf.ConfigProto()
custom_op = config.graph_options.rewrite_options.custom_optimizers.add()
custom_op.name = "NpuOptimizer"
custom_op.parameter_map["use_off_line"].b = True # training on Ascend chips
config.graph_options.rewrite_options.remapping = RewriterConfig.OFF
json_out = []
with tf.Session(config=config) as sess:
# with tf.Session() as sess:
input_data = tf.placeholder(tf.float32, [1, args.new_size[1], args.new_size[0], 3], name='input_data')
yolo_model = yolov3(args.num_class, args.anchors)
with tf.variable_scope('yolov3'):
pred_feature_maps = yolo_model.forward(input_data, False)
pred_boxes, pred_confs, pred_probs = yolo_model.predict(pred_feature_maps)
pred_scores = pred_confs * pred_probs
# boxes, scores, labels = gpu_nms(pred_boxes, pred_scores, args.num_class, max_boxes=100, score_thresh=args.score_thresh, nms_thresh=0.5)
saver = tf.train.Saver()
if args.restore_path.find('.ckpt') < 0 and args.restore_path.find('model-') < 0:
with open(os.path.join(args.restore_path, 'checkpoint'), 'r')as f:
tmp_checkpoint = f.readline()
tmp_checkpoint = tmp_checkpoint.replace('"', '').split(':')[1].strip()
args.restore_path = os.path.join(args.restore_path, tmp_checkpoint)
print('tmp_checkpoint: ', tmp_checkpoint)
# input()
saver.restore(sess, args.restore_path)
if args.max_test > 0:
test_len = min(args.max_test, len(eval_file_dict.keys()))
else:
test_len = len(eval_file_dict.keys())
for test_idx in trange(test_len):
img_path = eval_file_dict[test_idx]['path']
img_ori = cv2.imread(img_path)
if args.letterbox_resize:
img, resize_ratio, dw, dh = letterbox_resize(img_ori, args.new_size[0], args.new_size[1])
else:
height_ori, width_ori = img_ori.shape[:2]
img = cv2.resize(img_ori, tuple(args.new_size))
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = np.asarray(img, np.float32)
img = img[np.newaxis, :] / 255.
# boxes_, scores_, labels_ = sess.run([boxes, scores, labels], feed_dict={input_data: img})
# print('bbox: ',boxes_)
t = time.time()
boxes_, scores_ = sess.run([pred_boxes, pred_scores], feed_dict={input_data: img})
# print("FPS: ", 1/(time.time() - t))
boxes_, scores_, labels_ = cpu_nms(boxes_, scores_, args.num_class, args.max_boxes, args.score_thresh, args.nms_thresh)
# print('bbox: ', boxes_)
# try:
# boxes_[:, [0, 2]] = (boxes_[:, [0, 2]] - dw) / resize_ratio
# except:
# print("boxes_: ", boxes_)
# continue
# print("boxes_: ", boxes_)
# rescale the coordinates to the original image
if args.letterbox_resize:
boxes_[:, [0, 2]] = (boxes_[:, [0, 2]] - dw) / resize_ratio
boxes_[:, [1, 3]] = (boxes_[:, [1, 3]] - dh) / resize_ratio
else:
boxes_[:, [0, 2]] *= (width_ori / float(args.new_size[0]))
boxes_[:, [1, 3]] *= (height_ori / float(args.new_size[1]))
if args.save_img:
# print("box coords:")
# print(boxes_)
# print('*' * 30)
# print("scores:")
# print(scores_)
# print('*' * 30)
# print("labels:")
# print(labels_)
for i in range(len(boxes_)):
x0, y0, x1, y1 = boxes_[i]
plot_one_box(img_ori, [x0, y0, x1, y1],
label=args.classes[labels_[i]] + ', {:.2f}%'.format(scores_[i] * 100),
color=color_table[labels_[i]])
cv2.imwrite('tmp/%d_detection_result.jpg' % test_idx, img_ori)
print('%d done' % test_idx)
if args.save_json:
for i in range(len(boxes_)):
x0, y0, x1, y1 = boxes_[i]
bw = x1 - x0
bh = y1 - y0
s = scores_[i]
c = labels_[i]
t_dict = get_default_dict()
t_dict['image_id'] = int(img_path.split('/')[-1].split('.')[0].split('_')[-1])
t_dict['category_id'] = real_id_to_cat_id[int(c) + 1]
t_dict['bbox'] = [int(i) for i in [x0, y0, bw, bh]]
t_dict['score'] = float(s)
json_out.append(t_dict)
if args.save_json:
with open(args.save_json_path, 'w')as f:
json.dump(json_out, f)
print('output json saved to: ', args.save_json_path)
eval_coco = os.path.realpath(__file__ + "/../eval_coco.py")
os.system('python3.7 %s %s' % (eval_coco, args.save_json_path))
@@ -0,0 +1,61 @@
#export CUDA_VISIBLE_DEVICES=''
#export CUDA_VISIBLE_DEVICES=7
# setting main path
MAIN_PATH=$(dirname $(readlink -f $0))
## set env
#export PYTHONPATH=/usr/local/Ascend/ops/op_impl/built-in/ai_core/tbe/:$MAIN_PATH/../../../
#export LD_LIBRARY_PATH=/usr/local/lib/:/usr/lib/:/usr/local/Ascend/fwkacllib/lib64/:/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:/usr/local/Ascend/add-ons/:/usr/lib/x86_64-linux-gnu
#PATH=$PATH:$HOME/bin
#export PATH=$PATH:/usr/local/Ascend/fwkacllib/ccec_compiler/bin:$PATH
#export ASCEND_OPP_PATH=/usr/local/Ascend/opp
# set env
export ASCEND_HOME=/usr/local/Ascend
export LD_LIBRARY_PATH=/usr/local/lib/:/usr/lib/:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64:/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:/usr/local/Ascend/add-ons/
export PYTHONPATH=$PYTHONPATH:/usr/local/Ascend/ascend-toolkit/latest/opp/op_impl/built-in/ai_core/tbe:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/te:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/topi:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/hccl:/usr/local/Ascend/ascend-toolkit/latest/tfplugin/python/site-packages:$currentDir
export PATH=$PATH:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin
export ASCEND_OPP_PATH=/usr/local/Ascend/ascend-toolkit/latest/opp/
export DDK_VERSION_FLAG=1.60.T49.0.B201
export NEW_GE_FE_ID=1
export GE_AICPU_FLAG=1
export SOC_VERSION=Ascend910
export JOB_ID=10087
export FUSION_TENSOR_SIZE=1000000000
#export SLOG_PRINT_TO_STDOUT=1
#export DUMP_GE_GRAPH=2
#export DUMP_GRAPH_LEVEL=3
for((RANK_ID=0;RANK_ID<8;RANK_ID++));
do
export RANK_ID=$RANK_ID
export RANK_SIZE=1
export DEVICE_ID=$RANK_ID
export DEVICE_INDEX=$RANK_ID
su HwHiAiUser -c "adc --host 0.0.0.0:22118 --log \"SetLogLevel(0)[debug]\" --device "$RANK_ID
RESTORE_PATH=./training/t1/D$RANK_ID/training/
nohup python3.7 eval.py \
--save_json True \
--score_thresh 0.0001 \
--nms_thresh 0.55 \
--max_boxes 100 \
--restore_path $RESTORE_PATH \
--max_test 10000 \
--save_json_path eval_res_D$RANK_ID.json > eval_$RANK_ID.out &
done
@@ -0,0 +1,57 @@
#-*- coding:utf-8 -*-
# import matplotlib.pyplot as plt
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
import numpy as np
import pylab,json
import sys
# pylab.rcParams['figure.figsize'] = (10.0, 8.0)
def get_img_id(file_name):
ls = []
myset = []
annos = json.load(open(file_name, 'r'))
for anno in annos:
ls.append(anno['image_id'])
myset = {}.fromkeys(ls).keys()
return myset
'''
Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.317
Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.562
Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.321
Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.162
Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.343
Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.448
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.278
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.438
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.464
Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.275
Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.497
Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.625
'''
if __name__ == '__main__':
annType = ['segm', 'bbox', 'keypoints']#set iouType to 'segm', 'bbox' or 'keypoints'
annType = annType[1] # specify type here
cocoGt_file = '/opt/npu/dataset/coco/coco2014/annotations/instances_val2014.json'
cocoGt = COCO(cocoGt_file)#取得标注集中coco json对象
# print(list(cocoGt.anns.items())[:10])
# print(cocoGt.anns[318219])
# input()
# cocoDt_file = 'result.json'
cocoDt_file = sys.argv[1]
imgIds = get_img_id(cocoDt_file)
# print(len(imgIds))
cocoDt = cocoGt.loadRes(cocoDt_file)#取得结果集中image json对象
imgIds = sorted(imgIds)#按顺序排列coco标注集image_id
# print(imgIds)
# input()
# imgIds = imgIds[0:5000]#标注集中的image数据
cocoEval = COCOeval(cocoGt, cocoDt, annType)
cocoEval.params.imgIds = imgIds#参数设置
cocoEval.evaluate()#评价
cocoEval.accumulate()#积累
cocoEval.summarize()#总结
@@ -0,0 +1,155 @@
# coding: utf-8
# This script is modified from https://github.com/lars76/kmeans-anchor-boxes
from __future__ import division, print_function
import numpy as np
def iou(box, clusters):
"""
Calculates the Intersection over Union (IoU) between a box and k clusters.
param:
box: tuple or array, shifted to the origin (i. e. width and height)
clusters: numpy array of shape (k, 2) where k is the number of clusters
return:
numpy array of shape (k, 0) where k is the number of clusters
"""
x = np.minimum(clusters[:, 0], box[0])
y = np.minimum(clusters[:, 1], box[1])
if np.count_nonzero(x == 0) > 0 or np.count_nonzero(y == 0) > 0:
raise ValueError("Box has no area")
intersection = x * y
box_area = box[0] * box[1]
cluster_area = clusters[:, 0] * clusters[:, 1]
iou_ = np.true_divide(intersection, box_area + cluster_area - intersection + 1e-10)
# iou_ = intersection / (box_area + cluster_area - intersection + 1e-10)
return iou_
def avg_iou(boxes, clusters):
"""
Calculates the average Intersection over Union (IoU) between a numpy array of boxes and k clusters.
param:
boxes: numpy array of shape (r, 2), where r is the number of rows
clusters: numpy array of shape (k, 2) where k is the number of clusters
return:
average IoU as a single float
"""
return np.mean([np.max(iou(boxes[i], clusters)) for i in range(boxes.shape[0])])
def translate_boxes(boxes):
"""
Translates all the boxes to the origin.
param:
boxes: numpy array of shape (r, 4)
return:
numpy array of shape (r, 2)
"""
new_boxes = boxes.copy()
for row in range(new_boxes.shape[0]):
new_boxes[row][2] = np.abs(new_boxes[row][2] - new_boxes[row][0])
new_boxes[row][3] = np.abs(new_boxes[row][3] - new_boxes[row][1])
return np.delete(new_boxes, [0, 1], axis=1)
def kmeans(boxes, k, dist=np.median):
"""
Calculates k-means clustering with the Intersection over Union (IoU) metric.
param:
boxes: numpy array of shape (r, 2), where r is the number of rows
k: number of clusters
dist: distance function
return:
numpy array of shape (k, 2)
"""
rows = boxes.shape[0]
distances = np.empty((rows, k))
last_clusters = np.zeros((rows,))
np.random.seed()
# the Forgy method will fail if the whole array contains the same rows
clusters = boxes[np.random.choice(rows, k, replace=False)]
while True:
for row in range(rows):
distances[row] = 1 - iou(boxes[row], clusters)
nearest_clusters = np.argmin(distances, axis=1)
if (last_clusters == nearest_clusters).all():
break
for cluster in range(k):
clusters[cluster] = dist(boxes[nearest_clusters == cluster], axis=0)
last_clusters = nearest_clusters
return clusters
def parse_anno(annotation_path, target_size=None):
anno = open(annotation_path, 'r')
result = []
for line in anno:
s = line.strip().split(' ')
img_w = int(s[2])
img_h = int(s[3])
s = s[4:]
box_cnt = len(s) // 5
for i in range(box_cnt):
x_min, y_min, x_max, y_max = float(s[i*5+1]), float(s[i*5+2]), float(s[i*5+3]), float(s[i*5+4])
width = x_max - x_min
height = y_max - y_min
assert width > 0
assert height > 0
# use letterbox resize, i.e. keep the original aspect ratio
# get k-means anchors on the resized target image size
if target_size is not None:
resize_ratio = min(target_size[0] / img_w, target_size[1] / img_h)
width *= resize_ratio
height *= resize_ratio
result.append([width, height])
# get k-means anchors on the original image size
else:
result.append([width, height])
result = np.asarray(result)
return result
def get_kmeans(anno, cluster_num=9):
anchors = kmeans(anno, cluster_num)
ave_iou = avg_iou(anno, anchors)
anchors = anchors.astype('int').tolist()
anchors = sorted(anchors, key=lambda x: x[0] * x[1])
return anchors, ave_iou
if __name__ == '__main__':
# target resize format: [width, height]
# if target_resize is speficied, the anchors are on the resized image scale
# if target_resize is set to None, the anchors are on the original image scale
target_size = [416, 416]
annotation_path = "train.txt"
anno_result = parse_anno(annotation_path, target_size=target_size)
anchors, ave_iou = get_kmeans(anno_result, 9)
anchor_string = ''
for anchor in anchors:
anchor_string += '{},{}, '.format(anchor[0], anchor[1])
anchor_string = anchor_string[:-2]
print('anchors are:')
print(anchor_string)
print('the average iou is:')
print(ave_iou)
@@ -0,0 +1,32 @@
{
"board_id": "0x002f",
"chip_info": "910",
"deploy_mode": "lab",
"group_count": "1",
"group_list": [
{
"device_num": "1",
"server_num": "1",
"group_name": "",
"instance_count": "1",
"instance_list": [
{
"devices": [
{
"device_id": "0",
"device_ip": "192.168.100.101"
}
],
"rank_id": "0",
"server_id": "0.0.0.0"
}
]
}
],
"para_plane_nic_location": "device",
"para_plane_nic_name": [
"eth0"
],
"para_plane_nic_num": "1",
"status": "completed"
}
@@ -0,0 +1,43 @@
{
"board_id": "0x002f",
"chip_info": "910",
"deploy_mode": "lab",
"group_count": "1",
"group_list": [
{
"device_num": "2",
"server_num": "1",
"group_name": "",
"instance_count": "2",
"instance_list": [
{
"devices": [
{
"device_id": "0",
"device_ip": "192.168.100.101"
}
],
"rank_id": "0",
"server_id": "0.0.0.0"
},
{
"devices": [
{
"device_id": "1",
"device_ip": "192.168.101.101"
}
],
"rank_id": "1",
"server_id": "0.0.0.0"
}
]
}
],
"para_plane_nic_location": "device",
"para_plane_nic_name": [
"eth0",
"eth1"
],
"para_plane_nic_num": "2",
"status": "completed"
}
@@ -0,0 +1,65 @@
{
"board_id": "0x002f",
"chip_info": "910",
"deploy_mode": "lab",
"group_count": "1",
"group_list": [
{
"device_num": "4",
"server_num": "1",
"group_name": "",
"instance_count": "4",
"instance_list": [
{
"devices": [
{
"device_id": "0",
"device_ip": "192.168.100.101"
}
],
"rank_id": "0",
"server_id": "0.0.0.0"
},
{
"devices": [
{
"device_id": "1",
"device_ip": "192.168.101.101"
}
],
"rank_id": "1",
"server_id": "0.0.0.0"
},
{
"devices": [
{
"device_id": "2",
"device_ip": "192.168.102.101"
}
],
"rank_id": "2",
"server_id": "0.0.0.0"
},
{
"devices": [
{
"device_id": "3",
"device_ip": "192.168.103.101"
}
],
"rank_id": "3",
"server_id": "0.0.0.0"
}
]
}
],
"para_plane_nic_location": "device",
"para_plane_nic_name": [
"eth0",
"eth1",
"eth2",
"eth3"
],
"para_plane_nic_num": "4",
"status": "completed"
}
@@ -0,0 +1,109 @@
{
"board_id": "0x002f",
"chip_info": "910",
"deploy_mode": "lab",
"group_count": "1",
"group_list": [
{
"device_num": "8",
"server_num": "1",
"group_name": "",
"instance_count": "8",
"instance_list": [
{
"devices": [
{
"device_id": "0",
"device_ip": "192.168.100.101"
}
],
"rank_id": "0",
"server_id": "0.0.0.0"
},
{
"devices": [
{
"device_id": "1",
"device_ip": "192.168.101.101"
}
],
"rank_id": "1",
"server_id": "0.0.0.0"
},
{
"devices": [
{
"device_id": "2",
"device_ip": "192.168.102.101"
}
],
"rank_id": "2",
"server_id": "0.0.0.0"
},
{
"devices": [
{
"device_id": "3",
"device_ip": "192.168.103.101"
}
],
"rank_id": "3",
"server_id": "0.0.0.0"
},
{
"devices": [
{
"device_id": "4",
"device_ip": "192.168.100.100"
}
],
"rank_id": "4",
"server_id": "0.0.0.0"
},
{
"devices": [
{
"device_id": "5",
"device_ip": "192.168.101.100"
}
],
"rank_id": "5",
"server_id": "0.0.0.0"
},
{
"devices": [
{
"device_id": "6",
"device_ip": "192.168.102.100"
}
],
"rank_id": "6",
"server_id": "0.0.0.0"
},
{
"devices": [
{
"device_id": "7",
"device_ip": "192.168.103.100"
}
],
"rank_id": "7",
"server_id": "0.0.0.0"
}
]
}
],
"para_plane_nic_location": "device",
"para_plane_nic_name": [
"eth0",
"eth1",
"eth2",
"eth3",
"eth4",
"eth5",
"eth6",
"eth7"
],
"para_plane_nic_num": "8",
"status": "completed"
}
@@ -0,0 +1,88 @@
# coding: utf-8
# This file contains the parameter used in train.py
from __future__ import division, print_function
from utils.misc_utils import parse_anchors, read_class_names
import math
### Some paths
train_file = './data/my_data/train.txt' # The path of the training txt file.
val_file = './data/my_data/val.txt' # The path of the validation txt file.
restore_path = './data/darknet_weights/yolov3.ckpt' # The path of the weights to restore.
save_dir = './checkpoint/' # The directory of the weights to save.
log_dir = './data/logs/' # The directory to store the tensorboard log files.
progress_log_path = './data/progress.log' # The path to record the training progress.
anchor_path = './data/yolo_anchors.txt' # The path of the anchor txt file.
class_name_path = './data/voc.names' # The path of the class names.
### Training releated numbers
batch_size = 6
img_size = [416, 416] # Images will be resized to `img_size` and fed to the network, size format: [width, height]
letterbox_resize = False # Whether to use the letterbox resize, i.e., keep the original aspect ratio in the resized image.
total_epoches = 100
train_evaluation_step = 100 # Evaluate on the training batch after some steps.
val_evaluation_epoch = 1 # Evaluate on the whole validation dataset after some steps. Set to None to evaluate every epoch.
save_epoch = 10 # Save the model after some epochs.
batch_norm_decay = 0.99 # decay in bn ops
weight_decay = 5e-4 # l2 weight decay
global_step = 0 # used when resuming training
### tf.data parameters
num_threads = 10 # Number of threads for image processing used in tf.data pipeline.
prefetech_buffer = 5 # Prefetech_buffer used in tf.data pipeline.
### Learning rate and optimizer
optimizer_name = 'momentum' # Chosen from [sgd, momentum, adam, rmsprop]
save_optimizer = False # Whether to save the optimizer parameters into the checkpoint file.
learning_rate_init = 1e-4
lr_type = 'piecewise' # Chosen from [fixed, exponential, cosine_decay, cosine_decay_restart, piecewise]
lr_decay_epoch = 5 # Epochs after which learning rate decays. Int or float. Used when chosen `exponential` and `cosine_decay_restart` lr_type.
lr_decay_factor = 0.96 # The learning rate decay factor. Used when chosen `exponential` lr_type.
lr_lower_bound = 1e-6 # The minimum learning rate.
# piecewise params
pw_boundaries = [25, 40] # epoch based boundaries
pw_values = [learning_rate_init, 3e-5, 1e-4]
### Load and finetune
# Choose the parts you want to restore the weights. List form.
# restore_include: None, restore_exclude: None => restore the whole model
# restore_include: None, restore_exclude: scope => restore the whole model except `scope`
# restore_include: scope1, restore_exclude: scope2 => if scope1 contains scope2, restore scope1 and not restore scope2 (scope1 - scope2)
# choise 1: only restore the darknet body
# restore_include = ['yolov3/darknet53_body']
# restore_exclude = None
# choise 2: restore all layers except the last 3 conv2d layers in 3 scale
restore_include = None
restore_exclude = ['yolov3/yolov3_head/Conv_14', 'yolov3/yolov3_head/Conv_6', 'yolov3/yolov3_head/Conv_22']
# Choose the parts you want to finetune. List form.
# Set to None to train the whole model.
update_part = None
### other training strategies
multi_scale_train = True # Whether to apply multi-scale training strategy. Image size varies from [320, 320] to [640, 640] by default.
use_label_smooth = True # Whether to use class label smoothing strategy.
use_focal_loss = True # Whether to apply focal loss on the conf loss.
use_mix_up = True # Whether to use mix up data augmentation strategy.
use_warm_up = True # whether to use warm up strategy to prevent from gradient exploding.
warm_up_epoch = 3 # Warm up training epoches. Set to a larger value if gradient explodes.
### some constants in validation
# nms
nms_threshold = 0.45 # iou threshold in nms operation
score_threshold = 0.01 # threshold of the probability of the classes in nms operation, i.e. score = pred_confs * pred_probs. set lower for higher recall.
nms_topk = 150 # keep at most nms_topk outputs after nms
# mAP eval
eval_threshold = 0.5 # the iou threshold applied in mAP evaluation
use_voc_07_metric = False # whether to use voc 2007 evaluation metric, i.e. the 11-point metric
### parse some params
anchors = parse_anchors(anchor_path)
classes = read_class_names(class_name_path)
class_num = len(classes)
train_img_cnt = len(open(train_file, 'r').readlines())
val_img_cnt = len(open(val_file, 'r').readlines())
train_batch_num = int(math.ceil(float(train_img_cnt) / batch_size))
lr_decay_freq = int(train_batch_num * lr_decay_epoch)
pw_boundaries = [float(i) * train_batch_num + global_step for i in pw_boundaries]
@@ -0,0 +1,140 @@
# coding: utf-8
from __future__ import division, print_function
import tensorflow as tf
import numpy as np
import argparse
from tqdm import trange
from utils.data_utils import get_batch_data
from utils.misc_utils import parse_anchors, read_class_names, AverageMeter
from utils.eval_utils import evaluate_on_cpu, evaluate_on_gpu, get_preds_gpu, voc_eval, parse_gt_rec
from utils.nms_utils import gpu_nms
from model import yolov3
#################
# ArgumentParser
#################
parser = argparse.ArgumentParser(description="YOLO-V3 eval procedure.")
# some paths
parser.add_argument("--eval_file", type=str, default="./data/my_data/val.txt",
help="The path of the validation or test txt file.")
parser.add_argument("--restore_path", type=str, default="./data/checkpoint_whole_finetune_no_letterbox/best_model_Epoch_32_step_91046_mAP_0.8754_loss_2.2147_lr_3e-05",
help="The path of the weights to restore.")
parser.add_argument("--anchor_path", type=str, default="./data/yolo_anchors.txt",
help="The path of the anchor txt file.")
parser.add_argument("--class_name_path", type=str, default="./data/voc.names",
help="The path of the class names.")
# some numbers
parser.add_argument("--img_size", nargs='*', type=int, default=[416, 416],
help="Resize the input image to `img_size`, size format: [width, height]")
parser.add_argument("--letterbox_resize", type=lambda x: (str(x).lower() == 'true'), default=False,
help="Whether to use the letterbox resize.")
parser.add_argument("--num_threads", type=int, default=10,
help="Number of threads for image processing used in tf.data pipeline.")
parser.add_argument("--prefetech_buffer", type=int, default=5,
help="Prefetech_buffer used in tf.data pipeline.")
parser.add_argument("--nms_threshold", type=float, default=0.45,
help="IOU threshold in nms operation.")
parser.add_argument("--score_threshold", type=float, default=0.01,
help="Threshold of the probability of the classes in nms operation.")
parser.add_argument("--nms_topk", type=int, default=150,
help="Keep at most nms_topk outputs after nms.")
parser.add_argument("--use_voc_07_metric", type=lambda x: (str(x).lower() == 'true'), default=False,
help="Whether to use the voc 2007 mAP metrics.")
args = parser.parse_args()
# args params
args.anchors = parse_anchors(args.anchor_path)
args.classes = read_class_names(args.class_name_path)
args.class_num = len(args.classes)
args.img_cnt = len(open(args.eval_file, 'r').readlines())
# setting placeholders
is_training = tf.placeholder(dtype=tf.bool, name="phase_train")
handle_flag = tf.placeholder(tf.string, [], name='iterator_handle_flag')
pred_boxes_flag = tf.placeholder(tf.float32, [1, None, None])
pred_scores_flag = tf.placeholder(tf.float32, [1, None, None])
gpu_nms_op = gpu_nms(pred_boxes_flag, pred_scores_flag, args.class_num, args.nms_topk, args.score_threshold, args.nms_threshold)
##################
# tf.data pipeline
##################
val_dataset = tf.data.TextLineDataset(args.eval_file)
val_dataset = val_dataset.batch(1)
val_dataset = val_dataset.map(
lambda x: tf.py_func(get_batch_data, [x, args.class_num, args.img_size, args.anchors, 'val', False, False, args.letterbox_resize], [tf.int64, tf.float32, tf.float32, tf.float32, tf.float32]),
num_parallel_calls=args.num_threads
)
val_dataset.prefetch(args.prefetech_buffer)
iterator = val_dataset.make_one_shot_iterator()
image_ids, image, y_true_13, y_true_26, y_true_52 = iterator.get_next()
image_ids.set_shape([None])
y_true = [y_true_13, y_true_26, y_true_52]
image.set_shape([None, args.img_size[1], args.img_size[0], 3])
for y in y_true:
y.set_shape([None, None, None, None, None])
##################
# Model definition
##################
yolo_model = yolov3(args.class_num, args.anchors)
with tf.variable_scope('yolov3'):
pred_feature_maps = yolo_model.forward(image, is_training=is_training)
loss = yolo_model.compute_loss(pred_feature_maps, y_true)
y_pred = yolo_model.predict(pred_feature_maps)
saver_to_restore = tf.train.Saver()
with tf.Session() as sess:
sess.run([tf.global_variables_initializer()])
saver_to_restore.restore(sess, args.restore_path)
print('\n----------- start to eval -----------\n')
val_loss_total, val_loss_xy, val_loss_wh, val_loss_conf, val_loss_class = \
AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter()
val_preds = []
for j in trange(args.img_cnt):
__image_ids, __y_pred, __loss = sess.run([image_ids, y_pred, loss], feed_dict={is_training: False})
pred_content = get_preds_gpu(sess, gpu_nms_op, pred_boxes_flag, pred_scores_flag, __image_ids, __y_pred)
val_preds.extend(pred_content)
val_loss_total.update(__loss[0])
val_loss_xy.update(__loss[1])
val_loss_wh.update(__loss[2])
val_loss_conf.update(__loss[3])
val_loss_class.update(__loss[4])
rec_total, prec_total, ap_total = AverageMeter(), AverageMeter(), AverageMeter()
gt_dict = parse_gt_rec(args.eval_file, args.img_size, args.letterbox_resize)
print('mAP eval:')
for ii in range(args.class_num):
npos, nd, rec, prec, ap = voc_eval(gt_dict, val_preds, ii, iou_thres=0.5, use_07_metric=args.use_voc_07_metric)
rec_total.update(rec, npos)
prec_total.update(prec, nd)
ap_total.update(ap, 1)
print('Class {}: Recall: {:.4f}, Precision: {:.4f}, AP: {:.4f}'.format(ii, rec, prec, ap))
mAP = ap_total.average
print('final mAP: {:.4f}'.format(mAP))
print("recall: {:.3f}, precision: {:.3f}".format(rec_total.average, prec_total.average))
print("total_loss: {:.3f}, loss_xy: {:.3f}, loss_wh: {:.3f}, loss_conf: {:.3f}, loss_class: {:.3f}".format(
val_loss_total.average, val_loss_xy.average, val_loss_wh.average, val_loss_conf.average, val_loss_class.average
))
@@ -0,0 +1,20 @@
aeroplane
bicycle
bird
boat
bottle
bus
car
cat
chair
cow
diningtable
dog
horse
motorbike
person
pottedplant
sheep
sofa
train
tvmonitor
@@ -0,0 +1,96 @@
# coding: utf-8
import xml.etree.ElementTree as ET
import os
names_dict = {}
cnt = 0
f = open('./voc_names.txt', 'r').readlines()
for line in f:
line = line.strip()
names_dict[line] = cnt
cnt += 1
voc_07 = '/data/VOCdevkit/VOC2007'
voc_12 = '/data/VOCdevkit/VOC2012'
anno_path = [os.path.join(voc_07, 'Annotations'), os.path.join(voc_12, 'Annotations')]
img_path = [os.path.join(voc_07, 'JPEGImages'), os.path.join(voc_12, 'JPEGImages')]
trainval_path = [os.path.join(voc_07, 'ImageSets/Main/trainval.txt'),
os.path.join(voc_12, 'ImageSets/Main/trainval.txt')]
test_path = [os.path.join(voc_07, 'ImageSets/Main/test.txt')]
def parse_xml(path):
tree = ET.parse(path)
img_name = path.split('/')[-1][:-4]
height = tree.findtext("./size/height")
width = tree.findtext("./size/width")
objects = [img_name, width, height]
for obj in tree.findall('object'):
difficult = obj.find('difficult').text
if difficult == '1':
continue
name = obj.find('name').text
bbox = obj.find('bndbox')
xmin = bbox.find('xmin').text
ymin = bbox.find('ymin').text
xmax = bbox.find('xmax').text
ymax = bbox.find('ymax').text
name = str(names_dict[name])
objects.extend([name, xmin, ymin, xmax, ymax])
if len(objects) > 1:
return objects
else:
return None
test_cnt = 0
def gen_test_txt(txt_path):
global test_cnt
f = open(txt_path, 'w')
for i, path in enumerate(test_path):
img_names = open(path, 'r').readlines()
for img_name in img_names:
img_name = img_name.strip()
xml_path = anno_path[i] + '/' + img_name + '.xml'
objects = parse_xml(xml_path)
if objects:
objects[0] = img_path[i] + '/' + img_name + '.jpg'
if os.path.exists(objects[0]):
objects.insert(0, str(test_cnt))
test_cnt += 1
objects = ' '.join(objects) + '\n'
f.write(objects)
f.close()
train_cnt = 0
def gen_train_txt(txt_path):
global train_cnt
f = open(txt_path, 'w')
for i, path in enumerate(trainval_path):
img_names = open(path, 'r').readlines()
for img_name in img_names:
img_name = img_name.strip()
xml_path = anno_path[i] + '/' + img_name + '.xml'
objects = parse_xml(xml_path)
if objects:
objects[0] = img_path[i] + '/' + img_name + '.jpg'
if os.path.exists(objects[0]):
objects.insert(0, str(train_cnt))
train_cnt += 1
objects = ' '.join(objects) + '\n'
f.write(objects)
f.close()
gen_train_txt('train.txt')
gen_test_txt('val.txt')
@@ -0,0 +1,32 @@
# coding: utf-8
# This script is used to remove the optimizer parameters in the saved checkpoint files.
# These parameters are useless in the forward process.
# Removing them will shrink the checkpoint size a lot.
import sys
sys.path.append('..')
import os
import tensorflow as tf
from model import yolov3
# params
ckpt_path = ''
class_num = 20
save_dir = 'shrinked_ckpt'
if not os.path.exists(save_dir):
os.makedirs(save_dir)
image = tf.placeholder(tf.float32, [1, 416, 416, 3])
yolo_model = yolov3(class_num, None)
with tf.variable_scope('yolov3'):
pred_feature_maps = yolo_model.forward(image)
saver_to_restore = tf.train.Saver()
saver_to_save = tf.train.Saver()
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
saver_to_restore.restore(sess, ckpt_path)
saver_to_save.save(sess, save_dir + '/shrinked')
@@ -0,0 +1,457 @@
# coding=utf-8
# for better understanding about yolov3 architecture, refer to this website (in Chinese):
# https://blog.csdn.net/leviopku/article/details/82660381
from __future__ import division, print_function
import tensorflow as tf
slim = tf.contrib.slim
from utils.layer_utils import conv2d, darknet53_body, yolo_block, upsample_layer
class yolov3(object):
def __init__(self, class_num, anchors, use_label_smooth=False, use_focal_loss=False, batch_norm_decay=0.999,
weight_decay=5e-4, use_static_shape=True,
img_size=(416, 416), batch_size=None):
# self.anchors = [[10, 13], [16, 30], [33, 23],
# [30, 61], [62, 45], [59, 119],
# [116, 90], [156, 198], [373, 326]]
self.class_num = class_num
self.anchors = anchors
self.batch_norm_decay = batch_norm_decay
self.use_label_smooth = use_label_smooth
self.use_focal_loss = use_focal_loss
self.weight_decay = weight_decay
# inference speed optimization
# if `use_static_shape` is True, use tensor.get_shape(), otherwise use tf.shape(tensor)
# static_shape is slightly faster
self.use_static_shape = use_static_shape
self.batch_size = batch_size
# self.img_size = (416, 416)
self.img_size = img_size
self.featrue_map_shape_base = [32, 16, 8]
self.featrue_map_shape = [(self.img_size[0] // i, self.img_size[1] // i) for i in self.featrue_map_shape_base]
def forward(self, inputs, is_training=False, reuse=False):
# the input img_size, form: [height, weight]
# it will be used later
# self.img_size = tf.shape(inputs)[1:3]
# self.featrue_map_shape = [(self.img_size[0]//i, self.img_size[1]//i) for i in self.featrue_map_shape_base]
# set batch norm params
batch_norm_params = {
'decay': self.batch_norm_decay,
'epsilon': 1e-05,
'scale': True,
'is_training': is_training,
'fused': None, # Use fused batch norm if possible.
}
with slim.arg_scope([slim.conv2d, slim.batch_norm], reuse=reuse):
with slim.arg_scope([slim.conv2d],
normalizer_fn=slim.batch_norm,
normalizer_params=batch_norm_params,
biases_initializer=None,
activation_fn=lambda x: tf.nn.leaky_relu(x, alpha=0.1),
weights_regularizer=slim.l2_regularizer(self.weight_decay)):
with tf.variable_scope('darknet53_body'):
route_1, route_2, route_3 = darknet53_body(inputs)
with tf.variable_scope('yolov3_head'):
inter1, net = yolo_block(route_3, 512)
feature_map_1 = slim.conv2d(net, 3 * (5 + self.class_num), 1,
stride=1, normalizer_fn=None,
activation_fn=None, biases_initializer=tf.zeros_initializer())
feature_map_1 = tf.identity(feature_map_1, name='feature_map_1')
inter1 = conv2d(inter1, 256, 1)
inter1 = upsample_layer(inter1,
route_2.get_shape().as_list() if self.use_static_shape else tf.shape(
route_2))
concat1 = tf.concat([inter1, route_2], axis=3)
inter2, net = yolo_block(concat1, 256)
feature_map_2 = slim.conv2d(net, 3 * (5 + self.class_num), 1,
stride=1, normalizer_fn=None,
activation_fn=None, biases_initializer=tf.zeros_initializer())
feature_map_2 = tf.identity(feature_map_2, name='feature_map_2')
inter2 = conv2d(inter2, 128, 1)
inter2 = upsample_layer(inter2,
route_1.get_shape().as_list() if self.use_static_shape else tf.shape(
route_1))
concat2 = tf.concat([inter2, route_1], axis=3)
_, feature_map_3 = yolo_block(concat2, 128)
feature_map_3 = slim.conv2d(feature_map_3, 3 * (5 + self.class_num), 1,
stride=1, normalizer_fn=None,
activation_fn=None, biases_initializer=tf.zeros_initializer())
feature_map_3 = tf.identity(feature_map_3, name='feature_map_3')
return feature_map_1, feature_map_2, feature_map_3
def reorg_layer(self, feature_map, anchors):
'''
feature_map: a feature_map from [feature_map_1, feature_map_2, feature_map_3] returned
from `forward` function
anchors: shape: [3, 2]
'''
# NOTE: size in [h, w] format! don't get messed up!
grid_size = feature_map.get_shape().as_list()[1:3] if self.use_static_shape else tf.shape(feature_map)[
1:3] # [13, 13]
# the downscale ratio in height and weight
# ratio = tf.cast(self.img_size / grid_size, tf.float32)
ratio = tf.cast([self.img_size[0] / grid_size[0], self.img_size[1] / grid_size[1]], tf.float32)
# rescale the anchors to the feature_map
# NOTE: the anchor is in [w, h] format!
rescaled_anchors = [(anchor[0] / ratio[1], anchor[1] / ratio[0]) for anchor in anchors]
feature_map = tf.reshape(feature_map, [-1, grid_size[0], grid_size[1], 3, 5 + self.class_num])
# split the feature_map along the last dimension
# shape info: take 416x416 input image and the 13*13 feature_map for example:
# box_centers: [N, 13, 13, 3, 2] last_dimension: [center_x, center_y]
# box_sizes: [N, 13, 13, 3, 2] last_dimension: [width, height]
# conf_logits: [N, 13, 13, 3, 1]
# prob_logits: [N, 13, 13, 3, class_num]
# box_centers, box_sizes, conf_logits, prob_logits = tf.split(feature_map, [2, 2, 1, self.class_num], axis=-1)
box_centers = feature_map[..., :2]
box_sizes = feature_map[..., 2:4]
conf_logits = feature_map[..., 4:5]
prob_logits = feature_map[..., 5:]
# conf_logits = tf.expand_dims(conf_logits, -1)
box_centers = tf.nn.sigmoid(box_centers)
# use some broadcast tricks to get the mesh coordinates
grid_x = tf.range(grid_size[1], dtype=tf.int32)
grid_y = tf.range(grid_size[0], dtype=tf.int32)
grid_x, grid_y = tf.meshgrid(grid_x, grid_y)
x_offset = tf.reshape(grid_x, (-1, 1))
y_offset = tf.reshape(grid_y, (-1, 1))
x_y_offset = tf.concat([x_offset, y_offset], axis=-1)
# shape: [13, 13, 1, 2]
x_y_offset = tf.cast(tf.reshape(x_y_offset, [grid_size[0], grid_size[1], 1, 2]), tf.float32)
# get the absolute box coordinates on the feature_map
box_centers = box_centers + x_y_offset
# rescale to the original image scale
box_centers = box_centers * ratio[::-1]
# avoid getting possible nan value with tf.clip_by_value
box_sizes = tf.exp(box_sizes) * rescaled_anchors
# box_sizes = tf.clip_by_value(tf.exp(box_sizes), 1e-9, 100) * rescaled_anchors
# rescale to the original image scale
box_sizes = box_sizes * ratio[::-1]
# shape: [N, 13, 13, 3, 4]
# last dimension: (center_x, center_y, w, h)
boxes = tf.concat([box_centers, box_sizes], axis=-1)
# shape:
# x_y_offset: [13, 13, 1, 2]
# boxes: [N, 13, 13, 3, 4], rescaled to the original image scale
# conf_logits: [N, 13, 13, 3, 1]
# prob_logits: [N, 13, 13, 3, class_num]
return x_y_offset, boxes, conf_logits, prob_logits
def predict(self, feature_maps):
'''
Receive the returned feature_maps from `forward` function,
the produce the output predictions at the test stage.
'''
feature_map_1, feature_map_2, feature_map_3 = feature_maps
feature_map_anchors = [(feature_map_1, self.anchors[6:9]),
(feature_map_2, self.anchors[3:6]),
(feature_map_3, self.anchors[0:3])]
reorg_results = [self.reorg_layer(feature_map, anchors) for (feature_map, anchors) in feature_map_anchors]
def _reshape(result):
x_y_offset, boxes, conf_logits, prob_logits = result
grid_size = x_y_offset.get_shape().as_list()[:2] if self.use_static_shape else tf.shape(x_y_offset)[:2]
boxes = tf.reshape(boxes, [-1, grid_size[0] * grid_size[1] * 3, 4])
conf_logits = tf.reshape(conf_logits, [-1, grid_size[0] * grid_size[1] * 3, 1])
prob_logits = tf.reshape(prob_logits, [-1, grid_size[0] * grid_size[1] * 3, self.class_num])
# shape: (take 416*416 input image and feature_map_1 for example)
# boxes: [N, 13*13*3, 4]
# conf_logits: [N, 13*13*3, 1]
# prob_logits: [N, 13*13*3, class_num]
return boxes, conf_logits, prob_logits
boxes_list, confs_list, probs_list = [], [], []
for result in reorg_results:
boxes, conf_logits, prob_logits = _reshape(result)
confs = tf.sigmoid(conf_logits)
probs = tf.sigmoid(prob_logits)
boxes_list.append(boxes)
confs_list.append(confs)
probs_list.append(probs)
# collect results on three scales
# take 416*416 input image for example:
# shape: [N, (13*13+26*26+52*52)*3, 4]
boxes = tf.concat(boxes_list, axis=1)
# shape: [N, (13*13+26*26+52*52)*3, 1]
confs = tf.concat(confs_list, axis=1)
# shape: [N, (13*13+26*26+52*52)*3, class_num]
probs = tf.concat(probs_list, axis=1)
# center_x, center_y, width, height = tf.split(boxes, [1, 1, 1, 1], axis=-1)
# center_x = tf.expand_dims(boxes[..., 0], 2)
# center_y = tf.expand_dims(boxes[..., 1], 2)
# width = tf.expand_dims(boxes[..., 2], 2)
# height = tf.expand_dims(boxes[..., 3], 2)
center_x = boxes[..., 0:1]
center_y = boxes[..., 1:2]
width = boxes[..., 2:3]
height = boxes[..., 3:]
x_min = center_x - width / 2
y_min = center_y - height / 2
x_max = center_x + width / 2
y_max = center_y + height / 2
boxes = tf.concat([x_min, y_min, x_max, y_max], axis=-1)
return boxes, confs, probs
def loss_layer(self, feature_map_i, y_true, anchors, feature_map_shape_i, gt_box_i):
'''
calc loss function from a certain scale
input:
feature_map_i: feature maps of a certain scale. shape: [N, 13, 13, 3*(5 + num_class)] etc.
y_true: y_ture from a certain scale. shape: [N, 13, 13, 3, 5 + num_class + 1] etc.
anchors: shape [9, 2]
'''
# size in [h, w] format! don't get messed up!
# grid_size = tf.shape(feature_map_i)[1:3]
grid_size = tf.shape(feature_map_i)[1:3]
# the downscale ratio in height and weight
ratio = tf.cast(self.img_size / grid_size, tf.float32)
# N: batch_size
N = tf.cast(tf.shape(feature_map_i)[0], tf.float32)
x_y_offset, pred_boxes, pred_conf_logits, pred_prob_logits = self.reorg_layer(feature_map_i, anchors)
###########
# get mask
###########
# shape: take 416x416 input image and 13*13 feature_map for example:
# [N, 13, 13, 3, 1]
object_mask = y_true[..., 4:5]
# the calculation of ignore mask if referred from
# https://github.com/pjreddie/darknet/blob/master/src/yolo_layer.c#L179
# ignore_mask = tf.TensorArray(tf.float32, size=0, dynamic_size=True)
# def loop_cond(idx, ignore_mask):
# return tf.less(idx, tf.cast(N, tf.int32))
# def loop_body(idx, ignore_mask=None):
# # shape: [13, 13, 3, 4] & [13, 13, 3] ==> [V, 4]
# # V: num of true gt box of each image in a batch
# valid_true_boxes = tf.boolean_mask(y_true[idx, ..., 0:4], tf.cast(object_mask[idx, ..., 0], 'bool'))
# # shape: [13, 13, 3, 4] & [V, 4] ==> [13, 13, 3, V]
# iou = self.box_iou(pred_boxes[idx], valid_true_boxes)
# # shape: [13, 13, 3]
# best_iou = tf.reduce_max(iou, axis=-1)
# # shape: [13, 13, 3]
# ignore_mask_tmp = tf.cast(best_iou < 0.5, tf.float32)
# # finally will be shape: [N, 13, 13, 3]
# # ignore_mask = ignore_mask.write(idx, ignore_mask_tmp)
# if ignore_mask is None:
# ignore_mask = tf.expand_dims(ignore_mask_tmp, 0)
# else:
# ignore_mask = tf.concat([ignore_mask, tf.expand_dims(ignore_mask_tmp, 0)], 0)
# print(idx, ignore_mask)
# return idx + 1, ignore_mask
# ignore_mask = None
# _, ignore_mask = tf.while_loop(cond=loop_cond, body=loop_body, loop_vars=[0, ignore_mask])
# ignore_mask = ignore_mask.stack()
iou = self.box_iou(pred_boxes, gt_box_i) # [N, 13, 13, 3, 16]
best_iou = tf.reduce_max(iou, axis=-1) # [N, 13, 13, 3]
ignore_mask = tf.cast(best_iou < 0.5, tf.float32) # [N, 13, 13, 3]
# shape: [N, 13, 13, 3, 1]
ignore_mask = tf.expand_dims(ignore_mask, -1)
ignore_mask = tf.stop_gradient(ignore_mask)
# shape: [N, 13, 13, 3, 2]
pred_box_xy = pred_boxes[..., 0:2]
pred_box_wh = pred_boxes[..., 2:4]
# get xy coordinates in one cell from the feature_map
# numerical range: 0 ~ 1
# shape: [N, 13, 13, 3, 2]
print(y_true[..., 0:2], ratio[::-1], x_y_offset)
true_xy = y_true[..., 0:2] / ratio[::-1] - x_y_offset
pred_xy = pred_box_xy / ratio[::-1] - x_y_offset
# get_tw_th
# numerical range: 0 ~ 1
# shape: [N, 13, 13, 3, 2]
true_tw_th = y_true[..., 2:4] / anchors
pred_tw_th = pred_box_wh / anchors
# for numerical stability
true_tw_th = tf.where(condition=tf.equal(true_tw_th, 0),
x=tf.ones_like(true_tw_th), y=true_tw_th)
pred_tw_th = tf.where(condition=tf.equal(pred_tw_th, 0),
x=tf.ones_like(pred_tw_th), y=pred_tw_th)
true_tw_th = tf.log(tf.clip_by_value(true_tw_th, 1e-9, 1e9))
pred_tw_th = tf.log(tf.clip_by_value(pred_tw_th, 1e-9, 1e9))
# box size punishment:
# box with smaller area has bigger weight. This is taken from the yolo darknet C source code.
# shape: [N, 13, 13, 3, 1]
box_loss_scale = 2. - (y_true[..., 2:3] / tf.cast(self.img_size[1], tf.float32)) * (
y_true[..., 3:4] / tf.cast(self.img_size[0], tf.float32))
############
# loss_part
############
# mix_up weight
# mix_w = y_true[..., self.class_num+5]
# [N, 13, 13, 3, 1]
# mix_w = y_true[..., -1:]
mix_w = y_true[..., 85:]
# mix_w = tf.expand_dims(mix_w, -1)
# shape: [N, 13, 13, 3, 1]
xy_loss = tf.reduce_sum(tf.square(true_xy - pred_xy) * object_mask * box_loss_scale * mix_w) / N
wh_loss = tf.reduce_sum(tf.square(true_tw_th - pred_tw_th) * object_mask * box_loss_scale * mix_w) / N
# shape: [N, 13, 13, 3, 1]
conf_pos_mask = object_mask
conf_neg_mask = (1 - object_mask) * ignore_mask
conf_loss_pos = conf_pos_mask * tf.nn.sigmoid_cross_entropy_with_logits(labels=object_mask,
logits=pred_conf_logits)
conf_loss_neg = conf_neg_mask * tf.nn.sigmoid_cross_entropy_with_logits(labels=object_mask,
logits=pred_conf_logits)
# TODO: may need to balance the pos-neg by multiplying some weights
conf_loss = conf_loss_pos + conf_loss_neg
if self.use_focal_loss:
alpha = 1.0
gamma = 2.0
# TODO: alpha should be a mask array if needed
focal_mask = alpha * tf.pow(tf.abs(object_mask - tf.sigmoid(pred_conf_logits)), gamma)
conf_loss *= focal_mask
conf_loss = tf.reduce_sum(conf_loss * mix_w) / N
# shape: [N, 13, 13, 3, 1]
# whether to use label smooth
if self.use_label_smooth:
delta = 0.01
label_target = (1 - delta) * y_true[..., 5:(5 + self.class_num)] + delta * 1. / self.class_num
else:
label_target = y_true[..., 5:(5 + self.class_num)]
class_loss = object_mask * tf.nn.sigmoid_cross_entropy_with_logits(labels=label_target,
logits=pred_prob_logits) * mix_w
class_loss = tf.reduce_sum(class_loss) / N
return xy_loss, wh_loss, conf_loss, class_loss
def box_iou(self, pred_boxes, valid_true_boxes):
'''
param:
pred_boxes: [13, 13, 3, 4], (center_x, center_y, w, h)
valid_true: [1, 16, 4]
'''
# valid_true_boxes = tf.expand_dims(valid_true_boxes, -2)
# [13, 13, 3, 2]
pred_box_xy = pred_boxes[..., 0:2]
pred_box_wh = pred_boxes[..., 2:4]
# shape: [13, 13, 3, 1, 2]
pred_box_xy = tf.expand_dims(pred_box_xy, -2)
pred_box_wh = tf.expand_dims(pred_box_wh, -2)
print('##################pred_box_wh', pred_box_wh)
# [V, 2]
# N,H,W,A,C = valid_true_boxes.shape
# valid_true_boxes = tf.gather(valid_true_boxes, tf.where(object_mask))
# print(valid_true_boxes, object_mask)
# print(valid_true_boxes)
# input()
# valid_true_boxes = tf.reshape(valid_true_boxes, (self.batch_size, 1, 1, 3, -1, 4))
# x = tf.reshape(valid_true_boxes[..., 0], (self.batch_size, 3, -1))
# y = tf.reshape(valid_true_boxes[..., 1], (self.batch_size, 3, -1))
# w = tf.reshape(valid_true_boxes[..., 2], (self.batch_size, 3, -1))
# h = tf.reshape(valid_true_boxes[..., 3], (self.batch_size, 3, -1))
# valid_true_boxes = tf.stack([x,y,w,h], axis=-1)
valid_true_boxes = tf.expand_dims(valid_true_boxes, 1) # [1, 1, 16, 4]
valid_true_boxes = tf.expand_dims(valid_true_boxes, 1) # [1, 1, 1, 16, 4]
print('##################valid_true_boxes', valid_true_boxes)
# valid_true_boxes = tf.tile(valid_true_boxes, [1,H,W,1,1])
# print(valid_true_boxes)
# input()
true_box_xy = valid_true_boxes[..., :2] # [1, 1, 1, 16, 2]
true_box_wh = valid_true_boxes[..., 2:] # [1, 1, 1, 16, 2]
print('##################true_box_wh', true_box_wh)
# [13, 13, 3, 1, 2] & [1, 1, 1, 16, 2] ==> [13, 13, 3, 16, 2]
intersect_mins = tf.maximum(pred_box_xy - pred_box_wh / 2.,
true_box_xy - true_box_wh / 2.)
intersect_maxs = tf.minimum(pred_box_xy + pred_box_wh / 2.,
true_box_xy + true_box_wh / 2.)
intersect_wh = tf.maximum(intersect_maxs - intersect_mins, 0.)
print('##################intersect_mins', intersect_mins)
print('##################intersect_wh', intersect_wh)
# shape: [13, 13, 3, 16]
intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
# shape: [13, 13, 3, 1]
pred_box_area = pred_box_wh[..., 0] * pred_box_wh[..., 1]
# shape: [1, 1, 1, 16]
true_box_area = true_box_wh[..., 0] * true_box_wh[..., 1]
# shape: [1, V]
# true_box_area = tf.expand_dims(true_box_area, -2)
print('##################intersect_area', intersect_area)
print('##################pred_box_area', pred_box_area)
print('##################true_box_area', true_box_area)
# [13, 13, 3, 16]
iou = intersect_area / (pred_box_area + true_box_area - intersect_area + 1e-10)
print('##################iou', iou)
# iou = tf.clip_by_value(iou, 0, 1)
# print(pred_box_xy, pred_box_wh)
# print(intersect_area , pred_box_area , true_box_area , intersect_area)
# print(iou)
# input()
return iou
def compute_loss(self, y_pred, y_true, gt_box):
'''
param:
y_pred: returned feature_map list by `forward` function: [feature_map_1, feature_map_2, feature_map_3]
y_true: input y_true by the tf.data pipeline
'''
loss_xy, loss_wh, loss_conf, loss_class = 0., 0., 0., 0.
anchor_group = [self.anchors[6:9], self.anchors[3:6], self.anchors[0:3]]
# calc loss in 3 scales
for i in range(len(y_pred)):
print('##################level', i)
result = self.loss_layer(y_pred[i], y_true[i], anchor_group[i], self.featrue_map_shape[i], gt_box[i])
loss_xy += result[0]
loss_wh += result[1]
loss_conf += result[2]
loss_class += result[3]
total_loss = loss_xy + loss_wh + loss_conf + loss_class
return [total_loss, loss_xy, loss_wh, loss_conf, loss_class]
@@ -0,0 +1,58 @@
#!/bin/bash
scriptDir=$(cd "$(dirname "$0")"; pwd)
currentDir=$(cd "$(dirname "$scriptDir")"; pwd)
# set env
source ${currentDir}/config/npu_set_env.sh
# setting main path
CODE_PATH=currentDir/code
# set env
export ASCEND_HOME=/usr/local/Ascend
export LD_LIBRARY_PATH=/usr/local/lib/:/usr/lib/:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64:/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:/usr/local/Ascend/add-ons/
export PYTHONPATH=$PYTHONPATH:/usr/local/Ascend/ascend-toolkit/latest/opp/op_impl/built-in/ai_core/tbe:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/te:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/topi:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/hccl:/usr/local/Ascend/ascend-toolkit/latest/tfplugin/python/site-packages:$currentDir
export PATH=$PATH:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin
export ASCEND_OPP_PATH=/usr/local/Ascend/ascend-toolkit/latest/opp/
export DDK_VERSION_FLAG=1.60.T49.0.B201
export NEW_GE_FE_ID=1
export GE_AICPU_FLAG=1
export SOC_VERSION=Ascend910
#export DUMP_GE_GRAPH=2
#export DUMP_GRAPH_LEVEL=3
#export PRINT_MODEL=1
export SLOG_PRINT_TO_STDOUT=0
# dump op data
#export DISABLE_REUSE_MEMORY=1
#export DUMP_OP=1
ulimit -c unlimited
# local variable
RANK_SIZE=$1
RANK_TABLE_FILE=./hccl_config/${RANK_SIZE}p.json
RANK_ID_START=0
SAVE_PATH=training/t1
# training stage
MODE=$2
for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
do
echo
su HwHiAiUser -c "adc --host 0.0.0.0:22118 --log \"SetLogLevel(0)[error]\" --device "$RANK_ID
TMP_PATH=$SAVE_PATH/D$RANK_ID
mkdir -p $TMP_PATH
cp run_yolov3.sh $TMP_PATH/
cp $RANK_TABLE_FILE $TMP_PATH/rank_table.json
cd $TMP_PATH
nohup bash run_yolov3.sh $RANK_ID $RANK_SIZE $CODE_PATH $MODE > train_$RANK_ID.log &
cd -
done
@@ -0,0 +1 @@
nohup bash npu_train.sh 1 multi &
@@ -0,0 +1 @@
nohup bash npu_train.sh 1 single &
@@ -0,0 +1 @@
nohup bash npu_train.sh 8 multi &
@@ -0,0 +1 @@
nohup bash npu_train.sh 8 single &
@@ -0,0 +1,50 @@
#clean slog
rm -rf /var/log/npu/slog/host-0/*.log
rm -rf /var/log/npu/slog/device-*/*.log
# setting main path
MAIN_PATH=$(dirname $(readlink -f $0))
# set env
export PYTHONPATH=/usr/local/Ascend/ops/op_impl/built-in/ai_core/tbe/:$MAIN_PATH/../../../
export LD_LIBRARY_PATH=/usr/local/lib/:/usr/lib/:/usr/local/Ascend/fwkacllib/lib64/:/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:/usr/local/Ascend/add-ons/:/usr/lib/x86_64-linux-gnu
PATH=$PATH:$HOME/bin
export PATH=$PATH:/usr/local/Ascend/fwkacllib/ccec_compiler/bin:$PATH
export ASCEND_OPP_PATH=/usr/local/Ascend/opp
export DDK_VERSION_FLAG=1.60.T49.0.B201
export NEW_GE_FE_ID=1
export GE_AICPU_FLAG=1
export SOC_VERSION=Ascend910
export DUMP_GE_GRAPH=1
export DUMP_GRAPH_LEVEL=1
export PRINT_MODEL=1
#export SLOG_PRINT_TO_STDOUT=1
ulimit -c unlimited
# local variable
RANK_SIZE=$1
RANK_TABLE_FILE=./configs/${RANK_SIZE}p.json
RANK_ID_START=1
SAVE_PATH=training/t1
for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
do
echo
su HwHiAiUser -c "adc --host 0.0.0.0:22118 --log \"SetLogLevel(0)[debug]\" --device "$RANK_ID
TMP_PATH=$SAVE_PATH/D$RANK_ID
mkdir -p $TMP_PATH
cp run_yolov3.sh $TMP_PATH/
cp $RANK_TABLE_FILE $TMP_PATH/rank_table.json
cd $TMP_PATH
nohup bash run_yolov3.sh $RANK_ID $RANK_SIZE $MAIN_PATH > train_$RANK_ID.log &
cd -
done
@@ -0,0 +1,29 @@
#!/bin/bash
rm -rf Onnxgraph
rm -rf Partition
rm -rf OptimizeSubGraph
rm -rf Aicpu_Optimized
rm *txt
rm -rf result_$RANK_ID
export RANK_ID=$1
export RANK_SIZE=$2
export DEVICE_ID=$RANK_ID
export DEVICE_INDEX=$RANK_ID
export RANK_TABLE_FILE=rank_table.json
export JOB_ID=123678
export FUSION_TENSOR_SIZE=1000000000
KERNEL_NUM=20
PID_START=$((KERNEL_NUM * RANK_ID))
PID_END=$((PID_START + KERNEL_NUM - 1))
#sleep 5
taskset -c $PID_START-$PID_END python3 $3/train.py \
--mode $4
mkdir graph
mv *.txt graph
mv *.pbtxt graph
@@ -0,0 +1,57 @@
#export CUDA_VISIBLE_DEVICES=''
#export CUDA_VISIBLE_DEVICES=7
# setting main path
MAIN_PATH=$(dirname $(readlink -f $0))
# set env
export PYTHONPATH=/usr/local/Ascend/ops/op_impl/built-in/ai_core/tbe/:$MAIN_PATH/../../../
export LD_LIBRARY_PATH=/usr/local/lib/:/usr/lib/:/usr/local/Ascend/fwkacllib/lib64/:/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:/usr/local/Ascend/add-ons/:/usr/lib/x86_64-linux-gnu
PATH=$PATH:$HOME/bin
export PATH=$PATH:/usr/local/Ascend/fwkacllib/ccec_compiler/bin:$PATH
export ASCEND_OPP_PATH=/usr/local/Ascend/opp
export DDK_VERSION_FLAG=1.60.T49.0.B201
export NEW_GE_FE_ID=1
export GE_AICPU_FLAG=1
export SOC_VERSION=Ascend910
export RANK_ID=7
export RANK_SIZE=1
export DEVICE_ID=$RANK_ID
export DEVICE_INDEX=$RANK_ID
export JOB_ID=10087
export FUSION_TENSOR_SIZE=1000000000
#export SLOG_PRINT_TO_STDOUT=1
#export DUMP_GE_GRAPH=2
#export DUMP_GRAPH_LEVEL=3
su HwHiAiUser -c "adc --host 0.0.0.0:22118 --log \"SetLogLevel(0)[debug]\" --device "$RANK_ID
#RESTORE_PATH=/opt/npu/wujianping/epoch200/
RESTORE_PATH=/opt/npu/w00558981/yolov3_ok_bak_zip/training/t1/D0/training/
#RESTORE_PATH=/opt/npu/w00558981/training_done_yolov3/training/t1/D0/training/model-epoch_200_step_182000_loss_20.7852_lr_0
while :
do
#python3.7 eval.py \
#--save_img True \
#--score_thresh 0.2 \
#--restore_path $RESTORE_PATH \
#--max_test 10 \
python3.7 eval.py \
--save_json True \
--score_thresh 0.001 \
--restore_path $RESTORE_PATH \
--max_test 10000
break
sleep 1200
done
@@ -0,0 +1,86 @@
# coding: utf-8
from __future__ import division, print_function
import tensorflow as tf
import numpy as np
import argparse
import cv2
from utils.misc_utils import parse_anchors, read_class_names
from utils.nms_utils import gpu_nms
from utils.plot_utils import get_color_table, plot_one_box
from utils.data_aug import letterbox_resize
from model import yolov3
parser = argparse.ArgumentParser(description="YOLO-V3 test single image test procedure.")
parser.add_argument("input_image", type=str,
help="The path of the input image.")
parser.add_argument("--anchor_path", type=str, default="./data/yolo_anchors.txt",
help="The path of the anchor txt file.")
parser.add_argument("--new_size", nargs='*', type=int, default=[416, 416],
help="Resize the input image with `new_size`, size format: [width, height]")
parser.add_argument("--letterbox_resize", type=lambda x: (str(x).lower() == 'true'), default=True,
help="Whether to use the letterbox resize.")
parser.add_argument("--class_name_path", type=str, default="./data/coco.names",
help="The path of the class names.")
parser.add_argument("--restore_path", type=str, default="./data/darknet_weights/yolov3.ckpt",
help="The path of the weights to restore.")
args = parser.parse_args()
args.anchors = parse_anchors(args.anchor_path)
args.classes = read_class_names(args.class_name_path)
args.num_class = len(args.classes)
color_table = get_color_table(args.num_class)
img_ori = cv2.imread(args.input_image)
if args.letterbox_resize:
img, resize_ratio, dw, dh = letterbox_resize(img_ori, args.new_size[0], args.new_size[1])
else:
height_ori, width_ori = img_ori.shape[:2]
img = cv2.resize(img_ori, tuple(args.new_size))
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = np.asarray(img, np.float32)
img = img[np.newaxis, :] / 255.
with tf.Session() as sess:
input_data = tf.placeholder(tf.float32, [1, args.new_size[1], args.new_size[0], 3], name='input_data')
yolo_model = yolov3(args.num_class, args.anchors)
with tf.variable_scope('yolov3'):
pred_feature_maps = yolo_model.forward(input_data, False)
pred_boxes, pred_confs, pred_probs = yolo_model.predict(pred_feature_maps)
pred_scores = pred_confs * pred_probs
boxes, scores, labels = gpu_nms(pred_boxes, pred_scores, args.num_class, max_boxes=200, score_thresh=0.3, nms_thresh=0.45)
saver = tf.train.Saver()
saver.restore(sess, args.restore_path)
boxes_, scores_, labels_ = sess.run([boxes, scores, labels], feed_dict={input_data: img})
# rescale the coordinates to the original image
if args.letterbox_resize:
boxes_[:, [0, 2]] = (boxes_[:, [0, 2]] - dw) / resize_ratio
boxes_[:, [1, 3]] = (boxes_[:, [1, 3]] - dh) / resize_ratio
else:
boxes_[:, [0, 2]] *= (width_ori/float(args.new_size[0]))
boxes_[:, [1, 3]] *= (height_ori/float(args.new_size[1]))
print("box coords:")
print(boxes_)
print('*' * 30)
print("scores:")
print(scores_)
print('*' * 30)
print("labels:")
print(labels_)
for i in range(len(boxes_)):
x0, y0, x1, y1 = boxes_[i]
plot_one_box(img_ori, [x0, y0, x1, y1], label=args.classes[labels_[i]] + ', {:.2f}%'.format(scores_[i] * 100), color=color_table[labels_[i]])
cv2.imshow('Detection result', img_ori)
cv2.imwrite('detection_result.jpg', img_ori)
cv2.waitKey(0)
@@ -0,0 +1,287 @@
# coding: utf-8
from __future__ import division, print_function
import tensorflow as tf
import numpy as np
import logging
from tqdm import trange
import random
import time
import datetime
from utils.data_utils import get_batch_data, color_jitter
from utils.misc_utils import shuffle_and_overwrite, make_summary, config_learning_rate, config_optimizer, AverageMeter
from utils.eval_utils import evaluate_on_cpu, evaluate_on_gpu, get_preds_gpu, voc_eval, parse_gt_rec
from model import yolov3
import time
import os
import sys
# npu modified
from npu_bridge.estimator import npu_ops
from npu_bridge.estimator.npu.npu_optimizer import NPUDistributedOptimizer
from npu_bridge.estimator.npu.npu_loss_scale_optimizer import NPULossScaleOptimizer
from npu_bridge.estimator.npu.npu_loss_scale_manager import FixedLossScaleManager
from npu_bridge.estimator.npu.npu_loss_scale_manager import ExponentialUpdateLossScaleManager
from tensorflow.core.protobuf.rewriter_config_pb2 import RewriterConfig
from npu_bridge.estimator.npu import util
sys.path.append(os.path.join(os.path.abspath(os.path.dirname(__file__)),'../../../../../'))
sys.path.append(os.path.join(os.path.abspath(os.path.dirname(__file__)),'../../../../utils/atlasboost'))
from benchmark_log import hwlog
from benchmark_log.basic_utils import get_environment_info
from benchmark_log.basic_utils import get_model_parameter
import argparse
hwlog.ROOT_DIR = os.path.split(os.path.abspath(__file__))[0]
cpu_info, npu_info, framework_info, os_info, benchmark_version = get_environment_info("tensorflow")
config_info = get_model_parameter("tensorflow_config")
initinal_data={"base_lr": 0.128, "dataset": "coco1024", "optimizer": "Adam", "loss_scale": 512, "batchsize": 32}
hwlog.remark_print(key=hwlog.CPU_INFO, value=cpu_info)
hwlog.remark_print(key=hwlog.NPU_INFO, value=npu_info)
hwlog.remark_print(key=hwlog.OS_INFO, value=os_info)
hwlog.remark_print(key=hwlog.FRAMEWORK_INFO, value=framework_info)
hwlog.remark_print(key=hwlog.BENCHMARK_VERSION, value=benchmark_version)
hwlog.remark_print(key=hwlog.CONFIG_INFO, value=config_info)
hwlog.remark_print(key=hwlog.BASE_LR, value=initinal_data.get("base_lr"))
hwlog.remark_print(key=hwlog.DATASET, value=initinal_data.get("dataset"))
hwlog.remark_print(key=hwlog.OPT_NAME, value=initinal_data.get("optimizer"))
hwlog.remark_print(key=hwlog.LOSS_SCALE, value=initinal_data.get("loss_scale"))
hwlog.remark_print(key=hwlog.INPUT_BATCH_SIZE, value=initinal_data.get("batchsize"))
parser = argparse.ArgumentParser(description="YOLO-V3 training setting.")
parser.add_argument("--mode", type=str, default='single',
help="setting train mode of training.")
parser.add_argument("--resume", type=bool, default=False,
help="setting if train from resume.")
args_input = parser.parse_args()
if args_input.mode == 'single':
import args_single as args
elif args_input.mode == 'multi':
import args_multi as args
print('setting train mode %s.' %args_input.mode)
# setting loggers
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s %(levelname)s %(message)s',
datefmt='%a, %d %b %Y %H:%M:%S', filename=args.progress_log_path, filemode='w')
##################
# tf.data pipeline
##################
train_dataset = tf.data.TextLineDataset(args.train_file)
print('##########################args_input.rank_id', os.environ['RANK_ID'])
logging.info('shuffle seed_%s args.', os.environ['RANK_ID'])
train_dataset = train_dataset.shuffle(args.train_img_cnt, seed=int(os.environ['RANK_ID']),
reshuffle_each_iteration=True)
print('##########################args.train_img_cnt', args.train_img_cnt)
train_dataset = train_dataset.repeat()
train_dataset = train_dataset.batch(args.batch_size, drop_remainder=True) # npu modified
train_dataset = train_dataset.map(
lambda x: tf.py_func(get_batch_data,
inp=[x, args.class_num, args.img_size, args.anchors, 'train', args.multi_scale_train,
args.use_mix_up, args.letterbox_resize],
Tout=[tf.float32,
tf.float32, tf.float32, tf.float32,
tf.float32, tf.float32, tf.float32]),
num_parallel_calls=20
)
def valid_shape(*x):
image, y_true_13, y_true_26, y_true_52, gt_box_13, gt_box_26, gt_box_52 = x
y_true = [y_true_13, y_true_26, y_true_52]
gt_box = [gt_box_13, gt_box_26, gt_box_52]
# npu modified
if args_input.mode == 'single':
image.set_shape([args.batch_size, args.img_size[0], args.img_size[1], 3])
y_true[0].set_shape([args.batch_size, 13, 13, 3, 86])
y_true[1].set_shape([args.batch_size, 26, 26, 3, 86])
y_true[2].set_shape([args.batch_size, 52, 52, 3, 86])
elif args_input.mode == 'multi':
image.set_shape([args.batch_size, args.img_size[0], args.img_size[1], 3])
y_true[0].set_shape([args.batch_size, 19*1, 19*1, 3, 86])
y_true[1].set_shape([args.batch_size, 19*2, 19*2, 3, 86])
y_true[2].set_shape([args.batch_size, 19*4, 19*4, 3, 86])
gt_box[0].set_shape([args.batch_size, 1, 32, 4])
gt_box[1].set_shape([args.batch_size, 1, 64, 4])
gt_box[2].set_shape([args.batch_size, 1, 128, 4])
image = color_jitter(
image, brightness=0.125, contrast=0.5, saturation=0.5, hue=0.05)
return image, y_true_13, y_true_26, y_true_52, gt_box_13, gt_box_26, gt_box_52
train_dataset = train_dataset.map(valid_shape, num_parallel_calls=20)
train_dataset = train_dataset.prefetch(args.prefetech_buffer)
iterator = tf.data.Iterator.from_structure(train_dataset.output_types, train_dataset.output_shapes)
train_init_op = iterator.make_initializer(train_dataset)
# get an element from the chosen dataset iterator
image, y_true_13, y_true_26, y_true_52, gt_box_13, gt_box_26, gt_box_52 = iterator.get_next()
y_true = [y_true_13, y_true_26, y_true_52]
gt_box = [gt_box_13, gt_box_26, gt_box_52]
##################
# Model definition
##################
yolo_model = yolov3(args.class_num, args.anchors, args.use_label_smooth, args.use_focal_loss, args.batch_norm_decay,
args.weight_decay, use_static_shape=False,
batch_size=args.batch_size, img_size=args.img_size)
with tf.variable_scope('yolov3'):
pred_feature_maps = yolo_model.forward(image, is_training=True)
loss = yolo_model.compute_loss(pred_feature_maps, y_true, gt_box)
l2_loss = tf.losses.get_regularization_loss()
# setting restore parts and vars to update
saver_to_restore = tf.train.Saver(
var_list=tf.contrib.framework.get_variables_to_restore(include=args.restore_include, exclude=args.restore_exclude))
update_vars = tf.contrib.framework.get_variables_to_restore(include=args.update_part)
tf.summary.scalar('train_batch_statistics/total_loss', loss[0])
tf.summary.scalar('train_batch_statistics/loss_xy', loss[1])
tf.summary.scalar('train_batch_statistics/loss_wh', loss[2])
tf.summary.scalar('train_batch_statistics/loss_conf', loss[3])
tf.summary.scalar('train_batch_statistics/loss_class', loss[4])
tf.summary.scalar('train_batch_statistics/loss_l2', l2_loss)
tf.summary.scalar('train_batch_statistics/loss_ratio', l2_loss / loss[0])
def learning_rate_fn(global_step):
"""Builds scaled learning rate function with 0.08 epoch warm up."""
initial_learning_rate = args.learning_rate_init
batches_per_epoch = args.train_batch_num // args.iterations_per_loop * args.iterations_per_loop
total_steps = int(args.total_epoches * batches_per_epoch)
warmup_steps = int(batches_per_epoch * args.warm_up_epoch)
tf.compat.v1.logging.info('total_steps: %d', int(total_steps))
tf.compat.v1.logging.info('warmup_steps: %d', int(warmup_steps))
lr = tf.maximum(
tf.compat.v1.train.cosine_decay(
learning_rate=initial_learning_rate,
global_step=global_step - warmup_steps,
decay_steps=total_steps - warmup_steps,
),
0,
)
warmup_lr = (
initial_learning_rate * tf.cast(global_step, tf.float32) / tf.cast(
warmup_steps, tf.float32))
return tf.cond(pred=global_step < warmup_steps,
true_fn=lambda: warmup_lr,
false_fn=lambda: lr)
global_step = tf.train.get_or_create_global_step()
learning_rate = learning_rate_fn(global_step)
tf.summary.scalar('learning_rate', learning_rate)
if not args.save_optimizer:
saver_to_save = tf.train.Saver()
saver_best = tf.train.Saver()
optimizer = config_optimizer(args.optimizer_name, learning_rate)
optimizer = NPUDistributedOptimizer(optimizer)
loss_scale_manager = FixedLossScaleManager(loss_scale=128)
if args.num_gpus > 1:
optimizer = NPULossScaleOptimizer(optimizer, loss_scale_manager, is_distributed=True)
else:
optimizer = NPULossScaleOptimizer(optimizer, loss_scale_manager, is_distributed=False)
# set dependencies for BN ops
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
# apply gradient clip to avoid gradient exploding
gvs = optimizer.compute_gradients(loss[0] + l2_loss, var_list=update_vars)
clip_grad_var = [gv if gv[0] is None else [
tf.clip_by_norm(gv[0], 100.), gv[1]] for gv in gvs]
train_op = optimizer.apply_gradients(clip_grad_var, global_step=tf.train.get_global_step())
if args.save_optimizer:
print(
'Saving optimizer parameters to checkpoint! Remember to restore the global_step in the fine-tuning afterwards.')
saver_to_save = tf.train.Saver()
saver_best = tf.train.Saver()
# npu modified
config = tf.ConfigProto()
custom_op = config.graph_options.rewrite_options.custom_optimizers.add()
custom_op.name = "NpuOptimizer"
custom_op.parameter_map["use_off_line"].b = True # training on Ascend chips
custom_op.parameter_map["enable_data_pre_proc"].b = True
custom_op.parameter_map["iterations_per_loop"].i = args.iterations_per_loop
config.graph_options.rewrite_options.remapping = RewriterConfig.OFF
with tf.Session(config=config) as sess:
# yolov3 finetuning训练开启(darknet53.ckpt
sess.run([tf.global_variables_initializer(), tf.local_variables_initializer()])
# 断点续训开启
if args_input.resume:
saver_to_restore = tf.train.Saver()
saver_to_restore.restore(sess, tf.train.latest_checkpoint(args.save_dir))
else:
saver_to_restore.restore(sess, args.restore_path)
merged = tf.summary.merge_all()
writer = tf.summary.FileWriter(args.log_dir, sess.graph)
print('\n----------- start to train -----------\n')
#hwlog.logger.info("time_ts:%s, hardware:%s current os:%s" %(date_time,'Ascend910','Ubuntu 18.04'))
#hwlog.logger.info("time_ts:%s, framework is tensorflow 1.15.0 " %(date_time))
#remark_logger.info("ABK time_ts: %s, yolov3 %s model train begain, total train_epoches:%d, file: %s, lineno: %s" %(date_time,args_input.mode,args.total_epoches,file_name,sys._getframe().f_lineno))
hwlog.remark_print(key=hwlog.TOTAL_TRAIN_EPOCH, value=f"{args.total_epoches}")
best_mAP = -np.Inf
train_op = util.set_iteration_per_loop(sess, train_op, args.iterations_per_loop)
sess.run(train_init_op)
for epoch in range(args.total_epoches):
loss_total, loss_xy, loss_wh, loss_conf, loss_class = AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter()
for i in trange(args.train_batch_num // args.iterations_per_loop):
t = time.time()
_, summary, __y_true, __loss, __global_step, __lr = sess.run(
[train_op, merged, y_true, loss, global_step, learning_rate]
)
fps = 1 / (time.time() - t) * args.iterations_per_loop * args.num_gpus * args.batch_size
writer.add_summary(summary, global_step=__global_step)
loss_total.update(__loss[0], len(__y_true[0]))
loss_xy.update(__loss[1], len(__y_true[0]))
loss_wh.update(__loss[2], len(__y_true[0]))
loss_conf.update(__loss[3], len(__y_true[0]))
loss_class.update(__loss[4], len(__y_true[0]))
info = "Epoch: {}, global_step: {} fps: {:.2f} lr: {:.5f} | loss: total: {:.2f}, xy: {:.2f}, wh: {:.2f}, conf: {:.2f}, class: {:.2f} | ".format(
epoch, int(__global_step), fps, __lr, loss_total.average, loss_xy.average, loss_wh.average,
loss_conf.average,
loss_class.average)
print(info)
logging.info(info)
#remark_logger.info("ABK time_ts:%s, global_steps %d, learning rate %2f, file: %s, lineno: %s" %(date_time,int(__global_step),__lr,file_name,sys._getframe().f_lineno))
#remark_logger.info("ABK time_ts:%s, fps %2f, loss_total %2f, file: %s, lineno: %s" %(date_time,fps,loss_total.average,file_name,sys._getframe().f_lineno))
hwlog.remark_print(key=hwlog.FPS, value=f"{fps}")
hwlog.remark_print(key=hwlog.GLOBAL_STEP, value=f"{int(__global_step)}")
# NOTE: this is just demo. You can set the conditions when to save the weights.
temp_epoch = epoch + 1
if temp_epoch % args.save_epoch == 0 and epoch > 0:
saver_to_save.save(sess, args.save_dir + 'model-epoch_{}_step_{}_loss_{:.4f}_lr_{:.5g}'.format( \
temp_epoch,
int(__global_step),
loss_total.average,
__lr))
if __lr <= 0:
break
saver_to_save.save(sess, args.save_dir + 'model-final_step_{}_loss_{:.4f}_lr_{:.5g}'.format( \
int(__global_step),
loss_total.average,
__lr))
@@ -0,0 +1,109 @@
{
"board_id": "0x002f",
"chip_info": "910",
"deploy_mode": "lab",
"group_count": "1",
"group_list": [
{
"device_num": "8",
"server_num": "1",
"group_name": "",
"instance_count": "8",
"instance_list": [
{
"devices": [
{
"device_id": "0",
"device_ip": "192.168.100.101"
}
],
"rank_id": "0",
"server_id": "0.0.0.0"
},
{
"devices": [
{
"device_id": "1",
"device_ip": "192.168.101.101"
}
],
"rank_id": "1",
"server_id": "0.0.0.0"
},
{
"devices": [
{
"device_id": "2",
"device_ip": "192.168.102.101"
}
],
"rank_id": "2",
"server_id": "0.0.0.0"
},
{
"devices": [
{
"device_id": "3",
"device_ip": "192.168.103.101"
}
],
"rank_id": "3",
"server_id": "0.0.0.0"
},
{
"devices": [
{
"device_id": "4",
"device_ip": "192.168.100.100"
}
],
"rank_id": "4",
"server_id": "0.0.0.0"
},
{
"devices": [
{
"device_id": "5",
"device_ip": "192.168.101.100"
}
],
"rank_id": "5",
"server_id": "0.0.0.0"
},
{
"devices": [
{
"device_id": "6",
"device_ip": "192.168.102.100"
}
],
"rank_id": "6",
"server_id": "0.0.0.0"
},
{
"devices": [
{
"device_id": "7",
"device_ip": "192.168.103.100"
}
],
"rank_id": "7",
"server_id": "0.0.0.0"
}
]
}
],
"para_plane_nic_location": "device",
"para_plane_nic_name": [
"eth0",
"eth1",
"eth2",
"eth3",
"eth4",
"eth5",
"eth6",
"eth7"
],
"para_plane_nic_num": "8",
"status": "completed"
}
@@ -0,0 +1,29 @@
#!/bin/bash
rm -rf Onnxgraph
rm -rf Partition
rm -rf OptimizeSubGraph
rm -rf Aicpu_Optimized
rm *txt
rm -rf result_$RANK_ID
export RANK_ID=$1
export RANK_SIZE=$2
export DEVICE_ID=$RANK_ID
export DEVICE_INDEX=$RANK_ID
export RANK_TABLE_FILE=rank_table.json
export JOB_ID=123678
export FUSION_TENSOR_SIZE=1000000000
KERNEL_NUM=20
PID_START=$((KERNEL_NUM * RANK_ID))
PID_END=$((PID_START + KERNEL_NUM - 1))
#sleep 5
taskset -c $PID_START-$PID_END python3 $3/train.py \
--mode $4
mkdir graph
mv *.txt graph
mv *.pbtxt graph
@@ -0,0 +1,109 @@
{
"board_id": "0x002f",
"chip_info": "910",
"deploy_mode": "lab",
"group_count": "1",
"group_list": [
{
"device_num": "8",
"server_num": "1",
"group_name": "",
"instance_count": "8",
"instance_list": [
{
"devices": [
{
"device_id": "0",
"device_ip": "192.168.100.101"
}
],
"rank_id": "0",
"server_id": "0.0.0.0"
},
{
"devices": [
{
"device_id": "1",
"device_ip": "192.168.101.101"
}
],
"rank_id": "1",
"server_id": "0.0.0.0"
},
{
"devices": [
{
"device_id": "2",
"device_ip": "192.168.102.101"
}
],
"rank_id": "2",
"server_id": "0.0.0.0"
},
{
"devices": [
{
"device_id": "3",
"device_ip": "192.168.103.101"
}
],
"rank_id": "3",
"server_id": "0.0.0.0"
},
{
"devices": [
{
"device_id": "4",
"device_ip": "192.168.100.100"
}
],
"rank_id": "4",
"server_id": "0.0.0.0"
},
{
"devices": [
{
"device_id": "5",
"device_ip": "192.168.101.100"
}
],
"rank_id": "5",
"server_id": "0.0.0.0"
},
{
"devices": [
{
"device_id": "6",
"device_ip": "192.168.102.100"
}
],
"rank_id": "6",
"server_id": "0.0.0.0"
},
{
"devices": [
{
"device_id": "7",
"device_ip": "192.168.103.100"
}
],
"rank_id": "7",
"server_id": "0.0.0.0"
}
]
}
],
"para_plane_nic_location": "device",
"para_plane_nic_name": [
"eth0",
"eth1",
"eth2",
"eth3",
"eth4",
"eth5",
"eth6",
"eth7"
],
"para_plane_nic_num": "8",
"status": "completed"
}
@@ -0,0 +1,29 @@
#!/bin/bash
rm -rf Onnxgraph
rm -rf Partition
rm -rf OptimizeSubGraph
rm -rf Aicpu_Optimized
rm *txt
rm -rf result_$RANK_ID
export RANK_ID=$1
export RANK_SIZE=$2
export DEVICE_ID=$RANK_ID
export DEVICE_INDEX=$RANK_ID
export RANK_TABLE_FILE=rank_table.json
export JOB_ID=123678
export FUSION_TENSOR_SIZE=1000000000
KERNEL_NUM=20
PID_START=$((KERNEL_NUM * RANK_ID))
PID_END=$((PID_START + KERNEL_NUM - 1))
#sleep 5
taskset -c $PID_START-$PID_END python3 $3/train.py \
--mode $4
mkdir graph
mv *.txt graph
mv *.pbtxt graph
@@ -0,0 +1,109 @@
{
"board_id": "0x002f",
"chip_info": "910",
"deploy_mode": "lab",
"group_count": "1",
"group_list": [
{
"device_num": "8",
"server_num": "1",
"group_name": "",
"instance_count": "8",
"instance_list": [
{
"devices": [
{
"device_id": "0",
"device_ip": "192.168.100.101"
}
],
"rank_id": "0",
"server_id": "0.0.0.0"
},
{
"devices": [
{
"device_id": "1",
"device_ip": "192.168.101.101"
}
],
"rank_id": "1",
"server_id": "0.0.0.0"
},
{
"devices": [
{
"device_id": "2",
"device_ip": "192.168.102.101"
}
],
"rank_id": "2",
"server_id": "0.0.0.0"
},
{
"devices": [
{
"device_id": "3",
"device_ip": "192.168.103.101"
}
],
"rank_id": "3",
"server_id": "0.0.0.0"
},
{
"devices": [
{
"device_id": "4",
"device_ip": "192.168.100.100"
}
],
"rank_id": "4",
"server_id": "0.0.0.0"
},
{
"devices": [
{
"device_id": "5",
"device_ip": "192.168.101.100"
}
],
"rank_id": "5",
"server_id": "0.0.0.0"
},
{
"devices": [
{
"device_id": "6",
"device_ip": "192.168.102.100"
}
],
"rank_id": "6",
"server_id": "0.0.0.0"
},
{
"devices": [
{
"device_id": "7",
"device_ip": "192.168.103.100"
}
],
"rank_id": "7",
"server_id": "0.0.0.0"
}
]
}
],
"para_plane_nic_location": "device",
"para_plane_nic_name": [
"eth0",
"eth1",
"eth2",
"eth3",
"eth4",
"eth5",
"eth6",
"eth7"
],
"para_plane_nic_num": "8",
"status": "completed"
}
@@ -0,0 +1,29 @@
#!/bin/bash
rm -rf Onnxgraph
rm -rf Partition
rm -rf OptimizeSubGraph
rm -rf Aicpu_Optimized
rm *txt
rm -rf result_$RANK_ID
export RANK_ID=$1
export RANK_SIZE=$2
export DEVICE_ID=$RANK_ID
export DEVICE_INDEX=$RANK_ID
export RANK_TABLE_FILE=rank_table.json
export JOB_ID=123678
export FUSION_TENSOR_SIZE=1000000000
KERNEL_NUM=20
PID_START=$((KERNEL_NUM * RANK_ID))
PID_END=$((PID_START + KERNEL_NUM - 1))
#sleep 5
taskset -c $PID_START-$PID_END python3 $3/train.py \
--mode $4
mkdir graph
mv *.txt graph
mv *.pbtxt graph
@@ -0,0 +1,109 @@
{
"board_id": "0x002f",
"chip_info": "910",
"deploy_mode": "lab",
"group_count": "1",
"group_list": [
{
"device_num": "8",
"server_num": "1",
"group_name": "",
"instance_count": "8",
"instance_list": [
{
"devices": [
{
"device_id": "0",
"device_ip": "192.168.100.101"
}
],
"rank_id": "0",
"server_id": "0.0.0.0"
},
{
"devices": [
{
"device_id": "1",
"device_ip": "192.168.101.101"
}
],
"rank_id": "1",
"server_id": "0.0.0.0"
},
{
"devices": [
{
"device_id": "2",
"device_ip": "192.168.102.101"
}
],
"rank_id": "2",
"server_id": "0.0.0.0"
},
{
"devices": [
{
"device_id": "3",
"device_ip": "192.168.103.101"
}
],
"rank_id": "3",
"server_id": "0.0.0.0"
},
{
"devices": [
{
"device_id": "4",
"device_ip": "192.168.100.100"
}
],
"rank_id": "4",
"server_id": "0.0.0.0"
},
{
"devices": [
{
"device_id": "5",
"device_ip": "192.168.101.100"
}
],
"rank_id": "5",
"server_id": "0.0.0.0"
},
{
"devices": [
{
"device_id": "6",
"device_ip": "192.168.102.100"
}
],
"rank_id": "6",
"server_id": "0.0.0.0"
},
{
"devices": [
{
"device_id": "7",
"device_ip": "192.168.103.100"
}
],
"rank_id": "7",
"server_id": "0.0.0.0"
}
]
}
],
"para_plane_nic_location": "device",
"para_plane_nic_name": [
"eth0",
"eth1",
"eth2",
"eth3",
"eth4",
"eth5",
"eth6",
"eth7"
],
"para_plane_nic_num": "8",
"status": "completed"
}
@@ -0,0 +1,29 @@
#!/bin/bash
rm -rf Onnxgraph
rm -rf Partition
rm -rf OptimizeSubGraph
rm -rf Aicpu_Optimized
rm *txt
rm -rf result_$RANK_ID
export RANK_ID=$1
export RANK_SIZE=$2
export DEVICE_ID=$RANK_ID
export DEVICE_INDEX=$RANK_ID
export RANK_TABLE_FILE=rank_table.json
export JOB_ID=123678
export FUSION_TENSOR_SIZE=1000000000
KERNEL_NUM=20
PID_START=$((KERNEL_NUM * RANK_ID))
PID_END=$((PID_START + KERNEL_NUM - 1))
#sleep 5
taskset -c $PID_START-$PID_END python3 $3/train.py \
--mode $4
mkdir graph
mv *.txt graph
mv *.pbtxt graph
@@ -0,0 +1,109 @@
{
"board_id": "0x002f",
"chip_info": "910",
"deploy_mode": "lab",
"group_count": "1",
"group_list": [
{
"device_num": "8",
"server_num": "1",
"group_name": "",
"instance_count": "8",
"instance_list": [
{
"devices": [
{
"device_id": "0",
"device_ip": "192.168.100.101"
}
],
"rank_id": "0",
"server_id": "0.0.0.0"
},
{
"devices": [
{
"device_id": "1",
"device_ip": "192.168.101.101"
}
],
"rank_id": "1",
"server_id": "0.0.0.0"
},
{
"devices": [
{
"device_id": "2",
"device_ip": "192.168.102.101"
}
],
"rank_id": "2",
"server_id": "0.0.0.0"
},
{
"devices": [
{
"device_id": "3",
"device_ip": "192.168.103.101"
}
],
"rank_id": "3",
"server_id": "0.0.0.0"
},
{
"devices": [
{
"device_id": "4",
"device_ip": "192.168.100.100"
}
],
"rank_id": "4",
"server_id": "0.0.0.0"
},
{
"devices": [
{
"device_id": "5",
"device_ip": "192.168.101.100"
}
],
"rank_id": "5",
"server_id": "0.0.0.0"
},
{
"devices": [
{
"device_id": "6",
"device_ip": "192.168.102.100"
}
],
"rank_id": "6",
"server_id": "0.0.0.0"
},
{
"devices": [
{
"device_id": "7",
"device_ip": "192.168.103.100"
}
],
"rank_id": "7",
"server_id": "0.0.0.0"
}
]
}
],
"para_plane_nic_location": "device",
"para_plane_nic_name": [
"eth0",
"eth1",
"eth2",
"eth3",
"eth4",
"eth5",
"eth6",
"eth7"
],
"para_plane_nic_num": "8",
"status": "completed"
}
@@ -0,0 +1,29 @@
#!/bin/bash
rm -rf Onnxgraph
rm -rf Partition
rm -rf OptimizeSubGraph
rm -rf Aicpu_Optimized
rm *txt
rm -rf result_$RANK_ID
export RANK_ID=$1
export RANK_SIZE=$2
export DEVICE_ID=$RANK_ID
export DEVICE_INDEX=$RANK_ID
export RANK_TABLE_FILE=rank_table.json
export JOB_ID=123678
export FUSION_TENSOR_SIZE=1000000000
KERNEL_NUM=20
PID_START=$((KERNEL_NUM * RANK_ID))
PID_END=$((PID_START + KERNEL_NUM - 1))
#sleep 5
taskset -c $PID_START-$PID_END python3 $3/train.py \
--mode $4
mkdir graph
mv *.txt graph
mv *.pbtxt graph
@@ -0,0 +1,109 @@
{
"board_id": "0x002f",
"chip_info": "910",
"deploy_mode": "lab",
"group_count": "1",
"group_list": [
{
"device_num": "8",
"server_num": "1",
"group_name": "",
"instance_count": "8",
"instance_list": [
{
"devices": [
{
"device_id": "0",
"device_ip": "192.168.100.101"
}
],
"rank_id": "0",
"server_id": "0.0.0.0"
},
{
"devices": [
{
"device_id": "1",
"device_ip": "192.168.101.101"
}
],
"rank_id": "1",
"server_id": "0.0.0.0"
},
{
"devices": [
{
"device_id": "2",
"device_ip": "192.168.102.101"
}
],
"rank_id": "2",
"server_id": "0.0.0.0"
},
{
"devices": [
{
"device_id": "3",
"device_ip": "192.168.103.101"
}
],
"rank_id": "3",
"server_id": "0.0.0.0"
},
{
"devices": [
{
"device_id": "4",
"device_ip": "192.168.100.100"
}
],
"rank_id": "4",
"server_id": "0.0.0.0"
},
{
"devices": [
{
"device_id": "5",
"device_ip": "192.168.101.100"
}
],
"rank_id": "5",
"server_id": "0.0.0.0"
},
{
"devices": [
{
"device_id": "6",
"device_ip": "192.168.102.100"
}
],
"rank_id": "6",
"server_id": "0.0.0.0"
},
{
"devices": [
{
"device_id": "7",
"device_ip": "192.168.103.100"
}
],
"rank_id": "7",
"server_id": "0.0.0.0"
}
]
}
],
"para_plane_nic_location": "device",
"para_plane_nic_name": [
"eth0",
"eth1",
"eth2",
"eth3",
"eth4",
"eth5",
"eth6",
"eth7"
],
"para_plane_nic_num": "8",
"status": "completed"
}
@@ -0,0 +1,29 @@
#!/bin/bash
rm -rf Onnxgraph
rm -rf Partition
rm -rf OptimizeSubGraph
rm -rf Aicpu_Optimized
rm *txt
rm -rf result_$RANK_ID
export RANK_ID=$1
export RANK_SIZE=$2
export DEVICE_ID=$RANK_ID
export DEVICE_INDEX=$RANK_ID
export RANK_TABLE_FILE=rank_table.json
export JOB_ID=123678
export FUSION_TENSOR_SIZE=1000000000
KERNEL_NUM=20
PID_START=$((KERNEL_NUM * RANK_ID))
PID_END=$((PID_START + KERNEL_NUM - 1))
#sleep 5
taskset -c $PID_START-$PID_END python3 $3/train.py \
--mode $4
mkdir graph
mv *.txt graph
mv *.pbtxt graph
@@ -0,0 +1,109 @@
{
"board_id": "0x002f",
"chip_info": "910",
"deploy_mode": "lab",
"group_count": "1",
"group_list": [
{
"device_num": "8",
"server_num": "1",
"group_name": "",
"instance_count": "8",
"instance_list": [
{
"devices": [
{
"device_id": "0",
"device_ip": "192.168.100.101"
}
],
"rank_id": "0",
"server_id": "0.0.0.0"
},
{
"devices": [
{
"device_id": "1",
"device_ip": "192.168.101.101"
}
],
"rank_id": "1",
"server_id": "0.0.0.0"
},
{
"devices": [
{
"device_id": "2",
"device_ip": "192.168.102.101"
}
],
"rank_id": "2",
"server_id": "0.0.0.0"
},
{
"devices": [
{
"device_id": "3",
"device_ip": "192.168.103.101"
}
],
"rank_id": "3",
"server_id": "0.0.0.0"
},
{
"devices": [
{
"device_id": "4",
"device_ip": "192.168.100.100"
}
],
"rank_id": "4",
"server_id": "0.0.0.0"
},
{
"devices": [
{
"device_id": "5",
"device_ip": "192.168.101.100"
}
],
"rank_id": "5",
"server_id": "0.0.0.0"
},
{
"devices": [
{
"device_id": "6",
"device_ip": "192.168.102.100"
}
],
"rank_id": "6",
"server_id": "0.0.0.0"
},
{
"devices": [
{
"device_id": "7",
"device_ip": "192.168.103.100"
}
],
"rank_id": "7",
"server_id": "0.0.0.0"
}
]
}
],
"para_plane_nic_location": "device",
"para_plane_nic_name": [
"eth0",
"eth1",
"eth2",
"eth3",
"eth4",
"eth5",
"eth6",
"eth7"
],
"para_plane_nic_num": "8",
"status": "completed"
}
@@ -0,0 +1,29 @@
#!/bin/bash
rm -rf Onnxgraph
rm -rf Partition
rm -rf OptimizeSubGraph
rm -rf Aicpu_Optimized
rm *txt
rm -rf result_$RANK_ID
export RANK_ID=$1
export RANK_SIZE=$2
export DEVICE_ID=$RANK_ID
export DEVICE_INDEX=$RANK_ID
export RANK_TABLE_FILE=rank_table.json
export JOB_ID=123678
export FUSION_TENSOR_SIZE=1000000000
KERNEL_NUM=20
PID_START=$((KERNEL_NUM * RANK_ID))
PID_END=$((PID_START + KERNEL_NUM - 1))
#sleep 5
taskset -c $PID_START-$PID_END python3 $3/train.py \
--mode $4
mkdir graph
mv *.txt graph
mv *.pbtxt graph
@@ -0,0 +1,109 @@
{
"board_id": "0x002f",
"chip_info": "910",
"deploy_mode": "lab",
"group_count": "1",
"group_list": [
{
"device_num": "8",
"server_num": "1",
"group_name": "",
"instance_count": "8",
"instance_list": [
{
"devices": [
{
"device_id": "0",
"device_ip": "192.168.100.101"
}
],
"rank_id": "0",
"server_id": "0.0.0.0"
},
{
"devices": [
{
"device_id": "1",
"device_ip": "192.168.101.101"
}
],
"rank_id": "1",
"server_id": "0.0.0.0"
},
{
"devices": [
{
"device_id": "2",
"device_ip": "192.168.102.101"
}
],
"rank_id": "2",
"server_id": "0.0.0.0"
},
{
"devices": [
{
"device_id": "3",
"device_ip": "192.168.103.101"
}
],
"rank_id": "3",
"server_id": "0.0.0.0"
},
{
"devices": [
{
"device_id": "4",
"device_ip": "192.168.100.100"
}
],
"rank_id": "4",
"server_id": "0.0.0.0"
},
{
"devices": [
{
"device_id": "5",
"device_ip": "192.168.101.100"
}
],
"rank_id": "5",
"server_id": "0.0.0.0"
},
{
"devices": [
{
"device_id": "6",
"device_ip": "192.168.102.100"
}
],
"rank_id": "6",
"server_id": "0.0.0.0"
},
{
"devices": [
{
"device_id": "7",
"device_ip": "192.168.103.100"
}
],
"rank_id": "7",
"server_id": "0.0.0.0"
}
]
}
],
"para_plane_nic_location": "device",
"para_plane_nic_name": [
"eth0",
"eth1",
"eth2",
"eth3",
"eth4",
"eth5",
"eth6",
"eth7"
],
"para_plane_nic_num": "8",
"status": "completed"
}
@@ -0,0 +1,29 @@
#!/bin/bash
rm -rf Onnxgraph
rm -rf Partition
rm -rf OptimizeSubGraph
rm -rf Aicpu_Optimized
rm *txt
rm -rf result_$RANK_ID
export RANK_ID=$1
export RANK_SIZE=$2
export DEVICE_ID=$RANK_ID
export DEVICE_INDEX=$RANK_ID
export RANK_TABLE_FILE=rank_table.json
export JOB_ID=123678
export FUSION_TENSOR_SIZE=1000000000
KERNEL_NUM=20
PID_START=$((KERNEL_NUM * RANK_ID))
PID_END=$((PID_START + KERNEL_NUM - 1))
#sleep 5
taskset -c $PID_START-$PID_END python3 $3/train.py \
--mode $4
mkdir graph
mv *.txt graph
mv *.pbtxt graph
@@ -0,0 +1,450 @@
# coding: utf-8
# part of this is take from Gluon's repo:
# https://github.com/dmlc/gluon-cv/blob/master/gluoncv/data/transforms/presets/yolo.py
from __future__ import division, print_function
import random
import numpy as np
import cv2
# from matplotlib.colors import rgb_to_hsv, hsv_to_rgb
def mix_up(img1, img2, bbox1, bbox2):
'''
return:
mix_img: HWC format mix up image
mix_bbox: [N, 5] shape mix up bbox, i.e. `x_min, y_min, x_max, y_mix, mixup_weight`.
'''
height = max(img1.shape[0], img2.shape[0])
width = max(img1.shape[1], img2.shape[1])
mix_img = np.zeros(shape=(height, width, 3), dtype='float32')
# rand_num = np.random.random()
rand_num = np.random.beta(1.5, 1.5)
rand_num = max(0, min(1, rand_num))
mix_img[:img1.shape[0], :img1.shape[1], :] = img1.astype('float32') * rand_num
mix_img[:img2.shape[0], :img2.shape[1], :] += img2.astype('float32') * (1. - rand_num)
mix_img = mix_img.astype('uint8')
# the last element of the 2nd dimention is the mix up weight
bbox1 = np.concatenate((bbox1, np.full(shape=(bbox1.shape[0], 1), fill_value=rand_num)), axis=-1)
bbox2 = np.concatenate((bbox2, np.full(shape=(bbox2.shape[0], 1), fill_value=1. - rand_num)), axis=-1)
mix_bbox = np.concatenate((bbox1, bbox2), axis=0)
return mix_img, mix_bbox
def bbox_crop(bbox, crop_box=None, allow_outside_center=True):
"""Crop bounding boxes according to slice area.
This method is mainly used with image cropping to ensure bonding boxes fit
within the cropped image.
Parameters
----------
bbox : numpy.ndarray
Numpy.ndarray with shape (N, 4+) where N is the number of bounding boxes.
The second axis represents attributes of the bounding box.
Specifically, these are :math:`(x_{min}, y_{min}, x_{max}, y_{max})`,
we allow additional attributes other than coordinates, which stay intact
during bounding box transformations.
crop_box : tuple
Tuple of length 4. :math:`(x_{min}, y_{min}, width, height)`
allow_outside_center : bool
If `False`, remove bounding boxes which have centers outside cropping area.
Returns
-------
numpy.ndarray
Cropped bounding boxes with shape (M, 4+) where M <= N.
"""
bbox = bbox.copy()
if crop_box is None:
return bbox
if not len(crop_box) == 4:
raise ValueError(
"Invalid crop_box parameter, requires length 4, given {}".format(str(crop_box)))
if sum([int(c is None) for c in crop_box]) == 4:
return bbox
l, t, w, h = crop_box
left = l if l else 0
top = t if t else 0
right = left + (w if w else np.inf)
bottom = top + (h if h else np.inf)
crop_bbox = np.array((left, top, right, bottom))
if allow_outside_center:
mask = np.ones(bbox.shape[0], dtype=bool)
else:
centers = (bbox[:, :2] + bbox[:, 2:4]) / 2
mask = np.logical_and(crop_bbox[:2] <= centers, centers < crop_bbox[2:]).all(axis=1)
# transform borders
bbox[:, :2] = np.maximum(bbox[:, :2], crop_bbox[:2])
bbox[:, 2:4] = np.minimum(bbox[:, 2:4], crop_bbox[2:4])
bbox[:, :2] -= crop_bbox[:2]
bbox[:, 2:4] -= crop_bbox[:2]
mask = np.logical_and(mask, (bbox[:, :2] < bbox[:, 2:4]).all(axis=1))
bbox = bbox[mask]
return bbox
def bbox_iou(bbox_a, bbox_b, offset=0):
"""Calculate Intersection-Over-Union(IOU) of two bounding boxes.
Parameters
----------
bbox_a : numpy.ndarray
An ndarray with shape :math:`(N, 4)`.
bbox_b : numpy.ndarray
An ndarray with shape :math:`(M, 4)`.
offset : float or int, default is 0
The ``offset`` is used to control the whether the width(or height) is computed as
(right - left + ``offset``).
Note that the offset must be 0 for normalized bboxes, whose ranges are in ``[0, 1]``.
Returns
-------
numpy.ndarray
An ndarray with shape :math:`(N, M)` indicates IOU between each pairs of
bounding boxes in `bbox_a` and `bbox_b`.
"""
if bbox_a.shape[1] < 4 or bbox_b.shape[1] < 4:
raise IndexError("Bounding boxes axis 1 must have at least length 4")
tl = np.maximum(bbox_a[:, None, :2], bbox_b[:, :2])
br = np.minimum(bbox_a[:, None, 2:4], bbox_b[:, 2:4])
area_i = np.prod(br - tl + offset, axis=2) * (tl < br).all(axis=2)
area_a = np.prod(bbox_a[:, 2:4] - bbox_a[:, :2] + offset, axis=1)
area_b = np.prod(bbox_b[:, 2:4] - bbox_b[:, :2] + offset, axis=1)
return area_i / (area_a[:, None] + area_b - area_i)
def random_crop_with_constraints(bbox, size, min_scale=0.25, max_scale=1,
max_aspect_ratio=2, constraints=None,
max_trial=10):
"""Crop an image randomly with bounding box constraints.
This data augmentation is used in training of
Single Shot Multibox Detector [#]_. More details can be found in
data augmentation section of the original paper.
.. [#] Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy,
Scott Reed, Cheng-Yang Fu, Alexander C. Berg.
SSD: Single Shot MultiBox Detector. ECCV 2016.
Parameters
----------
bbox : numpy.ndarray
Numpy.ndarray with shape (N, 4+) where N is the number of bounding boxes.
The second axis represents attributes of the bounding box.
Specifically, these are :math:`(x_{min}, y_{min}, x_{max}, y_{max})`,
we allow additional attributes other than coordinates, which stay intact
during bounding box transformations.
size : tuple
Tuple of length 2 of image shape as (width, height).
min_scale : float
The minimum ratio between a cropped region and the original image.
The default value is :obj:`0.3`.
max_scale : float
The maximum ratio between a cropped region and the original image.
The default value is :obj:`1`.
max_aspect_ratio : float
The maximum aspect ratio of cropped region.
The default value is :obj:`2`.
constraints : iterable of tuples
An iterable of constraints.
Each constraint should be :obj:`(min_iou, max_iou)` format.
If means no constraint if set :obj:`min_iou` or :obj:`max_iou` to :obj:`None`.
If this argument defaults to :obj:`None`, :obj:`((0.1, None), (0.3, None),
(0.5, None), (0.7, None), (0.9, None), (None, 1))` will be used.
max_trial : int, default 40
Maximum number of trials for each constraint before exit no matter what.
Returns
-------
numpy.ndarray
Cropped bounding boxes with shape :obj:`(M, 4+)` where M <= N.
tuple
Tuple of length 4 as (x_offset, y_offset, new_width, new_height).
"""
# default params in paper
if constraints is None:
constraints = (
# (0.1, None),
(0.3, None),
(0.5, None),
(0.7, None),
(0.9, None),
(None, 1),
)
w, h = size
candidates = [(0, 0, w, h)]
for min_iou, max_iou in constraints:
min_iou = -np.inf if min_iou is None else min_iou
max_iou = np.inf if max_iou is None else max_iou
for _ in range(max_trial):
scale = random.uniform(min_scale, max_scale)
aspect_ratio = random.uniform(
max(1 / max_aspect_ratio, scale * scale),
min(max_aspect_ratio, 1 / (scale * scale)))
crop_h = int(h * scale / np.sqrt(aspect_ratio))
crop_w = int(w * scale * np.sqrt(aspect_ratio))
crop_t = random.randrange(h - crop_h)
crop_l = random.randrange(w - crop_w)
crop_bb = np.array((crop_l, crop_t, crop_l + crop_w, crop_t + crop_h))
if len(bbox) == 0:
top, bottom = crop_t, crop_t + crop_h
left, right = crop_l, crop_l + crop_w
return bbox, (left, top, right-left, bottom-top)
iou = bbox_iou(bbox, crop_bb[np.newaxis])
if min_iou <= iou.min() and iou.max() <= max_iou:
top, bottom = crop_t, crop_t + crop_h
left, right = crop_l, crop_l + crop_w
candidates.append((left, top, right-left, bottom-top))
break
# random select one
while candidates:
crop = candidates.pop(np.random.randint(0, len(candidates)))
new_bbox = bbox_crop(bbox, crop, allow_outside_center=False)
if new_bbox.size < 1:
continue
new_crop = (crop[0], crop[1], crop[2], crop[3])
return new_bbox, new_crop
return bbox, (0, 0, w, h)
def _rand(a=0., b=1.):
return np.random.rand() * (b - a) + a
def random_color_distort(image_data, _hue=0.1, _sat=1.5, _val=1.5):
_hue = _rand(-_hue, _hue)
_sat = _rand(1, _sat) if _rand() < .5 else 1 / _rand(1, _sat)
_val = _rand(1, _val) if _rand() < .5 else 1 / _rand(1, _val)
x = rgb_to_hsv(image_data)
x[..., 0] += _hue
x[..., 0][x[..., 0] > 1] -= 1
x[..., 0][x[..., 0] < 0] += 1
x[..., 1] *= _sat
x[..., 2] *= _val
x[x > 1] = 1
x[x < 0] = 0
image_data = hsv_to_rgb(x)
image_data = image_data.astype(np.float32)
return image_data
def random_color_distort_1(img, bgain=16, hgain=0.0138, sgain=0.678, vgain=0.36):
# brightness_delta = int(np.random.uniform(-bgain, bgain))
# img = np.clip(img + brightness_delta , 0, 255)
# img = img.astype(np.uint8)
r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1 # random gains
hue, sat, val = cv2.split(cv2.cvtColor(img, cv2.COLOR_BGR2HSV))
dtype = img.dtype # uint8
x = np.arange(0, 256, dtype=np.int16)
lut_hue = ((x * r[0]) % 180).astype(dtype)
lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
img_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))).astype(dtype)
img = cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR) # no return needed
return img
def random_color_distort_raw(img, brightness_delta=16, hue_vari=0.01, sat_vari=0.15, val_vari=0.15, p=0.2):
'''
randomly distort image color. Adjust brightness, hue, saturation, value.
param:
img: a BGR uint8 format OpenCV image. HWC format.
'''
def random_hue(img_hsv, hue_vari, p=p):
if np.random.uniform(0, 1) > p:
hue_delta = np.random.randint(-hue_vari, hue_vari)
img_hsv[:, :, 0] = (img_hsv[:, :, 0] + hue_delta) % 180
return img_hsv
def random_saturation(img_hsv, sat_vari, p=p):
if np.random.uniform(0, 1) > p:
sat_mult = 1 + np.random.uniform(-sat_vari, sat_vari)
img_hsv[:, :, 1] *= sat_mult
return img_hsv
def random_value(img_hsv, val_vari, p=p):
if np.random.uniform(0, 1) > p:
val_mult = 1 + np.random.uniform(-val_vari, val_vari)
img_hsv[:, :, 2] *= val_mult
return img_hsv
def random_brightness(img, brightness_delta, p=p):
if np.random.uniform(0, 1) > p:
img = img.astype(np.float32)
brightness_delta = int(np.random.uniform(-brightness_delta, brightness_delta))
img = img + brightness_delta
return np.clip(img, 0, 255)
# brightness
img = random_brightness(img, brightness_delta)
img = img.astype(np.uint8)
# color jitter
img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV).astype(np.float32)
if np.random.randint(0, 2):
img_hsv = random_value(img_hsv, val_vari)
img_hsv = random_saturation(img_hsv, sat_vari)
img_hsv = random_hue(img_hsv, hue_vari)
else:
img_hsv = random_saturation(img_hsv, sat_vari)
img_hsv = random_hue(img_hsv, hue_vari)
img_hsv = random_value(img_hsv, val_vari)
img_hsv = np.clip(img_hsv, 0, 255)
img = cv2.cvtColor(img_hsv.astype(np.uint8), cv2.COLOR_HSV2BGR)
return img
def letterbox_resize(img, new_width, new_height, interp=0):
'''
Letterbox resize. keep the original aspect ratio in the resized image.
'''
ori_height, ori_width = img.shape[:2]
resize_ratio = min(new_width / ori_width, new_height / ori_height)
resize_w = int(resize_ratio * ori_width)
resize_h = int(resize_ratio * ori_height)
img = cv2.resize(img, (resize_w, resize_h), interpolation=interp)
image_padded = np.full((new_height, new_width, 3), 128, np.uint8)
dw = int((new_width - resize_w) / 2)
dh = int((new_height - resize_h) / 2)
image_padded[dh: resize_h + dh, dw: resize_w + dw, :] = img
return image_padded, resize_ratio, dw, dh
def resize_with_bbox(img, bbox, new_width, new_height, interp=0, letterbox=False):
'''
Resize the image and correct the bbox accordingly.
'''
if letterbox:
image_padded, resize_ratio, dw, dh = letterbox_resize(img, new_width, new_height, interp)
# xmin, xmax
bbox[:, [0, 2]] = bbox[:, [0, 2]] * resize_ratio + dw
# ymin, ymax
bbox[:, [1, 3]] = bbox[:, [1, 3]] * resize_ratio + dh
return image_padded, bbox
else:
ori_height, ori_width = img.shape[:2]
img = cv2.resize(img, (new_width, new_height), interpolation=interp)
# xmin, xmax
bbox[:, [0, 2]] = bbox[:, [0, 2]] / ori_width * new_width
# ymin, ymax
bbox[:, [1, 3]] = bbox[:, [1, 3]] / ori_height * new_height
return img, bbox
def random_flip(img, bbox, px=0, py=0):
'''
Randomly flip the image and correct the bbox.
param:
px:
the probability of horizontal flip
py:
the probability of vertical flip
'''
height, width = img.shape[:2]
if np.random.uniform(0, 1) < px:
img = cv2.flip(img, 1)
xmax = width - bbox[:, 0]
xmin = width - bbox[:, 2]
bbox[:, 0] = xmin
bbox[:, 2] = xmax
if np.random.uniform(0, 1) < py:
img = cv2.flip(img, 0)
ymax = height - bbox[:, 1]
ymin = height - bbox[:, 3]
bbox[:, 1] = ymin
bbox[:, 3] = ymax
return img, bbox
def random_resize(img, bbox, min_ratio=0.25, max_ratio=2, jitter=0.3):
'''
Random expand original image with borders, this is identical to placing
the original image on a larger canvas.
param:
max_ratio :
Maximum ratio of the output image on both direction(vertical and horizontal)
fill :
The value(s) for padded borders.
keep_ratio : bool
If `True`, will keep output image the same aspect ratio as input.
'''
h,w,c = img.shape
max_ratio_limited = 608 / max(h,w)
scale = random.uniform(min_ratio, max_ratio)
scale = min(max_ratio_limited, scale)
w_ratio = random.uniform(1 - jitter, 1 + jitter) * scale
h_ratio = random.uniform(1 - jitter, 1 + jitter) * scale
dst = cv2.resize(img, None, fx=w_ratio, fy=h_ratio)
# correct bbox
bbox[:, 0] *= w_ratio
bbox[:, 2] *= w_ratio
bbox[:, 1] *= h_ratio
bbox[:, 3] *= h_ratio
return dst, bbox
def random_expand(img, bbox, max_ratio=2, fill=0, keep_ratio=True):
'''
Random expand original image with borders, this is identical to placing
the original image on a larger canvas.
param:
max_ratio :
Maximum ratio of the output image on both direction(vertical and horizontal)
fill :
The value(s) for padded borders.
keep_ratio : bool
If `True`, will keep output image the same aspect ratio as input.
'''
h, w, c = img.shape
ratio_x = random.uniform(1, max_ratio)
if keep_ratio:
ratio_y = ratio_x
else:
ratio_y = random.uniform(1, max_ratio)
oh, ow = int(h * ratio_y), int(w * ratio_x)
off_y = random.randint(0, oh - h)
off_x = random.randint(0, ow - w)
dst = np.full(shape=(oh, ow, c), fill_value=fill, dtype=img.dtype)
dst[off_y:off_y + h, off_x:off_x + w, :] = img
# correct bbox
bbox[:, :2] += (off_x, off_y)
bbox[:, 2:4] += (off_x, off_y)
return dst, bbox
@@ -0,0 +1,294 @@
# coding: utf-8
from __future__ import division, print_function
import numpy as np
import cv2
import sys
from utils.data_aug import *
import random
import tensorflow as tf
PY_VERSION = sys.version_info[0]
iter_cnt = 0
IterControl = 50
def color_jitter(image, brightness=0, contrast=0, saturation=0, hue=0):
"""Distorts the color of the image.
Args:
image: The input image tensor.
brightness: A float, specifying the brightness for color jitter.
contrast: A float, specifying the contrast for color jitter.
saturation: A float, specifying the saturation for color jitter.
hue: A float, specifying the hue for color jitter.
Returns:
The distorted image tensor.
"""
with tf.name_scope('distort_color'):
if brightness > 0:
image = tf.image.random_brightness(image, max_delta=brightness)
if contrast > 0:
image = tf.image.random_contrast(
image, lower=1-contrast, upper=1+contrast)
if saturation > 0:
image = tf.image.random_saturation(
image, lower=1-saturation, upper=1+saturation)
if hue > 0:
image = tf.image.random_hue(image, max_delta=hue)
return image
def parse_line(line):
'''
Given a line from the training/test txt file, return parsed info.
line format: line_index, img_path, img_width, img_height, [box_info_1 (5 number)], ...
return:
line_idx: int32
pic_path: string.
boxes: shape [N, 4], N is the ground truth count, elements in the second
dimension are [x_min, y_min, x_max, y_max]
labels: shape [N]. class index.
img_width: int.
img_height: int
'''
if 'str' not in str(type(line)):
line = line.decode()
s = line.strip().split(' ')
assert len(
s) > 8, 'Annotation error! Please check your annotation file. Make sure there is at least one target object in each image.'
# line_idx = int(s[0])
pic_path = s[1]
img_width = int(s[2])
img_height = int(s[3])
s = s[4:]
assert len(
s) % 5 == 0, 'Annotation error! Please check your annotation file. Maybe partially missing some coordinates?'
box_cnt = len(s) // 5
boxes = []
labels = []
for i in range(box_cnt):
label, x_min, y_min, x_max, y_max = int(s[i * 5]), float(s[i * 5 + 1]), float(s[i * 5 + 2]), float(
s[i * 5 + 3]), float(s[i * 5 + 4])
boxes.append([x_min, y_min, x_max, y_max])
labels.append(label)
boxes = np.asarray(boxes, np.float32)
labels = np.asarray(labels, np.int32)
return pic_path, boxes, labels, img_width, img_height
def process_box(boxes, labels, img_size, class_num, anchors):
'''
Generate the y_true label, i.e. the ground truth feature_maps in 3 different scales.
params:
boxes: [N, 5] shape, float32 dtype. `x_min, y_min, x_max, y_mix, mixup_weight`.
labels: [N] shape, int32 dtype.
class_num: int32 num.
anchors: [9, 4] shape, float32 dtype.
'''
anchors_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
# boxes = np.random.shuffle()
# convert boxes form:
# shape: [N, 2]
# (x_center, y_center)
box_centers = (boxes[:, 0:2] + boxes[:, 2:4]) / 2
# (width, height)
box_sizes = boxes[:, 2:4] - boxes[:, 0:2]
# [13, 13, 3, 5+num_class+1] `5` means coords and labels. `1` means mix up weight.
y_true_13 = np.zeros((img_size[1] // 32, img_size[0] // 32, 3, 6 + class_num), np.float32)
y_true_26 = np.zeros((img_size[1] // 16, img_size[0] // 16, 3, 6 + class_num), np.float32)
y_true_52 = np.zeros((img_size[1] // 8, img_size[0] // 8, 3, 6 + class_num), np.float32)
gt_box_13 = np.zeros((1, 32, 4), np.float32)
gt_box_26 = np.zeros((1, 64, 4), np.float32)
gt_box_52 = np.zeros((1, 128, 4), np.float32)
gt_box_list = [gt_box_13, gt_box_26, gt_box_52]
# mix up weight default to 1.
y_true_13[..., -1] = 1.
y_true_26[..., -1] = 1.
y_true_52[..., -1] = 1.
y_true = [y_true_13, y_true_26, y_true_52]
# [N, 1, 2]
box_sizes = np.expand_dims(box_sizes, 1)
# broadcast tricks
# [N, 1, 2] & [9, 2] ==> [N, 9, 2]
mins = np.maximum(- box_sizes / 2, - anchors / 2)
maxs = np.minimum(box_sizes / 2, anchors / 2)
# [N, 9, 2]
whs = maxs - mins
# [N, 9]
iou = (whs[:, :, 0] * whs[:, :, 1]) / (
box_sizes[:, :, 0] * box_sizes[:, :, 1] + anchors[:, 0] * anchors[:, 1] - whs[:, :, 0] * whs[:, :,
1] + 1e-10)
# [N]
best_match_idx = np.argmax(iou, axis=1)
ratio_dict = {1.: 8., 2.: 16., 3.: 32.}
index_dict = {0: 0, 1: 0, 2: 0}
for i, idx in enumerate(best_match_idx):
# idx: 0,1,2 ==> 2; 3,4,5 ==> 1; 6,7,8 ==> 0
feature_map_group = 2 - idx // 3
# scale ratio: 0,1,2 ==> 8; 3,4,5 ==> 16; 6,7,8 ==> 32
ratio = ratio_dict[np.ceil((idx + 1) / 3.)]
x = int(np.floor(box_centers[i, 0] / ratio))
y = int(np.floor(box_centers[i, 1] / ratio))
k = anchors_mask[feature_map_group].index(idx)
c = labels[i]
# print(feature_map_group, '|', y,x,k,c)
y_true[feature_map_group][y, x, k, :2] = box_centers[i]
y_true[feature_map_group][y, x, k, 2:4] = box_sizes[i]
y_true[feature_map_group][y, x, k, 4] = 1.
y_true[feature_map_group][y, x, k, 5 + c] = 1.
y_true[feature_map_group][y, x, k, -1] = boxes[i, -1]
if index_dict[feature_map_group] < gt_box_list[feature_map_group].shape[1]:
gt_box_list[feature_map_group][0, index_dict[feature_map_group], :2] = box_centers[i]
gt_box_list[feature_map_group][0, index_dict[feature_map_group], 2:4] = box_sizes[i]
index_dict[feature_map_group] += 1
return y_true_13, y_true_26, y_true_52, gt_box_13, gt_box_26, gt_box_52
def parse_data(line, class_num, img_size, anchors, mode, letterbox_resize, multi_scale):
'''
param:
line: a line from the training/test txt file
class_num: totol class nums.
img_size: the size of image to be resized to. [width, height] format.
anchors: anchors.
mode: 'train' or 'val'. When set to 'train', data_augmentation will be applied.
letterbox_resize: whether to use the letterbox resize, i.e., keep the original aspect ratio in the resized image.
'''
if not isinstance(line, list):
print('###################### line')
pic_path, boxes, labels, _, _ = parse_line(line)
img = cv2.imread(pic_path)
# expand the 2nd dimension, mix up weight default to 1.
boxes = np.concatenate((boxes, np.full(shape=(boxes.shape[0], 1), fill_value=1., dtype=np.float32)), axis=-1)
else:
print('###################### mixup')
# the mix up case
pic_path1, boxes1, labels1, _, _ = parse_line(line[0])
img1 = cv2.imread(pic_path1)
pic_path2, boxes2, labels2, _, _ = parse_line(line[1])
img2 = cv2.imread(pic_path2)
img, boxes = mix_up(img1, img2, boxes1, boxes2)
labels = np.concatenate((labels1, labels2))
if mode == 'train':
img, boxes = random_resize(img, boxes, min_ratio=0.25, max_ratio=2, jitter=0.3)
# random expansion with prob 0.5
if np.random.uniform(0, 1) > 0.5:
img, boxes = random_expand(img, boxes, max_ratio=3, fill=128, keep_ratio=False)
# random cropping
h, w, _ = img.shape
boxes, crop = random_crop_with_constraints(boxes, (w, h))
x0, y0, w, h = crop
img = img[y0: y0 + h, x0: x0 + w]
# resize with random interpolation
h, w, _ = img.shape
interp = np.random.randint(0, 5)
img, boxes = resize_with_bbox(img, boxes, img_size[0], img_size[1], interp=interp, letterbox=letterbox_resize)
# random horizontal flip
h, w, _ = img.shape
img, boxes = random_flip(img, boxes, px=0.5)
else:
img, boxes = resize_with_bbox(img, boxes, img_size[0], img_size[1], interp=1, letterbox=letterbox_resize)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype(np.float32)
# the input of yolo_v3 should be in range 0~1
img = img / 255.
if mode == 'train' and iter_cnt >= IterControl and multi_scale:
cav = np.zeros((608, 608, 3), dtype=np.float32) + 0.5
true_h, true_w, c = img.shape
cav[:true_h, :true_w, :] = img
img = cav.astype(np.float32)
img_size = [608, 608]
y_true_13, y_true_26, y_true_52, gt_box_13, gt_box_26, gt_box_52 = process_box(boxes, labels, img_size, class_num,
anchors)
return img, y_true_13, y_true_26, y_true_52, gt_box_13, gt_box_26, gt_box_52
def get_batch_data(batch_line, class_num, img_size, anchors, mode, multi_scale=False, mix_up=False,
letterbox_resize=True, interval=10):
'''
generate a batch of imgs and labels
param:
batch_line: a batch of lines from train/val.txt files
class_num: num of total classes.
img_size: the image size to be resized to. format: [width, height].
anchors: anchors. shape: [9, 2].
mode: 'train' or 'val'. if set to 'train', data augmentation will be applied.
multi_scale: whether to use multi_scale training, img_size varies from [320, 320] to [640, 640] by default. Note that it will take effect only when mode is set to 'train'.
letterbox_resize: whether to use the letterbox resize, i.e., keep the original aspect ratio in the resized image.
interval: change the scale of image every interval batches. Note that it's indeterministic because of the multi threading.
'''
if isinstance(mode, bytes):
mode = mode.decode()
global iter_cnt
# multi_scale training
if multi_scale and mode == 'train' and iter_cnt >= IterControl:
random.seed(iter_cnt // interval)
random_img_size = [[x * 32, x * 32] for x in range(10, 20)]
img_size = random.sample(random_img_size, 1)[0]
print('multi_scale iter: %d, img_size: %d,%d' % (iter_cnt, img_size[0], img_size[1]))
else:
print('single_scale iter: %d, img_size: %d,%d' % (iter_cnt, img_size[0], img_size[1]))
iter_cnt += 1
img_idx_batch, img_batch, y_true_13_batch, y_true_26_batch, y_true_52_batch = [], [], [], [], []
gt_box_13_batch, gt_box_26_batch, gt_box_52_batch = [], [], []
# mix up strategy
if mix_up and mode == 'train':
mix_lines = []
batch_line = batch_line.tolist()
for idx, line in enumerate(batch_line):
if np.random.uniform(0, 1) < 0.5:
mix_lines.append([line, random.sample(batch_line[:idx] + batch_line[idx + 1:], 1)[0]])
else:
mix_lines.append(line)
batch_line = mix_lines
for line in batch_line:
img, y_true_13, y_true_26, y_true_52, gt_box_13, gt_box_26, gt_box_52 = parse_data(line, class_num,
img_size, anchors,
mode,
letterbox_resize,
multi_scale)
img_batch.append(img)
y_true_13_batch.append(y_true_13)
y_true_26_batch.append(y_true_26)
y_true_52_batch.append(y_true_52)
gt_box_13_batch.append(gt_box_13)
gt_box_26_batch.append(gt_box_26)
gt_box_52_batch.append(gt_box_52)
img_batch, y_true_13_batch, y_true_26_batch, y_true_52_batch = np.asarray(img_batch, np.float32), np.asarray(
y_true_13_batch, np.float32), np.asarray(y_true_26_batch, np.float32), np.asarray(y_true_52_batch, np.float32)
gt_box_13_batch, gt_box_26_batch, gt_box_52_batch = \
np.asarray(gt_box_13_batch), np.asarray(gt_box_26_batch), np.asarray(gt_box_52_batch)
return img_batch, y_true_13_batch, y_true_26_batch, y_true_52_batch, \
gt_box_13_batch, gt_box_26_batch, gt_box_52_batch
@@ -0,0 +1,423 @@
# coding: utf-8
from __future__ import division, print_function
import numpy as np
import cv2
from collections import Counter
from utils.nms_utils import cpu_nms, gpu_nms
from utils.data_utils import parse_line
def calc_iou(pred_boxes, true_boxes):
'''
Maintain an efficient way to calculate the ios matrix using the numpy broadcast tricks.
shape_info: pred_boxes: [N, 4]
true_boxes: [V, 4]
return: IoU matrix: shape: [N, V]
'''
# [N, 1, 4]
pred_boxes = np.expand_dims(pred_boxes, -2)
# [1, V, 4]
true_boxes = np.expand_dims(true_boxes, 0)
# [N, 1, 2] & [1, V, 2] ==> [N, V, 2]
intersect_mins = np.maximum(pred_boxes[..., :2], true_boxes[..., :2])
intersect_maxs = np.minimum(pred_boxes[..., 2:], true_boxes[..., 2:])
intersect_wh = np.maximum(intersect_maxs - intersect_mins, 0.)
# shape: [N, V]
intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
# shape: [N, 1, 2]
pred_box_wh = pred_boxes[..., 2:] - pred_boxes[..., :2]
# shape: [N, 1]
pred_box_area = pred_box_wh[..., 0] * pred_box_wh[..., 1]
# [1, V, 2]
true_boxes_wh = true_boxes[..., 2:] - true_boxes[..., :2]
# [1, V]
true_boxes_area = true_boxes_wh[..., 0] * true_boxes_wh[..., 1]
# shape: [N, V]
iou = intersect_area / (pred_box_area + true_boxes_area - intersect_area + 1e-10)
return iou
def evaluate_on_cpu(y_pred, y_true, num_classes, calc_now=True, max_boxes=50, score_thresh=0.5, iou_thresh=0.5):
'''
Given y_pred and y_true of a batch of data, get the recall and precision of the current batch.
'''
num_images = y_true[0].shape[0]
true_labels_dict = {i: 0 for i in range(num_classes)} # {class: count}
pred_labels_dict = {i: 0 for i in range(num_classes)}
true_positive_dict = {i: 0 for i in range(num_classes)}
for i in range(num_images):
true_labels_list, true_boxes_list = [], []
for j in range(3): # three feature maps
# shape: [13, 13, 3, 80]
true_probs_temp = y_true[j][i][..., 5:-1]
# shape: [13, 13, 3, 4] (x_center, y_center, w, h)
true_boxes_temp = y_true[j][i][..., 0:4]
# [13, 13, 3]
object_mask = true_probs_temp.sum(axis=-1) > 0
# [V, 3] V: Ground truth number of the current image
true_probs_temp = true_probs_temp[object_mask]
# [V, 4]
true_boxes_temp = true_boxes_temp[object_mask]
# [V], labels
true_labels_list += np.argmax(true_probs_temp, axis=-1).tolist()
# [V, 4] (x_center, y_center, w, h)
true_boxes_list += true_boxes_temp.tolist()
if len(true_labels_list) != 0:
for cls, count in Counter(true_labels_list).items():
true_labels_dict[cls] += count
# [V, 4] (xmin, ymin, xmax, ymax)
true_boxes = np.array(true_boxes_list)
box_centers, box_sizes = true_boxes[:, 0:2], true_boxes[:, 2:4]
true_boxes[:, 0:2] = box_centers - box_sizes / 2.
true_boxes[:, 2:4] = true_boxes[:, 0:2] + box_sizes
# [1, xxx, 4]
pred_boxes = y_pred[0][i:i + 1]
pred_confs = y_pred[1][i:i + 1]
pred_probs = y_pred[2][i:i + 1]
# pred_boxes: [N, 4]
# pred_confs: [N]
# pred_labels: [N]
# N: Detected box number of the current image
pred_boxes, pred_confs, pred_labels = cpu_nms(pred_boxes, pred_confs * pred_probs, num_classes,
max_boxes=max_boxes, score_thresh=score_thresh, iou_thresh=iou_thresh)
# len: N
pred_labels_list = [] if pred_labels is None else pred_labels.tolist()
if pred_labels_list == []:
continue
# calc iou
# [N, V]
iou_matrix = calc_iou(pred_boxes, true_boxes)
# [N]
max_iou_idx = np.argmax(iou_matrix, axis=-1)
correct_idx = []
correct_conf = []
for k in range(max_iou_idx.shape[0]):
pred_labels_dict[pred_labels_list[k]] += 1
match_idx = max_iou_idx[k] # V level
if iou_matrix[k, match_idx] > iou_thresh and true_labels_list[match_idx] == pred_labels_list[k]:
if match_idx not in correct_idx:
correct_idx.append(match_idx)
correct_conf.append(pred_confs[k])
else:
same_idx = correct_idx.index(match_idx)
if pred_confs[k] > correct_conf[same_idx]:
correct_idx.pop(same_idx)
correct_conf.pop(same_idx)
correct_idx.append(match_idx)
correct_conf.append(pred_confs[k])
for t in correct_idx:
true_positive_dict[true_labels_list[t]] += 1
if calc_now:
# avoid divided by 0
recall = sum(true_positive_dict.values()) / (sum(true_labels_dict.values()) + 1e-6)
precision = sum(true_positive_dict.values()) / (sum(pred_labels_dict.values()) + 1e-6)
return recall, precision
else:
return true_positive_dict, true_labels_dict, pred_labels_dict
def evaluate_on_gpu(sess, gpu_nms_op, pred_boxes_flag, pred_scores_flag, y_pred, y_true, num_classes, iou_thresh=0.5, calc_now=True):
'''
Given y_pred and y_true of a batch of data, get the recall and precision of the current batch.
This function will perform gpu operation on the GPU.
'''
num_images = y_true[0].shape[0]
true_labels_dict = {i: 0 for i in range(num_classes)} # {class: count}
pred_labels_dict = {i: 0 for i in range(num_classes)}
true_positive_dict = {i: 0 for i in range(num_classes)}
for i in range(num_images):
true_labels_list, true_boxes_list = [], []
for j in range(3): # three feature maps
# shape: [13, 13, 3, 80]
true_probs_temp = y_true[j][i][..., 5:-1]
# shape: [13, 13, 3, 4] (x_center, y_center, w, h)
true_boxes_temp = y_true[j][i][..., 0:4]
# [13, 13, 3]
object_mask = true_probs_temp.sum(axis=-1) > 0
# [V, 80] V: Ground truth number of the current image
true_probs_temp = true_probs_temp[object_mask]
# [V, 4]
true_boxes_temp = true_boxes_temp[object_mask]
# [V], labels, each from 0 to 79
true_labels_list += np.argmax(true_probs_temp, axis=-1).tolist()
# [V, 4] (x_center, y_center, w, h)
true_boxes_list += true_boxes_temp.tolist()
if len(true_labels_list) != 0:
for cls, count in Counter(true_labels_list).items():
true_labels_dict[cls] += count
# [V, 4] (xmin, ymin, xmax, ymax)
true_boxes = np.array(true_boxes_list)
box_centers, box_sizes = true_boxes[:, 0:2], true_boxes[:, 2:4]
true_boxes[:, 0:2] = box_centers - box_sizes / 2.
true_boxes[:, 2:4] = true_boxes[:, 0:2] + box_sizes
# [1, xxx, 4]
pred_boxes = y_pred[0][i:i + 1]
pred_confs = y_pred[1][i:i + 1]
pred_probs = y_pred[2][i:i + 1]
# pred_boxes: [N, 4]
# pred_confs: [N]
# pred_labels: [N]
# N: Detected box number of the current image
pred_boxes, pred_confs, pred_labels = sess.run(gpu_nms_op,
feed_dict={pred_boxes_flag: pred_boxes,
pred_scores_flag: pred_confs * pred_probs})
# len: N
pred_labels_list = [] if pred_labels is None else pred_labels.tolist()
if pred_labels_list == []:
continue
# calc iou
# [N, V]
iou_matrix = calc_iou(pred_boxes, true_boxes)
# [N]
max_iou_idx = np.argmax(iou_matrix, axis=-1)
correct_idx = []
correct_conf = []
for k in range(max_iou_idx.shape[0]):
pred_labels_dict[pred_labels_list[k]] += 1
match_idx = max_iou_idx[k] # V level
if iou_matrix[k, match_idx] > iou_thresh and true_labels_list[match_idx] == pred_labels_list[k]:
if match_idx not in correct_idx:
correct_idx.append(match_idx)
correct_conf.append(pred_confs[k])
else:
same_idx = correct_idx.index(match_idx)
if pred_confs[k] > correct_conf[same_idx]:
correct_idx.pop(same_idx)
correct_conf.pop(same_idx)
correct_idx.append(match_idx)
correct_conf.append(pred_confs[k])
for t in correct_idx:
true_positive_dict[true_labels_list[t]] += 1
if calc_now:
# avoid divided by 0
recall = sum(true_positive_dict.values()) / (sum(true_labels_dict.values()) + 1e-6)
precision = sum(true_positive_dict.values()) / (sum(pred_labels_dict.values()) + 1e-6)
return recall, precision
else:
return true_positive_dict, true_labels_dict, pred_labels_dict
def get_preds_gpu(sess, gpu_nms_op, pred_boxes_flag, pred_scores_flag, image_ids, y_pred):
'''
Given the y_pred of an input image, get the predicted bbox and label info.
return:
pred_content: 2d list.
'''
image_id = image_ids[0]
# keep the first dimension 1
pred_boxes = y_pred[0][0:1]
pred_confs = y_pred[1][0:1]
pred_probs = y_pred[2][0:1]
boxes, scores, labels = sess.run(gpu_nms_op,
feed_dict={pred_boxes_flag: pred_boxes,
pred_scores_flag: pred_confs * pred_probs})
pred_content = []
for i in range(len(labels)):
x_min, y_min, x_max, y_max = boxes[i]
score = scores[i]
label = labels[i]
pred_content.append([image_id, x_min, y_min, x_max, y_max, score, label])
return pred_content
gt_dict = {} # key: img_id, value: gt object list
def parse_gt_rec(gt_filename, target_img_size, letterbox_resize=True):
'''
parse and re-organize the gt info.
return:
gt_dict: dict. Each key is a img_id, the value is the gt bboxes in the corresponding img.
'''
global gt_dict
if not gt_dict:
new_width, new_height = target_img_size
with open(gt_filename, 'r') as f:
for line in f:
img_id, pic_path, boxes, labels, ori_width, ori_height = parse_line(line)
objects = []
for i in range(len(labels)):
x_min, y_min, x_max, y_max = boxes[i]
label = labels[i]
if letterbox_resize:
resize_ratio = min(new_width / ori_width, new_height / ori_height)
resize_w = int(resize_ratio * ori_width)
resize_h = int(resize_ratio * ori_height)
dw = int((new_width - resize_w) / 2)
dh = int((new_height - resize_h) / 2)
objects.append([x_min * resize_ratio + dw,
y_min * resize_ratio + dh,
x_max * resize_ratio + dw,
y_max * resize_ratio + dh,
label])
else:
objects.append([x_min * new_width / ori_width,
y_min * new_height / ori_height,
x_max * new_width / ori_width,
y_max * new_height / ori_height,
label])
gt_dict[img_id] = objects
return gt_dict
# The following two functions are modified from FAIR's Detectron repo to calculate mAP:
# https://github.com/facebookresearch/Detectron/blob/master/detectron/datasets/voc_eval.py
def voc_ap(rec, prec, use_07_metric=False):
"""Compute VOC AP given precision and recall. If use_07_metric is true, uses
the VOC 07 11-point method (default:False).
"""
if use_07_metric:
# 11 point metric
ap = 0.
for t in np.arange(0., 1.1, 0.1):
if np.sum(rec >= t) == 0:
p = 0
else:
p = np.max(prec[rec >= t])
ap = ap + p / 11.
else:
# correct AP calculation
# first append sentinel values at the end
mrec = np.concatenate(([0.], rec, [1.]))
mpre = np.concatenate(([0.], prec, [0.]))
# compute the precision envelope
for i in range(mpre.size - 1, 0, -1):
mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
# to calculate area under PR curve, look for points
# where X axis (recall) changes value
i = np.where(mrec[1:] != mrec[:-1])[0]
# and sum (\Delta recall) * prec
ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
return ap
def voc_eval(gt_dict, val_preds, classidx, iou_thres=0.5, use_07_metric=False):
'''
Top level function that does the PASCAL VOC evaluation.
'''
# 1.obtain gt: extract all gt objects for this class
class_recs = {}
npos = 0
for img_id in gt_dict:
R = [obj for obj in gt_dict[img_id] if obj[-1] == classidx]
bbox = np.array([x[:4] for x in R])
det = [False] * len(R)
npos += len(R)
class_recs[img_id] = {'bbox': bbox, 'det': det}
# 2. obtain pred results
pred = [x for x in val_preds if x[-1] == classidx]
img_ids = [x[0] for x in pred]
confidence = np.array([x[-2] for x in pred])
BB = np.array([[x[1], x[2], x[3], x[4]] for x in pred])
# 3. sort by confidence
sorted_ind = np.argsort(-confidence)
try:
BB = BB[sorted_ind, :]
except:
print('no box, ignore')
return 1e-6, 1e-6, 0, 0, 0
img_ids = [img_ids[x] for x in sorted_ind]
# 4. mark TPs and FPs
nd = len(img_ids)
tp = np.zeros(nd)
fp = np.zeros(nd)
for d in range(nd):
# all the gt info in some image
R = class_recs[img_ids[d]]
bb = BB[d, :]
ovmax = -np.Inf
BBGT = R['bbox']
if BBGT.size > 0:
# calc iou
# intersection
ixmin = np.maximum(BBGT[:, 0], bb[0])
iymin = np.maximum(BBGT[:, 1], bb[1])
ixmax = np.minimum(BBGT[:, 2], bb[2])
iymax = np.minimum(BBGT[:, 3], bb[3])
iw = np.maximum(ixmax - ixmin + 1., 0.)
ih = np.maximum(iymax - iymin + 1., 0.)
inters = iw * ih
# union
uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) + (BBGT[:, 2] - BBGT[:, 0] + 1.) * (
BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
overlaps = inters / uni
ovmax = np.max(overlaps)
jmax = np.argmax(overlaps)
if ovmax > iou_thres:
# gt not matched yet
if not R['det'][jmax]:
tp[d] = 1.
R['det'][jmax] = 1
else:
fp[d] = 1.
else:
fp[d] = 1.
# compute precision recall
fp = np.cumsum(fp)
tp = np.cumsum(tp)
rec = tp / float(npos)
# avoid divide by zero in case the first detection matches a difficult
# ground truth
prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
ap = voc_ap(rec, prec, use_07_metric)
# return rec, prec, ap
return npos, nd, tp[-1] / float(npos), tp[-1] / float(nd), ap
@@ -0,0 +1,89 @@
# coding: utf-8
from __future__ import division, print_function
import numpy as np
import tensorflow as tf
slim = tf.contrib.slim
def conv2d(inputs, filters, kernel_size, strides=1):
def _fixed_padding(inputs, kernel_size):
pad_total = kernel_size - 1
pad_beg = pad_total // 2
pad_end = pad_total - pad_beg
padded_inputs = tf.pad(inputs, [[0, 0], [pad_beg, pad_end],
[pad_beg, pad_end], [0, 0]], mode='CONSTANT')
return padded_inputs
if strides > 1:
inputs = _fixed_padding(inputs, kernel_size)
inputs = slim.conv2d(inputs, filters, kernel_size, stride=strides,
padding=('SAME' if strides == 1 else 'VALID'))
return inputs
def darknet53_body(inputs):
def res_block(inputs, filters):
shortcut = inputs
net = conv2d(inputs, filters * 1, 1)
net = conv2d(net, filters * 2, 3)
net = net + shortcut
return net
# first two conv2d layers
net = conv2d(inputs, 32, 3, strides=1)
net = conv2d(net, 64, 3, strides=2)
# res_block * 1
net = res_block(net, 32)
net = conv2d(net, 128, 3, strides=2)
# res_block * 2
for i in range(2):
net = res_block(net, 64)
net = conv2d(net, 256, 3, strides=2)
# res_block * 8
for i in range(8):
net = res_block(net, 128)
route_1 = net
net = conv2d(net, 512, 3, strides=2)
# res_block * 8
for i in range(8):
net = res_block(net, 256)
route_2 = net
net = conv2d(net, 1024, 3, strides=2)
# res_block * 4
for i in range(4):
net = res_block(net, 512)
route_3 = net
return route_1, route_2, route_3
def yolo_block(inputs, filters):
net = conv2d(inputs, filters * 1, 1)
net = conv2d(net, filters * 2, 3)
net = conv2d(net, filters * 1, 1)
net = conv2d(net, filters * 2, 3)
net = conv2d(net, filters * 1, 1)
route = net
net = conv2d(net, filters * 2, 3)
return route, net
def upsample_layer(inputs, out_shape):
new_height, new_width = out_shape[1], out_shape[2]
# NOTE: here height is the first
# TODO: Do we need to set `align_corners` as True?
inputs = tf.image.resize_nearest_neighbor(inputs, (new_height, new_width), name='upsampled')
return inputs
@@ -0,0 +1,165 @@
# coding: utf-8
import numpy as np
import tensorflow as tf
import random
from tensorflow.core.framework import summary_pb2
def make_summary(name, val):
return summary_pb2.Summary(value=[summary_pb2.Summary.Value(tag=name, simple_value=val)])
class AverageMeter(object):
def __init__(self):
self.reset()
def reset(self):
self.val = 0
self.average = 0
self.sum = 0
self.count = 0
def update(self, val, n=1):
self.val = val
self.sum += val * n
self.count += n
self.average = self.sum / float(self.count)
def parse_anchors(anchor_path):
'''
parse anchors.
returned data: shape [N, 2], dtype float32
'''
anchors = np.reshape(np.asarray(open(anchor_path, 'r').read().split(','), np.float32), [-1, 2])
return anchors
def read_class_names(class_name_path):
names = {}
with open(class_name_path, 'r') as data:
for ID, name in enumerate(data):
names[ID] = name.strip('\n')
return names
def shuffle_and_overwrite(file_name):
content = open(file_name, 'r').readlines()
random.shuffle(content)
with open(file_name, 'w') as f:
for line in content:
f.write(line)
def update_dict(ori_dict, new_dict):
if not ori_dict:
return new_dict
for key in ori_dict:
ori_dict[key] += new_dict[key]
return ori_dict
def list_add(ori_list, new_list):
for i in range(len(ori_list)):
ori_list[i] += new_list[i]
return ori_list
def load_weights(var_list, weights_file):
"""
Loads and converts pre-trained weights.
param:
var_list: list of network variables.
weights_file: name of the binary file.
"""
with open(weights_file, "rb") as fp:
np.fromfile(fp, dtype=np.int32, count=5)
weights = np.fromfile(fp, dtype=np.float32)
ptr = 0
i = 0
assign_ops = []
try:
while i < len(var_list) - 1:
var1 = var_list[i]
var2 = var_list[i + 1]
# do something only if we process conv layer
if 'Conv' in var1.name.split('/')[-2]:
# check type of next layer
if 'BatchNorm' in var2.name.split('/')[-2]:
# load batch norm params
gamma, beta, mean, var = var_list[i + 1:i + 5]
batch_norm_vars = [beta, gamma, mean, var]
for var in batch_norm_vars:
shape = var.shape.as_list()
num_params = np.prod(shape)
var_weights = weights[ptr:ptr + num_params].reshape(shape)
ptr += num_params
assign_ops.append(tf.assign(var, var_weights, validate_shape=True))
# we move the pointer by 4, because we loaded 4 variables
i += 4
elif 'Conv' in var2.name.split('/')[-2]:
# load biases
bias = var2
bias_shape = bias.shape.as_list()
bias_params = np.prod(bias_shape)
bias_weights = weights[ptr:ptr +
bias_params].reshape(bias_shape)
ptr += bias_params
assign_ops.append(tf.assign(bias, bias_weights, validate_shape=True))
# we loaded 1 variable
i += 1
# we can load weights of conv layer
shape = var1.shape.as_list()
num_params = np.prod(shape)
var_weights = weights[ptr:ptr + num_params].reshape(
(shape[3], shape[2], shape[0], shape[1]))
# remember to transpose to column-major
var_weights = np.transpose(var_weights, (2, 3, 1, 0))
ptr += num_params
assign_ops.append(
tf.assign(var1, var_weights, validate_shape=True))
i += 1
except:
pass
return assign_ops
def config_learning_rate(args, global_step):
if args.lr_type == 'exponential':
lr_tmp = tf.train.exponential_decay(args.learning_rate_init, global_step, args.lr_decay_freq,
args.lr_decay_factor, staircase=True, name='exponential_learning_rate')
return tf.maximum(lr_tmp, args.lr_lower_bound)
elif args.lr_type == 'cosine_decay':
train_steps = (args.total_epoches - float(args.use_warm_up) * args.warm_up_epoch) * args.train_batch_num
return args.lr_lower_bound + 0.5 * (args.learning_rate_init - args.lr_lower_bound) * \
(1 + tf.cos(global_step / train_steps * np.pi))
elif args.lr_type == 'cosine_decay_restart':
return tf.train.cosine_decay_restarts(args.learning_rate_init, global_step,
args.lr_decay_freq, t_mul=2.0, m_mul=1.0,
name='cosine_decay_learning_rate_restart')
elif args.lr_type == 'fixed':
return tf.convert_to_tensor(args.learning_rate_init, name='fixed_learning_rate')
elif args.lr_type == 'piecewise':
return tf.train.piecewise_constant(global_step, boundaries=args.pw_boundaries, values=args.pw_values,
name='piecewise_learning_rate')
else:
raise ValueError('Unsupported learning rate type!')
def config_optimizer(optimizer_name, learning_rate, decay=0.9, momentum=0.9):
if optimizer_name == 'momentum':
return tf.train.MomentumOptimizer(learning_rate, momentum=momentum, use_nesterov=False)
elif optimizer_name == 'nesterov':
return tf.train.MomentumOptimizer(learning_rate, momentum=momentum, use_nesterov=True)
elif optimizer_name == 'rmsprop':
return tf.train.RMSPropOptimizer(learning_rate, decay=decay, momentum=momentum)
elif optimizer_name == 'adam':
return tf.train.AdamOptimizer(learning_rate)
elif optimizer_name == 'sgd':
return tf.train.GradientDescentOptimizer(learning_rate)
else:
raise ValueError('Unsupported optimizer type!')
@@ -0,0 +1,123 @@
# coding: utf-8
from __future__ import division, print_function
import numpy as np
import tensorflow as tf
def gpu_nms(boxes, scores, num_classes, max_boxes=50, score_thresh=0.5, nms_thresh=0.5):
"""
Perform NMS on GPU using TensorFlow.
params:
boxes: tensor of shape [1, 10647, 4] # 10647=(13*13+26*26+52*52)*3, for input 416*416 image
scores: tensor of shape [1, 10647, num_classes], score=conf*prob
num_classes: total number of classes
max_boxes: integer, maximum number of predicted boxes you'd like, default is 50
score_thresh: if [ highest class probability score < score_threshold]
then get rid of the corresponding box
nms_thresh: real value, "intersection over union" threshold used for NMS filtering
"""
boxes_list, label_list, score_list = [], [], []
max_boxes = tf.constant(max_boxes, dtype='int32')
# since we do nms for single image, then reshape it
boxes = tf.reshape(boxes, [-1, 4]) # '-1' means we don't konw the exact number of boxes
score = tf.reshape(scores, [-1, num_classes])
# Step 1: Create a filtering mask based on "box_class_scores" by using "threshold".
mask = tf.greater_equal(score, tf.constant(score_thresh))
# Step 2: Do non_max_suppression for each class
for i in range(num_classes):
# Step 3: Apply the mask to scores, boxes and pick them out
filter_boxes = tf.boolean_mask(boxes, mask[:,i])
filter_score = tf.boolean_mask(score[:,i], mask[:,i])
nms_indices = tf.image.non_max_suppression(boxes=filter_boxes,
scores=filter_score,
max_output_size=max_boxes,
iou_threshold=nms_thresh, name='nms_indices')
label_list.append(tf.ones_like(tf.gather(filter_score, nms_indices), 'int32')*i)
boxes_list.append(tf.gather(filter_boxes, nms_indices))
score_list.append(tf.gather(filter_score, nms_indices))
boxes = tf.concat(boxes_list, axis=0)
score = tf.concat(score_list, axis=0)
label = tf.concat(label_list, axis=0)
return boxes, score, label
def py_nms(boxes, scores, max_boxes=50, iou_thresh=0.5):
"""
Pure Python NMS baseline.
Arguments: boxes: shape of [-1, 4], the value of '-1' means that dont know the
exact number of boxes
scores: shape of [-1,]
max_boxes: representing the maximum of boxes to be selected by non_max_suppression
iou_thresh: representing iou_threshold for deciding to keep boxes
"""
assert boxes.shape[1] == 4 and len(scores.shape) == 1
x1 = boxes[:, 0]
y1 = boxes[:, 1]
x2 = boxes[:, 2]
y2 = boxes[:, 3]
areas = (x2 - x1) * (y2 - y1)
order = scores.argsort()[::-1]
keep = []
while order.size > 0:
i = order[0]
keep.append(i)
xx1 = np.maximum(x1[i], x1[order[1:]])
yy1 = np.maximum(y1[i], y1[order[1:]])
xx2 = np.minimum(x2[i], x2[order[1:]])
yy2 = np.minimum(y2[i], y2[order[1:]])
w = np.maximum(0.0, xx2 - xx1 + 1)
h = np.maximum(0.0, yy2 - yy1 + 1)
inter = w * h
ovr = inter / (areas[i] + areas[order[1:]] - inter)
inds = np.where(ovr <= iou_thresh)[0]
order = order[inds + 1]
return keep[:max_boxes]
def cpu_nms(boxes, scores, num_classes, max_boxes=50, score_thresh=0.5, iou_thresh=0.5):
"""
Perform NMS on CPU.
Arguments:
boxes: shape [1, 10647, 4]
scores: shape [1, 10647, num_classes]
"""
boxes = boxes.reshape(-1, 4)
scores = scores.reshape(-1, num_classes)
# Picked bounding boxes
picked_boxes, picked_score, picked_label = [], [], []
for i in range(num_classes):
indices = np.where(scores[:,i] >= score_thresh)
filter_boxes = boxes[indices]
filter_scores = scores[:,i][indices]
if len(filter_boxes) == 0:
continue
# do non_max_suppression on the cpu
indices = py_nms(filter_boxes, filter_scores,
max_boxes=max_boxes, iou_thresh=iou_thresh)
picked_boxes.append(filter_boxes[indices])
picked_score.append(filter_scores[indices])
picked_label.append(np.ones(len(indices), dtype='int32')*i)
if len(picked_boxes) == 0:
return None, None, None
boxes = np.concatenate(picked_boxes, axis=0)
score = np.concatenate(picked_score, axis=0)
label = np.concatenate(picked_label, axis=0)
return boxes, score, label
@@ -0,0 +1,35 @@
# coding: utf-8
from __future__ import division, print_function
import cv2
import random
def get_color_table(class_num, seed=2):
random.seed(seed)
color_table = {}
for i in range(class_num):
color_table[i] = [random.randint(0, 255) for _ in range(3)]
return color_table
def plot_one_box(img, coord, label=None, color=None, line_thickness=None):
'''
coord: [x_min, y_min, x_max, y_max] format coordinates.
img: img to plot on.
label: str. The label name.
color: int. color index.
line_thickness: int. rectangle line thickness.
'''
tl = line_thickness or int(round(0.002 * max(img.shape[0:2]))) # line thickness
color = color or [random.randint(0, 255) for _ in range(3)]
c1, c2 = (int(coord[0]), int(coord[1])), (int(coord[2]), int(coord[3]))
cv2.rectangle(img, c1, c2, color, thickness=tl)
if label:
tf = max(tl - 1, 1) # font thickness
t_size = cv2.getTextSize(label, 0, fontScale=float(tl) / 3, thickness=tf)[0]
c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
cv2.rectangle(img, c1, c2, color, -1) # filled
cv2.putText(img, label, (c1[0], c1[1] - 2), 0, float(tl) / 3, [0, 0, 0], thickness=tf, lineType=cv2.LINE_AA)
@@ -0,0 +1,102 @@
# coding: utf-8
from __future__ import division, print_function
import tensorflow as tf
import numpy as np
import argparse
import cv2
import time
from utils.misc_utils import parse_anchors, read_class_names
from utils.nms_utils import gpu_nms
from utils.plot_utils import get_color_table, plot_one_box
from utils.data_aug import letterbox_resize
from model import yolov3
parser = argparse.ArgumentParser(description="YOLO-V3 video test procedure.")
parser.add_argument("input_video", type=str,
help="The path of the input video.")
parser.add_argument("--anchor_path", type=str, default="./data/yolo_anchors.txt",
help="The path of the anchor txt file.")
parser.add_argument("--new_size", nargs='*', type=int, default=[416, 416],
help="Resize the input image with `new_size`, size format: [width, height]")
parser.add_argument("--letterbox_resize", type=lambda x: (str(x).lower() == 'true'), default=True,
help="Whether to use the letterbox resize.")
parser.add_argument("--class_name_path", type=str, default="./data/coco.names",
help="The path of the class names.")
parser.add_argument("--restore_path", type=str, default="./data/darknet_weights/yolov3.ckpt",
help="The path of the weights to restore.")
parser.add_argument("--save_video", type=lambda x: (str(x).lower() == 'true'), default=False,
help="Whether to save the video detection results.")
args = parser.parse_args()
args.anchors = parse_anchors(args.anchor_path)
args.classes = read_class_names(args.class_name_path)
args.num_class = len(args.classes)
color_table = get_color_table(args.num_class)
vid = cv2.VideoCapture(args.input_video)
video_frame_cnt = int(vid.get(7))
video_width = int(vid.get(3))
video_height = int(vid.get(4))
video_fps = int(vid.get(5))
if args.save_video:
fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
videoWriter = cv2.VideoWriter('video_result.mp4', fourcc, video_fps, (video_width, video_height))
with tf.Session() as sess:
input_data = tf.placeholder(tf.float32, [1, args.new_size[1], args.new_size[0], 3], name='input_data')
yolo_model = yolov3(args.num_class, args.anchors)
with tf.variable_scope('yolov3'):
pred_feature_maps = yolo_model.forward(input_data, False)
pred_boxes, pred_confs, pred_probs = yolo_model.predict(pred_feature_maps)
pred_scores = pred_confs * pred_probs
boxes, scores, labels = gpu_nms(pred_boxes, pred_scores, args.num_class, max_boxes=200, score_thresh=0.3, nms_thresh=0.45)
saver = tf.train.Saver()
saver.restore(sess, args.restore_path)
for i in range(video_frame_cnt):
ret, img_ori = vid.read()
if args.letterbox_resize:
img, resize_ratio, dw, dh = letterbox_resize(img_ori, args.new_size[0], args.new_size[1])
else:
height_ori, width_ori = img_ori.shape[:2]
img = cv2.resize(img_ori, tuple(args.new_size))
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = np.asarray(img, np.float32)
img = img[np.newaxis, :] / 255.
start_time = time.time()
boxes_, scores_, labels_ = sess.run([boxes, scores, labels], feed_dict={input_data: img})
end_time = time.time()
# rescale the coordinates to the original image
if args.letterbox_resize:
boxes_[:, [0, 2]] = (boxes_[:, [0, 2]] - dw) / resize_ratio
boxes_[:, [1, 3]] = (boxes_[:, [1, 3]] - dh) / resize_ratio
else:
boxes_[:, [0, 2]] *= (width_ori/float(args.new_size[0]))
boxes_[:, [1, 3]] *= (height_ori/float(args.new_size[1]))
for i in range(len(boxes_)):
x0, y0, x1, y1 = boxes_[i]
plot_one_box(img_ori, [x0, y0, x1, y1], label=args.classes[labels_[i]] + ', {:.2f}%'.format(scores_[i] * 100), color=color_table[labels_[i]])
cv2.putText(img_ori, '{:.2f}ms'.format((end_time - start_time) * 1000), (40, 40), 0,
fontScale=1, color=(0, 255, 0), thickness=2)
cv2.imshow('image', img_ori)
if args.save_video:
videoWriter.write(img_ori)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
vid.release()
if args.save_video:
videoWriter.release()
@@ -0,0 +1,9 @@
{
"server_count": "1",
"server_list": [{
"device": [{devices}],
"server_id": "127.0.0.1"
}],
"status": "completed",
"version": "1.0"
}
@@ -0,0 +1,29 @@
#!/bin/bash
# main env
if [ -d /usr/local/Ascend/nnae/latest ];then
export LD_LIBRARY_PATH=/usr/local/:/usr/local/lib/:/usr/lib/:/usr/local/Ascend/nnae/latest/fwkacllib/lib64:/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:/usr/local/Ascend/add-ons/:/usr/local/Ascend/driver/tools/hccn_tool/:/usr/local/mpirun4.0/lib
export PYTHONPATH=$PYTHONPATH:/usr/local/Ascend/tfplugin/latest/tfplugin/python/site-packages:/usr/local/Ascend/nnae/latest/opp/op_impl/built-in/ai_core/tbe:/usr/local/Ascend/nnae/latest/fwkacllib/python/site-packages/:/usr/local/Ascend/tfplugin/latest/tfplugin/python/site-packages
export PATH=$PATH:/usr/local/Ascend/nnae/latest/fwkacllib/ccec_compiler/bin:/usr/local/mpirun4.0/bin
export ASCEND_OPP_PATH=/usr/local/Ascend/nnae/latest/opp
else
export LD_LIBRARY_PATH=/usr/local/lib/:/usr/lib/:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64:/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:/usr/local/Ascend/add-ons/:/usr/local/mpirun4.0/lib
export PYTHONPATH=$PYTHONPATH:/usr/local/Ascend/tfplugin/latest/tfplugin/python/site-packages:/usr/local/Ascend/ascend-toolkit/latest/opp/op_impl/built-in/ai_core/tbe:/usr/local/Ascend/ascend-toolkit/latest//fwkacllib/python/site-packages/:/usr/local/Ascend/ascend-toolkit/latest/tfplugin/python/site-packages:$projectDir
export PATH=$PATH:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin:/usr/local/mpirun4.0/bin
export ASCEND_OPP_PATH=/usr/local/Ascend/ascend-toolkit/latest/opp/
fi
export NEW_GE_FE_ID=1
export GE_AICPU_FLAG=1
export SOC_VERSION=Ascend910
#export DUMP_GE_GRAPH=2
#export DUMP_GRAPH_LEVEL=3
#export PRINT_MODEL=1
export SLOG_PRINT_TO_STDOUT=0
export HCCL_CONNECT_TIMEOUT=600
# system env
ulimit -c unlimited
@@ -0,0 +1,53 @@
# setting main path
MAIN_PATH=$(dirname $(readlink -f $0))
echo $MAIN_PATH
DEVICE_NUM=$1
ckpt_path=$2
#echo $1
#echo $2
# set env
export DDK_VERSION_FLAG=1.60.T49.0.B201
export NEW_GE_FE_ID=1
export GE_AICPU_FLAG=1
export SOC_VERSION=Ascend910
export JOB_ID=10087
export FUSION_TENSOR_SIZE=1000000000
export RANK_ID=yolo
#echo "device_num is $DEVICE_NUM"
for((i=0;i<${DEVICE_NUM};i++));
do
export RANK_SIZE=$DEVICE_NUM
export DEVICE_ID=$i
export DEVICE_INDEX=$i
#su HwHiAiUser -c "adc --host 0.0.0.0:22118 --log \"SetLogLevel(0)[debug]\" --device "$RANK_ID
cd ${MAIN_PATH}/../result
if [ x"${ckpt_path}" == x"" ];then
lastresult=$(ls -t | grep -E "Train*" | head -n 1)
RESTORE_PATH=${lastresult}/${i}/training/
else
lastresult=${ckpt_path}
RESTORE_PATH=${ckpt_path}/${i}/training/
fi
echo $RESTORE_PATH
python3.7 ${MAIN_PATH}/../code/eval.py \
--save_json True \
--score_thresh 0.0001 \
--nms_thresh 0.55 \
--max_boxes 100 \
--restore_path $RESTORE_PATH \
--max_test 10000 \
--save_json_path eval_res_D$DEVICE_NUM.json > ${lastresult}/eval_$i.out 2>&1
done
@@ -0,0 +1,77 @@
#!/bin/bash
rank_size=$1
yamlPath=$2
toolsPath=$3
if [ -f /.dockerenv ];then
CLUSTER=$4
MPIRUN_ALL_IP="$5"
export CLUSTER=${CLUSTER}
fi
currentDir=$(cd "$(dirname "$0")/.."; pwd)
# 从 yaml 获取配置
eval $(${toolsPath}/get_params_for_yaml.sh ${yamlPath} "tensorflow_config")
source ${currentDir}/config/npu_set_env.sh
if [ x"$runmode" != x"evaluate" ];then
currtime=`date +%Y%m%d%H%M%S`
mkdir -p ${currentDir%train*}/train/result/tf_yolov3/training_job_${currtime}/
train_job_dir=${currentDir%train*}/train/result/tf_yolov3/training_job_${currtime}/
echo "[`date +%Y%m%d-%H:%M:%S`] [INFO] ${train_job_dir} &"
fi
# device 列表, 若无指定 device 根据 rank_size 顺序选择
eval device_group=\$device_group_${rank_size}p
if [ x"${device_group}" == x"" ] || [ ${rank_size} -ge 8 ];then
device_group="$(seq 0 "$(expr $rank_size - 1)")"
fi
# get last device id in device_group, hw log in performance from the dir named first_device_id
device_group_str=`echo ${device_group} | sed 's/ //g'`
first_device_id=`echo ${device_group_str: 0:1}`
argsFilePath=${currentDir}/code/args_${mode}.py
#echo "argsFilePath is "${argsFilePath}
sed -i "0,/batch_size.*$/s//batch_size\ = ${batch_size}/g" ${argsFilePath}
sed -i "s/save_epoch.*$/save_epoch\ = ${save_epoch}/g" ${argsFilePath}
sed -i "s/total_epoches =.*$/total_epoches\ = ${total_epoches}/g" ${argsFilePath}
sed -i 's/\r//g' ${argsFilePath}
if [ x"${CLUSTER}" == x"True" ];then
# ln hw log
ln -snf ${train_job_dir}/0/hw_yolov3.log ${train_job_dir}
this_ip=$(hostname -I |awk '{print $1}')
for ip in $MPIRUN_ALL_IP;do
if [ x"$ip" != x"$this_ip" ];then
scp $yamlPath root@$ip:$yamlPath
scp $argsFilePath root@$ip:$argsFilePath
fi
done
export PATH=$PATH:/usr/local/mpirun4.0/bin
mpirun -H ${mpirun_ip} \
--bind-to none -map-by slot\
--allow-run-as-root \
--mca btl_tcp_if_exclude lo,docker0,endvnic,virbr0,vethf40501b,docker_gwbridge,br-f42ac38052b4\
--prefix /usr/local/mpirun4.0/ \
${currentDir}/scripts/train.sh 0 $rank_size $yamlPath $currtime ${toolsPath} ${CLUSTER}
elif [ $runmode == "train" ];then
ln -snf ${train_job_dir}/${first_device_id}/hw_yolov3.log ${train_job_dir}
rank_id=0
for device_id in $device_group;do
#echo "[`date +%Y%m%d-%H:%M:%S`] [INFO] start: train ${device_id} & " >> ${currentDir}/result/main.log
${currentDir}/scripts/train.sh $device_id $rank_size $yamlPath $currtime ${toolsPath} $rank_id&
let rank_id++
done
else
echo "[`date +%Y%m%d-%H:%M:%S`] [INFO] ${ckpt_path} &"
ln -snf ${train_job_dir}/${first_device_id}/hw_yolov3.log ${train_job_dir}
bash ${currentDir}/scripts/eval.sh ${rank_size} ${ckpt_path}
fi
wait
#echo "[`date +%Y%m%d-%H:%M:%S`] [INFO] all train exit " >> ${currentDir}/result/main.log

Some files were not shown because too many files have changed in this diff Show More