[add]上传训练benchmark by z00560161

2020-10-19 20:22:23 +08:00
parent 22b83024f5
commit 82522e2f61
1225 changed files with 345421 additions and 0 deletions
@@ -0,0 +1,89 @@
+#  SSD-Resnet34 TensorFlow训练说明
+
+### 1. 运行环境
+Python版本: 3.7.5
+主要python三方库:
+- tensorflow >= 1.15.0 (satisfied with NPU)
+
+
+### 2. 参数配置
+在train/yaml/SSD-Resnet34.yaml中修改相应配置， 配置项含义:
+
+```
+tensorflow_config: tensorflow框架下ssd-resnet34的配置项
+
+train_batch_size: 训练时设置的batch size大小
+training_file_pattern: 数据集中训练数据集文件标签类型， 数据集中有该类型的文件夹
+resnet_checkpoint: ckpt路径
+validation_file_pattern: 数据集中验证数据文件标签类型， 数据集中有该类型的文件夹
+val_json_file: 数据集中验证数据json文件
+eval_batch_size: 评测时设置的batch size大小
+num_epochs: epochs数量
+model_dir: 存放模型graph等数据的路径
+max_steps: 最大步数
+runmode: 运行模式 边训练边评测、只训练、只评测
+device_group_1p: 跑1p时的device_id
+device_group_2p: 跑2p时的device_id
+device_group_4p: 跑4p时的device_id
+mpirun_ip: 仅集群场景时需要配置, 格式ip1:卡数量1,ip2:卡数量2
+docker_image: docker镜像名称:版本号
+```
+
+
+SSD-Resnet34.yaml中配置项示例：
+```
+tensorflow_config:
+
+    train_batch_size: 32
+    training_file_pattern: /home/data/raw_data/tfrecord/train2017*
+    resnet_checkpoint: /home/data/raw_data/resnet34_pretrain_model/model.ckpt-28152
+    validation_file_pattern: /home/data/raw_data/tfrecord/val2017*
+    val_json_file: /home/data/raw_data/annotations/instances_val2017.json
+    eval_batch_size: 32
+    num_epochs: 1
+    model_dir: result_npu
+    max_steps: 432000
+    runmode: train_and_eval
+    device_group_1p: 0
+    device_group_2p: 0 1
+    device_group_4p: 0 1 2 3
+    mpirun_ip: 90.90.176.152:8,90.90.176.154:8
+    docker_image: mpirun3:latest
+
+```
+SSD-Resnet34.yaml中配置注意事项：
+    当ssd-resnet34在docker侧进行训练时，resnet_checkpoint、validation_file_pattern和val_json_file的路径都必须规划在training_file_pattern字段路径中的raw_data下，因配置路径较多，脚本中统一只对training_file_pattern字段路径中的raw_data下文件做映射
+
+### 3. 启动训练脚本
+
+#### 3.1 训练脚本启动
+当前路径为benchmark包的train文件夹下
+```
+bash benchmark.sh -e SSD-Resnet34 -hw 1p              # host侧1p
+bash benchmark.sh -e SSD-Resnet34 -hw 8p              # host侧8p
+bash benchmark.sh -e SSD-Resnet34 -hw 1p -docker      # docker侧1p
+bash benchmark.sh -e SSD-Resnet34 -hw 8p -docker      # docker侧8p
+bash benchmark.sh -e SSD-Resnet34 -ct                 # host侧集群
+bash benchmark.sh -e SSD-Resnet34 -ct -docker         # docker侧集群
+```
+
+#### 3.2 训练日志
+日志在benchmark包的train路径下reuslt中找到ssd-resnet34的文件夹里。
+```
+./result/tf_ssd-resnet34/TrainingJob-2020xxxxxxxxxx/train_${device_id}.log
+./result/tf_ssd-resnet34/TrainingJob-2020xxxxxxxxxx/device_id/hw_ssd-resnet34.log
+```
+
+### 4. 模型评测
+将train/yaml/SSD-Resnet34.yaml中resnet_checkpoint的值改为训练产生的日志的路径， runmode的值改为evaluate，如2中示例；
+然后运行与训练时相同的脚本，结果参看见train.log。
+
+
+### 5. 训练结果参考
+
+1p: 600
+4P: 2000
+8p: 4000
+
+
+
@@ -0,0 +1,281 @@
+# Copyright 2018 Google. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""COCO-style evaluation metrics.
+
+Implements the interface of COCO API and metric_fn in tf.TPUEstimator.
+
+COCO API: github.com/cocodataset/cocoapi/
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import atexit
+import tempfile
+import time
+
+from absl import flags
+
+import numpy as np
+from pycocotools.coco import COCO
+from pycocotools.cocoeval import COCOeval
+import six
+
+#COCO = coco.COCO
+#COCOeval = coco.COCOeval
+
+import tensorflow as tf
+
+import ssd_constants
+
+FLAGS = flags.FLAGS
+
+
+# https://github.com/cocodataset/cocoapi/issues/49
+if six.PY3:
+  import pycocotools.coco
+  pycocotools.coco.unicode = str
+
+
+def create_coco(val_json_file, use_cpp_extension=True):
+  """Creates Microsoft COCO helper class object and return it."""
+  if val_json_file.startswith('gs://'):
+    _, local_val_json = tempfile.mkstemp(suffix='.json')
+    tf.gfile.Remove(local_val_json)
+
+    tf.gfile.Copy(val_json_file, local_val_json)
+    atexit.register(tf.gfile.Remove, local_val_json)
+  else:
+    local_val_json = val_json_file
+
+  if use_cpp_extension:
+    coco_gt = coco.COCO(local_val_json, False)
+  else:
+    coco_gt = COCO(local_val_json)
+  return coco_gt
+
+
+def compute_map(labels_and_predictions,
+                coco_gt,
+                use_cpp_extension=True,
+                nms_on_tpu=True):
+  """Use model predictions to compute mAP.
+
+  The evaluation code is largely copied from the MLPerf reference
+  implementation. While it is possible to write the evaluation as a tensor
+  metric and use Estimator.evaluate(), this approach was selected for simplicity
+  and ease of duck testing.
+
+  Args:
+    labels_and_predictions: A map from TPU predict method.
+    coco_gt: ground truch COCO object.
+    use_cpp_extension: use cocoeval C++ library.
+    nms_on_tpu: do NMS on TPU.
+  Returns:
+    Evaluation result.
+  """
+
+  predictions = []
+  tic = time.time()
+
+  if nms_on_tpu:
+    p = []
+    for i in labels_and_predictions:
+      for j in i:
+        p.append(np.array(j, dtype=np.float32))
+    predictions = np.concatenate(list(p)).reshape((-1, 7))
+  else:
+    k = 0
+    for example in labels_and_predictions:
+      if ssd_constants.IS_PADDED in example and example[
+          ssd_constants.IS_PADDED]:
+        continue
+      print(k)
+      k += 1
+      htot, wtot, _ = example[ssd_constants.RAW_SHAPE]
+      pred_box = example['pred_box']
+      pred_scores = example['pred_scores']
+      indices = example['indices']
+      loc, label, prob = decode_single(
+          pred_box, pred_scores, indices, ssd_constants.OVERLAP_CRITERIA,
+          ssd_constants.MAX_NUM_EVAL_BOXES, ssd_constants.MAX_NUM_EVAL_BOXES)
+
+      for loc_, label_, prob_ in zip(loc, label, prob):
+        # Ordering convention differs, hence [1], [0] rather than [0], [1]
+        predictions.append([
+            int(example[ssd_constants.SOURCE_ID]),
+            loc_[1] * wtot, loc_[0] * htot, (loc_[3] - loc_[1]) * wtot,
+            (loc_[2] - loc_[0]) * htot, prob_,
+            ssd_constants.CLASS_INV_MAP[label_]
+        ])
+
+  toc = time.time()
+  tf.logging.info('Prepare predictions DONE (t={:0.2f}s).'.format(toc - tic))
+
+  if coco_gt is None:
+    coco_gt = create_coco(
+        FLAGS.val_json_file, use_cpp_extension=use_cpp_extension)
+
+  if use_cpp_extension:
+    coco_dt = coco_gt.LoadRes(np.array(predictions, dtype=np.float32))
+    coco_eval = COCOeval(coco_gt, coco_dt, iou_type='bbox')
+    coco_eval.Evaluate()
+    coco_eval.Accumulate()
+    coco_eval.Summarize()
+    stats = coco_eval.GetStats()
+
+  else:
+    coco_dt = coco_gt.loadRes(np.array(predictions))
+
+    coco_eval = COCOeval(coco_gt, coco_dt, iouType='bbox')
+    coco_eval.evaluate()
+    coco_eval.accumulate()
+    coco_eval.summarize()
+    stats = coco_eval.stats
+
+  print('Current AP: {:.5f}'.format(stats[0]))
+  metric_names = ['AP', 'AP50', 'AP75', 'APs', 'APm', 'APl', 'ARmax1',
+                  'ARmax10', 'ARmax100', 'ARs', 'ARm', 'ARl']
+  coco_time = time.time()
+  tf.logging.info('COCO eval DONE (t={:0.2f}s).'.format(coco_time - toc))
+
+  # Prefix with "COCO" to group in TensorBoard.
+  return {'COCO/' + key: value for key, value in zip(metric_names, stats)}
+
+
+def calc_iou(target, candidates):
+  target_tiled = np.tile(target[np.newaxis, :], (candidates.shape[0], 1))
+  # Left Top & Right Bottom
+  lt = np.maximum(target_tiled[:,:2], candidates[:,:2])
+
+  rb = np.minimum(target_tiled[:,2:], candidates[:,2:])
+
+  delta = np.maximum(rb - lt, 0)
+
+  intersect = delta[:,0] * delta[:,1]
+
+  delta1 = target_tiled[:, 2:] - target_tiled[:, :2]
+  area1 = delta1[:,0] * delta1[:,1]
+  delta2 = candidates[:, 2:] - candidates[:, :2]
+  area2 = delta2[:,0] * delta2[:,1]
+
+  iou = intersect/(area1 + area2 - intersect)
+  return iou
+
+
+def decode_single(bboxes_in,
+                  scores_in,
+                  indices,
+                  criteria,
+                  max_output,
+                  max_num=200):
+  """Implement Non-maximum suppression.
+
+    Reference to https://github.com/amdegroot/ssd.pytorch
+
+  Args:
+    bboxes_in: a Tensor with shape [N, 4], which stacks box regression outputs
+      on all feature levels. The N is the number of total anchors on all levels.
+    scores_in: a Tensor with shape [ssd_constants.MAX_NUM_EVAL_BOXES,
+      num_classes]. The top ssd_constants.MAX_NUM_EVAL_BOXES box scores for each
+      class.
+    indices: a Tensor with shape [ssd_constants.MAX_NUM_EVAL_BOXES,
+      num_classes]. The indices for these top boxes for each class.
+    criteria: a float number to specify the threshold of NMS.
+    max_output: maximum output length.
+    max_num: maximum number of boxes before NMS.
+
+  Returns:
+    boxes, labels and scores after NMS.
+  """
+
+  bboxes_out = []
+  scores_out = []
+  labels_out = []
+
+  for i, score in enumerate(np.split(scores_in, scores_in.shape[1], 1)):
+    class_indices = indices[:, i]
+    bboxes = bboxes_in[class_indices, :]
+    score = np.squeeze(score, 1)
+
+    # skip background
+    if i == 0:
+      continue
+
+    mask = score > ssd_constants.MIN_SCORE
+    if not np.any(mask):
+      continue
+
+    bboxes, score = bboxes[mask, :], score[mask]
+
+    # remain_list = []
+    # for r in range(bboxes.shape[0]):
+    #   if bboxes[r, 0] < 0 or bboxes[r, 1] < 0 or bboxes[r, 2] < 0 or bboxes[r, 3] < 0 or bboxes[r, 0] >= bboxes[r, 2] or \
+    #           bboxes[r, 1] >= bboxes[r, 3]:
+    #     continue
+    #   remain_list.append(r)
+    # bboxes = bboxes[remain_list, :]
+    # score = score[remain_list]
+
+    remain_list = []
+    for r in range(bboxes.shape[0]):
+      for j in range(4):
+        if bboxes[r, j] < 0:
+          bboxes[r, j] = 0.00001
+      if bboxes[r, 0] >= bboxes[r, 2]:
+        bboxes[r, 2] = bboxes[r, 0] + 0.00001
+      if bboxes[r, 1] >= bboxes[r, 3]:
+        bboxes[r, 3] = bboxes[r, 1] + 0.00001
+      remain_list.append(r)
+    bboxes = bboxes[remain_list, :]
+    score = score[remain_list]
+
+
+    score_idx_sorted = np.argsort(score)
+    score_sorted = score[score_idx_sorted]
+
+    score_idx_sorted = score_idx_sorted[-max_num:]
+    candidates = []
+
+    # perform non-maximum suppression
+    while len(score_idx_sorted):
+      idx = score_idx_sorted[-1]
+      bboxes_sorted = bboxes[score_idx_sorted, :]
+      bboxes_idx = bboxes[idx, :]
+      iou = calc_iou(bboxes_idx, bboxes_sorted)
+
+      score_idx_sorted = score_idx_sorted[iou < criteria]
+      candidates.append(idx)
+
+    bboxes_out.append(bboxes[candidates, :])
+    scores_out.append(score[candidates])
+    labels_out.extend([i]*len(candidates))
+
+  if len(scores_out) == 0:
+    tf.logging.info("No objects detected. Returning dummy values.")
+    return (
+        np.zeros(shape=(1, 4), dtype=np.float32),
+        np.zeros(shape=(1,), dtype=np.int32),
+        np.ones(shape=(1,), dtype=np.float32) * ssd_constants.DUMMY_SCORE,
+    )
+
+  bboxes_out = np.concatenate(bboxes_out, axis=0)
+  scores_out = np.concatenate(scores_out, axis=0)
+  labels_out = np.array(labels_out)
+
+  max_ids = np.argsort(scores_out)[-max_output:]
+
+  return bboxes_out[max_ids, :], labels_out[max_ids], scores_out[max_ids]
@@ -0,0 +1,369 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+r"""Convert raw COCO dataset to TFRecord for object_detection.
+
+Example usage:
+    python create_coco_tf_record.py --logtostderr \
+      --image_dir="${TRAIN_IMAGE_DIR}" \
+      --object_annotations_file="${TRAIN_ANNOTATIONS_FILE}" \
+      --caption_annotations_file="${CAPTION_ANNOTATIONS_FILE}" \
+      --output_file_prefix="${OUTPUT_DIR/FILE_PREFIX}" \
+      --num_shards=32
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import hashlib
+import io
+import json
+import logging
+import multiprocessing
+import os
+from absl import app
+from absl import flags
+import numpy as np
+import PIL.Image
+
+from pycocotools import mask
+from research.object_detection.utils import dataset_util
+from research.object_detection.utils import label_map_util
+
+import tensorflow.compat.v1 as tf
+flags.DEFINE_boolean(
+    'include_masks', False, 'Whether to include instance segmentations masks '
+    '(PNG encoded) in the result. default: False.')
+flags.DEFINE_string('image_dir', '', 'Directory containing images.')
+flags.DEFINE_string(
+    'image_info_file', '', 'File containing image information. '
+    'Tf Examples in the output files correspond to the image '
+    'info entries in this file. If this file is not provided '
+    'object_annotations_file is used if present. Otherwise, '
+    'caption_annotations_file is used to get image info.')
+flags.DEFINE_string(
+    'object_annotations_file', '', 'File containing object '
+    'annotations - boxes and instance masks.')
+flags.DEFINE_string('caption_annotations_file', '', 'File containing image '
+                    'captions.')
+flags.DEFINE_string('output_file_prefix', '/tmp/train', 'Path to output file')
+flags.DEFINE_integer('num_shards', 32, 'Number of shards for output file.')
+
+FLAGS = flags.FLAGS
+
+logger = tf.get_logger()
+logger.setLevel(logging.INFO)
+
+
+def create_tf_example(image,
+                      image_dir,
+                      bbox_annotations=None,
+                      category_index=None,
+                      caption_annotations=None,
+                      include_masks=False):
+  """Converts image and annotations to a tf.Example proto.
+
+  Args:
+    image: dict with keys: [u'license', u'file_name', u'coco_url', u'height',
+      u'width', u'date_captured', u'flickr_url', u'id']
+    image_dir: directory containing the image files.
+    bbox_annotations:
+      list of dicts with keys: [u'segmentation', u'area', u'iscrowd',
+        u'image_id', u'bbox', u'category_id', u'id'] Notice that bounding box
+        coordinates in the official COCO dataset are given as [x, y, width,
+        height] tuples using absolute coordinates where x, y represent the
+        top-left (0-indexed) corner.  This function converts to the format
+        expected by the Tensorflow Object Detection API (which is which is
+        [ymin, xmin, ymax, xmax] with coordinates normalized relative to image
+        size).
+    category_index: a dict containing COCO category information keyed by the
+      'id' field of each category.  See the label_map_util.create_category_index
+      function.
+    caption_annotations:
+      list of dict with keys: [u'id', u'image_id', u'str'].
+    include_masks: Whether to include instance segmentations masks
+      (PNG encoded) in the result. default: False.
+
+  Returns:
+    example: The converted tf.Example
+    num_annotations_skipped: Number of (invalid) annotations that were ignored.
+
+  Raises:
+    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
+  """
+  image_height = image['height']
+  image_width = image['width']
+  filename = image['file_name']
+  image_id = image['id']
+
+  full_path = os.path.join(image_dir, filename)
+  with tf.gfile.GFile(full_path, 'rb') as fid:
+    encoded_jpg = fid.read()
+  encoded_jpg_io = io.BytesIO(encoded_jpg)
+  image = PIL.Image.open(encoded_jpg_io)
+  key = hashlib.sha256(encoded_jpg).hexdigest()
+  feature_dict = {
+      'image/height':
+          dataset_util.int64_feature(image_height),
+      'image/width':
+          dataset_util.int64_feature(image_width),
+      'image/filename':
+          dataset_util.bytes_feature(filename.encode('utf8')),
+      'image/source_id':
+          dataset_util.bytes_feature(str(image_id).encode('utf8')),
+      'image/key/sha256':
+          dataset_util.bytes_feature(key.encode('utf8')),
+      'image/encoded':
+          dataset_util.bytes_feature(encoded_jpg),
+      'image/format':
+          dataset_util.bytes_feature('jpeg'.encode('utf8')),
+  }
+
+  num_annotations_skipped = 0
+  if bbox_annotations:
+    xmin = []
+    xmax = []
+    ymin = []
+    ymax = []
+    is_crowd = []
+    category_names = []
+    category_ids = []
+    area = []
+    encoded_mask_png = []
+    for object_annotations in bbox_annotations:
+      (x, y, width, height) = tuple(object_annotations['bbox'])
+      if width <= 0 or height <= 0:
+        num_annotations_skipped += 1
+        continue
+      if x + width > image_width or y + height > image_height:
+        num_annotations_skipped += 1
+        continue
+      xmin.append(float(x) / image_width)
+      xmax.append(float(x + width) / image_width)
+      ymin.append(float(y) / image_height)
+      ymax.append(float(y + height) / image_height)
+      is_crowd.append(object_annotations['iscrowd'])
+      category_id = int(object_annotations['category_id'])
+      category_ids.append(category_id)
+      category_names.append(category_index[category_id]['name'].encode('utf8'))
+      area.append(object_annotations['area'])
+
+      if include_masks:
+        run_len_encoding = mask.frPyObjects(object_annotations['segmentation'],
+                                            image_height, image_width)
+        binary_mask = mask.decode(run_len_encoding)
+        if not object_annotations['iscrowd']:
+          binary_mask = np.amax(binary_mask, axis=2)
+        pil_image = PIL.Image.fromarray(binary_mask)
+        output_io = io.BytesIO()
+        pil_image.save(output_io, format='PNG')
+        encoded_mask_png.append(output_io.getvalue())
+    feature_dict.update({
+        'image/object/bbox/xmin':
+            dataset_util.float_list_feature(xmin),
+        'image/object/bbox/xmax':
+            dataset_util.float_list_feature(xmax),
+        'image/object/bbox/ymin':
+            dataset_util.float_list_feature(ymin),
+        'image/object/bbox/ymax':
+            dataset_util.float_list_feature(ymax),
+        'image/object/class/text':
+            dataset_util.bytes_list_feature(category_names),
+        'image/object/class/label':
+            dataset_util.int64_list_feature(category_ids),
+        'image/object/is_crowd':
+            dataset_util.int64_list_feature(is_crowd),
+        'image/object/area':
+            dataset_util.float_list_feature(area),
+    })
+    if include_masks:
+      feature_dict['image/object/mask'] = (
+          dataset_util.bytes_list_feature(encoded_mask_png))
+  if caption_annotations:
+    captions = []
+    for caption_annotation in caption_annotations:
+      captions.append(caption_annotation['caption'].encode('utf8'))
+    feature_dict.update(
+        {'image/caption': dataset_util.bytes_list_feature(captions)})
+
+  example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
+  return key, example, num_annotations_skipped
+
+
+def _pool_create_tf_example(args):
+  return create_tf_example(*args)
+
+
+def _load_object_annotations(object_annotations_file):
+  """Loads object annotation JSON file."""
+  with tf.gfile.GFile(object_annotations_file, 'r') as fid:
+    obj_annotations = json.load(fid)
+
+  images = obj_annotations['images']
+  category_index = label_map_util.create_category_index(
+      obj_annotations['categories'])
+
+  img_to_obj_annotation = collections.defaultdict(list)
+  logging.info('Building bounding box index.')
+  for annotation in obj_annotations['annotations']:
+    image_id = annotation['image_id']
+    img_to_obj_annotation[image_id].append(annotation)
+
+  missing_annotation_count = 0
+  for image in images:
+    image_id = image['id']
+    if image_id not in img_to_obj_annotation:
+      missing_annotation_count += 1
+
+  logging.info('%d images are missing bboxes.', missing_annotation_count)
+
+  return img_to_obj_annotation, category_index
+
+
+def _load_caption_annotations(caption_annotations_file):
+  """Loads caption annotation JSON file."""
+  with tf.gfile.GFile(caption_annotations_file, 'r') as fid:
+    caption_annotations = json.load(fid)
+
+  img_to_caption_annotation = collections.defaultdict(list)
+  logging.info('Building caption index.')
+  for annotation in caption_annotations['annotations']:
+    image_id = annotation['image_id']
+    img_to_caption_annotation[image_id].append(annotation)
+
+  missing_annotation_count = 0
+  images = caption_annotations['images']
+  for image in images:
+    image_id = image['id']
+    if image_id not in img_to_caption_annotation:
+      missing_annotation_count += 1
+
+  logging.info('%d images are missing captions.', missing_annotation_count)
+
+  return img_to_caption_annotation
+
+
+def _load_images_info(images_info_file):
+  with tf.gfile.GFile(images_info_file, 'r') as fid:
+    info_dict = json.load(fid)
+  return info_dict['images']
+
+
+def _create_tf_record_from_coco_annotations(images_info_file,
+                                            image_dir,
+                                            output_path,
+                                            num_shards,
+                                            object_annotations_file=None,
+                                            caption_annotations_file=None,
+                                            include_masks=False):
+  """Loads COCO annotation json files and converts to tf.Record format.
+
+  Args:
+    images_info_file: JSON file containing image info. The number of tf.Examples
+      in the output tf Record files is exactly equal to the number of image info
+      entries in this file. This can be any of train/val/test annotation json
+      files Eg. 'image_info_test-dev2017.json',
+      'instance_annotations_train2017.json',
+      'caption_annotations_train2017.json', etc.
+    image_dir: Directory containing the image files.
+    output_path: Path to output tf.Record file.
+    num_shards: Number of output files to create.
+    object_annotations_file: JSON file containing bounding box annotations.
+    caption_annotations_file: JSON file containing caption annotations.
+    include_masks: Whether to include instance segmentations masks
+      (PNG encoded) in the result. default: False.
+  """
+
+  logging.info('writing to output path: %s', output_path)
+  writers = [
+      tf.python_io.TFRecordWriter(
+          output_path + '-%05d-of-%05d.tfrecord' % (i, num_shards))
+      for i in range(num_shards)
+  ]
+  images = _load_images_info(images_info_file)
+
+  img_to_obj_annotation = None
+  img_to_caption_annotation = None
+  category_index = None
+  if object_annotations_file:
+    img_to_obj_annotation, category_index = (
+        _load_object_annotations(object_annotations_file))
+  if caption_annotations_file:
+    img_to_caption_annotation = (
+        _load_caption_annotations(caption_annotations_file))
+
+  def _get_object_annotation(image_id):
+    if img_to_obj_annotation:
+      return img_to_obj_annotation[image_id]
+    else:
+      return None
+
+  def _get_caption_annotation(image_id):
+    if img_to_caption_annotation:
+      return img_to_caption_annotation[image_id]
+    else:
+      return None
+
+  pool = multiprocessing.Pool()
+  total_num_annotations_skipped = 0
+  for idx, (_, tf_example, num_annotations_skipped) in enumerate(
+      pool.imap(_pool_create_tf_example,
+                [(image, image_dir, _get_object_annotation(image['id']),
+                  category_index, _get_caption_annotation(image['id']),
+                  include_masks) for image in images])):
+    if idx % 100 == 0:
+      logging.info('On image %d of %d', idx, len(images))
+
+    total_num_annotations_skipped += num_annotations_skipped
+    writers[idx % num_shards].write(tf_example.SerializeToString())
+
+  pool.close()
+  pool.join()
+
+  for writer in writers:
+    writer.close()
+
+  logging.info('Finished writing, skipped %d annotations.',
+               total_num_annotations_skipped)
+
+
+def main(_):
+  assert FLAGS.image_dir, '`image_dir` missing.'
+  assert (FLAGS.image_info_file or FLAGS.object_annotations_file or
+          FLAGS.caption_annotations_file), ('All annotation files are '
+                                            'missing.')
+  if FLAGS.image_info_file:
+    images_info_file = FLAGS.image_info_file
+  elif FLAGS.object_annotations_file:
+    images_info_file = FLAGS.object_annotations_file
+  else:
+    images_info_file = FLAGS.caption_annotations_file
+
+  directory = os.path.dirname(FLAGS.output_file_prefix)
+  if not tf.gfile.IsDirectory(directory):
+    tf.gfile.MakeDirs(directory)
+
+  _create_tf_record_from_coco_annotations(images_info_file, FLAGS.image_dir,
+                                          FLAGS.output_file_prefix,
+                                          FLAGS.num_shards,
+                                          FLAGS.object_annotations_file,
+                                          FLAGS.caption_annotations_file,
+                                          FLAGS.include_masks)
+
+
+if __name__ == '__main__':
+  logger = tf.get_logger()
+  logger.setLevel(logging.INFO)
+  app.run(main)
@@ -0,0 +1,436 @@
+# Copyright 2018 Google. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Data loader and processing."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import itertools as it
+import math
+import os
+
+import numpy as np
+import tensorflow as tf
+
+from object_detection import argmax_matcher
+from object_detection import box_list
+from object_detection import faster_rcnn_box_coder
+from object_detection import preprocessor
+from object_detection import region_similarity_calculator
+from object_detection import target_assigner
+from object_detection import tf_example_decoder
+import ssd_constants
+
+
+def get_rank_size():
+    return int(os.environ['RANK_SIZE'])
+
+def get_rank_id():
+    return int(os.environ['DEVICE_ID'])
+
+class DefaultBoxes(object):
+  """Default bounding boxes for 300x300 5 layer SSD.
+
+  Default bounding boxes generation follows the order of (W, H, anchor_sizes).
+  Therefore, the tensor converted from DefaultBoxes has a shape of
+  [anchor_sizes, H, W, 4]. The last dimension is the box coordinates; 'ltrb'
+  is [ymin, xmin, ymax, xmax] while 'xywh' is [cy, cx, h, w].
+  """
+
+  def __init__(self):
+    fk = ssd_constants.IMAGE_SIZE / np.array(ssd_constants.STEPS)
+
+    self.default_boxes = []
+    # size of feature and number of feature
+    for idx, feature_size in enumerate(ssd_constants.FEATURE_SIZES):
+      sk1 = ssd_constants.SCALES[idx] / ssd_constants.IMAGE_SIZE
+      sk2 = ssd_constants.SCALES[idx+1] / ssd_constants.IMAGE_SIZE
+      sk3 = math.sqrt(sk1*sk2)
+      all_sizes = [(sk1, sk1), (sk3, sk3)]
+
+      for alpha in ssd_constants.ASPECT_RATIOS[idx]:
+        w, h = sk1 * math.sqrt(alpha), sk1 / math.sqrt(alpha)
+        all_sizes.append((w, h))
+        all_sizes.append((h, w))
+
+      assert len(all_sizes) == ssd_constants.NUM_DEFAULTS[idx]
+
+      for i, j in it.product(range(feature_size), repeat=2):
+        for w, h in all_sizes:
+          cx, cy = (j + 0.5) / fk[idx], (i + 0.5) / fk[idx]
+          box = tuple(np.clip(k, 0, 1) for k in (cy, cx, h, w))
+          self.default_boxes.append(box)
+
+    assert len(self.default_boxes) == ssd_constants.NUM_SSD_BOXES
+
+    def to_ltrb(cy, cx, h, w):
+      return cy - h / 2, cx - w / 2, cy + h / 2, cx + w / 2
+
+    # For IoU calculation
+    self.default_boxes_ltrb = tuple(to_ltrb(*i) for i in self.default_boxes)
+
+  def __call__(self, order='ltrb'):
+    if order == 'ltrb': return self.default_boxes_ltrb
+    if order == 'xywh': return self.default_boxes
+
+
+def calc_iou_tensor(box1, box2):
+  """ Calculation of IoU based on two boxes tensor,
+      Reference to https://github.com/kuangliu/pytorch-ssd
+      input:
+          box1 (N, 4)
+          box2 (M, 4)
+      output:
+          IoU (N, M)
+  """
+  N = tf.shape(box1)[0]
+  M = tf.shape(box2)[0]
+
+  be1 = tf.tile(tf.expand_dims(box1, axis=1), (1, M, 1))
+  be2 = tf.tile(tf.expand_dims(box2, axis=0), (N, 1, 1))
+
+  # Left Top & Right Bottom
+  lt = tf.maximum(be1[:,:,:2], be2[:,:,:2])
+
+  rb = tf.minimum(be1[:,:,2:], be2[:,:,2:])
+
+  delta = tf.maximum(rb - lt, 0)
+
+  intersect = delta[:,:,0]*delta[:,:,1]
+
+  delta1 = be1[:,:,2:] - be1[:,:,:2]
+  area1 = delta1[:,:,0]*delta1[:,:,1]
+  delta2 = be2[:,:,2:] - be2[:,:,:2]
+  area2 = delta2[:,:,0]*delta2[:,:,1]
+
+  iou = intersect/(area1 + area2 - intersect)
+  return iou
+
+
+def ssd_crop(image, boxes, classes):
+  """IoU biassed random crop.
+
+  Reference: https://github.com/chauhan-utk/ssd.DomainAdaptation
+  """
+
+  num_boxes = tf.shape(boxes)[0]
+
+  def no_crop_check():
+    return (tf.random_uniform(shape=(), minval=0, maxval=1, dtype=tf.float32)
+            < ssd_constants.P_NO_CROP_PER_PASS)
+
+  def no_crop_proposal():
+    return (
+        tf.ones((), tf.bool),
+        tf.convert_to_tensor([0, 0, 1, 1], dtype=tf.float32),
+        tf.ones((num_boxes,), tf.bool),
+    )
+
+  def crop_proposal():
+    rand_vec = lambda minval, maxval: tf.random_uniform(
+        shape=(ssd_constants.NUM_CROP_PASSES, 1), minval=minval, maxval=maxval,
+        dtype=tf.float32)
+
+    width, height = rand_vec(0.3, 1), rand_vec(0.3, 1)
+    left, top = rand_vec(0, 1-width), rand_vec(0, 1-height)
+
+    right = left + width
+    bottom = top + height
+
+    ltrb = tf.concat([left, top, right, bottom], axis=1)
+
+    min_iou = tf.random_shuffle(ssd_constants.CROP_MIN_IOU_CHOICES)[0]
+    ious = calc_iou_tensor(ltrb, boxes)
+
+    # discard any bboxes whose center not in the cropped image
+    xc, yc = [tf.tile(0.5 * (boxes[:, i + 0] + boxes[:, i + 2])[tf.newaxis, :],
+                      (ssd_constants.NUM_CROP_PASSES, 1)) for i in range(2)]
+
+    masks = tf.reduce_all(tf.stack([
+        tf.greater(xc, tf.tile(left, (1, num_boxes))),
+        tf.less(xc, tf.tile(right, (1, num_boxes))),
+        tf.greater(yc, tf.tile(top, (1, num_boxes))),
+        tf.less(yc, tf.tile(bottom, (1, num_boxes))),
+    ], axis=2), axis=2)
+
+    # Checks of whether a crop is valid.
+    valid_aspect = tf.logical_and(tf.less(height/width, 2),
+                                  tf.less(width/height, 2))
+    valid_ious = tf.reduce_all(tf.greater(ious, min_iou), axis=1, keepdims=True)
+    valid_masks = tf.reduce_any(masks, axis=1, keepdims=True)
+
+    valid_all = tf.cast(tf.reduce_all(tf.concat(
+        [valid_aspect, valid_ious, valid_masks], axis=1), axis=1), tf.int32)
+
+    # One indexed, as zero is needed for the case of no matches.
+    index = tf.range(1, 1 + ssd_constants.NUM_CROP_PASSES, dtype=tf.int32)
+
+    # Either one-hot, or zeros if there is no valid crop.
+    selection = tf.equal(tf.reduce_max(index * valid_all), index)
+
+    use_crop = tf.reduce_any(selection)
+    output_ltrb = tf.reduce_sum(tf.multiply(ltrb, tf.tile(tf.cast(
+        selection, tf.float32)[:, tf.newaxis], (1, 4))), axis=0)
+    output_masks = tf.reduce_any(tf.logical_and(masks, tf.tile(
+        selection[:, tf.newaxis], (1, num_boxes))), axis=0)
+
+    return use_crop, output_ltrb, output_masks
+
+  def proposal(*args):
+    return tf.cond(
+        pred=no_crop_check(),
+        true_fn=no_crop_proposal,
+        false_fn=crop_proposal,
+    )
+
+  _, crop_bounds, box_masks = tf.while_loop(
+      cond=lambda x, *_: tf.logical_not(x),
+      body=proposal,
+      loop_vars=[tf.zeros((), tf.bool), tf.zeros((4,), tf.float32), tf.zeros((num_boxes,), tf.bool)],
+  )
+
+  filtered_boxes = tf.boolean_mask(boxes, box_masks, axis=0)
+
+  # Clip boxes to the cropped region.
+  filtered_boxes = tf.stack([
+      tf.maximum(filtered_boxes[:, 0], crop_bounds[0]),
+      tf.maximum(filtered_boxes[:, 1], crop_bounds[1]),
+      tf.minimum(filtered_boxes[:, 2], crop_bounds[2]),
+      tf.minimum(filtered_boxes[:, 3], crop_bounds[3]),
+  ], axis=1)
+
+  left = crop_bounds[0]
+  top = crop_bounds[1]
+  width = crop_bounds[2] - left
+  height = crop_bounds[3] - top
+
+  cropped_boxes = tf.stack([
+      (filtered_boxes[:, 0] - left) / width,
+      (filtered_boxes[:, 1] - top) / height,
+      (filtered_boxes[:, 2] - left) / width,
+      (filtered_boxes[:, 3] - top) / height,
+  ], axis=1)
+
+  cropped_image = tf.image.crop_and_resize(
+      image=image[tf.newaxis, :, :, :],
+      boxes=crop_bounds[tf.newaxis, :],
+      box_ind=tf.zeros((1,), tf.int32),
+      crop_size=(ssd_constants.IMAGE_SIZE, ssd_constants.IMAGE_SIZE),
+  )[0, :, :, :]
+
+  cropped_classes = tf.boolean_mask(classes, box_masks, axis=0)
+
+  return cropped_image, cropped_boxes, cropped_classes
+
+
+def color_jitter(image, brightness=0, contrast=0, saturation=0, hue=0):
+  """Distorts the color of the image.
+
+  Args:
+    image: The input image tensor.
+    brightness: A float, specifying the brightness for color jitter.
+    contrast: A float, specifying the contrast for color jitter.
+    saturation: A float, specifying the saturation for color jitter.
+    hue: A float, specifying the hue for color jitter.
+
+  Returns:
+    The distorted image tensor.
+  """
+  with tf.name_scope('distort_color'):
+    if brightness > 0:
+      image = tf.image.random_brightness(image, max_delta=brightness)
+    if contrast > 0:
+      image = tf.image.random_contrast(
+          image, lower=1-contrast, upper=1+contrast)
+    if saturation > 0:
+      image = tf.image.random_saturation(
+          image, lower=1-saturation, upper=1+saturation)
+    if hue > 0:
+      image = tf.image.random_hue(image, max_delta=hue)
+    return image
+
+
+def encode_labels(gt_boxes, gt_labels):
+  """Labels anchors with ground truth inputs.
+
+  Args:
+    gt_boxes: A float tensor with shape [N, 4] representing groundtruth boxes.
+      For each row, it stores [y0, x0, y1, x1] for four corners of a box.
+    gt_labels: A integer tensor with shape [N, 1] representing groundtruth
+      classes.
+  Returns:
+    encoded_classes: a tensor with shape [num_anchors, 1].
+    encoded_boxes: a tensor with shape [num_anchors, 4].
+    num_positives: scalar tensor storing number of positives in an image.
+  """
+  similarity_calc = region_similarity_calculator.IouSimilarity()
+  matcher = argmax_matcher.ArgMaxMatcher(
+      matched_threshold=ssd_constants.MATCH_THRESHOLD,
+      unmatched_threshold=ssd_constants.MATCH_THRESHOLD,
+      negatives_lower_than_unmatched=True,
+      force_match_for_each_row=True)
+
+  box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder(
+      scale_factors=ssd_constants.BOX_CODER_SCALES)
+
+  default_boxes = box_list.BoxList(tf.convert_to_tensor(DefaultBoxes()('ltrb')))
+  target_boxes = box_list.BoxList(gt_boxes)
+
+  assigner = target_assigner.TargetAssigner(
+      similarity_calc, matcher, box_coder)
+
+  encoded_classes, _, encoded_boxes, _, matches = assigner.assign(
+      default_boxes, target_boxes, gt_labels)
+  num_matched_boxes = tf.reduce_sum(
+      tf.cast(tf.not_equal(matches.match_results, -1), tf.float32))
+  return encoded_classes, encoded_boxes, num_matched_boxes
+
+class SSDInputReader(object):
+  """Input reader for dataset."""
+
+  def __init__(self,
+               file_pattern,
+               transpose_input=False,
+               is_training=False,
+               distributed_eval=False,
+               count=-1):
+    self._file_pattern = file_pattern
+    self._transpose_input = transpose_input
+    self._is_training = is_training
+    self._distributed_eval = distributed_eval
+    self._count = count
+
+  def __call__(self, params):
+    example_decoder = tf_example_decoder.TfExampleDecoder()
+
+    def _parse_example(data):
+      with tf.name_scope('augmentation'):
+        source_id = data['source_id']
+        image = data['image']  # dtype uint8
+        raw_shape = tf.shape(image)
+        boxes = data['groundtruth_boxes']
+        classes = tf.reshape(data['groundtruth_classes'], [-1, 1])
+
+        # Only 80 of the 90 COCO classes are used.
+        class_map = tf.convert_to_tensor(ssd_constants.CLASS_MAP)
+        classes = tf.gather(class_map, classes)
+        classes = tf.cast(classes, dtype=tf.float32)
+
+        if self._is_training:
+          image, boxes, classes = ssd_crop(image, boxes, classes)
+          # ssd_crop resizes and returns image of dtype float32 and does not
+          # change its range (i.e., value in between 0--255). Divide by 255.
+          # converts it to [0, 1] range. Not doing this before cropping to
+          # avoid dtype cast (which incurs additional memory copy).
+          image /= 255.0
+
+          # random_horizontal_flip() is hard coded to flip with 50% chance.
+          image, boxes = preprocessor.random_horizontal_flip(
+              image=image, boxes=boxes)
+
+          # TODO(shibow): Investigate the parameters for color jitter.
+          image = color_jitter(
+              image, brightness=0.125, contrast=0.5, saturation=0.5, hue=0.05)
+
+
+          encoded_classes, encoded_boxes, num_matched_boxes = encode_labels(
+              boxes, classes)
+
+          # TODO(taylorrobie): Check that this cast is valid.
+          encoded_classes = tf.cast(encoded_classes, tf.int32)
+
+          labels = {
+              ssd_constants.NUM_MATCHED_BOXES: num_matched_boxes,
+              ssd_constants.BOXES: encoded_boxes,
+              ssd_constants.CLASSES: tf.squeeze(encoded_classes, axis=1),
+          }
+
+          return image, labels
+
+        else:
+          image = tf.image.resize_images(
+              image, size=(ssd_constants.IMAGE_SIZE, ssd_constants.IMAGE_SIZE))
+          # resize_image returns image of dtype float32 and does not change its
+          # range. Divide by 255 to convert image to [0, 1] range.
+          image /= 255.
+
+          def trim_and_pad(inp_tensor, dim_1):
+            """Limit the number of boxes, and pad if necessary."""
+            inp_tensor = inp_tensor[:ssd_constants.MAX_NUM_EVAL_BOXES]
+            num_pad = ssd_constants.MAX_NUM_EVAL_BOXES - tf.shape(inp_tensor)[0]
+            inp_tensor = tf.pad(inp_tensor, [[0, num_pad], [0, 0]])
+            return tf.reshape(
+                inp_tensor, [ssd_constants.MAX_NUM_EVAL_BOXES, dim_1])
+
+          boxes, classes = trim_and_pad(boxes, 4), trim_and_pad(classes, 1)
+
+          sample = {
+              ssd_constants.IMAGE: image,
+              ssd_constants.BOXES: boxes,
+              ssd_constants.CLASSES: classes,
+              ssd_constants.SOURCE_ID: tf.string_to_number(source_id, tf.int32),
+              ssd_constants.RAW_SHAPE: raw_shape,
+          }
+
+          return sample
+
+    batch_size = params['batch_size']
+    dataset = tf.data.Dataset.list_files(self._file_pattern, shuffle=False)
+
+    if self._is_training or self._distributed_eval:
+      if get_rank_size() == 1:
+          dataset = dataset.shard(1, 0)
+      else:
+          dataset = dataset.shard(get_rank_size(), get_rank_id())
+      if self._is_training:
+        dataset = dataset.shuffle( tf.to_int64(256))
+
+    # Prefetch data from files.
+    def _prefetch_dataset(filename):
+      dataset = tf.data.TFRecordDataset(filename).prefetch(1)
+      return dataset
+    dataset = dataset.apply(
+        tf.data.experimental.parallel_interleave(
+            _prefetch_dataset, cycle_length=32, sloppy=self._is_training))
+
+    # Parse the fetched records to input tensors for model function.
+    dataset = dataset.map(example_decoder.decode, num_parallel_calls=64)
+
+    if self._is_training:
+      dataset = dataset.map(
+          # pylint: disable=g-long-lambda
+          lambda data: (data,
+                        tf.greater(tf.shape(data['groundtruth_boxes'])[0], 0)),
+          num_parallel_calls=64)
+      dataset = dataset.filter(lambda data, pred: pred)
+
+      dataset = dataset.shuffle(64).repeat()
+
+      dataset = dataset.map(lambda data, pred: data) # use the first value
+      dataset = dataset.map(_parse_example, num_parallel_calls=64)
+      dataset = dataset.batch(batch_size=batch_size, drop_remainder=True)
+    else:
+      dataset = dataset.prefetch(batch_size * 64)
+      dataset = dataset.map(_parse_example, num_parallel_calls=64)
+      dataset = dataset.batch(batch_size=batch_size, drop_remainder=True)
+
+    dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)
+    options = tf.data.Options()
+    options.experimental_threading.max_intra_op_parallelism = 1
+    options.experimental_threading.private_threadpool_size = 48
+    dataset = dataset.with_options(options)
+
+    return dataset
@@ -0,0 +1,24 @@
+#!/bin/bash
+export RANK_ID=$1
+export RANK_SIZE=$2
+export DEVICE_ID=$RANK_ID
+export DEVICE_INDEX=$RANK_ID
+export JOB_ID=990
+export FUSION_TENSOR_SIZE=1000000000
+
+python3 ${3}/ssd_main.py --mode=train_and_eval \
+                     --train_batch_size=32 \
+                     --training_file_pattern="train_tfrecord_path/train2017*" \
+                     --resnet_checkpoint=resnet34_path/model.ckpt-28152 \
+                     --validation_file_pattern="val_tfrecord_path/val2017*" \
+                     --val_json_file="annotations_patah/instances_val2017.json" \
+                     --eval_batch_size=32 \
+                     --model_dir=result_npu
+
+
+sleep 2
+echo "**************** train finished ***************"
+cp /var/log/npu/slog/host-0/* ./slog
+cp /var/log/npu/slog/device-$DEVICE_ID/* ./slog
+cp /var/log/npu/slog/device-os-$DEVICE_ID/* ./slog
+
@@ -0,0 +1,14 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
@@ -0,0 +1,199 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Argmax matcher implementation.
+
+This class takes a similarity matrix and matches columns to rows based on the
+maximum value per column. One can specify matched_thresholds and
+to prevent columns from matching to rows (generally resulting in a negative
+training example) and unmatched_theshold to ignore the match (generally
+resulting in neither a positive or negative training example).
+
+This matcher is used in Fast(er)-RCNN.
+
+Note: matchers are used in TargetAssigners. There is a create_target_assigner
+factory function for popular implementations.
+"""
+import tensorflow as tf
+
+from object_detection import matcher
+from object_detection import shape_utils
+
+
+class ArgMaxMatcher(matcher.Matcher):
+  """Matcher based on highest value.
+
+  This class computes matches from a similarity matrix. Each column is matched
+  to a single row.
+
+  To support object detection target assignment this class enables setting both
+  matched_threshold (upper threshold) and unmatched_threshold (lower thresholds)
+  defining three categories of similarity which define whether examples are
+  positive, negative, or ignored:
+  (1) similarity >= matched_threshold: Highest similarity. Matched/Positive!
+  (2) matched_threshold > similarity >= unmatched_threshold: Medium similarity.
+          Depending on negatives_lower_than_unmatched, this is either
+          Unmatched/Negative OR Ignore.
+  (3) unmatched_threshold > similarity: Lowest similarity. Depending on flag
+          negatives_lower_than_unmatched, either Unmatched/Negative OR Ignore.
+  For ignored matches this class sets the values in the Match object to -2.
+  """
+
+  def __init__(self,
+               matched_threshold,
+               unmatched_threshold=None,
+               negatives_lower_than_unmatched=True,
+               force_match_for_each_row=False):
+    """Construct ArgMaxMatcher.
+
+    Args:
+      matched_threshold: Threshold for positive matches. Positive if
+        sim >= matched_threshold, where sim is the maximum value of the
+        similarity matrix for a given column. Set to None for no threshold.
+      unmatched_threshold: Threshold for negative matches. Negative if
+        sim < unmatched_threshold. Defaults to matched_threshold
+        when set to None.
+      negatives_lower_than_unmatched: Boolean which defaults to True. If True
+        then negative matches are the ones below the unmatched_threshold,
+        whereas ignored matches are in between the matched and umatched
+        threshold. If False, then negative matches are in between the matched
+        and unmatched threshold, and everything lower than unmatched is ignored.
+      force_match_for_each_row: If True, ensures that each row is matched to
+        at least one column (which is not guaranteed otherwise if the
+        matched_threshold is high). Defaults to False. See
+        argmax_matcher_test.testMatcherForceMatch() for an example.
+
+    Raises:
+      ValueError: if unmatched_threshold is set but matched_threshold is not set
+        or if unmatched_threshold > matched_threshold.
+    """
+    if (matched_threshold is None) and (unmatched_threshold is not None):
+      raise ValueError('Need to also define matched_threshold when'
+                       'unmatched_threshold is defined')
+    self._matched_threshold = matched_threshold
+    if unmatched_threshold is None:
+      self._unmatched_threshold = matched_threshold
+    else:
+      if unmatched_threshold > matched_threshold:
+        raise ValueError('unmatched_threshold needs to be smaller or equal'
+                         'to matched_threshold')
+      self._unmatched_threshold = unmatched_threshold
+    if not negatives_lower_than_unmatched:
+      if self._unmatched_threshold == self._matched_threshold:
+        raise ValueError('When negatives are in between matched and '
+                         'unmatched thresholds, these cannot be of equal '
+                         'value. matched: %s, unmatched: %s',
+                         self._matched_threshold, self._unmatched_threshold)
+    self._force_match_for_each_row = force_match_for_each_row
+    self._negatives_lower_than_unmatched = negatives_lower_than_unmatched
+
+  def _match(self, similarity_matrix):
+    """Tries to match each column of the similarity matrix to a row.
+
+    Args:
+      similarity_matrix: tensor of shape [N, M] representing any similarity
+        metric.
+
+    Returns:
+      Match object with corresponding matches for each of M columns.
+    """
+
+    def _match_when_rows_are_empty():
+      """Performs matching when the rows of similarity matrix are empty.
+
+      When the rows are empty, all detections are false positives. So we return
+      a tensor of -1's to indicate that the columns do not match to any rows.
+
+      Returns:
+        matches:  int32 tensor indicating the row each column matches to.
+      """
+      similarity_matrix_shape = shape_utils.combined_static_and_dynamic_shape(
+          similarity_matrix)
+      return -1 * tf.ones([similarity_matrix_shape[1]], dtype=tf.int32)
+
+    def _match_when_rows_are_non_empty():
+      """Performs matching when the rows of similarity matrix are non empty.
+
+      Returns:
+        matches:  int32 tensor indicating the row each column matches to.
+      """
+      # Matches for each column
+      matches = tf.argmax(similarity_matrix, 0, output_type=tf.int32)
+
+      # Deal with matched and unmatched threshold
+      if self._matched_threshold is not None:
+        # Get logical indices of ignored and unmatched columns as tf.int64
+        matched_vals = tf.reduce_max(similarity_matrix, 0)
+        below_unmatched_threshold = tf.greater(self._unmatched_threshold,
+                                               matched_vals)
+        between_thresholds = tf.logical_and(
+            tf.greater_equal(matched_vals, self._unmatched_threshold),
+            tf.greater(self._matched_threshold, matched_vals))
+
+        if self._negatives_lower_than_unmatched:
+          matches = self._set_values_using_indicator(matches,
+                                                     below_unmatched_threshold,
+                                                     -1)
+          matches = self._set_values_using_indicator(matches,
+                                                     between_thresholds,
+                                                     -2)
+        else:
+          matches = self._set_values_using_indicator(matches,
+                                                     below_unmatched_threshold,
+                                                     -2)
+          matches = self._set_values_using_indicator(matches,
+                                                     between_thresholds,
+                                                     -1)
+
+      if self._force_match_for_each_row:
+        similarity_matrix_shape = shape_utils.combined_static_and_dynamic_shape(
+            similarity_matrix)
+        force_match_column_ids = tf.argmax(similarity_matrix, 1,
+                                           output_type=tf.int32)
+        force_match_column_indicators = tf.one_hot(
+            force_match_column_ids, depth=similarity_matrix_shape[1])
+        force_match_row_ids = tf.argmax(force_match_column_indicators, 0,
+                                        output_type=tf.int32)
+        force_match_column_mask = tf.cast(
+            tf.reduce_max(force_match_column_indicators, 0), tf.bool)
+        final_matches = tf.where(force_match_column_mask,
+                                 force_match_row_ids, matches)
+        return final_matches
+      else:
+        return matches
+
+    if similarity_matrix.shape.is_fully_defined():
+      if similarity_matrix.shape[0].value == 0:
+        return _match_when_rows_are_empty()
+      else:
+        return _match_when_rows_are_non_empty()
+    else:
+      return tf.cond(
+          tf.greater(tf.shape(similarity_matrix)[0], 0),
+          _match_when_rows_are_non_empty, _match_when_rows_are_empty)
+
+  def _set_values_using_indicator(self, x, indicator, val):
+    """Set the indicated fields of x to val.
+
+    Args:
+      x: tensor.
+      indicator: boolean with same shape as x.
+      val: scalar with value to set.
+
+    Returns:
+      modified tensor.
+    """
+    indicator = tf.cast(indicator, x.dtype)
+    return tf.add(tf.multiply(x, 1 - indicator), val * indicator)
@@ -0,0 +1,151 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Base box coder.
+
+Box coders convert between coordinate frames, namely image-centric
+(with (0,0) on the top left of image) and anchor-centric (with (0,0) being
+defined by a specific anchor).
+
+Users of a BoxCoder can call two methods:
+ encode: which encodes a box with respect to a given anchor
+  (or rather, a tensor of boxes wrt a corresponding tensor of anchors) and
+ decode: which inverts this encoding with a decode operation.
+In both cases, the arguments are assumed to be in 1-1 correspondence already;
+it is not the job of a BoxCoder to perform matching.
+"""
+from abc import ABCMeta
+from abc import abstractmethod
+from abc import abstractproperty
+
+import tensorflow as tf
+
+
+# Box coder types.
+FASTER_RCNN = 'faster_rcnn'
+KEYPOINT = 'keypoint'
+MEAN_STDDEV = 'mean_stddev'
+SQUARE = 'square'
+
+
+class BoxCoder(object):
+  """Abstract base class for box coder."""
+  __metaclass__ = ABCMeta
+
+  @abstractproperty
+  def code_size(self):
+    """Return the size of each code.
+
+    This number is a constant and should agree with the output of the `encode`
+    op (e.g. if rel_codes is the output of self.encode(...), then it should have
+    shape [N, code_size()]).  This abstractproperty should be overridden by
+    implementations.
+
+    Returns:
+      an integer constant
+    """
+    pass
+
+  def encode(self, boxes, anchors):
+    """Encode a box list relative to an anchor collection.
+
+    Args:
+      boxes: BoxList holding N boxes to be encoded
+      anchors: BoxList of N anchors
+
+    Returns:
+      a tensor representing N relative-encoded boxes
+    """
+    with tf.name_scope('Encode'):
+      return self._encode(boxes, anchors)
+
+  def decode(self, rel_codes, anchors):
+    """Decode boxes that are encoded relative to an anchor collection.
+
+    Args:
+      rel_codes: a tensor representing N relative-encoded boxes
+      anchors: BoxList of anchors
+
+    Returns:
+      boxlist: BoxList holding N boxes encoded in the ordinary way (i.e.,
+        with corners y_min, x_min, y_max, x_max)
+    """
+    with tf.name_scope('Decode'):
+      return self._decode(rel_codes, anchors)
+
+  @abstractmethod
+  def _encode(self, boxes, anchors):
+    """Method to be overriden by implementations.
+
+    Args:
+      boxes: BoxList holding N boxes to be encoded
+      anchors: BoxList of N anchors
+
+    Returns:
+      a tensor representing N relative-encoded boxes
+    """
+    pass
+
+  @abstractmethod
+  def _decode(self, rel_codes, anchors):
+    """Method to be overriden by implementations.
+
+    Args:
+      rel_codes: a tensor representing N relative-encoded boxes
+      anchors: BoxList of anchors
+
+    Returns:
+      boxlist: BoxList holding N boxes encoded in the ordinary way (i.e.,
+        with corners y_min, x_min, y_max, x_max)
+    """
+    pass
+
+
+def batch_decode(encoded_boxes, box_coder, anchors):
+  """Decode a batch of encoded boxes.
+
+  This op takes a batch of encoded bounding boxes and transforms
+  them to a batch of bounding boxes specified by their corners in
+  the order of [y_min, x_min, y_max, x_max].
+
+  Args:
+    encoded_boxes: a float32 tensor of shape [batch_size, num_anchors,
+      code_size] representing the location of the objects.
+    box_coder: a BoxCoder object.
+    anchors: a BoxList of anchors used to encode `encoded_boxes`.
+
+  Returns:
+    decoded_boxes: a float32 tensor of shape [batch_size, num_anchors,
+      coder_size] representing the corners of the objects in the order
+      of [y_min, x_min, y_max, x_max].
+
+  Raises:
+    ValueError: if batch sizes of the inputs are inconsistent, or if
+    the number of anchors inferred from encoded_boxes and anchors are
+    inconsistent.
+  """
+  encoded_boxes.get_shape().assert_has_rank(3)
+  if encoded_boxes.get_shape()[1].value != anchors.num_boxes_static():
+    raise ValueError('The number of anchors inferred from encoded_boxes'
+                     ' and anchors are inconsistent: shape[1] of encoded_boxes'
+                     ' %s should be equal to the number of anchors: %s.' %
+                     (encoded_boxes.get_shape()[1].value,
+                      anchors.num_boxes_static()))
+
+  decoded_boxes = tf.stack([
+      box_coder.decode(boxes, anchors).get()
+      for boxes in tf.unstack(encoded_boxes)
+  ])
+  return decoded_boxes
@@ -0,0 +1,207 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Bounding Box List definition.
+
+BoxList represents a list of bounding boxes as tensorflow
+tensors, where each bounding box is represented as a row of 4 numbers,
+[y_min, x_min, y_max, x_max].  It is assumed that all bounding boxes
+within a given list correspond to a single image.  See also
+box_list_ops.py for common box related operations (such as area, iou, etc).
+
+Optionally, users can add additional related fields (such as weights).
+We assume the following things to be true about fields:
+* they correspond to boxes in the box_list along the 0th dimension
+* they have inferrable rank at graph construction time
+* all dimensions except for possibly the 0th can be inferred
+  (i.e., not None) at graph construction time.
+
+Some other notes:
+  * Following tensorflow conventions, we use height, width ordering,
+  and correspondingly, y,x (or ymin, xmin, ymax, xmax) ordering
+  * Tensors are always provided as (flat) [N, 4] tensors.
+"""
+
+import tensorflow as tf
+
+
+class BoxList(object):
+  """Box collection."""
+
+  def __init__(self, boxes):
+    """Constructs box collection.
+
+    Args:
+      boxes: a tensor of shape [N, 4] representing box corners
+
+    Raises:
+      ValueError: if invalid dimensions for bbox data or if bbox data is not in
+          float32 format.
+    """
+    if len(boxes.get_shape()) != 2 or boxes.get_shape()[-1] != 4:
+      raise ValueError('Invalid dimensions for box data.')
+    if boxes.dtype != tf.float32:
+      raise ValueError('Invalid tensor type: should be tf.float32')
+    self.data = {'boxes': boxes}
+
+  def num_boxes(self):
+    """Returns number of boxes held in collection.
+
+    Returns:
+      a tensor representing the number of boxes held in the collection.
+    """
+    return tf.shape(self.data['boxes'])[0]
+
+  def num_boxes_static(self):
+    """Returns number of boxes held in collection.
+
+    This number is inferred at graph construction time rather than run-time.
+
+    Returns:
+      Number of boxes held in collection (integer) or None if this is not
+        inferrable at graph construction time.
+    """
+    return self.data['boxes'].get_shape()[0].value
+
+  def get_all_fields(self):
+    """Returns all fields."""
+    return self.data.keys()
+
+  def get_extra_fields(self):
+    """Returns all non-box fields (i.e., everything not named 'boxes')."""
+    return [k for k in self.data.keys() if k != 'boxes']
+
+  def add_field(self, field, field_data):
+    """Add field to box list.
+
+    This method can be used to add related box data such as
+    weights/labels, etc.
+
+    Args:
+      field: a string key to access the data via `get`
+      field_data: a tensor containing the data to store in the BoxList
+    """
+    self.data[field] = field_data
+
+  def has_field(self, field):
+    return field in self.data
+
+  def get(self):
+    """Convenience function for accessing box coordinates.
+
+    Returns:
+      a tensor with shape [N, 4] representing box coordinates.
+    """
+    return self.get_field('boxes')
+
+  def set(self, boxes):
+    """Convenience function for setting box coordinates.
+
+    Args:
+      boxes: a tensor of shape [N, 4] representing box corners
+
+    Raises:
+      ValueError: if invalid dimensions for bbox data
+    """
+    if len(boxes.get_shape()) != 2 or boxes.get_shape()[-1] != 4:
+      raise ValueError('Invalid dimensions for box data.')
+    self.data['boxes'] = boxes
+
+  def get_field(self, field):
+    """Accesses a box collection and associated fields.
+
+    This function returns specified field with object; if no field is specified,
+    it returns the box coordinates.
+
+    Args:
+      field: this optional string parameter can be used to specify
+        a related field to be accessed.
+
+    Returns:
+      a tensor representing the box collection or an associated field.
+
+    Raises:
+      ValueError: if invalid field
+    """
+    if not self.has_field(field):
+      raise ValueError('field ' + str(field) + ' does not exist')
+    return self.data[field]
+
+  def set_field(self, field, value):
+    """Sets the value of a field.
+
+    Updates the field of a box_list with a given value.
+
+    Args:
+      field: (string) name of the field to set value.
+      value: the value to assign to the field.
+
+    Raises:
+      ValueError: if the box_list does not have specified field.
+    """
+    if not self.has_field(field):
+      raise ValueError('field %s does not exist' % field)
+    self.data[field] = value
+
+  def get_center_coordinates_and_sizes(self, scope=None):
+    """Computes the center coordinates, height and width of the boxes.
+
+    Args:
+      scope: name scope of the function.
+
+    Returns:
+      a list of 4 1-D tensors [ycenter, xcenter, height, width].
+    """
+    with tf.name_scope(scope, 'get_center_coordinates_and_sizes'):
+      box_corners = self.get()
+      ymin, xmin, ymax, xmax = tf.unstack(tf.transpose(box_corners))
+      width = xmax - xmin
+      height = ymax - ymin
+      ycenter = ymin + height / 2.
+      xcenter = xmin + width / 2.
+      return [ycenter, xcenter, height, width]
+
+  def transpose_coordinates(self, scope=None):
+    """Transpose the coordinate representation in a boxlist.
+
+    Args:
+      scope: name scope of the function.
+    """
+    with tf.name_scope(scope, 'transpose_coordinates'):
+      y_min, x_min, y_max, x_max = tf.split(
+          value=self.get(), num_or_size_splits=4, axis=1)
+      self.set(tf.concat([x_min, y_min, x_max, y_max], 1))
+
+  def as_tensor_dict(self, fields=None):
+    """Retrieves specified fields as a dictionary of tensors.
+
+    Args:
+      fields: (optional) list of fields to return in the dictionary.
+        If None (default), all fields are returned.
+
+    Returns:
+      tensor_dict: A dictionary of tensors specified by fields.
+
+    Raises:
+      ValueError: if specified field is not contained in boxlist.
+    """
+    tensor_dict = {}
+    if fields is None:
+      fields = self.get_all_fields()
+    for field in fields:
+      if not self.has_field(field):
+        raise ValueError('boxlist must contain all specified fields')
+      tensor_dict[field] = self.get_field(field)
+    return tensor_dict
@@ -0,0 +1,118 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Faster RCNN box coder.
+
+Faster RCNN box coder follows the coding schema described below:
+  ty = (y - ya) / ha
+  tx = (x - xa) / wa
+  th = log(h / ha)
+  tw = log(w / wa)
+  where x, y, w, h denote the box's center coordinates, width and height
+  respectively. Similarly, xa, ya, wa, ha denote the anchor's center
+  coordinates, width and height. tx, ty, tw and th denote the anchor-encoded
+  center, width and height respectively.
+
+  See http://arxiv.org/abs/1506.01497 for details.
+"""
+
+import tensorflow as tf
+
+from object_detection import box_coder
+from object_detection import box_list
+
+EPSILON = 1e-8
+
+
+class FasterRcnnBoxCoder(box_coder.BoxCoder):
+  """Faster RCNN box coder."""
+
+  def __init__(self, scale_factors=None):
+    """Constructor for FasterRcnnBoxCoder.
+
+    Args:
+      scale_factors: List of 4 positive scalars to scale ty, tx, th and tw.
+        If set to None, does not perform scaling. For Faster RCNN,
+        the open-source implementation recommends using [10.0, 10.0, 5.0, 5.0].
+    """
+    if scale_factors:
+      assert len(scale_factors) == 4
+      for scalar in scale_factors:
+        assert scalar > 0
+    self._scale_factors = scale_factors
+
+  @property
+  def code_size(self):
+    return 4
+
+  def _encode(self, boxes, anchors):
+    """Encode a box collection with respect to anchor collection.
+
+    Args:
+      boxes: BoxList holding N boxes to be encoded.
+      anchors: BoxList of anchors.
+
+    Returns:
+      a tensor representing N anchor-encoded boxes of the format
+      [ty, tx, th, tw].
+    """
+    # Convert anchors to the center coordinate representation.
+    ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
+    ycenter, xcenter, h, w = boxes.get_center_coordinates_and_sizes()
+    # Avoid NaN in division and log below.
+    ha += EPSILON
+    wa += EPSILON
+    h += EPSILON
+    w += EPSILON
+
+    tx = (xcenter - xcenter_a) / wa
+    ty = (ycenter - ycenter_a) / ha
+    tw = tf.log(w / wa)
+    th = tf.log(h / ha)
+    # Scales location targets as used in paper for joint training.
+    if self._scale_factors:
+      ty *= self._scale_factors[0]
+      tx *= self._scale_factors[1]
+      th *= self._scale_factors[2]
+      tw *= self._scale_factors[3]
+    return tf.transpose(tf.stack([ty, tx, th, tw]))
+
+  def _decode(self, rel_codes, anchors):
+    """Decode relative codes to boxes.
+
+    Args:
+      rel_codes: a tensor representing N anchor-encoded boxes.
+      anchors: BoxList of anchors.
+
+    Returns:
+      boxes: BoxList holding N bounding boxes.
+    """
+    ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
+
+    ty, tx, th, tw = tf.unstack(tf.transpose(rel_codes))
+    if self._scale_factors:
+      ty /= self._scale_factors[0]
+      tx /= self._scale_factors[1]
+      th /= self._scale_factors[2]
+      tw /= self._scale_factors[3]
+    w = tf.exp(tw) * wa
+    h = tf.exp(th) * ha
+    ycenter = ty * ha + ycenter_a
+    xcenter = tx * wa + xcenter_a
+    ymin = ycenter - h / 2.
+    xmin = xcenter - w / 2.
+    ymax = ycenter + h / 2.
+    xmax = xcenter + w / 2.
+    return box_list.BoxList(tf.transpose(tf.stack([ymin, xmin, ymax, xmax])))
@@ -0,0 +1,241 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Matcher interface and Match class.
+
+This module defines the Matcher interface and the Match object. The job of the
+matcher is to match row and column indices based on the similarity matrix and
+other optional parameters. Each column is matched to at most one row. There
+are three possibilities for the matching:
+
+1) match: A column matches a row.
+2) no_match: A column does not match any row.
+3) ignore: A column that is neither 'match' nor no_match.
+
+The ignore case is regularly encountered in object detection: when an anchor has
+a relatively small overlap with a ground-truth box, one neither wants to
+consider this box a positive example (match) nor a negative example (no match).
+
+The Match class is used to store the match results and it provides simple apis
+to query the results.
+"""
+from abc import ABCMeta
+from abc import abstractmethod
+
+import tensorflow as tf
+
+
+class Match(object):
+  """Class to store results from the matcher.
+
+  This class is used to store the results from the matcher. It provides
+  convenient methods to query the matching results.
+  """
+
+  def __init__(self, match_results):
+    """Constructs a Match object.
+
+    Args:
+      match_results: Integer tensor of shape [N] with (1) match_results[i]>=0,
+        meaning that column i is matched with row match_results[i].
+        (2) match_results[i]=-1, meaning that column i is not matched.
+        (3) match_results[i]=-2, meaning that column i is ignored.
+
+    Raises:
+      ValueError: if match_results does not have rank 1 or is not an
+        integer int32 scalar tensor
+    """
+    if match_results.shape.ndims != 1:
+      raise ValueError('match_results should have rank 1')
+    if match_results.dtype != tf.int32:
+      raise ValueError('match_results should be an int32 or int64 scalar '
+                       'tensor')
+    self._match_results = match_results
+
+  @property
+  def match_results(self):
+    """The accessor for match results.
+
+    Returns:
+      the tensor which encodes the match results.
+    """
+    return self._match_results
+
+  def matched_column_indices(self):
+    """Returns column indices that match to some row.
+
+    The indices returned by this op are always sorted in increasing order.
+
+    Returns:
+      column_indices: int32 tensor of shape [K] with column indices.
+    """
+    return self._reshape_and_cast(tf.where(tf.greater(self._match_results, -1)))
+
+  def matched_column_indicator(self):
+    """Returns column indices that are matched.
+
+    Returns:
+      column_indices: int32 tensor of shape [K] with column indices.
+    """
+    return tf.greater_equal(self._match_results, 0)
+
+  def num_matched_columns(self):
+    """Returns number (int32 scalar tensor) of matched columns."""
+    return tf.size(self.matched_column_indices())
+
+  def unmatched_column_indices(self):
+    """Returns column indices that do not match any row.
+
+    The indices returned by this op are always sorted in increasing order.
+
+    Returns:
+      column_indices: int32 tensor of shape [K] with column indices.
+    """
+    return self._reshape_and_cast(tf.where(tf.equal(self._match_results, -1)))
+
+  def unmatched_column_indicator(self):
+    """Returns column indices that are unmatched.
+
+    Returns:
+      column_indices: int32 tensor of shape [K] with column indices.
+    """
+    return tf.equal(self._match_results, -1)
+
+  def num_unmatched_columns(self):
+    """Returns number (int32 scalar tensor) of unmatched columns."""
+    return tf.size(self.unmatched_column_indices())
+
+  def ignored_column_indices(self):
+    """Returns column indices that are ignored (neither Matched nor Unmatched).
+
+    The indices returned by this op are always sorted in increasing order.
+
+    Returns:
+      column_indices: int32 tensor of shape [K] with column indices.
+    """
+    return self._reshape_and_cast(tf.where(self.ignored_column_indicator()))
+
+  def ignored_column_indicator(self):
+    """Returns boolean column indicator where True means the colum is ignored.
+
+    Returns:
+      column_indicator: boolean vector which is True for all ignored column
+      indices.
+    """
+    return tf.equal(self._match_results, -2)
+
+  def num_ignored_columns(self):
+    """Returns number (int32 scalar tensor) of matched columns."""
+    return tf.size(self.ignored_column_indices())
+
+  def unmatched_or_ignored_column_indices(self):
+    """Returns column indices that are unmatched or ignored.
+
+    The indices returned by this op are always sorted in increasing order.
+
+    Returns:
+      column_indices: int32 tensor of shape [K] with column indices.
+    """
+    return self._reshape_and_cast(tf.where(tf.greater(0, self._match_results)))
+
+  def matched_row_indices(self):
+    """Returns row indices that match some column.
+
+    The indices returned by this op are ordered so as to be in correspondence
+    with the output of matched_column_indicator().  For example if
+    self.matched_column_indicator() is [0,2], and self.matched_row_indices() is
+    [7, 3], then we know that column 0 was matched to row 7 and column 2 was
+    matched to row 3.
+
+    Returns:
+      row_indices: int32 tensor of shape [K] with row indices.
+    """
+    return self._reshape_and_cast(
+        tf.gather(self._match_results, self.matched_column_indices()))
+
+  def _reshape_and_cast(self, t):
+    return tf.cast(tf.reshape(t, [-1]), tf.int32)
+
+  def gather_based_on_match(self, input_tensor, unmatched_value,
+                            ignored_value):
+    """Gathers elements from `input_tensor` based on match results.
+
+    For columns that are matched to a row, gathered_tensor[col] is set to
+    input_tensor[match_results[col]]. For columns that are unmatched,
+    gathered_tensor[col] is set to unmatched_value. Finally, for columns that
+    are ignored gathered_tensor[col] is set to ignored_value.
+
+    Note that the input_tensor.shape[1:] must match with unmatched_value.shape
+    and ignored_value.shape
+
+    Args:
+      input_tensor: Tensor to gather values from.
+      unmatched_value: Constant tensor value for unmatched columns.
+      ignored_value: Constant tensor value for ignored columns.
+
+    Returns:
+      gathered_tensor: A tensor containing values gathered from input_tensor.
+        The shape of the gathered tensor is [match_results.shape[0]] +
+        input_tensor.shape[1:].
+    """
+    input_tensor = tf.concat([tf.stack([ignored_value, unmatched_value]),
+                              input_tensor], axis=0)
+    gather_indices = tf.maximum(self.match_results + 2, 0)
+    gathered_tensor = tf.gather(input_tensor, gather_indices)
+    return gathered_tensor
+
+
+class Matcher(object):
+  """Abstract base class for matcher.
+  """
+  __metaclass__ = ABCMeta
+
+  def match(self, similarity_matrix, scope=None, **params):
+    """Computes matches among row and column indices and returns the result.
+
+    Computes matches among the row and column indices based on the similarity
+    matrix and optional arguments.
+
+    Args:
+      similarity_matrix: Float tensor of shape [N, M] with pairwise similarity
+        where higher value means more similar.
+      scope: Op scope name. Defaults to 'Match' if None.
+      **params: Additional keyword arguments for specific implementations of
+        the Matcher.
+
+    Returns:
+      A Match object with the results of matching.
+    """
+    with tf.name_scope(scope, 'Match', [similarity_matrix, params]) as scope:
+      return Match(self._match(similarity_matrix, **params))
+
+  @abstractmethod
+  def _match(self, similarity_matrix, **params):
+    """Method to be overridden by implementations.
+
+    Args:
+      similarity_matrix: Float tensor of shape [N, M] with pairwise similarity
+        where higher value means more similar.
+      **params: Additional keyword arguments for specific implementations of
+        the Matcher.
+
+    Returns:
+      match_results: Integer tensor of shape [M]: match_results[i]>=0 means
+        that column i is matched to row match_results[i], match_results[i]=-1
+        means that the column is not matched. match_results[i]=-2 means that
+        the column is ignored (usually this happens when there is a very weak
+        match which one neither wants as positive nor negative example).
+    """
+    pass
@@ -0,0 +1,442 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Preprocess images and bounding boxes for detection.
+
+We perform two sets of operations in preprocessing stage:
+(a) operations that are applied to both training and testing data,
+(b) operations that are applied only to training data for the purpose of
+    data augmentation.
+
+A preprocessing function receives a set of inputs,
+e.g. an image and bounding boxes,
+performs an operation on them, and returns them.
+Some examples are: randomly cropping the image, randomly mirroring the image,
+                   randomly changing the brightness, contrast, hue and
+                   randomly jittering the bounding boxes.
+
+The image is a rank 4 tensor: [1, height, width, channels] with
+dtype=tf.float32. The groundtruth_boxes is a rank 2 tensor: [N, 4] where
+in each row there is a box with [ymin xmin ymax xmax].
+Boxes are in normalized coordinates meaning
+their coordinate values range in [0, 1]
+
+Important Note: In tensor_dict, images is a rank 4 tensor, but preprocessing
+functions receive a rank 3 tensor for processing the image. Thus, inside the
+preprocess function we squeeze the image to become a rank 3 tensor and then
+we pass it to the functions. At the end of the preprocess we expand the image
+back to rank 4.
+"""
+
+import tensorflow as tf
+
+from object_detection import box_list
+
+
+def _flip_boxes_left_right(boxes):
+  """Left-right flip the boxes.
+
+  Args:
+    boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
+           Boxes are in normalized form meaning their coordinates vary
+           between [0, 1].
+           Each row is in the form of [ymin, xmin, ymax, xmax].
+
+  Returns:
+    Flipped boxes.
+  """
+  ymin, xmin, ymax, xmax = tf.split(value=boxes, num_or_size_splits=4, axis=1)
+  flipped_xmin = tf.subtract(1.0, xmax)
+  flipped_xmax = tf.subtract(1.0, xmin)
+  flipped_boxes = tf.concat([ymin, flipped_xmin, ymax, flipped_xmax], 1)
+  return flipped_boxes
+
+
+def _flip_masks_left_right(masks):
+  """Left-right flip masks.
+
+  Args:
+    masks: rank 3 float32 tensor with shape
+      [num_instances, height, width] representing instance masks.
+
+  Returns:
+    flipped masks: rank 3 float32 tensor with shape
+      [num_instances, height, width] representing instance masks.
+  """
+  return masks[:, :, ::-1]
+
+
+def keypoint_flip_horizontal(keypoints, flip_point, flip_permutation,
+                             scope=None):
+  """Flips the keypoints horizontally around the flip_point.
+
+  This operation flips the x coordinate for each keypoint around the flip_point
+  and also permutes the keypoints in a manner specified by flip_permutation.
+
+  Args:
+    keypoints: a tensor of shape [num_instances, num_keypoints, 2]
+    flip_point:  (float) scalar tensor representing the x coordinate to flip the
+      keypoints around.
+    flip_permutation: rank 1 int32 tensor containing the keypoint flip
+      permutation. This specifies the mapping from original keypoint indices
+      to the flipped keypoint indices. This is used primarily for keypoints
+      that are not reflection invariant. E.g. Suppose there are 3 keypoints
+      representing ['head', 'right_eye', 'left_eye'], then a logical choice for
+      flip_permutation might be [0, 2, 1] since we want to swap the 'left_eye'
+      and 'right_eye' after a horizontal flip.
+    scope: name scope.
+
+  Returns:
+    new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
+  """
+  with tf.name_scope(scope, 'FlipHorizontal'):
+    keypoints = tf.transpose(keypoints, [1, 0, 2])
+    keypoints = tf.gather(keypoints, flip_permutation)
+    v, u = tf.split(value=keypoints, num_or_size_splits=2, axis=2)
+    u = flip_point * 2.0 - u
+    new_keypoints = tf.concat([v, u], 2)
+    new_keypoints = tf.transpose(new_keypoints, [1, 0, 2])
+    return new_keypoints
+
+
+def random_horizontal_flip(image,
+                           boxes=None,
+                           masks=None,
+                           keypoints=None,
+                           keypoint_flip_permutation=None,
+                           seed=None):
+  """Randomly flips the image and detections horizontally.
+
+  The probability of flipping the image is 50%.
+
+  Args:
+    image: rank 3 float32 tensor with shape [height, width, channels].
+    boxes: (optional) rank 2 float32 tensor with shape [N, 4]
+           containing the bounding boxes.
+           Boxes are in normalized form meaning their coordinates vary
+           between [0, 1].
+           Each row is in the form of [ymin, xmin, ymax, xmax].
+    masks: (optional) rank 3 float32 tensor with shape
+           [num_instances, height, width] containing instance masks. The masks
+           are of the same height, width as the input `image`.
+    keypoints: (optional) rank 3 float32 tensor with shape
+               [num_instances, num_keypoints, 2]. The keypoints are in y-x
+               normalized coordinates.
+    keypoint_flip_permutation: rank 1 int32 tensor containing the keypoint flip
+                               permutation.
+    seed: random seed
+
+  Returns:
+    image: image which is the same shape as input image.
+
+    If boxes, masks, keypoints, and keypoint_flip_permutation are not None,
+    the function also returns the following tensors.
+
+    boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
+           Boxes are in normalized form meaning their coordinates vary
+           between [0, 1].
+    masks: rank 3 float32 tensor with shape [num_instances, height, width]
+           containing instance masks.
+    keypoints: rank 3 float32 tensor with shape
+               [num_instances, num_keypoints, 2]
+
+  Raises:
+    ValueError: if keypoints are provided but keypoint_flip_permutation is not.
+  """
+
+  def _flip_image(image):
+    # flip image
+    image_flipped = tf.image.flip_left_right(image)
+    return image_flipped
+
+  if keypoints is not None and keypoint_flip_permutation is None:
+    raise ValueError(
+        'keypoints are provided but keypoints_flip_permutation is not provided')
+
+  with tf.name_scope('RandomHorizontalFlip', values=[image, boxes]):
+    result = []
+    # random variable defining whether to do flip or not
+    do_a_flip_random = tf.greater(tf.random_uniform([], seed=seed), 0.5)
+
+    # flip image
+    image = tf.cond(do_a_flip_random, lambda: _flip_image(image), lambda: image)
+    result.append(image)
+
+    # flip boxes
+    if boxes is not None:
+      boxes = tf.cond(do_a_flip_random, lambda: _flip_boxes_left_right(boxes),
+                      lambda: boxes)
+      result.append(boxes)
+
+    # flip masks
+    if masks is not None:
+      masks = tf.cond(do_a_flip_random, lambda: _flip_masks_left_right(masks),
+                      lambda: masks)
+      result.append(masks)
+
+    # flip keypoints
+    if keypoints is not None and keypoint_flip_permutation is not None:
+      permutation = keypoint_flip_permutation
+      keypoints = tf.cond(
+          do_a_flip_random,
+          lambda: keypoint_flip_horizontal(keypoints, 0.5, permutation),
+          lambda: keypoints)
+      result.append(keypoints)
+
+    return tuple(result)
+
+
+def _compute_new_static_size(image, min_dimension, max_dimension):
+  """Compute new static shape for resize_to_range method."""
+  image_shape = image.get_shape().as_list()
+  orig_height = image_shape[0]
+  orig_width = image_shape[1]
+  num_channels = image_shape[2]
+  orig_min_dim = min(orig_height, orig_width)
+  # Calculates the larger of the possible sizes
+  large_scale_factor = min_dimension / float(orig_min_dim)
+  # Scaling orig_(height|width) by large_scale_factor will make the smaller
+  # dimension equal to min_dimension, save for floating point rounding errors.
+  # For reasonably-sized images, taking the nearest integer will reliably
+  # eliminate this error.
+  large_height = int(round(orig_height * large_scale_factor))
+  large_width = int(round(orig_width * large_scale_factor))
+  large_size = [large_height, large_width]
+  if max_dimension:
+    # Calculates the smaller of the possible sizes, use that if the larger
+    # is too big.
+    orig_max_dim = max(orig_height, orig_width)
+    small_scale_factor = max_dimension / float(orig_max_dim)
+    # Scaling orig_(height|width) by small_scale_factor will make the larger
+    # dimension equal to max_dimension, save for floating point rounding
+    # errors. For reasonably-sized images, taking the nearest integer will
+    # reliably eliminate this error.
+    small_height = int(round(orig_height * small_scale_factor))
+    small_width = int(round(orig_width * small_scale_factor))
+    small_size = [small_height, small_width]
+    new_size = large_size
+    if max(large_size) > max_dimension:
+      new_size = small_size
+  else:
+    new_size = large_size
+  return tf.constant(new_size + [num_channels])
+
+
+def _compute_new_dynamic_size(image, min_dimension, max_dimension):
+  """Compute new dynamic shape for resize_to_range method."""
+  image_shape = tf.shape(image)
+  orig_height = tf.to_float(image_shape[0])
+  orig_width = tf.to_float(image_shape[1])
+  num_channels = image_shape[2]
+  orig_min_dim = tf.minimum(orig_height, orig_width)
+  # Calculates the larger of the possible sizes
+  min_dimension = tf.constant(min_dimension, dtype=tf.float32)
+  large_scale_factor = min_dimension / orig_min_dim
+  # Scaling orig_(height|width) by large_scale_factor will make the smaller
+  # dimension equal to min_dimension, save for floating point rounding errors.
+  # For reasonably-sized images, taking the nearest integer will reliably
+  # eliminate this error.
+  large_height = tf.to_int32(tf.round(orig_height * large_scale_factor))
+  large_width = tf.to_int32(tf.round(orig_width * large_scale_factor))
+  large_size = tf.stack([large_height, large_width])
+  if max_dimension:
+    # Calculates the smaller of the possible sizes, use that if the larger
+    # is too big.
+    orig_max_dim = tf.maximum(orig_height, orig_width)
+    max_dimension = tf.constant(max_dimension, dtype=tf.float32)
+    small_scale_factor = max_dimension / orig_max_dim
+    # Scaling orig_(height|width) by small_scale_factor will make the larger
+    # dimension equal to max_dimension, save for floating point rounding
+    # errors. For reasonably-sized images, taking the nearest integer will
+    # reliably eliminate this error.
+    small_height = tf.to_int32(tf.round(orig_height * small_scale_factor))
+    small_width = tf.to_int32(tf.round(orig_width * small_scale_factor))
+    small_size = tf.stack([small_height, small_width])
+    new_size = tf.cond(
+        tf.to_float(tf.reduce_max(large_size)) > max_dimension,
+        lambda: small_size, lambda: large_size)
+  else:
+    new_size = large_size
+  return tf.stack(tf.unstack(new_size) + [num_channels])
+
+
+def resize_to_range(image,
+                    masks=None,
+                    min_dimension=None,
+                    max_dimension=None,
+                    method=tf.image.ResizeMethod.BILINEAR,
+                    align_corners=False,
+                    pad_to_max_dimension=False):
+  """Resizes an image so its dimensions are within the provided value.
+
+  The output size can be described by two cases:
+  1. If the image can be rescaled so its minimum dimension is equal to the
+     provided value without the other dimension exceeding max_dimension,
+     then do so.
+  2. Otherwise, resize so the largest dimension is equal to max_dimension.
+
+  Args:
+    image: A 3D tensor of shape [height, width, channels]
+    masks: (optional) rank 3 float32 tensor with shape
+           [num_instances, height, width] containing instance masks.
+    min_dimension: (optional) (scalar) desired size of the smaller image
+                   dimension.
+    max_dimension: (optional) (scalar) maximum allowed size
+                   of the larger image dimension.
+    method: (optional) interpolation method used in resizing. Defaults to
+            BILINEAR.
+    align_corners: bool. If true, exactly align all 4 corners of the input
+                   and output. Defaults to False.
+    pad_to_max_dimension: Whether to resize the image and pad it with zeros
+      so the resulting image is of the spatial size
+      [max_dimension, max_dimension]. If masks are included they are padded
+      similarly.
+
+  Returns:
+    Note that the position of the resized_image_shape changes based on whether
+    masks are present.
+    resized_image: A 3D tensor of shape [new_height, new_width, channels],
+      where the image has been resized (with bilinear interpolation) so that
+      min(new_height, new_width) == min_dimension or
+      max(new_height, new_width) == max_dimension.
+    resized_masks: If masks is not None, also outputs masks. A 3D tensor of
+      shape [num_instances, new_height, new_width].
+    resized_image_shape: A 1D tensor of shape [3] containing shape of the
+      resized image.
+
+  Raises:
+    ValueError: if the image is not a 3D tensor.
+  """
+  if len(image.get_shape()) != 3:
+    raise ValueError('Image should be 3D tensor')
+
+  with tf.name_scope('ResizeToRange', values=[image, min_dimension]):
+    if image.get_shape().is_fully_defined():
+      new_size = _compute_new_static_size(image, min_dimension, max_dimension)
+    else:
+      new_size = _compute_new_dynamic_size(image, min_dimension, max_dimension)
+    new_image = tf.image.resize_images(
+        image, new_size[:-1], method=method, align_corners=align_corners)
+
+    if pad_to_max_dimension:
+      new_image = tf.image.pad_to_bounding_box(
+          new_image, 0, 0, max_dimension, max_dimension)
+
+    result = [new_image]
+    if masks is not None:
+      new_masks = tf.expand_dims(masks, 3)
+      new_masks = tf.image.resize_images(
+          new_masks,
+          new_size[:-1],
+          method=tf.image.ResizeMethod.NEAREST_NEIGHBOR,
+          align_corners=align_corners)
+      new_masks = tf.squeeze(new_masks, 3)
+      if pad_to_max_dimension:
+        new_masks = tf.image.pad_to_bounding_box(
+            new_masks, 0, 0, max_dimension, max_dimension)
+      result.append(new_masks)
+
+    result.append(new_size)
+    return result
+
+
+def _copy_extra_fields(boxlist_to_copy_to, boxlist_to_copy_from):
+  """Copies the extra fields of boxlist_to_copy_from to boxlist_to_copy_to.
+
+  Args:
+    boxlist_to_copy_to: BoxList to which extra fields are copied.
+    boxlist_to_copy_from: BoxList from which fields are copied.
+
+  Returns:
+    boxlist_to_copy_to with extra fields.
+  """
+  for field in boxlist_to_copy_from.get_extra_fields():
+    boxlist_to_copy_to.add_field(field, boxlist_to_copy_from.get_field(field))
+  return boxlist_to_copy_to
+
+
+def box_list_scale(boxlist, y_scale, x_scale, scope=None):
+  """scale box coordinates in x and y dimensions.
+
+  Args:
+    boxlist: BoxList holding N boxes
+    y_scale: (float) scalar tensor
+    x_scale: (float) scalar tensor
+    scope: name scope.
+
+  Returns:
+    boxlist: BoxList holding N boxes
+  """
+  with tf.name_scope(scope, 'Scale'):
+    y_scale = tf.cast(y_scale, tf.float32)
+    x_scale = tf.cast(x_scale, tf.float32)
+    y_min, x_min, y_max, x_max = tf.split(
+        value=boxlist.get(), num_or_size_splits=4, axis=1)
+    y_min = y_scale * y_min
+    y_max = y_scale * y_max
+    x_min = x_scale * x_min
+    x_max = x_scale * x_max
+    scaled_boxlist = box_list.BoxList(
+        tf.concat([y_min, x_min, y_max, x_max], 1))
+    return _copy_extra_fields(scaled_boxlist, boxlist)
+
+
+def keypoint_scale(keypoints, y_scale, x_scale, scope=None):
+  """Scales keypoint coordinates in x and y dimensions.
+
+  Args:
+    keypoints: a tensor of shape [num_instances, num_keypoints, 2]
+    y_scale: (float) scalar tensor
+    x_scale: (float) scalar tensor
+    scope: name scope.
+
+  Returns:
+    new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
+  """
+  with tf.name_scope(scope, 'Scale'):
+    y_scale = tf.cast(y_scale, tf.float32)
+    x_scale = tf.cast(x_scale, tf.float32)
+    new_keypoints = keypoints * [[[y_scale, x_scale]]]
+    return new_keypoints
+
+
+def scale_boxes_to_pixel_coordinates(image, boxes, keypoints=None):
+  """Scales boxes from normalized to pixel coordinates.
+
+  Args:
+    image: A 3D float32 tensor of shape [height, width, channels].
+    boxes: A 2D float32 tensor of shape [num_boxes, 4] containing the bounding
+      boxes in normalized coordinates. Each row is of the form
+      [ymin, xmin, ymax, xmax].
+    keypoints: (optional) rank 3 float32 tensor with shape
+      [num_instances, num_keypoints, 2]. The keypoints are in y-x normalized
+      coordinates.
+
+  Returns:
+    image: unchanged input image.
+    scaled_boxes: a 2D float32 tensor of shape [num_boxes, 4] containing the
+      bounding boxes in pixel coordinates.
+    scaled_keypoints: a 3D float32 tensor with shape
+      [num_instances, num_keypoints, 2] containing the keypoints in pixel
+      coordinates.
+  """
+  boxlist = box_list.BoxList(boxes)
+  image_height = tf.shape(image)[0]
+  image_width = tf.shape(image)[1]
+  scaled_boxes = box_list_scale(boxlist, image_height, image_width).get()
+  result = [image, scaled_boxes]
+  if keypoints is not None:
+    scaled_keypoints = keypoint_scale(keypoints, image_height, image_width)
+    result.append(scaled_keypoints)
+  return tuple(result)
@@ -0,0 +1,135 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Region Similarity Calculators for BoxLists.
+
+Region Similarity Calculators compare a pairwise measure of similarity
+between the boxes in two BoxLists.
+"""
+from abc import ABCMeta
+from abc import abstractmethod
+
+import tensorflow as tf
+
+
+def area(boxlist, scope=None):
+  """Computes area of boxes.
+
+  Args:
+    boxlist: BoxList holding N boxes
+    scope: name scope.
+
+  Returns:
+    a tensor with shape [N] representing box areas.
+  """
+  with tf.name_scope(scope, 'Area'):
+    y_min, x_min, y_max, x_max = tf.split(
+        value=boxlist.get(), num_or_size_splits=4, axis=1)
+    return tf.squeeze((y_max - y_min) * (x_max - x_min), [1])
+
+
+def intersection(boxlist1, boxlist2, scope=None):
+  """Compute pairwise intersection areas between boxes.
+
+  Args:
+    boxlist1: BoxList holding N boxes
+    boxlist2: BoxList holding M boxes
+    scope: name scope.
+
+  Returns:
+    a tensor with shape [N, M] representing pairwise intersections
+  """
+  with tf.name_scope(scope, 'Intersection'):
+    y_min1, x_min1, y_max1, x_max1 = tf.split(
+        value=boxlist1.get(), num_or_size_splits=4, axis=1)
+    y_min2, x_min2, y_max2, x_max2 = tf.split(
+        value=boxlist2.get(), num_or_size_splits=4, axis=1)
+    all_pairs_min_ymax = tf.minimum(y_max1, tf.transpose(y_max2))
+    all_pairs_max_ymin = tf.maximum(y_min1, tf.transpose(y_min2))
+    intersect_heights = tf.maximum(0.0, all_pairs_min_ymax - all_pairs_max_ymin)
+    all_pairs_min_xmax = tf.minimum(x_max1, tf.transpose(x_max2))
+    all_pairs_max_xmin = tf.maximum(x_min1, tf.transpose(x_min2))
+    intersect_widths = tf.maximum(0.0, all_pairs_min_xmax - all_pairs_max_xmin)
+    return intersect_heights * intersect_widths
+
+
+def iou(boxlist1, boxlist2, scope=None):
+  """Computes pairwise intersection-over-union between box collections.
+
+  Args:
+    boxlist1: BoxList holding N boxes
+    boxlist2: BoxList holding M boxes
+    scope: name scope.
+
+  Returns:
+    a tensor with shape [N, M] representing pairwise iou scores.
+  """
+  with tf.name_scope(scope, 'IOU'):
+    intersections = intersection(boxlist1, boxlist2)
+    areas1 = area(boxlist1)
+    areas2 = area(boxlist2)
+    unions = (
+        tf.expand_dims(areas1, 1) + tf.expand_dims(areas2, 0) - intersections)
+    return tf.where(
+        tf.equal(intersections, 0.0),
+        tf.zeros_like(intersections), tf.truediv(intersections, unions))
+
+
+class RegionSimilarityCalculator(object):
+  """Abstract base class for region similarity calculator."""
+  __metaclass__ = ABCMeta
+
+  def compare(self, boxlist1, boxlist2, scope=None):
+    """Computes matrix of pairwise similarity between BoxLists.
+
+    This op (to be overriden) computes a measure of pairwise similarity between
+    the boxes in the given BoxLists. Higher values indicate more similarity.
+
+    Note that this method simply measures similarity and does not explicitly
+    perform a matching.
+
+    Args:
+      boxlist1: BoxList holding N boxes.
+      boxlist2: BoxList holding M boxes.
+      scope: Op scope name. Defaults to 'Compare' if None.
+
+    Returns:
+      a (float32) tensor of shape [N, M] with pairwise similarity score.
+    """
+    with tf.name_scope(scope, 'Compare', [boxlist1, boxlist2]) as scope:
+      return self._compare(boxlist1, boxlist2)
+
+  @abstractmethod
+  def _compare(self, boxlist1, boxlist2):
+    pass
+
+
+class IouSimilarity(RegionSimilarityCalculator):
+  """Class to compute similarity based on Intersection over Union (IOU) metric.
+
+  This class computes pairwise similarity between two BoxLists based on IOU.
+  """
+
+  def _compare(self, boxlist1, boxlist2):
+    """Compute pairwise IOU similarity between the two BoxLists.
+
+    Args:
+      boxlist1: BoxList holding N boxes.
+      boxlist2: BoxList holding M boxes.
+
+    Returns:
+      A tensor with shape [N, M] representing pairwise iou scores.
+    """
+    return iou(boxlist1, boxlist2)
@@ -0,0 +1,70 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Utils used to manipulate tensor shapes."""
+
+import tensorflow as tf
+
+
+def assert_shape_equal(shape_a, shape_b):
+  """Asserts that shape_a and shape_b are equal.
+
+  If the shapes are static, raises a ValueError when the shapes
+  mismatch.
+
+  If the shapes are dynamic, raises a tf InvalidArgumentError when the shapes
+  mismatch.
+
+  Args:
+    shape_a: a list containing shape of the first tensor.
+    shape_b: a list containing shape of the second tensor.
+
+  Returns:
+    Either a tf.no_op() when shapes are all static and a tf.assert_equal() op
+    when the shapes are dynamic.
+
+  Raises:
+    ValueError: When shapes are both static and unequal.
+  """
+  if (all(isinstance(dim, int) for dim in shape_a) and
+      all(isinstance(dim, int) for dim in shape_b)):
+    if shape_a != shape_b:
+      raise ValueError('Unequal shapes {}, {}'.format(shape_a, shape_b))
+    else: return tf.no_op()
+  else:
+    return tf.assert_equal(shape_a, shape_b)
+
+
+def combined_static_and_dynamic_shape(tensor):
+  """Returns a list containing static and dynamic values for the dimensions.
+
+  Returns a list of static and dynamic values for shape dimensions. This is
+  useful to preserve static shapes when available in reshape operation.
+
+  Args:
+    tensor: A tensor of any type.
+
+  Returns:
+    A list of size tensor.shape.ndims containing integers or a scalar tensor.
+  """
+  static_tensor_shape = tensor.shape.as_list()
+  dynamic_tensor_shape = tf.shape(tensor)
+  combined_shape = []
+  for index, dim in enumerate(static_tensor_shape):
+    if dim is not None:
+      combined_shape.append(dim)
+    else:
+      combined_shape.append(dynamic_tensor_shape[index])
+  return combined_shape
@@ -0,0 +1,310 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Base target assigner module.
+
+The job of a TargetAssigner is, for a given set of anchors (bounding boxes) and
+groundtruth detections (bounding boxes), to assign classification and regression
+targets to each anchor as well as weights to each anchor (specifying, e.g.,
+which anchors should not contribute to training loss).
+
+It assigns classification/regression targets by performing the following steps:
+1) Computing pairwise similarity between anchors and groundtruth boxes using a
+  provided RegionSimilarity Calculator
+2) Computing a matching based on the similarity matrix using a provided Matcher
+3) Assigning regression targets based on the matching and a provided BoxCoder
+4) Assigning classification targets based on the matching and groundtruth labels
+
+Note that TargetAssigners only operate on detections from a single
+image at a time, so any logic for applying a TargetAssigner to multiple
+images must be handled externally.
+"""
+import tensorflow as tf
+
+from object_detection import box_list
+from object_detection import shape_utils
+
+
+KEYPOINTS_FIELD_NAME = 'keypoints'
+
+
+class TargetAssigner(object):
+  """Target assigner to compute classification and regression targets."""
+
+  def __init__(self, similarity_calc, matcher, box_coder,
+               negative_class_weight=1.0, unmatched_cls_target=None):
+    """Construct Object Detection Target Assigner.
+
+    Args:
+      similarity_calc: a RegionSimilarityCalculator
+      matcher: Matcher used to match groundtruth to anchors.
+      box_coder: BoxCoder used to encode matching groundtruth boxes with
+        respect to anchors.
+      negative_class_weight: classification weight to be associated to negative
+        anchors (default: 1.0). The weight must be in [0., 1.].
+      unmatched_cls_target: a float32 tensor with shape [d_1, d_2, ..., d_k]
+        which is consistent with the classification target for each
+        anchor (and can be empty for scalar targets).  This shape must thus be
+        compatible with the groundtruth labels that are passed to the "assign"
+        function (which have shape [num_gt_boxes, d_1, d_2, ..., d_k]).
+        If set to None, unmatched_cls_target is set to be [0] for each anchor.
+
+    Raises:
+      ValueError: if similarity_calc is not a RegionSimilarityCalculator or
+        if matcher is not a Matcher or if box_coder is not a BoxCoder
+    """
+    self._similarity_calc = similarity_calc
+    self._matcher = matcher
+    self._box_coder = box_coder
+    self._negative_class_weight = negative_class_weight
+    if unmatched_cls_target is None:
+      self._unmatched_cls_target = tf.constant([0], tf.float32)
+    else:
+      self._unmatched_cls_target = unmatched_cls_target
+
+  @property
+  def box_coder(self):
+    return self._box_coder
+
+  def assign(self, anchors, groundtruth_boxes, groundtruth_labels=None,
+             groundtruth_weights=None, **params):
+    """Assign classification and regression targets to each anchor.
+
+    For a given set of anchors and groundtruth detections, match anchors
+    to groundtruth_boxes and assign classification and regression targets to
+    each anchor as well as weights based on the resulting match (specifying,
+    e.g., which anchors should not contribute to training loss).
+
+    Anchors that are not matched to anything are given a classification target
+    of self._unmatched_cls_target which can be specified via the constructor.
+
+    Args:
+      anchors: a BoxList representing N anchors
+      groundtruth_boxes: a BoxList representing M groundtruth boxes
+      groundtruth_labels:  a tensor of shape [M, d_1, ... d_k]
+        with labels for each of the ground_truth boxes. The subshape
+        [d_1, ... d_k] can be empty (corresponding to scalar inputs).  When set
+        to None, groundtruth_labels assumes a binary problem where all
+        ground_truth boxes get a positive label (of 1).
+      groundtruth_weights: a float tensor of shape [M] indicating the weight to
+        assign to all anchors match to a particular groundtruth box. The weights
+        must be in [0., 1.]. If None, all weights are set to 1.
+      **params: Additional keyword arguments for specific implementations of
+              the Matcher.
+
+    Returns:
+      cls_targets: a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k],
+        where the subshape [d_1, ..., d_k] is compatible with groundtruth_labels
+        which has shape [num_gt_boxes, d_1, d_2, ... d_k].
+      cls_weights: a float32 tensor with shape [num_anchors]
+      reg_targets: a float32 tensor with shape [num_anchors, box_code_dimension]
+      reg_weights: a float32 tensor with shape [num_anchors]
+      match: a matcher.Match object encoding the match between anchors and
+        groundtruth boxes, with rows corresponding to groundtruth boxes
+        and columns corresponding to anchors.
+
+    Raises:
+      ValueError: if anchors or groundtruth_boxes are not of type
+        box_list.BoxList
+    """
+    if not isinstance(anchors, box_list.BoxList):
+      raise ValueError('anchors must be an BoxList')
+    if not isinstance(groundtruth_boxes, box_list.BoxList):
+      raise ValueError('groundtruth_boxes must be an BoxList')
+
+    if groundtruth_labels is None:
+      groundtruth_labels = tf.ones(tf.expand_dims(groundtruth_boxes.num_boxes(),
+                                                  0))
+      groundtruth_labels = tf.expand_dims(groundtruth_labels, -1)
+    unmatched_shape_assert = shape_utils.assert_shape_equal(
+        shape_utils.combined_static_and_dynamic_shape(groundtruth_labels)[1:],
+        shape_utils.combined_static_and_dynamic_shape(
+            self._unmatched_cls_target))
+    labels_and_box_shapes_assert = shape_utils.assert_shape_equal(
+        shape_utils.combined_static_and_dynamic_shape(
+            groundtruth_labels)[:1],
+        shape_utils.combined_static_and_dynamic_shape(
+            groundtruth_boxes.get())[:1])
+
+    if groundtruth_weights is None:
+      num_gt_boxes = groundtruth_boxes.num_boxes_static()
+      if not num_gt_boxes:
+        num_gt_boxes = groundtruth_boxes.num_boxes()
+      groundtruth_weights = tf.ones([num_gt_boxes], dtype=tf.float32)
+    with tf.control_dependencies(
+        [unmatched_shape_assert, labels_and_box_shapes_assert]):
+      match_quality_matrix = self._similarity_calc.compare(groundtruth_boxes,
+                                                           anchors)
+      match = self._matcher.match(match_quality_matrix, **params)
+      reg_targets = self._create_regression_targets(anchors,
+                                                    groundtruth_boxes,
+                                                    match)
+      cls_targets = self._create_classification_targets(groundtruth_labels,
+                                                        match)
+      reg_weights = self._create_regression_weights(match, groundtruth_weights)
+      cls_weights = self._create_classification_weights(match,
+                                                        groundtruth_weights)
+
+    num_anchors = anchors.num_boxes_static()
+    if num_anchors is not None:
+      reg_targets = self._reset_target_shape(reg_targets, num_anchors)
+      cls_targets = self._reset_target_shape(cls_targets, num_anchors)
+      reg_weights = self._reset_target_shape(reg_weights, num_anchors)
+      cls_weights = self._reset_target_shape(cls_weights, num_anchors)
+
+    return cls_targets, cls_weights, reg_targets, reg_weights, match
+
+  def _reset_target_shape(self, target, num_anchors):
+    """Sets the static shape of the target.
+
+    Args:
+      target: the target tensor. Its first dimension will be overwritten.
+      num_anchors: the number of anchors, which is used to override the target's
+        first dimension.
+
+    Returns:
+      A tensor with the shape info filled in.
+    """
+    target_shape = target.get_shape().as_list()
+    target_shape[0] = num_anchors
+    target.set_shape(target_shape)
+    return target
+
+  def _create_regression_targets(self, anchors, groundtruth_boxes, match):
+    """Returns a regression target for each anchor.
+
+    Args:
+      anchors: a BoxList representing N anchors
+      groundtruth_boxes: a BoxList representing M groundtruth_boxes
+      match: a matcher.Match object
+
+    Returns:
+      reg_targets: a float32 tensor with shape [N, box_code_dimension]
+    """
+    matched_gt_boxes = match.gather_based_on_match(
+        groundtruth_boxes.get(),
+        unmatched_value=tf.zeros(4),
+        ignored_value=tf.zeros(4))
+    matched_gt_boxlist = box_list.BoxList(matched_gt_boxes)
+    if groundtruth_boxes.has_field(KEYPOINTS_FIELD_NAME):
+      groundtruth_keypoints = groundtruth_boxes.get_field(KEYPOINTS_FIELD_NAME)
+      matched_keypoints = match.gather_based_on_match(
+          groundtruth_keypoints,
+          unmatched_value=tf.zeros(groundtruth_keypoints.get_shape()[1:]),
+          ignored_value=tf.zeros(groundtruth_keypoints.get_shape()[1:]))
+      matched_gt_boxlist.add_field(KEYPOINTS_FIELD_NAME, matched_keypoints)
+    matched_reg_targets = self._box_coder.encode(matched_gt_boxlist, anchors)
+    match_results_shape = shape_utils.combined_static_and_dynamic_shape(
+        match.match_results)
+
+    # Zero out the unmatched and ignored regression targets.
+    unmatched_ignored_reg_targets = tf.tile(
+        self._default_regression_target(), [match_results_shape[0], 1])
+    matched_anchors_mask = match.matched_column_indicator()
+    reg_targets = tf.where(matched_anchors_mask,
+                           matched_reg_targets,
+                           unmatched_ignored_reg_targets)
+    return reg_targets
+
+  def _default_regression_target(self):
+    """Returns the default target for anchors to regress to.
+
+    Default regression targets are set to zero (though in
+    this implementation what these targets are set to should
+    not matter as the regression weight of any box set to
+    regress to the default target is zero).
+
+    Returns:
+      default_target: a float32 tensor with shape [1, box_code_dimension]
+    """
+    return tf.constant([self._box_coder.code_size*[0]], tf.float32)
+
+  def _create_classification_targets(self, groundtruth_labels, match):
+    """Create classification targets for each anchor.
+
+    Assign a classification target of for each anchor to the matching
+    groundtruth label that is provided by match.  Anchors that are not matched
+    to anything are given the target self._unmatched_cls_target
+
+    Args:
+      groundtruth_labels:  a tensor of shape [num_gt_boxes, d_1, ... d_k]
+        with labels for each of the ground_truth boxes. The subshape
+        [d_1, ... d_k] can be empty (corresponding to scalar labels).
+      match: a matcher.Match object that provides a matching between anchors
+        and groundtruth boxes.
+
+    Returns:
+      a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k], where the
+      subshape [d_1, ..., d_k] is compatible with groundtruth_labels which has
+      shape [num_gt_boxes, d_1, d_2, ... d_k].
+    """
+    return match.gather_based_on_match(
+        groundtruth_labels,
+        unmatched_value=self._unmatched_cls_target,
+        ignored_value=self._unmatched_cls_target)
+
+  def _create_regression_weights(self, match, groundtruth_weights):
+    """Set regression weight for each anchor.
+
+    Only positive anchors are set to contribute to the regression loss, so this
+    method returns a weight of 1 for every positive anchor and 0 for every
+    negative anchor.
+
+    Args:
+      match: a matcher.Match object that provides a matching between anchors
+        and groundtruth boxes.
+      groundtruth_weights: a float tensor of shape [M] indicating the weight to
+        assign to all anchors match to a particular groundtruth box.
+
+    Returns:
+      a float32 tensor with shape [num_anchors] representing regression weights.
+    """
+    return match.gather_based_on_match(
+        groundtruth_weights, ignored_value=0., unmatched_value=0.)
+
+  def _create_classification_weights(self,
+                                     match,
+                                     groundtruth_weights):
+    """Create classification weights for each anchor.
+
+    Positive (matched) anchors are associated with a weight of
+    positive_class_weight and negative (unmatched) anchors are associated with
+    a weight of negative_class_weight. When anchors are ignored, weights are set
+    to zero. By default, both positive/negative weights are set to 1.0,
+    but they can be adjusted to handle class imbalance (which is almost always
+    the case in object detection).
+
+    Args:
+      match: a matcher.Match object that provides a matching between anchors
+        and groundtruth boxes.
+      groundtruth_weights: a float tensor of shape [M] indicating the weight to
+        assign to all anchors match to a particular groundtruth box.
+
+    Returns:
+      a float32 tensor with shape [num_anchors] representing classification
+      weights.
+    """
+    return match.gather_based_on_match(
+        groundtruth_weights,
+        ignored_value=0.,
+        unmatched_value=self._negative_class_weight)
+
+  def get_box_coder(self):
+    """Get BoxCoder of this TargetAssigner.
+
+    Returns:
+      BoxCoder object.
+    """
+    return self._box_coder
@@ -0,0 +1,210 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tensorflow Example proto decoder for object detection.
+
+A decoder to decode string tensors containing serialized tensorflow.Example
+protos for object detection.
+"""
+import tensorflow as tf
+
+
+slim_example_decoder = tf.contrib.slim.tfexample_decoder
+
+
+class TfExampleDecoder(object):
+  """Tensorflow Example proto decoder."""
+
+  def __init__(self):
+    """Constructor sets keys_to_features and items_to_handlers."""
+    self.keys_to_features = {
+        'image/encoded':
+            tf.FixedLenFeature((), tf.string, default_value=''),
+        'image/format':
+            tf.FixedLenFeature((), tf.string, default_value='jpeg'),
+        'image/filename':
+            tf.FixedLenFeature((), tf.string, default_value=''),
+        'image/key/sha256':
+            tf.FixedLenFeature((), tf.string, default_value=''),
+        'image/source_id':
+            tf.FixedLenFeature((), tf.string, default_value=''),
+        'image/height':
+            tf.FixedLenFeature((), tf.int64, 1),
+        'image/width':
+            tf.FixedLenFeature((), tf.int64, 1),
+        # Object boxes and classes.
+        'image/object/bbox/xmin':
+            tf.VarLenFeature(tf.float32),
+        'image/object/bbox/xmax':
+            tf.VarLenFeature(tf.float32),
+        'image/object/bbox/ymin':
+            tf.VarLenFeature(tf.float32),
+        'image/object/bbox/ymax':
+            tf.VarLenFeature(tf.float32),
+        'image/object/class/label':
+            tf.VarLenFeature(tf.int64),
+        'image/object/class/text':
+            tf.VarLenFeature(tf.string),
+        'image/object/area':
+            tf.VarLenFeature(tf.float32),
+        'image/object/is_crowd':
+            tf.VarLenFeature(tf.int64),
+        'image/object/difficult':
+            tf.VarLenFeature(tf.int64),
+        'image/object/group_of':
+            tf.VarLenFeature(tf.int64),
+        'image/object/weight':
+            tf.VarLenFeature(tf.float32),
+    }
+    self.items_to_handlers = {
+        'image': slim_example_decoder.Image(
+            image_key='image/encoded', format_key='image/format', channels=3),
+        'source_id': (
+            slim_example_decoder.Tensor('image/source_id')),
+        'key': (
+            slim_example_decoder.Tensor('image/key/sha256')),
+        'filename': (
+            slim_example_decoder.Tensor('image/filename')),
+        # Object boxes and classes.
+        'groundtruth_boxes': (
+            slim_example_decoder.BoundingBox(
+                ['ymin', 'xmin', 'ymax', 'xmax'], 'image/object/bbox/')),
+        'groundtruth_area': slim_example_decoder.Tensor(
+            'image/object/area'),
+        'groundtruth_is_crowd': (
+            slim_example_decoder.Tensor('image/object/is_crowd')),
+        'groundtruth_difficult': (
+            slim_example_decoder.Tensor('image/object/difficult')),
+        'groundtruth_group_of': (
+            slim_example_decoder.Tensor('image/object/group_of')),
+        'groundtruth_weights': (
+            slim_example_decoder.Tensor('image/object/weight')),
+    }
+    label_handler = slim_example_decoder.Tensor('image/object/class/label')
+    self.items_to_handlers['groundtruth_classes'] = label_handler
+
+  def decode(self, tf_example_string_tensor):
+    """Decodes serialized tensorflow example and returns a tensor dictionary.
+
+    Args:
+      tf_example_string_tensor: a string tensor holding a serialized tensorflow
+        example proto.
+
+    Returns:
+      A dictionary of the following tensors.
+      image - 3D uint8 tensor of shape [None, None, 3]
+        containing image.
+      source_id - string tensor containing original
+        image id.
+      key - string tensor with unique sha256 hash key.
+      filename - string tensor with original dataset
+        filename.
+      groundtruth_boxes - 2D float32 tensor of shape
+        [None, 4] containing box corners.
+      groundtruth_classes - 1D int64 tensor of shape
+      groundtruth_weights - 1D float32 tensor of
+        shape [None] indicating the weights of groundtruth boxes.
+        [None] containing classes for the boxes.
+      groundtruth_area - 1D float32 tensor of shape
+        [None] containing containing object mask area in pixel squared.
+      groundtruth_is_crowd - 1D bool tensor of shape
+        [None] indicating if the boxes enclose a crowd.
+
+    Optional:
+      groundtruth_difficult - 1D bool tensor of shape
+        [None] indicating if the boxes represent `difficult` instances.
+      groundtruth_group_of - 1D bool tensor of shape
+        [None] indicating if the boxes represent `group_of` instances.
+      groundtruth_instance_masks - 3D float32 tensor of
+        shape [None, None, None] containing instance masks.
+    """
+    serialized_example = tf.reshape(tf_example_string_tensor, shape=[])
+    decoder = slim_example_decoder.TFExampleDecoder(self.keys_to_features,
+                                                    self.items_to_handlers)
+    keys = sorted(decoder.list_items())
+
+    tensors = decoder.decode(serialized_example, items=keys)
+    tensor_dict = dict(zip(keys, tensors))
+    is_crowd = 'groundtruth_is_crowd'
+    tensor_dict[is_crowd] = tf.cast(tensor_dict[is_crowd], dtype=tf.bool)
+    tensor_dict['image'].set_shape([None, None, 3])
+
+    def default_groundtruth_weights():
+      return tf.ones(
+          tf.shape(tensor_dict['groundtruth_boxes'])[0],
+          dtype=tf.float32)
+
+    tensor_dict['groundtruth_weights'] = tf.cond(
+        tf.greater(
+            tf.shape(
+                tensor_dict['groundtruth_weights'])[0],
+            0), lambda: tensor_dict['groundtruth_weights'],
+        default_groundtruth_weights)
+    return tensor_dict
+
+
+class TfExampleSegmentationDecoder(object):
+  """Tensorflow Example proto decoder."""
+
+  def __init__(self):
+    """Constructor sets keys_to_features and items_to_handlers."""
+    self.keys_to_features = {
+        'image/encoded':
+            tf.FixedLenFeature((), tf.string, default_value=''),
+        'image/filename':
+            tf.FixedLenFeature((), tf.string, default_value=''),
+        'image/format':
+            tf.FixedLenFeature((), tf.string, default_value='jpeg'),
+        'image/height':
+            tf.FixedLenFeature((), tf.int64, default_value=0),
+        'image/width':
+            tf.FixedLenFeature((), tf.int64, default_value=0),
+        'image/segmentation/class/encoded':
+            tf.FixedLenFeature((), tf.string, default_value=''),
+        'image/segmentation/class/format':
+            tf.FixedLenFeature((), tf.string, default_value='png'),
+    }
+    self.items_to_handlers = {
+        'image': slim_example_decoder.Image(
+            image_key='image/encoded', format_key='image/format', channels=3),
+        'labels_class': slim_example_decoder.Image(
+            image_key='image/segmentation/class/encoded',
+            format_key='image/segmentation/class/format',
+            channels=1)
+    }
+
+  def decode(self, tf_example_string_tensor):
+    """Decodes serialized tensorflow example and returns a tensor dictionary.
+
+    Args:
+      tf_example_string_tensor: a string tensor holding a serialized tensorflow
+        example proto.
+
+    Returns:
+      A dictionary of the following tensors.
+      image - 3D uint8 tensor of shape [None, None, 3] containing image.
+      labels_class - 2D unit8 tensor of shape [None, None] containing
+        pixel-wise class labels.
+    """
+    serialized_example = tf.reshape(tf_example_string_tensor, shape=[])
+    decoder = slim_example_decoder.TFExampleDecoder(self.keys_to_features,
+                                                    self.items_to_handlers)
+    keys = sorted(decoder.list_items())
+    keys = ['image', 'labels_class']
+
+    tensors = decoder.decode(serialized_example, items=keys)
+    tensor_dict = dict(zip(keys, tensors))
+    tensor_dict['image'].set_shape([None, None, 3])
+    return tensor_dict
@@ -0,0 +1,44 @@
+#clean slog
+rm -rf /var/log/npu/slog/host-0/*.log
+rm -rf /var/log/npu/slog/device-*/*.log
+
+# set env
+export PYTHONPATH=/usr/local/Ascend/ops/op_impl/built-in/ai_core/tbe
+export LD_LIBRARY_PATH=/usr/local/lib/:/usr/lib/:/usr/local/Ascend/fwkacllib/lib64/:/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:/usr/local/Ascend/add-ons/:/usr/lib/x86_64-linux-gnu
+PATH=$PATH:$HOME/bin
+export PATH=$PATH:/usr/local/Ascend/fwkacllib/ccec_compiler/bin:$PATH
+export ASCEND_OPP_PATH=/usr/local/Ascend/opp
+export DDK_VERSION_FLAG=1.71.T5.0.B060
+export NEW_GE_FE_ID=1
+export GE_AICPU_FLAG=1
+export SOC_VERSION=Ascend910
+export DUMP_GE_GRAPH=1
+export DUMP_GRAPH_LEVEL=3
+export PRINT_MODEL=1
+export SLOG_PRINT_TO_STDOUT=1
+
+
+export RANK_SIZE=1
+RANK_ID_START=1
+
+SAVE_PATH=training
+BASE_PATH=`pwd`
+echo $BASE_PATH
+
+for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
+do
+echo
+su HwHiAiUser -c "adc --host 0.0.0.0:22118 --log \"SetLogLevel(0)[error]\" --device "$RANK_ID
+TMP_PATH=$SAVE_PATH/D$RANK_ID
+mkdir -p $TMP_PATH
+cp exec_main.sh $TMP_PATH/
+cd $TMP_PATH
+bash exec_main.sh $RANK_ID $RANK_SIZE $BASE_PATH > train_$RANK_ID.log &
+cd -
+done
+
+
+
+
+
+
@@ -0,0 +1,41 @@
+
+#clean slog
+rm -rf /var/log/npu/slog/host-0/*.log
+rm -rf /var/log/npu/slog/device-*/*.log
+
+# set env
+export PYTHONPATH=/usr/local/Ascend/ops/op_impl/built-in/ai_core/tbe/
+export LD_LIBRARY_PATH=/usr/local/lib/:/usr/lib/:/usr/local/Ascend/fwkacllib/lib64/:/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:/usr/local/Ascend/add-ons/:/usr/lib/x86_64-linux-gnu
+PATH=$PATH:$HOME/bin
+export PATH=$PATH:/usr/local/Ascend/fwkacllib/ccec_compiler/bin:$PATH
+export ASCEND_OPP_PATH=/usr/local/Ascend/opp
+export DDK_VERSION_FLAG=1.71.T5.0.B060
+export NEW_GE_FE_ID=1
+export GE_AICPU_FLAG=1
+export SOC_VERSION=Ascend910
+export DUMP_GE_GRAPH=1
+export DUMP_GRAPH_LEVEL=3
+export PRINT_MODEL=1
+export SLOG_PRINT_TO_STDOUT=1
+
+
+export RANK_SIZE=8
+export RANK_TABLE_FILE=${PWD}/npu_config/${RANK_SIZE}p.json
+RANK_ID_START=0
+
+BASE_PATH=`pwd`
+SAVE_PATH=training
+for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++));
+do
+echo
+su HwHiAiUser -c "adc --host 0.0.0.0:22118 --log \"SetLogLevel(0)[error]\" --device "$RANK_ID
+TMP_PATH=$SAVE_PATH/D$RANK_ID
+mkdir -p $TMP_PATH
+cp exec_main.sh $TMP_PATH/
+cd $TMP_PATH
+nohup bash exec_main.sh $RANK_ID $RANK_SIZE $BASE_PATH > train_$RANK_ID.log &
+cd -
+done
+
+
+
@@ -0,0 +1,484 @@
+# Copyright 2018 Google. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""SSD (via ResNet50) model definition.
+
+Defines the SSD model and loss functions from this paper:
+
+https://arxiv.org/pdf/1708.02002
+
+Uses the ResNet model as a basis.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tensorflow as tf
+
+import ssd_constants
+
+def batch_norm_relu(inputs,
+                    is_training_bn,
+                    params,
+                    relu=True,
+                    init_zero=False,
+                    data_format='channels_last',
+                    name=None):
+  """Performs a batch normalization followed by a ReLU.
+
+  Args:
+    inputs: `Tensor` of shape `[batch, channels, ...]`.
+    is_training_bn: `bool` for whether the model is training.
+    params: params of the model, a dict including `distributed_group_size`
+        and `num_shards`.
+    relu: `bool` if False, omits the ReLU operation.
+    init_zero: `bool` if True, initializes scale parameter of batch
+        normalization with 0 instead of 1 (default).
+    data_format: `str` either "channels_first" for `[batch, channels, height,
+        width]` or "channels_last for `[batch, height, width, channels]`.
+    name: the name of the batch normalization layer
+
+  Returns:
+    A normalized `Tensor` with the same `data_format`.
+  """
+  if init_zero:
+    gamma_initializer = tf.zeros_initializer()
+  else:
+    gamma_initializer = tf.ones_initializer()
+
+  if data_format == 'channels_first':
+    axis = 1
+  else:
+    axis = 3
+
+
+  inputs = tf.layers.batch_normalization(
+        inputs=inputs,
+        axis=axis,
+        momentum=ssd_constants.BATCH_NORM_DECAY,
+        epsilon=ssd_constants.BATCH_NORM_EPSILON,
+        center=True,
+        scale=True,
+        training=is_training_bn,
+        fused=True,
+        gamma_initializer=gamma_initializer,
+        name=name)
+
+  if relu:
+    inputs = tf.nn.relu(inputs)
+  return inputs
+
+
+def fixed_padding(inputs, kernel_size, data_format='channels_last'):
+  """Pads the input along the spatial dimensions independently of input size.
+
+  Args:
+    inputs: `Tensor` of size `[batch, channels, height, width]` or
+        `[batch, height, width, channels]` depending on `data_format`.
+    kernel_size: `int` kernel size to be used for `conv2d` or max_pool2d`
+        operations. Should be a positive integer.
+    data_format: `str` either "channels_first" for `[batch, channels, height,
+        width]` or "channels_last for `[batch, height, width, channels]`.
+
+  Returns:
+    A padded `Tensor` of the same `data_format` with size either intact
+    (if `kernel_size == 1`) or padded (if `kernel_size > 1`).
+  """
+  pad_total = kernel_size - 1
+  pad_beg = pad_total // 2
+  pad_end = pad_total - pad_beg
+  if data_format == 'channels_first':
+    padded_inputs = tf.pad(
+        inputs, [[0, 0], [0, 0], [pad_beg, pad_end], [pad_beg, pad_end]])
+  else:
+    padded_inputs = tf.pad(
+        inputs, [[0, 0], [pad_beg, pad_end], [pad_beg, pad_end], [0, 0]])
+
+  return padded_inputs
+
+
+def conv2d_fixed_padding(inputs,
+                         filters,
+                         kernel_size,
+                         strides,
+                         data_format='channels_last'):
+  """Strided 2-D convolution with explicit padding.
+
+  The padding is consistent and is based only on `kernel_size`, not on the
+  dimensions of `inputs` (as opposed to using `tf.layers.conv2d` alone).
+
+  Args:
+    inputs: `Tensor` of size `[batch, channels, height_in, width_in]`.
+    filters: `int` number of filters in the convolution.
+    kernel_size: `int` size of the kernel to be used in the convolution.
+    strides: `int` strides of the convolution.
+    data_format: `str` either "channels_first" for `[batch, channels, height,
+        width]` or "channels_last for `[batch, height, width, channels]`.
+
+  Returns:
+    A `Tensor` of shape `[batch, filters, height_out, width_out]`.
+  """
+  if strides > 1:
+    inputs = fixed_padding(inputs, kernel_size, data_format=data_format)
+
+  return tf.layers.conv2d(
+      inputs=inputs,
+      filters=filters,
+      kernel_size=kernel_size,
+      strides=strides,
+      padding=('SAME' if strides == 1 else 'VALID'),
+      use_bias=False,
+      kernel_initializer=tf.variance_scaling_initializer(),
+      data_format=data_format)
+
+
+def residual_block(inputs,
+                   filters,
+                   is_training_bn,
+                   strides,
+                   params,
+                   use_projection=False,
+                   data_format='channels_last'):
+  """Standard building block for residual networks with BN after convolutions.
+
+  Args:
+    inputs: `Tensor` of size `[batch, channels, height, width]`.
+    filters: `int` number of filters for the first two convolutions. Note that
+        the third and final convolution will use 4 times as many filters.
+    is_training_bn: `bool` for whether the model is in training.
+    strides: `int` block stride. If greater than 1, this block will ultimately
+        downsample the input.
+    params: params of the model, a dict.
+    use_projection: `bool` for whether this block should use a projection
+        shortcut (versus the default identity shortcut). This is usually `True`
+        for the first block of a block group, which may change the number of
+        filters and the resolution.
+    data_format: `str` either "channels_first" for `[batch, channels, height,
+        width]` or "channels_last for `[batch, height, width, channels]`.
+
+  Returns:
+    The output `Tensor` of the block.
+  """
+  shortcut = inputs
+  if use_projection:
+    # Projection shortcut in first layer to match filters and strides
+    shortcut = conv2d_fixed_padding(
+        inputs=inputs,
+        filters=filters,
+        kernel_size=1,
+        strides=strides,
+        data_format=data_format)
+    shortcut = batch_norm_relu(
+        shortcut, is_training_bn, params, relu=False, data_format=data_format)
+
+  inputs = conv2d_fixed_padding(
+      inputs=inputs,
+      filters=filters,
+      kernel_size=3,
+      strides=strides,
+      data_format=data_format)
+  inputs = batch_norm_relu(
+      inputs, is_training_bn, params, data_format=data_format)
+
+  inputs = conv2d_fixed_padding(
+      inputs=inputs,
+      filters=filters,
+      kernel_size=3,
+      strides=1,
+      data_format=data_format)
+  inputs = batch_norm_relu(
+      inputs,
+      is_training_bn,
+      params,
+      relu=False,
+      init_zero=True,
+      data_format=data_format)
+
+  return tf.nn.relu(inputs + shortcut)
+
+
+def block_group(inputs,
+                filters,
+                block_fn,
+                blocks,
+                strides,
+                is_training_bn,
+                name,
+                params,
+                data_format='channels_last',
+                use_projection=True):
+  """Creates one group of blocks for the ResNet model.
+
+  Args:
+    inputs: `Tensor` of size `[batch, channels, height, width]`.
+    filters: `int` number of filters for the first convolution of the layer.
+    block_fn: `function` for the block to use within the model
+    blocks: `int` number of blocks contained in the layer.
+    strides: `int` stride to use for the first convolution of the layer. If
+        greater than 1, this layer will downsample the input.
+    is_training_bn: `bool` for whether the model is training.
+    name: `str`name for the Tensor output of the block layer.
+    params: params of the model, a dict.
+    data_format: `str` either "channels_first" for `[batch, channels, height,
+        width]` or "channels_last for `[batch, height, width, channels]`.
+    use_projection: `bool` for whether this block should use a projection
+        shortcut (versus the default identity shortcut). This is usually `True`
+        for the first block of a block group, which may change the number of
+        filters and the resolution.
+
+  Returns:
+    The output `Tensor` of the block layer.
+  """
+  # Only the first block per block_group uses projection shortcut and strides.
+  inputs = block_fn(
+      inputs,
+      filters,
+      is_training_bn,
+      strides,
+      params,
+      use_projection=use_projection,
+      data_format=data_format)
+
+  for _ in range(1, blocks):
+    inputs = block_fn(
+        inputs, filters, is_training_bn, 1, params, data_format=data_format)
+
+  return tf.identity(inputs, name)
+
+
+def resnet_v1_generator(block_fn, layers, params, data_format='channels_last'):
+  """Generator of ResNet v1 model with classification layers removed.
+
+    Our actual ResNet network.  We return the output of c2, c3,c4,c5
+    N.B. batch norm is always run with trained parameters, as we use very small
+    batches when training the object layers.
+
+  Args:
+    block_fn: `function` for the block to use within the model. Either
+        `residual_block` or `bottleneck_block`.
+    layers: list of 4 `int`s denoting the number of blocks to include in each
+      of the 4 block groups. Each group consists of blocks that take inputs of
+      the same resolution.
+    params: params of the model, a dict.
+    data_format: `str` either "channels_first" for `[batch, channels, height,
+        width]` or "channels_last for `[batch, height, width, channels]`.
+
+  Returns:
+    Model `function` that takes in `inputs` and `is_training` and returns the
+    output `Tensor` of the ResNet model.
+  """
+  def model(inputs, is_training_bn=False):
+    """Creation of the model graph."""
+    inputs = conv2d_fixed_padding(
+          inputs=inputs,
+          filters=64,
+          kernel_size=7,
+          strides=2,
+          data_format=data_format)
+    inputs = tf.identity(inputs, 'initial_conv')
+    inputs = batch_norm_relu(
+        inputs, is_training_bn, params, data_format=data_format)
+
+    inputs = tf.layers.max_pooling2d(
+        inputs=inputs,
+        pool_size=3,
+        strides=2,
+        padding='SAME',
+        data_format=data_format)
+    inputs = tf.identity(inputs, 'initial_max_pool')
+
+    c2 = block_group(
+        inputs=inputs,
+        filters=64,
+        blocks=layers[0],
+        strides=1,
+        block_fn=block_fn,
+        is_training_bn=is_training_bn,
+        params=params,
+        name='block_group1',
+        data_format=data_format,
+        use_projection=False)
+    c3 = block_group(
+        inputs=c2,
+        filters=128,
+        blocks=layers[1],
+        strides=2,
+        block_fn=block_fn,
+        is_training_bn=is_training_bn,
+        params=params,
+        name='block_group2',
+        data_format=data_format)
+    c4 = block_group(
+        inputs=c3,
+        filters=256,
+        blocks=layers[2],
+        strides=1,
+        block_fn=block_fn,
+        is_training_bn=is_training_bn,
+        params=params,
+        name='block_group3',
+        data_format=data_format)
+    return c2, c3, c4
+
+  return model
+
+
+def resnet_v1(resnet_depth, params, data_format='channels_last'):
+  """Returns the ResNet model for a given size and number of output classes."""
+  model_params = {
+      34: {'block': residual_block, 'layers': [3, 4, 6, 3]}
+  }
+
+  if resnet_depth not in model_params:
+    raise ValueError('Not a valid resnet_depth:', resnet_depth)
+
+  resnet_params = model_params[resnet_depth]
+  return resnet_v1_generator(resnet_params['block'], resnet_params['layers'],
+                             params, data_format)
+
+
+def class_net(images, level, num_classes):
+  """Class prediction network for SSD."""
+  return tf.layers.conv2d(
+      images,
+      num_classes * ssd_constants.NUM_DEFAULTS_BY_LEVEL[level],
+      kernel_size=(3, 3),
+      padding='same',
+      activation=None,
+      name='class-%d' % (level),
+  )
+
+
+def box_net(images, level):
+  """Box regression network for SSD."""
+  return tf.layers.conv2d(
+      images,
+      4 * ssd_constants.NUM_DEFAULTS_BY_LEVEL[level],
+      kernel_size=(3, 3),
+      padding='same',
+      activation=None,
+      name='box-%d' % (level),
+  )
+
+
+def ssd(features, params, is_training_bn=False):
+  """SSD classification and regression model."""
+  # upward layers
+  with tf.variable_scope(
+      'resnet%s' % ssd_constants.RESNET_DEPTH, reuse=tf.AUTO_REUSE):
+    resnet_fn = resnet_v1(ssd_constants.RESNET_DEPTH, params)
+    _, _, u4 = resnet_fn(features, is_training_bn)
+
+  with tf.variable_scope('ssd', reuse=tf.AUTO_REUSE):
+    feats = {}
+    # output channels for mlperf logging.
+    out_channels = [256]
+    feats[3] = u4
+    feats[4] = tf.layers.conv2d(
+        feats[3],
+        filters=256,
+        kernel_size=(1, 1),
+        padding='same',
+        activation=tf.nn.relu,
+        name='block7-conv1x1')
+    feats[4] = tf.layers.conv2d(
+        feats[4],
+        filters=512,
+        strides=(2, 2),
+        kernel_size=(3, 3),
+        padding='same',
+        activation=tf.nn.relu,
+        name='block7-conv3x3')
+    out_channels.append(512)
+    feats[5] = tf.layers.conv2d(
+        feats[4],
+        filters=256,
+        kernel_size=(1, 1),
+        padding='same',
+        activation=tf.nn.relu,
+        name='block8-conv1x1')
+    feats[5] = tf.layers.conv2d(
+        feats[5],
+        filters=512,
+        strides=(2, 2),
+        kernel_size=(3, 3),
+        padding='same',
+        activation=tf.nn.relu,
+        name='block8-conv3x3')
+    out_channels.append(512)
+    feats[6] = tf.layers.conv2d(
+        feats[5],
+        filters=128,
+        kernel_size=(1, 1),
+        padding='same',
+        activation=tf.nn.relu,
+        name='block9-conv1x1')
+    feats[6] = tf.layers.conv2d(
+        feats[6],
+        filters=256,
+        strides=(2, 2),
+        kernel_size=(3, 3),
+        padding='same',
+        activation=tf.nn.relu,
+        name='block9-conv3x3')
+    out_channels.append(256)
+    feats[7] = tf.layers.conv2d(
+        feats[6],
+        filters=128,
+        kernel_size=(1, 1),
+        padding='same',
+        activation=tf.nn.relu,
+        name='block10-conv1x1')
+    feats[7] = tf.layers.conv2d(
+        feats[7],
+        filters=256,
+        kernel_size=(3, 3),
+        padding='valid',
+        activation=tf.nn.relu,
+        name='block10-conv3x3')
+    out_channels.append(256)
+    feats[8] = tf.layers.conv2d(
+        feats[7],
+        filters=128,
+        kernel_size=(1, 1),
+        padding='same',
+        activation=tf.nn.relu,
+        name='block11-conv1x1')
+    feats[8] = tf.layers.conv2d(
+        feats[8],
+        filters=256,
+        kernel_size=(3, 3),
+        padding='valid',
+        activation=tf.nn.relu,
+        name='block11-conv3x3')
+    out_channels.append(256)
+
+    class_outputs = {}
+    box_outputs = {}
+    min_level = ssd_constants.MIN_LEVEL
+    max_level = ssd_constants.MAX_LEVEL
+    num_classes = ssd_constants.NUM_CLASSES
+
+    with tf.variable_scope('class_net', reuse=tf.AUTO_REUSE):
+      for level in range(min_level, max_level + 1):
+        class_outputs[level] = class_net(
+            feats[level], level, num_classes)
+
+    with tf.variable_scope('box_net', reuse=tf.AUTO_REUSE):
+      for level in range(min_level, max_level + 1):
+        box_outputs[level] = box_net(
+            feats[level], level)
+
+  return class_outputs, box_outputs
@@ -0,0 +1,122 @@
+# Copyright 2018 Google. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Central location for all constants related to MLPerf SSD."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+# ==============================================================================
+# == Model =====================================================================
+# ==============================================================================
+IMAGE_SIZE = 300
+SPACE_TO_DEPTH_BLOCK_SIZE = 2
+
+# TODO(taylorrobie): MLPerf uses 80, but COCO documents 90. (RetinaNet uses 90)
+# Update(taylorrobie): Labels > 81 show up in the pipeline. This will need to
+#                      be resolved.
+NUM_CLASSES = 81  # Including "no class". Not all COCO classes are used.
+
+# Note: Zero is special. (Background class) CLASS_INV_MAP[0] must be zero.
+CLASS_INV_MAP = (
+    0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21,
+    22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
+    44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+    64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87,
+    88, 89, 90)
+_MAP = {j: i for i, j in enumerate(CLASS_INV_MAP)}
+CLASS_MAP = tuple(_MAP.get(i, -1) for i in range(max(CLASS_INV_MAP) + 1))
+
+NUM_SSD_BOXES = 8732
+
+RESNET_DEPTH = 34
+
+"""SSD specific"""
+MIN_LEVEL = 3
+MAX_LEVEL = 8
+
+FEATURE_SIZES = (38, 19, 10, 5, 3, 1)
+STEPS = (8, 16, 32, 64, 100, 300)
+
+# https://github.com/amdegroot/ssd.pytorch/blob/master/data/config.py
+SCALES = (21, 45, 99, 153, 207, 261, 315)
+ASPECT_RATIOS = ((2,), (2, 3), (2, 3), (2, 3), (2,), (2,))
+NUM_DEFAULTS = (4, 6, 6, 6, 4, 4)
+NUM_DEFAULTS_BY_LEVEL = {3: 4, 4: 6, 5: 6, 6: 6, 7: 4, 8: 4}
+SCALE_XY = 0.1
+SCALE_HW = 0.2
+BOX_CODER_SCALES = (1 / SCALE_XY, 1 / SCALE_XY, 1 / SCALE_HW, 1 / SCALE_HW)
+MATCH_THRESHOLD = 0.5
+
+# https://discuss.pytorch.org/t/how-to-preprocess-input-for-pre-trained-networks/683
+NORMALIZATION_MEAN = (0.485, 0.456, 0.406)
+NORMALIZATION_STD = (0.229, 0.224, 0.225)
+
+# SSD Cropping
+NUM_CROP_PASSES = 50
+CROP_MIN_IOU_CHOICES = (0, 0.1, 0.3, 0.5, 0.7, 0.9)
+P_NO_CROP_PER_PASS = 1 / (len(CROP_MIN_IOU_CHOICES) + 1)
+
+# Hard example mining
+NEGS_PER_POSITIVE = 3
+
+# Batch normalization
+BATCH_NORM_DECAY = 0.9
+BATCH_NORM_EPSILON = 1e-5
+
+
+# ==============================================================================
+# == Optimizer =================================================================
+# ==============================================================================
+BASE_LEARNING_RATE = 3.0e-3
+FIRST_LR_DROP_STEP = 160000  # 该参数不起作用
+SECOND_LR_DROP_STEP = 200000 # 该参数不起作用
+MOMENTUM = 0.9
+WEIGHT_DECAY = 5e-4
+DEFAULT_BATCH_SIZE = 32.0
+
+# ==============================================================================
+# == Keys ======================================================================
+# ==============================================================================
+BOXES = "boxes"
+CLASSES = "classes"
+NUM_MATCHED_BOXES = "num_matched_boxes"
+IMAGE = "image"
+SOURCE_ID = "source_id"
+RAW_SHAPE = "raw_shape"
+IS_PADDED = "is_padded"
+
+
+# ==============================================================================
+# == Evaluation ================================================================
+# ==============================================================================
+
+# Note: This is based on a batch size of 32
+# https://github.com/mlperf/reference/blob/master/single_stage_detector/ssd/train.py#L21-L37  # pylint: disable=line-too-long
+EVAL_SAMPLES = 5000
+CHECKPOINT_FREQUENCY = 5000
+MAX_NUM_EVAL_BOXES = 200
+OVERLAP_CRITERIA = 0.5  # Used for nonmax supression
+MIN_SCORE = 0.05  # Minimum score to be considered during evaluation.
+DUMMY_SCORE = -1e5  # If no boxes are matched.
+# Eval step intervals starting from 0
+#EVAL_STEPS = (24000, 24000, 24000, 24000, 24000,24000, 24000, 24000, 24000, 24000)
+EVAL_STEPS = (432000,)
+# Target COCO/AP for mlperf.
+EVAL_TARGET = 0.24
+
+# For multiprocessing.
+QUEUE_SIZE = 24
+WORKER_COUNT = 10
@@ -0,0 +1,309 @@
+# Copyright 2018 Google. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Training script for SSD.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+import multiprocessing
+import os
+
+import sys
+sys.path.append(os.path.join(os.path.abspath(os.path.dirname(__file__)),'../../../../utils/atlasboost'))
+
+import threading
+from absl import app
+import numpy as np
+import tensorflow as tf
+
+from npu_bridge.estimator import npu_ops
+from tensorflow.core.protobuf import rewriter_config_pb2
+from npu_bridge.estimator.npu.npu_config import NPURunConfig
+from npu_bridge.estimator.npu.npu_estimator  import NPUEstimator
+
+import coco_metric
+import dataloader
+import ssd_constants
+import ssd_model
+
+
+def get_rank_size():
+    return int(os.environ['RANK_SIZE'])
+from benchmark_log import hwlog
+from benchmark_log.basic_utils import get_environment_info
+from benchmark_log.basic_utils import get_model_parameter
+tf.flags.DEFINE_string('model_dir', None, 'Location of model_dir')
+tf.flags.DEFINE_string('resnet_checkpoint', '',
+                       'Location of the ResNet checkpoint to use for model '
+                       'initialization.')
+tf.flags.DEFINE_integer('train_batch_size', 64, 'training batch size')
+tf.flags.DEFINE_integer('eval_batch_size', 1, 'evaluation batch size')
+tf.flags.DEFINE_integer('eval_samples', 5000, 'The number of samples for '
+                                              'evaluation.')
+tf.flags.DEFINE_string(
+    'training_file_pattern', None,
+    'Glob for training data files (e.g., COCO train - minival set)')
+tf.flags.DEFINE_string(
+    'validation_file_pattern', None,
+    'Glob for evaluation tfrecords (e.g., COCO val2017 set)')
+tf.flags.DEFINE_string(
+    'val_json_file',
+    None,
+    'COCO validation JSON containing golden bounding boxes.')
+tf.flags.DEFINE_integer('num_examples_per_epoch', 120000,
+                        'Number of examples in one epoch')
+tf.flags.DEFINE_float('num_epochs', 58, 'Number of epochs for training')
+
+tf.flags.DEFINE_string('mode', 'train_and_eval',
+                       'Mode to run: train_and_eval, train, eval')
+
+tf.flags.DEFINE_integer(
+    'keep_checkpoint_max', 32,
+    'Maximum number of checkpoints to keep.')
+
+
+FLAGS = tf.flags.FLAGS
+
+SUCCESS = False
+
+
+def construct_run_config():
+    """Construct the run config."""
+
+    # Parse hparams
+    hparams = ssd_model.default_hparams()
+
+    params = dict(
+        hparams.values(),
+        num_examples_per_epoch=FLAGS.num_examples_per_epoch,
+        resnet_checkpoint=FLAGS.resnet_checkpoint,
+        val_json_file=FLAGS.val_json_file,
+        mode=FLAGS.mode,
+        model_dir=FLAGS.model_dir,
+        eval_samples=FLAGS.eval_samples,
+    )
+
+    return NPURunConfig(
+        model_dir=FLAGS.model_dir,
+        session_config=tf.ConfigProto(),
+        keep_checkpoint_max=FLAGS.keep_checkpoint_max,
+        save_checkpoints_steps=ssd_constants.CHECKPOINT_FREQUENCY,
+        enable_data_pre_proc=True,
+        save_summary_steps=100,
+        iterations_per_loop=100,
+        precision_mode='allow_mix_precision'
+      ), params
+
+def coco_eval(predictions,
+              current_step,
+              summary_writer,
+              coco_gt,
+              use_cpp_extension=True,
+              nms_on_tpu=True):
+    """Call the coco library to get the eval metrics."""
+    global SUCCESS
+    eval_results = coco_metric.compute_map(
+        predictions,
+        coco_gt,
+        use_cpp_extension=use_cpp_extension,
+        nms_on_tpu=nms_on_tpu)
+    if eval_results['COCO/AP'] >= ssd_constants.EVAL_TARGET and not SUCCESS:
+        SUCCESS = True
+    tf.logging.info('Eval results: %s' % eval_results)
+    hwlog.remark_print(key=hwlog.EVAL_RESULTS, value=eval_results)
+    # Write out eval results for the checkpoint.
+    with tf.Graph().as_default():
+        summaries = []
+        for metric in eval_results:
+            summaries.append(
+                tf.Summary.Value(tag=metric, simple_value=eval_results[metric]))
+        tf_summary = tf.Summary(value=list(summaries))
+        summary_writer.add_summary(tf_summary, current_step)
+
+def init_npu():
+   """Initialize npu manually.
+   Returns:
+     `init_sess` npu  init session config.
+     `npu_init` npu  init ops.
+   """
+   npu_init = npu_ops.initialize_system()
+   config = tf.ConfigProto()
+
+   #npu mix precision attribute set to true when using mix precision
+   config.graph_options.rewrite_options.remapping = rewriter_config_pb2.RewriterConfig.OFF
+   custom_op = config.graph_options.rewrite_options.custom_optimizers.add()
+   custom_op.name = "NpuOptimizer"
+   custom_op.parameter_map["use_off_line"].b = True
+
+   init_sess = tf.Session(config=config)
+   return init_sess,npu_init
+
+def main(argv):
+    init_sess, npu_init = init_npu()
+    init_sess.run(npu_init)
+
+    del argv  # Unused.
+    global SUCCESS
+
+    # Check data path
+    if FLAGS.mode in ('train',
+                      'train_and_eval') and FLAGS.training_file_pattern is None:
+        raise RuntimeError('You must specify --training_file_pattern for training.')
+    if FLAGS.mode in ('train_and_eval', 'eval'):
+        if FLAGS.validation_file_pattern is None:
+            raise RuntimeError('You must specify --validation_file_pattern '
+                               'for evaluation.')
+        if FLAGS.val_json_file is None:
+            raise RuntimeError('You must specify --val_json_file for evaluation.')
+
+    run_config, params = construct_run_config()
+
+    if FLAGS.mode == 'train':
+        train_params = dict(params)
+        hwlog.remark_print(key=hwlog.CURRENT_EPOCH, value=train_params['num_examples_per_epoch'])
+        train_params['batch_size'] = FLAGS.train_batch_size
+        train_estimator = NPUEstimator(
+            model_fn=ssd_model.ssd_model_fn,
+            model_dir=FLAGS.model_dir,
+            config=run_config,
+            params=train_params)
+
+        tf.logging.info(params)
+
+        train_estimator.train(
+            input_fn=dataloader.SSDInputReader(
+                FLAGS.training_file_pattern,
+                params['transpose_input'],
+                is_training=True),
+            steps=int((FLAGS.num_epochs * FLAGS.num_examples_per_epoch) /
+                      FLAGS.train_batch_size / get_rank_size()))
+
+    elif FLAGS.mode == 'train_and_eval':
+        output_dir = os.path.join(FLAGS.model_dir, 'eval')
+        tf.gfile.MakeDirs(output_dir)
+        # Summary writer writes out eval metrics.
+        summary_writer = tf.summary.FileWriter(output_dir)
+
+        current_step = 0
+
+        coco_gt = coco_metric.create_coco(
+            FLAGS.val_json_file, use_cpp_extension=params['use_cocoeval_cc'])
+        for eval_step in ssd_constants.EVAL_STEPS:
+            # Compute the actual eval steps based on the actural train_batch_size
+            steps = int(eval_step / get_rank_size() * ssd_constants.DEFAULT_BATCH_SIZE /
+                        FLAGS.train_batch_size)
+            print('###################################', steps)
+
+            tf.logging.info('Starting training cycle for %d steps.' % steps)
+            run_config, params = construct_run_config()
+
+            train_params = dict(params)
+            hwlog.remark_print(key=hwlog.CURRENT_EPOCH, value=train_params['num_examples_per_epoch'])
+            train_params['batch_size'] = FLAGS.train_batch_size
+            train_estimator = NPUEstimator(
+                model_fn=ssd_model.ssd_model_fn,
+                model_dir=FLAGS.model_dir,
+                config=run_config,
+                params=train_params)
+            tf.logging.info(params)
+            train_estimator.train(
+                input_fn=dataloader.SSDInputReader(
+                    FLAGS.training_file_pattern,
+                    params['transpose_input'],
+                    is_training=True),
+                steps=steps)
+
+            if SUCCESS:
+                break
+
+            current_step = current_step + steps
+
+            tf.logging.info('Starting evaluation cycle at step %d.' % current_step)
+            # Run evaluation at the given step.
+            eval_params = dict(params)
+            eval_params['batch_size'] = FLAGS.eval_batch_size
+            eval_estimator = NPUEstimator(
+                model_fn=ssd_model.ssd_model_fn,
+                model_dir=FLAGS.model_dir,
+                config=run_config,
+                params=eval_params)
+
+            predictions = list(
+                eval_estimator.predict(
+                    input_fn=dataloader.SSDInputReader(
+                        FLAGS.validation_file_pattern,
+                        is_training=False)))
+
+            coco_eval(predictions, current_step, summary_writer, coco_gt, params['use_cocoeval_cc'], False)
+        summary_writer.close()
+
+    elif FLAGS.mode == 'eval':
+        coco_gt = coco_metric.create_coco(
+            FLAGS.val_json_file, use_cpp_extension=params['use_cocoeval_cc'])
+        eval_params = dict(params)
+        eval_params['batch_size'] = FLAGS.eval_batch_size
+        eval_estimator = NPUEstimator(
+            model_fn=ssd_model.ssd_model_fn,
+            model_dir=FLAGS.model_dir,
+            config=run_config,
+            params=eval_params)
+
+        output_dir = os.path.join(FLAGS.model_dir, 'eval')
+        tf.gfile.MakeDirs(output_dir)
+        # Summary writer writes out eval metrics.
+        summary_writer = tf.summary.FileWriter(output_dir)
+        ckpt = tf.train.latest_checkpoint(FLAGS.model_dir)
+        tf.logging.info('Starting to evaluate on newest checkpoint.')
+        predictions = list(
+            eval_estimator.predict(
+                checkpoint_path=ckpt,
+                input_fn=dataloader.SSDInputReader(
+                    FLAGS.validation_file_pattern,
+                    is_training=False)))
+        tf.logging.info('Starting to cal coco ap.')
+        current_step = int(os.path.basename(ckpt).split('-')[1])
+
+        coco_eval(predictions, current_step, summary_writer, coco_gt,
+                  params['use_cocoeval_cc'], False)
+
+        tf.logging.info('end to evaluate.')
+
+        summary_writer.close()
+
+    npu_shutdown = npu_ops.shutdown_system()
+    init_sess.run(npu_shutdown)
+    init_sess.close()
+
+if __name__ == '__main__':
+    hwlog.ROOT_DIR = os.path.split(os.path.abspath(__file__))[0]
+    cpu_info, npu_info, framework_info, os_info, benchmark_version = get_environment_info("tensorflow")
+    config_info = get_model_parameter("tensorflow_config")
+    initinal_data = {"base_lr": 0.01, "dataset": "imagenet1024", "optimizer": "SGD", "loss_scale": 512}
+    hwlog.remark_print(key=hwlog.CPU_INFO, value=cpu_info)
+    hwlog.remark_print(key=hwlog.NPU_INFO, value=npu_info)
+    hwlog.remark_print(key=hwlog.OS_INFO, value=os_info)
+    hwlog.remark_print(key=hwlog.FRAMEWORK_INFO, value=framework_info)
+    hwlog.remark_print(key=hwlog.BENCHMARK_VERSION, value=benchmark_version)
+    hwlog.remark_print(key=hwlog.CONFIG_INFO, value=config_info)
+    hwlog.remark_print(key=hwlog.BASE_LR, value=initinal_data.get("base_lr"))
+    hwlog.remark_print(key=hwlog.DATASET, value=initinal_data.get("dataset"))
+    hwlog.remark_print(key=hwlog.OPT_NAME, value=initinal_data.get("optimizer"))
+    hwlog.remark_print(key=hwlog.LOSS_SCALE, value=initinal_data.get("loss_scale"))
+    hwlog.remark_print(key=hwlog.INPUT_BATCH_SIZE, value=initinal_data.get("batchsize"))
+    tf.logging.set_verbosity(tf.logging.INFO)
+    app.run(main)
@@ -0,0 +1,500 @@
+# Copyright 2018 Google. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Model defination for the SSD Model.
+
+Defines model_fn of SSD for TF Estimator. The model_fn includes SSD
+model architecture, loss function, learning rate schedule, and evaluation
+procedure.
+
+T.-Y. Lin, P. Goyal, R. Girshick, K. He, and P. Dollar
+Focal Loss for Dense Object Detection. arXiv:1708.02002
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import os
+import tensorflow as tf
+
+from object_detection import box_coder
+from object_detection import box_list
+from object_detection import faster_rcnn_box_coder
+
+from tensorflow.python.estimator import model_fn as model_fn_lib
+
+import dataloader
+import ssd_architecture
+import ssd_constants
+from benchmark_log import hwlog
+from benchmark_log.basic_utils import get_environment_info
+from benchmark_log.basic_utils import get_model_parameter
+
+
+def get_rank_size():
+    return int(os.environ['RANK_SIZE'])
+
+def select_top_k_scores(scores_in, pre_nms_num_detections=5000):
+  """Select top_k scores and indices for each class.
+
+  Args:
+    scores_in: a Tensor with shape [batch_size, N, num_classes], which stacks
+      class logit outputs on all feature levels. The N is the number of total
+      anchors on all levels. The num_classes is the number of classes predicted
+      by the model.
+    pre_nms_num_detections: Number of candidates before NMS.
+
+  Returns:
+    scores and indices: Tensors with shape [batch_size, pre_nms_num_detections,
+      num_classes].
+  """
+  scores_trans = tf.transpose(scores_in, perm=[0, 2, 1])
+
+  top_k_scores, top_k_indices = tf.nn.top_k(
+      scores_trans, k=pre_nms_num_detections, sorted=True)
+
+  return tf.transpose(top_k_scores, [0, 2, 1]), tf.transpose(
+      top_k_indices, [0, 2, 1])
+
+
+def concat_outputs(cls_outputs, box_outputs):
+  """Concatenate predictions into a single tensor.
+
+  This function takes the dicts of class and box prediction tensors and
+  concatenates them into a single tensor for comparison with the ground truth
+  boxes and class labels.
+  Args:
+    cls_outputs: an OrderDict with keys representing levels and values
+      representing logits in [batch_size, height, width,
+      num_anchors * num_classses].
+    box_outputs: an OrderDict with keys representing levels and values
+      representing box regression targets in
+      [batch_size, height, width, num_anchors * 4].
+  Returns:
+    concatenanted cls_outputs and box_outputs.
+  """
+  assert set(cls_outputs.keys()) == set(box_outputs.keys())
+
+  # This sort matters. The labels assume a certain order based on
+  # ssd_constants.FEATURE_SIZES, and this sort matches that convention.
+  keys = sorted(cls_outputs.keys())
+  batch_size = int(cls_outputs[keys[0]].shape[0])
+
+  flat_cls = []
+  flat_box = []
+
+  for i, k in enumerate(keys):
+    # TODO(taylorrobie): confirm that this reshape, transpose,
+    # reshape is correct.
+    scale = ssd_constants.FEATURE_SIZES[i] # 不同特征尺度, 38,19,10,5,3,1
+    split_shape = (ssd_constants.NUM_DEFAULTS[i], ssd_constants.NUM_CLASSES) # （4,81）、（6,81）...
+    assert cls_outputs[k].shape[3] == split_shape[0] * split_shape[1]
+    intermediate_shape = (batch_size, scale, scale) + split_shape # (32,38,38)+ (4,81)=(32,38,38,4,81)
+    final_shape = (batch_size, scale ** 2 * split_shape[0], split_shape[1]) # (32, 38^2 * 4, 81)
+    flat_cls.append(tf.reshape(tf.reshape(
+        cls_outputs[k], intermediate_shape), final_shape))
+
+    split_shape = (ssd_constants.NUM_DEFAULTS[i], 4) # (4,4), (6,4)...
+    assert box_outputs[k].shape[3] == split_shape[0] * split_shape[1]
+    intermediate_shape = (batch_size, scale, scale) + split_shape # (32, 19,19) + (6,4) 为避免歧义，以第二个default box为例
+    final_shape = (batch_size, scale ** 2 * split_shape[0], split_shape[1]) # (32, 19^2 * 6, 4)
+    flat_box.append(tf.reshape(tf.reshape(
+        box_outputs[k], intermediate_shape), final_shape))
+
+  return tf.concat(flat_cls, axis=1), tf.concat(flat_box, axis=1)
+
+
+def _localization_loss(pred_locs, gt_locs, gt_labels, num_matched_boxes):
+  """Computes the localization loss.
+
+  Computes the localization loss using smooth l1 loss.
+  Args:
+    pred_locs: a dict from index to tensor of predicted locations. The shape
+      of each tensor is [batch_size, num_anchors, 4].
+    gt_locs: a list of tensors representing box regression targets in
+      [batch_size, num_anchors, 4].
+    gt_labels: a list of tensors that represents the classification groundtruth
+      targets. The shape is [batch_size, num_anchors, 1].
+    num_matched_boxes: the number of anchors that are matched to a groundtruth
+      targets, used as the loss normalizater. The shape is [batch_size].
+  Returns:
+    box_loss: a float32 representing total box regression loss.
+  """
+  keys = sorted(pred_locs.keys())
+  box_loss = 0
+  for i, k in enumerate(keys):
+    gt_label = gt_labels[i]
+    gt_loc = gt_locs[i]
+    pred_loc = tf.reshape(pred_locs[k], gt_loc.shape)
+    mask = tf.greater(gt_label, 0)
+    float_mask = tf.cast(mask, tf.float32)
+
+    smooth_l1 = tf.reduce_sum(
+        tf.losses.huber_loss(
+            gt_loc, pred_loc, reduction=tf.losses.Reduction.NONE),
+        axis=-1)
+    smooth_l1 = tf.multiply(smooth_l1, float_mask)
+    box_loss = box_loss + tf.reduce_sum(
+        smooth_l1, axis=list(range(1, smooth_l1.shape.ndims)))
+
+  # TODO(taylorrobie): Confirm that normalizing by the number of boxes matches
+  # reference
+  return tf.reduce_mean(box_loss / num_matched_boxes)
+
+
+@tf.custom_gradient
+def _softmax_cross_entropy(logits, label):
+  """Helper function to compute softmax cross entropy loss."""
+  shifted_logits = logits - tf.expand_dims(tf.reduce_max(logits, -1), -1)
+  exp_shifted_logits = tf.math.exp(shifted_logits)
+  sum_exp = tf.reduce_sum(exp_shifted_logits, -1)
+  log_sum_exp = tf.math.log(sum_exp)
+  one_hot_label = tf.one_hot(label, ssd_constants.NUM_CLASSES)
+  shifted_logits = tf.reduce_sum(shifted_logits * one_hot_label, -1)
+  loss = log_sum_exp - shifted_logits
+
+  def grad(dy):
+    return (exp_shifted_logits / tf.expand_dims(sum_exp, -1) -
+            one_hot_label) * tf.expand_dims(dy, -1), dy
+
+  return loss, grad
+
+
+def _classification_loss(pred_labels, gt_labels, num_matched_boxes):
+  """Computes the classification loss.
+
+  Computes the classification loss with hard negative mining.
+  Args:
+    pred_labels: a dict from index to tensor of predicted class. The shape
+      of the tensor is [batch_size, num_anchors, num_classes].
+    gt_labels: a list of tensor that represents the classification groundtruth
+      targets. The shape is [batch_size, num_anchors, 1].
+    num_matched_boxes: the number of anchors that are matched to a groundtruth
+      targets. This is used as the loss normalizater.
+  Returns:
+    box_loss: a float32 representing total box regression loss.
+  """
+  keys = sorted(pred_labels.keys())
+  batch_size = gt_labels[0].shape[0]
+  cross_entropy = []
+  for i, k in enumerate(keys):
+    gt_label = gt_labels[i]
+    pred_label = tf.reshape(
+        pred_labels[k],
+        gt_label.get_shape().as_list() + [ssd_constants.NUM_CLASSES])
+    cross_entropy.append(
+        tf.reshape(
+            _softmax_cross_entropy(pred_label, gt_label), [batch_size, -1]))
+
+
+  # Put the rest of the loss computation on one device to avoid excessive
+  # communication inside topk_mask with spatial partition
+  #with tf.device(tf.contrib.tpu.core(0)):
+  cross_entropy = tf.concat(cross_entropy, 1)
+  gt_label = tf.concat([tf.reshape(l, [batch_size, -1]) for l in gt_labels],
+                         1)
+  mask = tf.greater(gt_label, 0)
+  float_mask = tf.cast(mask, tf.float32)
+
+    # Hard example mining
+  neg_masked_cross_entropy = cross_entropy * (1 - float_mask)
+
+
+  value1, _ = tf.math.top_k(neg_masked_cross_entropy, k=4096)
+  kth1 = tf.reduce_min(value1, 1, keepdims=True)
+  mask1 = tf.cast(tf.less(neg_masked_cross_entropy, kth1), tf.float32)
+
+  value2, _ = tf.math.top_k(tf.multiply(neg_masked_cross_entropy, mask1), k=4096)
+  kth2 = tf.reduce_min(value2, 1, keepdims=True)
+  mask2 = tf.cast(tf.less(neg_masked_cross_entropy, kth2), tf.float32)
+
+  value3, _ = tf.math.top_k(tf.multiply(neg_masked_cross_entropy, mask2), k=540)
+
+  value = tf.concat([value1, value2, value3], axis=1)
+
+  num_neg_boxes = tf.minimum(
+          tf.to_int32(num_matched_boxes) * ssd_constants.NEGS_PER_POSITIVE, 8731)
+  large_neg_ce = tf.batch_gather(value, num_neg_boxes[:, tf.newaxis])
+  top_k_neg_mask = tf.cast(tf.greater_equal(neg_masked_cross_entropy, large_neg_ce), tf.float32)
+
+
+
+  class_loss = tf.reduce_sum(
+        tf.multiply(cross_entropy, float_mask + top_k_neg_mask), axis=1)
+
+
+    # TODO(taylorrobie): Confirm that normalizing by the number of boxes matches
+    # reference
+  return tf.reduce_mean(class_loss / num_matched_boxes)
+
+
+def detection_loss(cls_outputs, box_outputs, labels):
+  """Computes total detection loss.
+
+  Computes total detection loss including box and class loss from all levels.
+  Args:
+    cls_outputs: an OrderDict with keys representing levels and values
+      representing logits in [batch_size, height, width, num_anchors].
+    box_outputs: an OrderDict with keys representing levels and values
+      representing box regression targets in
+      [batch_size, height, width, num_anchors * 4].
+    labels: the dictionary that returned from dataloader that includes
+      groundturth targets.
+  Returns:
+    total_loss: a float32 representing total loss reducing from class and box
+      losses from all levels.
+    cls_loss: a float32 representing total class loss.
+    box_loss: a float32 representing total box regression loss.
+  """
+  if isinstance(labels[ssd_constants.BOXES], dict):
+    gt_boxes = list(labels[ssd_constants.BOXES].values())
+    gt_classes = list(labels[ssd_constants.CLASSES].values())
+  else:
+    gt_boxes = [labels[ssd_constants.BOXES]]
+    gt_classes = [labels[ssd_constants.CLASSES]]
+    cls_outputs, box_outputs = concat_outputs(cls_outputs, box_outputs)
+    cls_outputs = {'flatten': cls_outputs}
+    box_outputs = {'flatten': box_outputs}
+
+  box_loss = _localization_loss(box_outputs, gt_boxes, gt_classes,
+                                labels[ssd_constants.NUM_MATCHED_BOXES])
+  class_loss = _classification_loss(cls_outputs, gt_classes,
+                                    labels[ssd_constants.NUM_MATCHED_BOXES])
+
+  return class_loss + box_loss, class_loss, box_loss
+
+
+def update_learning_rate_schedule_parameters(params):
+  """Updates params that are related to the learning rate schedule.
+
+  Args:
+    params: a parameter dictionary that includes learning_rate, lr_warmup_epoch,
+      first_lr_drop_epoch, and second_lr_drop_epoch.
+  """
+  batch_size = params['batch_size']
+  # Learning rate is proportional to the batch size
+  steps_per_epoch = params['num_examples_per_epoch'] / batch_size // get_rank_size()
+  params['lr_warmup_step'] = int(params['lr_warmup_epoch'] * steps_per_epoch)
+  params['cos_decay_step'] = int(
+      params['cos_decay_epoch'] * steps_per_epoch)
+
+
+def learning_rate_schedule(params, global_step):
+  """Handles learning rate scaling, linear warmup, and learning rate decay.
+
+  Args:
+    params: A dictionary that defines hyperparameters of model.
+    global_step: A tensor representing current global step.
+
+  Returns:
+    A tensor representing current learning rate.
+  """
+  base_learning_rate = params['base_learning_rate']
+  lr_warmup_step = params['lr_warmup_step']
+  cos_decay_step = params['cos_decay_step']
+  batch_size = params['batch_size']
+  scaling_factor = get_rank_size() * batch_size / ssd_constants.DEFAULT_BATCH_SIZE
+  adjusted_learning_rate = base_learning_rate * scaling_factor
+  learning_rate = (tf.cast(global_step, dtype=tf.float32) /
+                   lr_warmup_step) * adjusted_learning_rate
+
+  learning_rate = tf.where(global_step < lr_warmup_step, learning_rate,
+                           tf.train.cosine_decay(adjusted_learning_rate, global_step, cos_decay_step, alpha=0.01))
+
+  return learning_rate
+
+
+class ExamplesPerSecondHook(tf.train.SessionRunHook):
+  def __init__(
+      self,
+      batch_size,
+      lr=0,
+      loss=0,
+      every_n_steps=100,
+      every_n_secs=None,):
+
+
+    if (every_n_steps is None) == (every_n_secs is None):
+      raise ValueError('exactly one of every_n_steps'
+                       ' and every_n_secs should be provided.')
+
+    self._timer = tf.train.SecondOrStepTimer(
+        every_steps=every_n_steps, every_secs=every_n_secs)
+
+    self._step_train_time = 0
+    self._total_steps = 0
+    self._batch_size = batch_size
+    self._lr = lr
+    self._loss = loss
+
+  def begin(self):
+    self._global_step_tensor = tf.compat.v1.train.get_global_step()
+    if self._global_step_tensor is None:
+      raise RuntimeError(
+          'Global step should be created to use StepCounterHook.')
+
+  def before_run(self, run_context):  # pylint: disable=unused-argument
+    return tf.train.SessionRunArgs([self._global_step_tensor, self._lr, self._loss])
+
+  def after_run(self, run_context, run_values):
+    _ = run_context
+
+    global_step, lr, loss = run_values.results
+    if self._timer.should_trigger_for_step(global_step):
+
+      elapsed_time, elapsed_steps = self._timer.update_last_triggered_step(
+          global_step)
+      if elapsed_time is not None:
+        steps_per_sec = elapsed_steps / elapsed_time
+        self._step_train_time += elapsed_time
+        self._total_steps += elapsed_steps
+
+        current_examples_per_sec = steps_per_sec * self._batch_size
+        tf.logging.info('%s: %g, %s: %s, %s: %s', 'FPS', current_examples_per_sec, 'learning rate', lr, 'loss', loss)
+        hwlog.remark_print(key=hwlog.FPS, value='%7.1f' % current_examples_per_sec)
+
+
+
+def _model_fn(features, labels, mode, params, model):
+  """Model defination for the SSD model based on ResNet-50.
+
+  Args:
+    features: the input image tensor with shape [batch_size, height, width, 3].
+      The height and width are fixed and equal.
+    labels: the input labels in a dictionary. The labels include class targets
+      and box targets which are dense label maps. The labels are generated from
+      get_input_fn function in data/dataloader.py
+    mode: the mode of TPUEstimator including TRAIN, EVAL, and PREDICT.
+    params: the dictionary defines hyperparameters of model. The default
+      settings are in default_hparams function in this file.
+    model: the SSD model outputs class logits and box regression outputs.
+
+  Returns:
+    spec: the EstimatorSpec or TPUEstimatorSpec to run training, evaluation,
+      or prediction.
+  """
+  if mode == tf.estimator.ModeKeys.PREDICT:
+    labels = features
+    features = labels.pop('image')
+
+  features -= tf.constant(
+        ssd_constants.NORMALIZATION_MEAN, shape=[1, 1, 3], dtype=features.dtype)
+
+  features /= tf.constant(
+        ssd_constants.NORMALIZATION_STD, shape=[1, 1, 3], dtype=features.dtype)
+
+  def _model_outputs():
+    return model(
+        features, params, is_training_bn=(mode == tf.estimator.ModeKeys.TRAIN))
+
+
+  cls_outputs, box_outputs = _model_outputs()
+
+  # First check if it is in PREDICT mode.
+  if mode == tf.estimator.ModeKeys.PREDICT:
+    flattened_cls, flattened_box = concat_outputs(cls_outputs, box_outputs)
+    ssd_box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder(
+        scale_factors=ssd_constants.BOX_CODER_SCALES)
+
+    anchors = box_list.BoxList(
+        tf.convert_to_tensor(dataloader.DefaultBoxes()('ltrb')))
+
+    decoded_boxes = box_coder.batch_decode(
+        encoded_boxes=flattened_box, box_coder=ssd_box_coder, anchors=anchors)
+
+    pred_scores = tf.nn.softmax(flattened_cls, axis=2)
+
+    pred_scores, indices = select_top_k_scores(pred_scores,
+                                               ssd_constants.MAX_NUM_EVAL_BOXES)
+
+    predictions = dict(
+          labels,
+          indices=indices,
+          pred_scores=pred_scores,
+          pred_box=decoded_boxes,
+    )
+
+    return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
+
+  # Load pretrained model from checkpoint.
+  if params['resnet_checkpoint'] and mode == tf.estimator.ModeKeys.TRAIN:
+
+    def scaffold_fn():
+      """Loads pretrained model through scaffold function."""
+      tf.train.init_from_checkpoint(params['resnet_checkpoint'], {
+          '/': 'resnet%s/' % ssd_constants.RESNET_DEPTH,
+      })
+      return tf.train.Scaffold()
+  else:
+    scaffold_fn = None
+
+  # Set up training loss and learning rate.
+  update_learning_rate_schedule_parameters(params)
+  global_step = tf.train.get_or_create_global_step()
+  learning_rate = learning_rate_schedule(params, global_step)
+  # cls_loss and box_loss are for logging. only total_loss is optimized.
+  total_loss, cls_loss, box_loss = detection_loss(
+      cls_outputs, box_outputs, labels)
+
+  total_loss += params['weight_decay'] * tf.add_n(
+      [tf.nn.l2_loss(v) for v in tf.trainable_variables()])
+
+  if mode == tf.estimator.ModeKeys.TRAIN:
+    total_loss_t = tf.reduce_mean(tf.reshape(total_loss, [1]))
+    cls_loss_t = tf.reduce_mean(tf.reshape(cls_loss, [1]))
+    box_loss_t = tf.reduce_mean(tf.reshape(box_loss, [1]))
+    learning_rate_t = tf.reduce_mean(tf.reshape(learning_rate, [1]))
+    tf.summary.scalar('total_loss', total_loss_t)
+    tf.summary.scalar('cls_loss_t', cls_loss_t)
+    tf.summary.scalar('box_loss_t', box_loss_t)
+    tf.summary.scalar('learning_rate_t', learning_rate_t)
+
+    optimizer = tf.train.MomentumOptimizer(
+        learning_rate, momentum=ssd_constants.MOMENTUM)
+    from npu_bridge.estimator.npu.npu_optimizer import NPUDistributedOptimizer
+    optimizer = NPUDistributedOptimizer(optimizer)  # 使用NPU分布式计算，更新梯度
+
+    # Batch norm requires update_ops to be added as a train_op dependency.
+    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
+
+    examples_sec_hook = ExamplesPerSecondHook(get_rank_size() * 32, learning_rate, total_loss)
+
+    train_op = tf.group(optimizer.minimize(total_loss, global_step),
+                        update_ops)
+    return model_fn_lib.EstimatorSpec(
+        mode=mode, loss=total_loss, train_op=train_op, scaffold=scaffold_fn(),
+        training_hooks=[examples_sec_hook])
+
+  if mode == tf.estimator.ModeKeys.EVAL:
+    raise NotImplementedError
+
+
+def ssd_model_fn(features, labels, mode, params):
+  """SSD model."""
+  return _model_fn(features, labels, mode, params, model=ssd_architecture.ssd)
+
+
+def default_hparams():
+  # TODO(taylorrobie): replace params useages with global constants.
+  return tf.contrib.training.HParams(
+
+      num_examples_per_epoch=120000,
+      lr_warmup_epoch=0.8,
+      cos_decay_epoch=106,
+      weight_decay=ssd_constants.WEIGHT_DECAY,
+      base_learning_rate=ssd_constants.BASE_LEARNING_RATE,
+      eval_every_checkpoint=False,
+      transpose_input=False,
+      use_cocoeval_cc=False
+  )
@@ -0,0 +1,197 @@
+# Alexnet for Tensorflow 
+
+This repository provides a script and recipe to train the AlexNet model .
+
+## Table Of Contents
+
+* [Model overview](#model-overview)
+  * [Model Architecture](#model-architecture)  
+  * [Default configuration](#default-configuration)
+* [Data augmentation](#data-augmentation)
+* [Setup](#setup)
+  * [Requirements](#requirements)
+* [Quick start guide](#quick-start-guide)
+* [Advanced](#advanced)
+  * [Command line arguments](#command-line-arguments)
+  * [Training process](#training-process)
+* [Performance](#performance)
+  * [Results](#results)
+    * [Training accuracy results](#training-accuracy-results)
+    * [Training performance results](#training-performance-results)
+
+
+    
+
+## Model overview
+
+AlexNet model from
+`Alex Krizhevsky. "One weird trick for parallelizing convolutional neural networks". <https://arxiv.org/abs/1404.5997>.`
+reference implementation:  <https://pytorch.org/docs/stable/_modules/torchvision/models/alexnet.html#alexnet>
+### Model architecture
+
+
+
+### Default configuration
+
+The following sections introduce the default configurations and hyperparameters for AlexNet model.
+
+#### Optimizer
+
+This model uses Momentum optimizer from Tensorflow with the following hyperparameters:
+
+- Momentum : 0.9
+- Learning rate (LR) : 0.06
+- LR schedule: cosine_annealing
+- Batch size : 128 
+- Weight decay :  0.0001. 
+- Label smoothing = 0.1
+- We train for:
+  - 150 epochs ->  60.1% top1 accuracy
+
+#### Data augmentation
+
+This model uses the following data augmentation:
+
+- For training:
+  - RandomResizeCrop, scale=(0.08, 1.0), ratio=(0.75, 1.333)
+  - RandomHorizontalFlip, prob=0.5
+  - Normalize, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)
+- For inference:
+  - Resize to (256, 256)
+  - CenterCrop to (224, 224)
+  - Normalize, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)
+
+## Setup
+The following section lists the requirements to start training the Alexnet model.
+### Requirements
+
+Tensorflow
+NPU environmemnt
+
+## Quick Start Guide
+
+### 1. Clone the respository
+
+```shell
+git clone xxx
+cd  Model_zoo_Alexnet_HARD
+```
+
+### 2. Download and preprocess the dataset
+
+1. down load the imagenet dataset
+2. Extract the training data
+3. The train and val images are under the train/ and val/ directories, respectively. All images within one folder have the same label.
+
+### 3. Train
+- train on single NPU
+    - **edit** *scripts/train_alexnet_1p.sh*( see example below)
+    - bash scripts/run_npu_1p.sh
+- train on 8 NPUs
+    - **edit** *scripts/train_alexnet_8p.sh*(see example below)
+    - bash scripts/run_npu_8p.sh 
+
+
+for example:
+- case for single NPU
+    - In scripts/train_alexnet_1p.sh , python scripts part should look like as follows. For more detailed command lines arguments, please refer to [Command line arguments](#command-line-arguments)
+```shell
+python3.7 ${EXEC_DIR}/train.py --rank_size=1 \
+	--iterations_per_loop=100 \
+	--batch_size=256 \
+	--data_dir=/path/to/dataset \
+	--mode=train \
+	--lr=0.015 \
+	--log_dir=./model_1p > ./train_${device_id}.log 2>&1 
+```
+run the program  
+```
+bash scripts/run_npu_1p.sh
+```
+- case for 8 NPUs
+    - In scripts/train_alexnet_8p.sh , python scripts part should look like as follows.
+```shell 
+python3.7 ${EXEC_DIR}/train.py --rank_size=8 \
+	--iterations_per_loop=100 \
+	--batch_size=128 \
+	--data_dir=/path/to/dataset \
+	--mode=train \
+	--lr=0.06 \
+	--log_dir=./model_8p > ./train_${device_id}.log 2>&1 
+```
+run the program  
+```
+bash scripts/run_npu_1p.sh
+```
+
+### 4. Test
+- same procedure as training except 2 following modifications
+    - change `--mode=train` to `--mode=evaluate`
+    - add `--checkpoint_dir=/path/to/checkpoints`
+
+
+## Advanced
+### Commmand-line options
+
+```
+  --data_dir                        train data dir
+  --num_classes                     num of classes in ImageNet（default:1000)
+  --image_size                      image size of the dataset
+  --batch_size                      mini-batch size (default: 128) per npu
+  --pretrained                      path of pretrained model
+  --lr                              initial learning rate
+  --max_epochs                      max epoch num to train the model
+  --warmup_epochs                   warmup epoch(when batchsize is large)
+  --weight_decay                    weight decay (default: 1e-4)
+  --momentum                        momentum(default: 0.9)
+  --label_smoothing                 use label smooth in CE, default 0.1
+  --save_summary_steps              logging interval(dafault:100)
+  --log_dir                         path to save checkpoint and log
+  --log_name                        name of log file
+  --save_checkpoints_steps          the interval to save checkpoint
+  --mode                            mode to run the program (train, evaluate)
+  --checkpoint_dir                  path to checkpoint for evaluation
+  --max_train_steps                 max number of training steps 
+  --synthetic                       whether to use synthetic data or not
+  --version                         weight initialization for model
+  --do_checkpoint                   whether to save checkpoint or not 
+  --rank_size                       local rank of distributed(default: 0)
+  --group_size                      world size of distributed(default: 1)
+  --max_train_steps                 number of training step , default : None, when set ,it will override the max_epoch
+```
+for a complete list of options, please refer to `train.py`
+### Training process
+
+All the results of the training will be stored in the directory `results`.
+Script will store:
+ - checkpoints.
+ - log.
+ 
+## Performance
+
+### Result
+
+Our result were obtained by running the applicable training script. To achieve the same results, follow the steps in the Quick Start Guide.
+
+#### Training accuracy results
+
+| **epochs** |   Top1/Top5   |
+| :--------: | :-----------: |
+|    150     | 60.12%/82.06% |
+
+#### Training performance results
+
+| **NPUs** | train performance |
+| :------: | :---------------: |
+|    8     |   30000+  img/s   |
+
+
+
+
+
+
+
+
+
+
+
@@ -0,0 +1,9 @@
+{
+    "server_count": "1",
+    "server_list": [{
+        "device": [{devices}],
+        "server_id": "127.0.0.1"
+    }],
+    "status": "completed",
+    "version": "1.0"
+}
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+rm -rf /var/log/npu/slog/host-0/*
+# main env
+if [ -d /usr/local/Ascend/nnae/latest ];then
+
+	export LD_LIBRARY_PATH=/usr/local/:/usr/local/lib/:/usr/lib/:/usr/local/Ascend/nnae/latest/fwkacllib/lib64:/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:/usr/local/Ascend/add-ons/:/usr/local/Ascend/driver/tools/hccn_tool/:/usr/local/mpirun4.0/lib
+	export PYTHONPATH=$PYTHONPATH:/usr/local/Ascend/tfplugin/latest/tfplugin/python/site-packages:/usr/local/Ascend/nnae/latest/opp/op_impl/built-in/ai_core/tbe:/usr/local/Ascend/nnae/latest/fwkacllib/python/site-packages/:/usr/local/Ascend/tfplugin/latest/tfplugin/python/site-packages
+	export PATH=$PATH:/usr/local/Ascend/nnae/latest/fwkacllib/ccec_compiler/bin:/usr/local/mpirun4.0/bin
+	export ASCEND_OPP_PATH=/usr/local/Ascend/nnae/latest/opp
+else
+	export LD_LIBRARY_PATH=/usr/local/lib/:/usr/lib/:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64:/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:/usr/local/Ascend/add-ons/:/usr/local/mpirun4.0/lib
+	export PYTHONPATH=$PYTHONPATH:/usr/local/Ascend/tfplugin/latest/tfplugin/python/site-packages:/usr/local/Ascend/ascend-toolkit/latest/opp/op_impl/built-in/ai_core/tbe:/usr/local/Ascend/ascend-toolkit/latest//fwkacllib/python/site-packages/:/usr/local/Ascend/ascend-toolkit/latest/tfplugin/python/site-packages:$projectDir
+	export PATH=$PATH:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin:/usr/local/mpirun4.0/bin
+	export ASCEND_OPP_PATH=/usr/local/Ascend/ascend-toolkit/latest/opp/
+	
+fi
+
+export DDK_VERSION_FLAG=1.60.T17.B830
+export HCCL_CONNECT_TIMEOUT=600
+export JOB_ID=9999001
+
+export NEW_GE_FE_ID=1
+export GE_AICPU_FLAG=1
+export SOC_VERSION=Ascend910
+export DUMP_GE_GRAPH=1
+export DUMP_GRAPH_LEVEL=3
+export PRINT_MODEL=1
+export SLOG_PRINT_TO_STDOUT=1
+
+
+export PROFILING_MODE=false
+export PROFILING_OPTIONS=training_trace
+export FP_POINT=ssd/block7-conv1x1/Relu
+export BP_POINT=gradients/resnet34/Relu_grad/ReluGrad
+export AICPU_PROFILING_MODE=false
@@ -0,0 +1,70 @@
+#!/bin/bash
+
+rank_size=$1
+yamlPath=$2
+
+toolsPath=$3
+if [ -f /.dockerenv ];then
+        CLUSTER=$4
+        MPIRUN_ALL_IP="$5"
+        export CLUSTER=${CLUSTER}
+fi
+
+currentDir=$(cd "$(dirname "$0")/.."; pwd)
+model_name=$(cd $currentDir/..;basename `pwd`)
+
+# 从 yaml 获取配置
+eval $(${toolsPath}/get_params_for_yaml.sh ${yamlPath} "tensorflow_config")
+
+#mkdir train job path
+currtime=`date +%Y%m%d%H%M%S`
+mkdir -p ${currentDir%train*}/train/result/tf_ssd_resnet34/training_job_${currtime}/
+train_job_dir=${currentDir%train*}/train/result/tf_ssd_resnet34/training_job_${currtime}/
+echo "[`date +%Y%m%d-%H:%M:%S`] [INFO] ${train_job_dir} &"
+jsonFilePath=${currentDir}/code/ssd_constants.py
+
+echo "start to modify inner config file"
+echo "jsonfilepath is "${jsonFilePath}
+
+sed -i "s/EVAL_STEPS = (.*,)$/EVAL_STEPS = (${max_steps},)/g" ${jsonFilePath}
+
+# device 列表, 若无指定 device 根据 rank_size 顺序选择
+eval device_group=\$device_group_${rank_size}p
+if [ x"${device_group}" == x"" ] || [ ${rank_size} -ge 8 ];then
+    device_group="$(seq 0 "$(expr $rank_size - 1)")"
+fi
+
+# get last device id in device_group, hw log in performance from the dir named first_device_id  
+device_group_str=`echo ${device_group} | sed 's/ //g'`
+first_device_id=`echo ${device_group_str: 0:1}`
+
+if [ x"${CLUSTER}" == x"True" ];then
+    # ln hw log
+    ln -snf ${train_job_dir}/0/hw_SSD-Resnet34.log ${train_job_dir}
+    this_ip=$(hostname -I |awk '{print $1}')
+    for ip in $MPIRUN_ALL_IP;do
+        if [ x"$ip" != x"$this_ip" ];then
+            scp $yamlPath root@$ip:$yamlPath
+            scp $jsonFilePath root@$ip:$jsonFilePath
+        fi
+    done
+    export PATH=$PATH:/usr/local/mpirun4.0/bin
+    mpirun -H ${mpirun_ip} \
+    --bind-to none -map-by slot\
+    --allow-run-as-root \
+    --mca btl_tcp_if_exclude lo,docker0,endvnic,virbr0,vethf40501b,docker_gwbridge,br-f42ac38052b4\
+    --prefix /usr/local/mpirun4.0/ \
+    ${currentDir}/scripts/train.sh 0 $rank_size $yamlPath $currtime ${toolsPath} ${CLUSTER}
+else
+    # ln hw log
+    ln -snf ${train_job_dir}/${first_device_id}/hw_SSD-Resnet34.log ${train_job_dir}
+    rank_id=0
+    for device_id in $device_group;do
+      ${currentDir}/scripts/train.sh $device_id $rank_size $yamlPath $currtime ${toolsPath} $rank_id &
+      let rank_id++
+    done
+fi
+wait
+
+#echo "[`date +%Y%m%d-%H:%M:%S`] [INFO] all train exit " >> ${currentDir}/result/main.log
+
@@ -0,0 +1,104 @@
+#!/usr/bin/env bash
+
+device_id=$1
+rank_size=$2
+yamlPath=$3
+
+currentDir=$(cd "$(dirname "$0")/.."; pwd)
+#model_name="SSD-Resnet34"
+currtime=$4
+toolsPath=$5
+export YAML_PATH=$3
+
+mkdir -p ${currentDir%train*}/train/result/tf_ssd_resnet34/training_job_${currtime}/
+train_job_dir=${currentDir%train*}/train/result/tf_ssd_resnet34/training_job_${currtime}/
+
+
+# 从 yaml 获取配置
+
+eval $(${toolsPath}/get_params_for_yaml.sh ${yamlPath} "tensorflow_config")
+
+source ${currentDir}/config/npu_set_env.sh
+
+
+# 声明变量
+export REMARK_LOG_FILE=hw_SSD-Resnet34.log  # 打点日志文件名称， 必须hw_后跟模型名称小写
+# 添加日志打点模块路径
+benchmark_log_path=${currentDir%atlas_benchmark-master*}/atlas_benchmark-master/utils
+export PYTHONPATH=$PYTHONPATH:${benchmark_log_path}
+
+# user env
+export DDK_VERSION_FLAG=1.60.T17.B830
+export HCCL_CONNECT_TIMEOUT=600
+export RANK_TABLE_FILE=${currentDir}/config/${rank_size}p.json
+export RANK_SIZE=${rank_size}
+export SLOG_PRINT_TO_STDOUT=0
+export DEVICE_ID=${device_id}
+export DEVICE_INDEX=$RANK_ID
+export JOB_ID=990
+export FUSION_TENSOR_SIZE=1000000000
+
+startTime=`date +%Y%m%d-%H:%M:%S`
+startTime_s=`date +%s`
+
+cd ${train_job_dir}
+curd_dir=${currentDir%atlas_benchmark-master*}/atlas_benchmark-master/utils/atlasboost
+export PYTHONPATH=$PYTHONPATH:${curd_dir}
+
+if [ x"$6" != x"True" ];then
+        rank_id=$6
+        export RANK_ID=$6
+else
+        device_id_mo=$(python3.7 -c "import src.tensorflow.mpi_ops as atlasboost;atlasboost.init(); \
+                device_id = atlasboost.local_rank();cluster_device_id = str(device_id); \
+                atlasboost.set_device_id(device_id);print(atlasboost.rank())")
+        device_id_mo=`echo $device_id_mo`
+        rank_id=${device_id_mo##* }
+        export RANK_ID=${rank_id}
+        device=${device_id_mo##*deviceid = }
+        device_id=${device%% phyid=*}
+        export DEVICE_ID=${device_id}
+        hccljson=${train_job_dir}/*.json
+        cp ${hccljson} ${currentDir}/config/${rank_size}p.json
+fi
+
+#mkdir exec path
+mkdir -p ${train_job_dir}/${device_id}
+cd ${train_job_dir}/${device_id}
+
+# 根据单卡/多卡区分调用参数
+if [ x"$6" == x"True" ];then
+    # 多卡多机
+    export CLUSTER=True
+fi
+python3.7 ${currentDir}/code/ssd_main.py \
+    --mode=${runmode} \
+    --train_batch_size=${train_batch_size} \
+    --training_file_pattern=${training_file_pattern} \
+    --resnet_checkpoint=${resnet_checkpoint} \
+    --validation_file_pattern=${validation_file_pattern} \
+    --val_json_file=${val_json_file} \
+    --eval_batch_size=${eval_batch_size} \
+    --num_epochs=${num_epochs} \
+    --model_dir=${model_dir} > ${train_job_dir}/train_${device_id}.log 2>&1
+
+if [ $? -eq 0 ] ;then
+    echo ":::ABK 1.0.0 SSD-Resnet34 train success"
+    echo ":::ABK 1.0.0 SSD-Resnet34 train success" >> ${train_job_dir}/train_${device_id}.log
+    echo ":::ABK 1.0.0 SSD-Resnet34 train success" >> ${train_job_dir}/${device_id}/hw_SSD-Resnet34.log
+else
+    echo ":::ABK 1.0.0 SSD-Resnet34 train failed"
+    echo ":::ABK 1.0.0 SSD-Resnet34 train failed" >> ${train_job_dir}/train_${device_id}.log
+    echo ":::ABK 1.0.0 SSD-Resnet34 train failed" >> ${train_job_dir}/${device_id}/hw_SSD-Resnet34.log
+fi
+
+endTime=`date +%Y%m%d-%H:%M:%S`
+endTime_s=`date +%s`
+sumTime=$[ $endTime_s - $startTime_s ]
+hour=$(( $sumTime/3600 ))
+min=$(( ($sumTime-${hour}*3600)/60 ))
+sec=$(( $sumTime-${hour}*3600-${min}*60 ))
+echo ${hour}:${min}:${sec}
+echo ":::ABK 1.0.0 SSD-Resnet34 train total time ${hour}:${min}:${sec}" >> ${train_job_dir}/${device_id}/hw_SSD-Resnet34.log
+
+