[add]上传训练benchmark by z00560161

This commit is contained in:
liang_chaoming@huawei.com
2020-10-19 20:22:23 +08:00
parent 22b83024f5
commit 82522e2f61
1225 changed files with 345421 additions and 0 deletions
@@ -0,0 +1 @@
#!/bin/bash
@@ -0,0 +1,25 @@
# MobileNet_pytorch训练说明
### 1. 模型训练参数配置
在train/yaml/MobileNet.yaml中修改相应配置, 配置项含义:
```
pytorch_config:
data_url: 数据集路径
epoches: 跑多少个epoch
batch_size: 单p默认768 2p 1534 4p 3072 8p默认6144
lr: 默认参数1p 0.03 2p 0.06 4p 0.12 8p 0.24
seed: 123456
docker_image: docker 镜像名称:版本号
```
------
@@ -0,0 +1 @@
# MobileNetV2 NPU训练
@@ -0,0 +1,29 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
g_feat_in = []
g_feat_out = []
g_grad_in = []
g_grad_out = []
def forward_hook_fn(module, input, output):
g_feat_in.append(input)
g_feat_out.append(output)
print(module)
print(input)
print(output)
def backward_hook_fn(module, grad_input, grad_output):
g_grad_in.append(grad_input)
g_grad_out.append(grad_output)
print(module)
print(grad_input)
print(grad_input)
@@ -0,0 +1,498 @@
import argparse
import os
import random
import shutil
import time
import warnings
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.distributed as dist
import torch.optim
import torch.multiprocessing as mp
import torch.utils.data
import torch.utils.data.distributed
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models
from mobilenet import mobilenet_v2
import torch.npu
# from torch.utils.tensorboard import SummaryWriter
from apex import amp
import numpy as np
from hook import *
from benchmark_log import hwlog
from benchmark_log.basic_utils import get_environment_info
from benchmark_log.basic_utils import get_model_parameter
# model_names = sorted(name for name in models.__dict__
# if name.islower() and not name.startswith("__")
# and callable(models.__dict__[name]))
parser = argparse.ArgumentParser(description='PyTorch ImageNet Training')
parser.add_argument('--data', metavar='DIR', default='/dataset/imagenet',
help='path to dataset')
# parser.add_argument('-a', '--arch', metavar='ARCH', default='resnet18',
# choices=model_names,
# help='model architecture: ' +
# ' | '.join(model_names) +
# ' (default: resnet18)')
parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
help='number of data loading workers (default: 4)')
parser.add_argument('--epochs', default=90, type=int, metavar='N',
help='number of total epochs to run')
parser.add_argument('--start-epoch', default=0, type=int, metavar='N',
help='manual epoch number (useful on restarts)')
parser.add_argument('-b', '--batch-size', default=256, type=int,
metavar='N',
help='mini-batch size (default: 256), this is the total '
'batch size of all GPUs on the current node when '
'using Data Parallel or Distributed Data Parallel')
parser.add_argument('--lr', '--learning-rate', default=0.1, type=float,
metavar='LR', help='initial learning rate', dest='lr')
parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
help='momentum')
parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,
metavar='W', help='weight decay (default: 1e-4)',
dest='weight_decay')
parser.add_argument('-p', '--print-freq', default=10, type=int,
metavar='N', help='print frequency (default: 10)')
parser.add_argument('--resume', default='', type=str, metavar='PATH',
help='path to latest checkpoint (default: none)')
parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true',
help='evaluate model on validation set')
parser.add_argument('--pretrained', dest='pretrained', action='store_true',
help='use pre-trained model')
parser.add_argument('--world-size', default=-1, type=int,
help='number of nodes for distributed training')
parser.add_argument('--rank', default=-1, type=int,
help='node rank for distributed training')
parser.add_argument('--dist-url', default='tcp://224.66.41.62:23456', type=str,
help='url used to set up distributed training')
parser.add_argument('--dist-backend', default='nccl', type=str,
help='distributed backend')
parser.add_argument('--seed', default=None, type=int,
help='seed for initializing training. ')
parser.add_argument('--gpu', default=None, type=int,
help='GPU id to use.')
parser.add_argument('--multiprocessing-distributed', action='store_true',
help='Use multi-processing distributed training to launch '
'N processes per node, which has N GPUs. This is the '
'fastest way to use PyTorch for either single node or '
'multi node data parallel training')
parser.add_argument('--amp', default=False, action='store_true',
help='use amp to train the model')
parser.add_argument('--opt-level', default=None, type=str, help='apex optimize level')
parser.add_argument('--loss-scale-value', default='1024', type=int, help='static loss scale value')
parser.add_argument('--summary-path', default=None, type=str, help='event file path')
parser.add_argument('--stop-step-num', default=None, type=int, help='after the stop-step, killing the training task')
parser.add_argument('--device', default='npu:0', type=str, help='device type, cpu or npu:x or cuda:x')
parser.add_argument('--eval-freq', default=10, type=int, help='test interval')
parser.add_argument('--hook', default=False, action='store_true', help='pytorch hook')
best_acc1 = 0
cur_step = 0
def seed_everything(seed, device):
random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
torch.manual_seed(seed)
if 'cuda' in device:
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
cudnn.deterministic = True
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
def main():
args = parser.parse_args()
if args.seed is not None:
seed_everything(args.seed, args.device)
warnings.warn('You have chosen to seed training. '
'This will turn on the CUDNN deterministic setting, '
'which can slow down your training considerably! '
'You may see unexpected behavior when restarting '
'from checkpoints.')
main_worker(args)
def main_worker(args):
global best_acc1
global cur_step
# sum_writer = SummaryWriter(args.summary_path)
global_step = -1
if 'npu' in args.device:
torch.npu.set_device(args.device)
if 'cuda' in args.device:
torch.cuda.set_device(args.device)
model = mobilenet_v2()
# set hook
if args.hook:
modules = model.named_modules()
for name, module in modules:
module.register_forward_hook(forward_hook_fn)
module.register_backward_hook(backward_hook_fn)
optimizer = torch.optim.SGD(model.parameters(), args.lr,
momentum=args.momentum,
weight_decay=args.weight_decay)
criterion = nn.CrossEntropyLoss()
if 'npu' in args.device or 'cuda' in args.device:
model = model.to(args.device)
criterion = criterion.to(args.device)
if args.amp:
model, optimizer = amp.initialize(model, optimizer, opt_level=args.opt_level, loss_scale=args.loss_scale_value)
# optionally resume from a checkpoint
if args.resume:
if os.path.isfile(args.resume):
print("=> loading checkpoint '{}'".format(args.resume))
checkpoint = torch.load(args.resume, map_location=args.device)
args.start_epoch = checkpoint['epoch']
best_acc1 = checkpoint['best_acc1']
model.load_state_dict(checkpoint['state_dict'])
optimizer.load_state_dict(checkpoint['optimizer'])
if args.amp:
amp.load_state_dict(checkpoint['amp'])
print("=> loaded checkpoint '{}' (epoch {})"
.format(args.resume, checkpoint['epoch']))
else:
print("=> no checkpoint found at '{}'".format(args.resume))
# Data loading code
traindir = os.path.join(args.data, 'train')
valdir = os.path.join(args.data, 'val')
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
train_dataset = datasets.ImageFolder(
traindir,
transforms.Compose([
transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
normalize,
]))
train_sampler = None
train_loader = torch.utils.data.DataLoader(
train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None),
num_workers=args.workers, pin_memory=True, sampler=train_sampler, drop_last=True)
val_loader = torch.utils.data.DataLoader(
datasets.ImageFolder(valdir, transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
normalize,
])),
batch_size=args.batch_size, shuffle=False,
num_workers=args.workers, pin_memory=True, drop_last=True)
if args.evaluate:
validate(val_loader, model, criterion, args, global_step)
return
for epoch in range(args.start_epoch, args.epochs):
# train for one epoch
global_step = train(train_loader, model, criterion, optimizer, epoch, args, global_step)
if (epoch + 1) % (args.eval_freq) == 0 or epoch == args.epochs - 1:
# evaluate on validation set
acc1 = validate(val_loader, model, criterion, args, global_step)
# remember best acc@1 and save checkpoint
is_best = acc1 > best_acc1
best_acc1 = max(acc1, best_acc1)
# save checkpoint
if args.amp:
save_checkpoint({
'epoch': epoch + 1,
'state_dict': model.state_dict(),
'best_acc1': best_acc1,
'optimizer': optimizer.state_dict(),
'amp': amp.state_dict(),
}, is_best)
else:
save_checkpoint({
'epoch': epoch + 1,
'state_dict': model.state_dict(),
'best_acc1': best_acc1,
'optimizer': optimizer.state_dict(),
}, is_best)
if args.stop_step_num is not None and cur_step >= args.stop_step_num:
break
# sum_writer.close()
def train(train_loader, model, criterion, optimizer, epoch, args, global_step, sum_writer=None):
global cur_step
if args.seed is not None:
seed_everything(args.seed + epoch, args.device)
batch_time = AverageMeter('Time', ':6.3f')
data_time = AverageMeter('Data', ':6.3f')
learning_rate = AverageMeter('LR', ':2.8f')
losses = AverageMeter('Loss', ':6.8f')
top1 = AverageMeter('Acc@1', ':6.2f')
top5 = AverageMeter('Acc@5', ':6.2f')
progress = ProgressMeter(
len(train_loader),
[batch_time, data_time, learning_rate, losses, top1, top5],
prefix="Epoch: [{}]".format(epoch))
# switch to train mode
model.train()
end = time.time()
steps_per_epoch = len(train_loader)
for i, (images, target) in enumerate(train_loader):
global_step = epoch * steps_per_epoch + i
cur_step = global_step
lr = adjust_learning_rate(optimizer, global_step, steps_per_epoch, args)
learning_rate.update(lr)
# sum_writer.add_scalar('learning rate', lr, global_step)
# measure data loading time
data_time.update(time.time() - end)
if 'npu' in args.device:
target = target.to(torch.int32)
if 'npu' in args.device or 'cuda' in args.device:
images = images.to(args.device, non_blocking=True)
target = target.to(args.device, non_blocking=True)
# output = None
# loss = None
# with torch.autograd.profiler.profile(record_shapes=True, use_npu=True) as prof:
# compute output
output = model(images)
loss = criterion(output, target)
# measure accuracy and record loss
acc1, acc5 = accuracy(output, target, topk=(1, 5))
losses.update(loss.item(), images.size(0))
top1.update(acc1[0], images.size(0))
top5.update(acc5[0], images.size(0))
# compute gradient and do SGD step
optimizer.zero_grad()
if args.amp:
with amp.scale_loss(loss, optimizer) as scaled_loss:
scaled_loss.backward()
else:
loss.backward()
# sum_writer.add_scalar('Accuary/train/top1', acc1, global_step)
# sum_writer.add_scalar('Accuary/train/top5', acc5, global_step)
# sum_writer.add_scalar('Loss/train/loss', loss, global_step)
optimizer.step()
# for name, parms in model.named_parameters():
# print('-->name:', name, ' -->grad_value_max:', torch.max(parms.grad), ' -->grad_value_min:', torch.min(parms.grad))
# print(prof.key_averages().table())
# prof.export_chrome_trace("mobilenetv2_{}_npu.prof".format(i))
# measure elapsed time
batch_time.update(time.time() - end)
end = time.time()
if i % args.print_freq == 0:
progress.display(i)
if args.stop_step_num is not None and cur_step >= args.stop_step_num:
break
print(' * FPS@all {:.3f}'.format(args.batch_size / batch_time.avg))
hwlog.remark_print(key=hwlog.FPS, value=' * FPS@all {:.3f}'.format(args.batch_size / batch_time.avg))
return global_step
def validate(val_loader, model, criterion, args, global_step, sum_writer=None):
batch_time = AverageMeter('Time', ':6.3f')
losses = AverageMeter('Loss', ':.4e')
top1 = AverageMeter('Acc@1', ':6.2f')
top5 = AverageMeter('Acc@5', ':6.2f')
progress = ProgressMeter(
len(val_loader),
[batch_time, losses, top1, top5],
prefix='Test: ')
# switch to evaluate mode
model.eval()
with torch.no_grad():
end = time.time()
for i, (images, target) in enumerate(val_loader):
if 'npu' in args.device:
target = target.to(torch.int32)
if 'npu' in args.device or 'cuda' in args.device:
images = images.to(args.device, non_blocking=True)
target = target.to(args.device, non_blocking=True)
# compute output
output = model(images)
loss = criterion(output, target)
# measure accuracy and record loss
acc1, acc5 = accuracy(output, target, topk=(1, 5))
losses.update(loss.item(), images.size(0))
top1.update(acc1[0], images.size(0))
top5.update(acc5[0], images.size(0))
# measure elapsed time
batch_time.update(time.time() - end)
end = time.time()
if i % args.print_freq == 0:
progress.display(i)
# TODO: this should also be done with the ProgressMeter
print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'
.format(top1=top1, top5=top5))
hwlog.remark_print(key=hwlog.EVAL_ACCURACY_TOP1, value="{top1.avg:.3f}".format(top1=top1))
hwlog.remark_print(key=hwlog.EVAL_ACCURACY_TOP5, value="{top5.avg:.3f}".format(top5=top5))
#if not args.evaluate:
# # sum_writer.add_scalar('Loss/validation/loss', losses, global_step)
# sum_writer.add_scalar('Accuary/validation/top1', top1.avg, global_step)
# sum_writer.add_scalar('Accuary/validation/top5', top5.avg, global_step)
return top1.avg
def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
torch.save(state, filename)
if is_best:
shutil.copyfile(filename, 'model_best.pth.tar')
class AverageMeter(object):
"""Computes and stores the average and current value"""
def __init__(self, name, fmt=':f'):
self.name = name
self.fmt = fmt
self.reset()
def reset(self):
self.val = 0
self.avg = 0
self.sum = 0
self.count = 0
def update(self, val, n=1):
self.val = val
self.sum += val * n
self.count += n
self.avg = self.sum / self.count
def __str__(self):
fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
return fmtstr.format(**self.__dict__)
class ProgressMeter(object):
def __init__(self, num_batches, meters, prefix=""):
self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
self.meters = meters
self.prefix = prefix
def display(self, batch):
entries = [self.prefix + self.batch_fmtstr.format(batch)]
entries += [str(meter) for meter in self.meters]
print('\t'.join(entries))
# 日志打点
train_acc1 = str(entries).split("Acc@1")[1].strip().split(" ")[0]
train_acc5 = str(entries).split("Acc@5")[1].strip().split(" ")[0]
hwlog.remark_print(key=hwlog.TRAIN_ACCURACY_TOP1, value=train_acc1)
hwlog.remark_print(key=hwlog.TRAIN_ACCURACY_TOP5, value=train_acc5)
def _get_batch_fmtstr(self, num_batches):
num_digits = len(str(num_batches // 1))
fmt = '{:' + str(num_digits) + 'd}'
return '[' + fmt + '/' + fmt.format(num_batches) + ']'
def adjust_learning_rate(optimizer, global_step, steps_per_epoch, args):
"""Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
# lr = args.lr * (0.98 ** (epoch / 2.5))
lr = args.lr * (0.98 ** (global_step // int(steps_per_epoch * 2.5)))
for param_group in optimizer.param_groups:
param_group['lr'] = lr
return lr
def accuracy(output, target, topk=(1,)):
"""Computes the accuracy over the k top predictions for the specified values of k"""
with torch.no_grad():
maxk = max(topk)
batch_size = target.size(0)
_, pred = output.topk(maxk, 1, True, True)
pred = pred.t()
correct = pred.eq(target.view(1, -1).expand_as(pred))
res = []
for k in topk:
correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
res.append(correct_k.mul_(100.0 / batch_size))
return res
if __name__ == '__main__':
hwlog.ROOT_DIR = os.path.split(os.path.abspath(__file__))[0]
cpu_info, npu_info, framework_info, os_info, benchmark_version = get_environment_info("pytorch")
config_info = get_model_parameter("pytorch_config")
initinal_data = {"base_lr": 0.1, "dataset": "imagenet", "optimizer": "SGD", "loss_scale": 1024}
hwlog.remark_print(key=hwlog.CPU_INFO, value=cpu_info)
hwlog.remark_print(key=hwlog.NPU_INFO, value=npu_info)
hwlog.remark_print(key=hwlog.OS_INFO, value=os_info)
hwlog.remark_print(key=hwlog.FRAMEWORK_INFO, value=framework_info)
hwlog.remark_print(key=hwlog.BENCHMARK_VERSION, value=benchmark_version)
hwlog.remark_print(key=hwlog.CONFIG_INFO, value=config_info)
hwlog.remark_print(key=hwlog.BASE_LR, value=initinal_data.get("base_lr"))
hwlog.remark_print(key=hwlog.DATASET, value=initinal_data.get("dataset"))
hwlog.remark_print(key=hwlog.OPT_NAME, value=initinal_data.get("optimizer"))
hwlog.remark_print(key=hwlog.LOSS_SCALE, value=initinal_data.get("loss_scale"))
main()
@@ -0,0 +1,179 @@
from torch import nn
# from .utils import load_state_dict_from_url
__all__ = ['MobileNetV2', 'mobilenet_v2']
model_urls = {
'mobilenet_v2': 'https://download.pytorch.org/models/mobilenet_v2-b0353104.pth',
}
def _make_divisible(v, divisor, min_value=None):
"""
This function is taken from the original tf repo.
It ensures that all layers have a channel number that is divisible by 8
It can be seen here:
https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
:param v:
:param divisor:
:param min_value:
:return:
"""
if min_value is None:
min_value = divisor
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
# Make sure that round down does not go down by more than 10%.
if new_v < 0.9 * v:
new_v += divisor
return new_v
class ConvBNReLU(nn.Sequential):
def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1):
padding = (kernel_size - 1) // 2
super(ConvBNReLU, self).__init__(
nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False),
nn.BatchNorm2d(out_planes),
nn.ReLU6(inplace=True)
# nn.ReLU(inplace=True)
)
class InvertedResidual(nn.Module):
def __init__(self, inp, oup, stride, expand_ratio):
super(InvertedResidual, self).__init__()
self.stride = stride
assert stride in [1, 2]
hidden_dim = int(round(inp * expand_ratio))
self.use_res_connect = self.stride == 1 and inp == oup
layers = []
if expand_ratio != 1:
# pw
layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1))
layers.extend([
# dw
ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim),
# pw-linear
nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
nn.BatchNorm2d(oup),
])
self.conv = nn.Sequential(*layers)
def forward(self, x):
if self.use_res_connect:
return x + self.conv(x)
else:
return self.conv(x)
class MobileNetV2(nn.Module):
def __init__(self,
num_classes=1000,
width_mult=1.0,
inverted_residual_setting=None,
round_nearest=8,
block=None):
"""
MobileNet V2 main class
Args:
num_classes (int): Number of classes
width_mult (float): Width multiplier - adjusts number of channels in each layer by this amount
inverted_residual_setting: Network structure
round_nearest (int): Round the number of channels in each layer to be a multiple of this number
Set to 1 to turn off rounding
block: Module specifying inverted residual building block for mobilenet
"""
super(MobileNetV2, self).__init__()
if block is None:
block = InvertedResidual
input_channel = 32
last_channel = 1280
if inverted_residual_setting is None:
inverted_residual_setting = [
# t, c, n, s
[1, 16, 1, 1],
[6, 24, 2, 2],
[6, 32, 3, 2],
[6, 64, 4, 2],
[6, 96, 3, 1],
[6, 160, 3, 2],
[6, 320, 1, 1],
]
# only check the first element, assuming user knows t,c,n,s are required
if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4:
raise ValueError("inverted_residual_setting should be non-empty "
"or a 4-element list, got {}".format(inverted_residual_setting))
# building first layer
input_channel = _make_divisible(input_channel * width_mult, round_nearest)
self.last_channel = _make_divisible(last_channel * max(1.0, width_mult), round_nearest)
features = [ConvBNReLU(3, input_channel, stride=2)]
# building inverted residual blocks
for t, c, n, s in inverted_residual_setting:
output_channel = _make_divisible(c * width_mult, round_nearest)
for i in range(n):
stride = s if i == 0 else 1
features.append(block(input_channel, output_channel, stride, expand_ratio=t))
input_channel = output_channel
# building last several layers
features.append(ConvBNReLU(input_channel, self.last_channel, kernel_size=1))
# make it nn.Sequential
self.features = nn.Sequential(*features)
# building classifier
self.classifier = nn.Sequential(
# p=0.2
nn.Dropout(0.2),
nn.Linear(self.last_channel, num_classes),
)
# weight initialization
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out')
if m.bias is not None:
nn.init.zeros_(m.bias)
elif isinstance(m, nn.BatchNorm2d):
nn.init.ones_(m.weight)
nn.init.zeros_(m.bias)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
nn.init.zeros_(m.bias)
def _forward_impl(self, x):
# This exists since TorchScript doesn't support inheritance, so the superclass method
# (this one) needs to have a name other than `forward` that can be accessed in a subclass
x = self.features(x)
# Cannot use "squeeze" as batch-size can be 1 => must use reshape with x.shape[0]
x = nn.functional.adaptive_avg_pool2d(x, 1).reshape(x.shape[0], -1)
x = self.classifier(x)
return x
def forward(self, x):
return self._forward_impl(x)
def mobilenet_v2(pretrained=False, progress=True, **kwargs):
"""
Constructs a MobileNetV2 architecture from
`"MobileNetV2: Inverted Residuals and Linear Bottlenecks" <https://arxiv.org/abs/1801.04381>`_.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
model = MobileNetV2(**kwargs)
# if pretrained:
# state_dict = load_state_dict_from_url(model_urls['mobilenet_v2'],
# progress=progress)
# model.load_state_dict(state_dict)
return model
@@ -0,0 +1,18 @@
{
"startCfg":
[
{
"jobID": "123456789",
"deviceID": ["0"],
"features":
[
{
"name": "task_trace"
},
{
"name": "training_trace"
}
]
}
]
}
@@ -0,0 +1 @@
# MobileNetV2 NPU训练
@@ -0,0 +1,22 @@
export ASCEND_HOME=/usr/local/Ascend
export LD_LIBRARY_PATH=/usr/local/lib/:/usr/lib/:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64:/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:/usr/local/Ascend/add-ons/
export PYTHONPATH=$PYTHONPATH:/usr/local/Ascend/ascend-toolkit/latest/opp/op_impl/built-in/ai_core/tbe:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/te:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/topi:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/hccl:/usr/local/Ascend/ascend-toolkit/latest/tfplugin/python/site-packages:$currentDir
export PATH=$PATH:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin
export ASCEND_OPP_PATH=/usr/local/Ascend/ascend-toolkit/latest/opp/
export SLOG_PRINT_TO_STDOUT=0
su HwHiAiUser -c "adc --host 0.0.0.0:22118 --log \"SetLogLevel(0)[error]\" --device 7"
export TASK_QUEUE_ENABLE=0
taskset -c 111-150 python3 densenet121_1p_main.py \
--workers 40 \
--arch densenet121 \
--npu 7 \
--lr 0.1 \
--momentum 0.9 \
--amp \
--batch-size 256 \
--epoch 90 \
--evaluate \
--resume checkpoint.pth.tar \
--data /opt/npu/dataset/imagenet
@@ -0,0 +1,29 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
g_feat_in = []
g_feat_out = []
g_grad_in = []
g_grad_out = []
def forward_hook_fn(module, input, output):
g_feat_in.append(input)
g_feat_out.append(output)
print(module)
print(input)
print(output)
def backward_hook_fn(module, grad_input, grad_output):
g_grad_in.append(grad_input)
g_grad_out.append(grad_output)
print(module)
print(grad_input)
print(grad_input)
@@ -0,0 +1,556 @@
import argparse
import os
import random
import shutil
import time
import warnings
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.distributed as dist
import torch.optim
import torch.multiprocessing as mp
import torch.utils.data
import torch.utils.data.distributed
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models
from mobilenet import mobilenet_v2
import torch.npu
import torch.cuda
from torch.utils.tensorboard import SummaryWriter
from apex import amp
import numpy as np
from hook import *
# model_names = sorted(name for name in models.__dict__
# if name.islower() and not name.startswith("__")
# and callable(models.__dict__[name]))
parser = argparse.ArgumentParser(description='PyTorch ImageNet Training')
parser.add_argument('--data', metavar='DIR', default='/dataset/imagenet',
help='path to dataset')
# parser.add_argument('-a', '--arch', metavar='ARCH', default='resnet18',
# choices=model_names,
# help='model architecture: ' +
# ' | '.join(model_names) +
# ' (default: resnet18)')
parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
help='number of data loading workers (default: 4)')
parser.add_argument('--epochs', default=90, type=int, metavar='N',
help='number of total epochs to run')
parser.add_argument('--start-epoch', default=0, type=int, metavar='N',
help='manual epoch number (useful on restarts)')
parser.add_argument('-b', '--batch-size', default=256, type=int,
metavar='N',
help='mini-batch size (default: 256), this is the total '
'batch size of all GPUs on the current node when '
'using Data Parallel or Distributed Data Parallel')
parser.add_argument('--lr', '--learning-rate', default=0.1, type=float,
metavar='LR', help='initial learning rate', dest='lr')
parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
help='momentum')
parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,
metavar='W', help='weight decay (default: 1e-4)',
dest='weight_decay')
parser.add_argument('-p', '--print-freq', default=10, type=int,
metavar='N', help='print frequency (default: 10)')
parser.add_argument('--resume', default='', type=str, metavar='PATH',
help='path to latest checkpoint (default: none)')
parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true',
help='evaluate model on validation set')
parser.add_argument('--pretrained', dest='pretrained', action='store_true',
help='use pre-trained model')
# parser.add_argument('--world-size', default=-1, type=int,
# help='number of nodes for distributed training')
parser.add_argument('--node-nums', default=1, type=int,
help='number of nodes for distributed training')
parser.add_argument('--rank', default=0, type=int,
help='node rank for distributed training')
parser.add_argument('--dist-url', default='tcp://224.66.41.62:23456', type=str,
help='url used to set up distributed training')
parser.add_argument('--dist-backend', default='nccl', type=str,
help='distributed backend')
parser.add_argument('--seed', default=None, type=int,
help='seed for initializing training. ')
parser.add_argument('--gpu', default=None, type=int,
help='GPU id to use.')
parser.add_argument('--multiprocessing-distributed', action='store_true',
help='Use multi-processing distributed training to launch '
'N processes per node, which has N GPUs. This is the '
'fastest way to use PyTorch for either single node or '
'multi node data parallel training')
parser.add_argument('--addr', default='10.136.181.115', type=str,
help='master addr')
parser.add_argument('--device-id', default=None, type=int,
help='GPU id to use.')
parser.add_argument('--amp', default=False, action='store_true',
help='use amp to train the model')
parser.add_argument('--opt-level', default=None, type=str, help='apex optimize level')
parser.add_argument('--loss-scale-value', default='1024', type=int, help='static loss scale value')
parser.add_argument('--summary-path', default=None, type=str, help='event file path')
parser.add_argument('--stop-step-num', default=None, type=int, help='after the stop-step, killing the training task')
parser.add_argument('--device', default='npu', type=str, help='device type, cpu or npu:x or cuda')
parser.add_argument('--eval-freq', default=10, type=int, help='test interval')
parser.add_argument('--hook', default=False, action='store_true', help='pytorch hook')
best_acc1 = 0
cur_step = 0
def seed_everything(seed, device):
random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
torch.manual_seed(seed)
if 'cuda' in device:
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
cudnn.deterministic = True
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
def main():
args = parser.parse_args()
if args.seed is not None:
seed_everything(args.seed, args.device)
warnings.warn('You have chosen to seed training. '
'This will turn on the CUDNN deterministic setting, '
'which can slow down your training considerably! '
'You may see unexpected behavior when restarting '
'from checkpoints.')
os.environ['MASTER_ADDR'] = args.addr
os.environ['MASTER_PORT'] = '90000'
args.distributed = args.node_nums > 1 or args.multiprocessing_distributed
if not args.distributed:
print('dist param is not correct!')
return
if args.device == 'npu':
# device_nums_per_node = torch.npu.device_count()
device_nums_per_node = 2
elif args.device == 'cuda':
device_nums_per_node = torch.cuda.device_count()
else:
print('unknown device type[npu/cuda]!')
return
if args.multiprocessing_distributed:
args.world_size = device_nums_per_node * args.node_nums # world_size means nums of all devices or nums of processes
if args.device == 'npu':
# main_worker(args.device_id, ngpus_per_node, args) # 需要外层脚本启多个进程
mp.spawn(main_worker, nprocs=device_nums_per_node, args=(device_nums_per_node, args)) # 这里起子进程,就不需要外层脚本启多个进程了
else:
mp.spawn(main_worker, nprocs=device_nums_per_node, args=(device_nums_per_node, args))
else:
print('dist param is not correct!')
return
# main_worker(args.device_id, device_nums_per_node, args)
# first param must be the index of PID
def main_worker(pid_idx, device_nums_per_node, args):
global best_acc1
global cur_step
# dist set
sum_writer = SummaryWriter(args.summary_path)
global_step = -1
if args.distributed:
if args.multiprocessing_distributed:
# For multiprocessing distributed training, rank needs to be the
# global rank among all the processes
args.rank = pid_idx # args.rank * device_nums_per_node + pid_idx
args.pid_idx = pid_idx
if args.device == 'npu':
dist.init_process_group(backend=args.dist_backend, world_size=args.world_size, rank=args.rank)
else:
dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
world_size=args.world_size, rank=args.rank)
if args.distributed:
# For multiprocessing distributed, DistributedDataParallel constructor
# should always set the single device scope, otherwise,
# DistributedDataParallel will use all available devices.
if args.device == 'npu':
loc = 'npu:{}'.format(pid_idx)
torch.npu.set_device(loc)
else:
torch.cuda.set_device(pid_idx)
args.batch_size = int(args.batch_size / device_nums_per_node)
args.workers = int((args.workers + device_nums_per_node - 1) / device_nums_per_node)
# Data loading code
traindir = os.path.join(args.data, 'train')
valdir = os.path.join(args.data, 'val')
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
train_dataset = datasets.ImageFolder(
traindir,
transforms.Compose([
transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
normalize,
]))
if args.distributed:
train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
else:
train_sampler = None
train_loader = torch.utils.data.DataLoader(
train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None),
num_workers=args.workers, pin_memory=True, sampler=train_sampler, drop_last=True)
val_loader = torch.utils.data.DataLoader(
datasets.ImageFolder(valdir, transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
normalize,
])),
batch_size=args.batch_size, shuffle=False,
num_workers=args.workers, pin_memory=True, drop_last=True)
# define model and train
model = mobilenet_v2()
criterion = nn.CrossEntropyLoss()
loc = None
if 'npu' == args.device:
loc = 'npu:{}'.format(pid_idx)
elif 'cuda' == args.device:
loc = 'cuda:{}'.format(pid_idx)
model = model.to(loc)
criterion = criterion.to(loc)
optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay)
if args.amp:
model, optimizer = amp.initialize(model, optimizer, opt_level=args.opt_level, loss_scale=args.loss_scale_value)
model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[pid_idx], broadcast_buffers=False)
# model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)
# set hook
if args.hook:
modules = model.named_modules()
for name, module in modules:
module.register_forward_hook(forward_hook_fn)
module.register_backward_hook(backward_hook_fn)
# optionally resume from a checkpoint
if args.resume:
if os.path.isfile(args.resume):
print("=> loading checkpoint '{}'".format(args.resume))
checkpoint = torch.load(args.resume, map_location=args.device)
args.start_epoch = checkpoint['epoch']
best_acc1 = checkpoint['best_acc1']
model.load_state_dict(checkpoint['state_dict'])
optimizer.load_state_dict(checkpoint['optimizer'])
if args.amp:
amp.load_state_dict(checkpoint['amp'])
print("=> loaded checkpoint '{}' (epoch {})"
.format(args.resume, checkpoint['epoch']))
else:
print("=> no checkpoint found at '{}'".format(args.resume))
if args.evaluate:
validate(val_loader, model, criterion, args, global_step, sum_writer)
return
for epoch in range(args.start_epoch, args.epochs):
# train for one epoch
global_step = train(train_loader, model, criterion, optimizer, epoch, args, global_step, sum_writer, device_nums_per_node)
if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
# evaluate on validation set
acc1 = validate(val_loader, model, criterion, args, global_step, sum_writer, device_nums_per_node)
# remember best acc@1 and save checkpoint
is_best = acc1 > best_acc1
best_acc1 = max(acc1, best_acc1)
# save checkpoint
if args.amp:
save_checkpoint({
'epoch': epoch + 1,
'state_dict': model.state_dict(),
'best_acc1': best_acc1,
'optimizer': optimizer.state_dict(),
'amp': amp.state_dict(),
}, is_best)
else:
save_checkpoint({
'epoch': epoch + 1,
'state_dict': model.state_dict(),
'best_acc1': best_acc1,
'optimizer': optimizer.state_dict(),
}, is_best)
if args.stop_step_num is not None and cur_step >= args.stop_step_num:
break
sum_writer.close()
def train(train_loader, model, criterion, optimizer, epoch, args, global_step, sum_writer, device_nums_per_node):
global cur_step
if args.seed is not None:
seed_everything(args.seed + epoch, args.device)
batch_time = AverageMeter('Time', ':6.3f')
data_time = AverageMeter('Data', ':6.3f')
learning_rate = AverageMeter('LR', ':2.8f')
losses = AverageMeter('Loss', ':6.8f')
top1 = AverageMeter('Acc@1', ':6.2f')
top5 = AverageMeter('Acc@5', ':6.2f')
progress = ProgressMeter(
len(train_loader),
[batch_time, data_time, learning_rate, losses, top1, top5],
prefix="Epoch: [{}]".format(epoch))
# switch to train mode
model.train()
end = time.time()
steps_per_epoch = len(train_loader)
for i, (images, target) in enumerate(train_loader):
global_step = epoch * steps_per_epoch + i
cur_step = global_step
lr = adjust_learning_rate(optimizer, global_step, steps_per_epoch, args)
learning_rate.update(lr)
sum_writer.add_scalar('learning rate', lr, global_step)
# measure data loading time
data_time.update(time.time() - end)
if 'npu' in args.device:
target = target.to(torch.int32)
loc = None
if 'npu' in args.device:
loc = 'npu:{}'.format(args.pid_idx)
elif 'cuda' in args.device:
loc = 'cuda:{}'.format(args.pid_idx)
images = images.to(loc, non_blocking=True)
target = target.to(loc, non_blocking=True)
# output = None
# loss = None
# with torch.autograd.profiler.profile(record_shapes=True, use_npu=True) as prof:
# compute output
output = model(images)
loss = criterion(output, target)
# measure accuracy and record loss
acc1, acc5 = accuracy(output, target, topk=(1, 5))
losses.update(loss.item(), images.size(0))
top1.update(acc1[0], images.size(0))
top5.update(acc5[0], images.size(0))
# compute gradient and do SGD step
optimizer.zero_grad()
if args.amp:
with amp.scale_loss(loss, optimizer) as scaled_loss:
scaled_loss.backward()
else:
loss.backward()
sum_writer.add_scalar('Accuary/train/top1', acc1, global_step)
sum_writer.add_scalar('Accuary/train/top5', acc5, global_step)
sum_writer.add_scalar('Loss/train/loss', loss, global_step)
optimizer.step()
# for name, parms in model.named_parameters():
# print('-->name:', name, ' -->grad_value_max:', torch.max(parms.grad), ' -->grad_value_min:', torch.min(parms.grad))
# print(prof.key_averages().table())
# prof.export_chrome_trace("mobilenetv2_{}_npu.prof".format(i))
# measure elapsed time
batch_time.update(time.time() - end)
end = time.time()
if i % args.print_freq == 0:
if not args.multiprocessing_distributed or \
(args.multiprocessing_distributed and args.rank % device_nums_per_node == 0):
progress.display(i)
if not args.multiprocessing_distributed or \
(args.multiprocessing_distributed and args.rank % device_nums_per_node == 0):
print('FPS@all: {:.3f}'.format(8 * args.batch_size / batch_time.avg))
if args.stop_step_num is not None and cur_step >= args.stop_step_num:
break
return global_step
def validate(val_loader, model, criterion, args, global_step, sum_writer, device_nums_per_node):
batch_time = AverageMeter('Time', ':6.3f')
losses = AverageMeter('Loss', ':.4e')
top1 = AverageMeter('Acc@1', ':6.2f')
top5 = AverageMeter('Acc@5', ':6.2f')
progress = ProgressMeter(
len(val_loader),
[batch_time, losses, top1, top5],
prefix='Test: ')
# switch to evaluate mode
model.eval()
with torch.no_grad():
end = time.time()
for i, (images, target) in enumerate(val_loader):
if 'npu' in args.device:
target = target.to(torch.int32)
loc = None
if 'npu' in args.device:
loc = 'npu:{}'.format(args.pid_idx)
elif 'cuda' in args.device:
loc = 'cuda:{}'.format(args.pid_idx)
images = images.to(loc, non_blocking=True)
target = target.to(loc, non_blocking=True)
# compute output
output = model(images)
loss = criterion(output, target)
# measure accuracy and record loss
acc1, acc5 = accuracy(output, target, topk=(1, 5))
losses.update(loss.item(), images.size(0))
top1.update(acc1[0], images.size(0))
top5.update(acc5[0], images.size(0))
# measure elapsed time
batch_time.update(time.time() - end)
end = time.time()
if i % args.print_freq == 0:
if not args.multiprocessing_distributed or \
(args.multiprocessing_distributed and args.rank % device_nums_per_node == 0):
progress.display(i)
# TODO: this should also be done with the ProgressMeter
print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'
.format(top1=top1, top5=top5))
if not args.multiprocessing_distributed or \
(args.multiprocessing_distributed and args.rank % device_nums_per_node == 0):
print("[device id:", args.gpu, "]", '[AVG-ACC] * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'.format(top1=top1, top5=top5))
if not args.evaluate:
# sum_writer.add_scalar('Loss/validation/loss', losses, global_step)
sum_writer.add_scalar('Accuary/validation/top1', top1.avg, global_step)
sum_writer.add_scalar('Accuary/validation/top5', top5.avg, global_step)
return top1.avg
def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
torch.save(state, filename)
if is_best:
shutil.copyfile(filename, 'model_best.pth.tar')
class AverageMeter(object):
"""Computes and stores the average and current value"""
def __init__(self, name, fmt=':f'):
self.name = name
self.fmt = fmt
self.reset()
def reset(self):
self.val = 0
self.avg = 0
self.sum = 0
self.count = 0
def update(self, val, n=1):
self.val = val
self.sum += val * n
self.count += n
self.avg = self.sum / self.count
def __str__(self):
fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
return fmtstr.format(**self.__dict__)
class ProgressMeter(object):
def __init__(self, num_batches, meters, prefix=""):
self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
self.meters = meters
self.prefix = prefix
def display(self, batch):
entries = [self.prefix + self.batch_fmtstr.format(batch)]
entries += [str(meter) for meter in self.meters]
print('\t'.join(entries))
def _get_batch_fmtstr(self, num_batches):
num_digits = len(str(num_batches // 1))
fmt = '{:' + str(num_digits) + 'd}'
return '[' + fmt + '/' + fmt.format(num_batches) + ']'
def adjust_learning_rate(optimizer, global_step, steps_per_epoch, args):
"""Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
# lr = args.lr * (0.98 ** (epoch / 2.5))
lr = args.lr * (0.98 ** (global_step // int(steps_per_epoch * 2.5)))
for param_group in optimizer.param_groups:
param_group['lr'] = lr
return lr
def accuracy(output, target, topk=(1,)):
"""Computes the accuracy over the k top predictions for the specified values of k"""
with torch.no_grad():
maxk = max(topk)
batch_size = target.size(0)
_, pred = output.topk(maxk, 1, True, True)
pred = pred.t()
correct = pred.eq(target.view(1, -1).expand_as(pred))
res = []
for k in topk:
correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
res.append(correct_k.mul_(100.0 / batch_size))
return res
if __name__ == '__main__':
main()
@@ -0,0 +1,179 @@
from torch import nn
# from .utils import load_state_dict_from_url
__all__ = ['MobileNetV2', 'mobilenet_v2']
model_urls = {
'mobilenet_v2': 'https://download.pytorch.org/models/mobilenet_v2-b0353104.pth',
}
def _make_divisible(v, divisor, min_value=None):
"""
This function is taken from the original tf repo.
It ensures that all layers have a channel number that is divisible by 8
It can be seen here:
https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
:param v:
:param divisor:
:param min_value:
:return:
"""
if min_value is None:
min_value = divisor
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
# Make sure that round down does not go down by more than 10%.
if new_v < 0.9 * v:
new_v += divisor
return new_v
class ConvBNReLU(nn.Sequential):
def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1):
padding = (kernel_size - 1) // 2
super(ConvBNReLU, self).__init__(
nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False),
nn.BatchNorm2d(out_planes),
nn.ReLU6(inplace=True)
# nn.ReLU(inplace=True)
)
class InvertedResidual(nn.Module):
def __init__(self, inp, oup, stride, expand_ratio):
super(InvertedResidual, self).__init__()
self.stride = stride
assert stride in [1, 2]
hidden_dim = int(round(inp * expand_ratio))
self.use_res_connect = self.stride == 1 and inp == oup
layers = []
if expand_ratio != 1:
# pw
layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1))
layers.extend([
# dw
ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim),
# pw-linear
nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
nn.BatchNorm2d(oup),
])
self.conv = nn.Sequential(*layers)
def forward(self, x):
if self.use_res_connect:
return x + self.conv(x)
else:
return self.conv(x)
class MobileNetV2(nn.Module):
def __init__(self,
num_classes=1000,
width_mult=1.0,
inverted_residual_setting=None,
round_nearest=8,
block=None):
"""
MobileNet V2 main class
Args:
num_classes (int): Number of classes
width_mult (float): Width multiplier - adjusts number of channels in each layer by this amount
inverted_residual_setting: Network structure
round_nearest (int): Round the number of channels in each layer to be a multiple of this number
Set to 1 to turn off rounding
block: Module specifying inverted residual building block for mobilenet
"""
super(MobileNetV2, self).__init__()
if block is None:
block = InvertedResidual
input_channel = 32
last_channel = 1280
if inverted_residual_setting is None:
inverted_residual_setting = [
# t, c, n, s
[1, 16, 1, 1],
[6, 24, 2, 2],
[6, 32, 3, 2],
[6, 64, 4, 2],
[6, 96, 3, 1],
[6, 160, 3, 2],
[6, 320, 1, 1],
]
# only check the first element, assuming user knows t,c,n,s are required
if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4:
raise ValueError("inverted_residual_setting should be non-empty "
"or a 4-element list, got {}".format(inverted_residual_setting))
# building first layer
input_channel = _make_divisible(input_channel * width_mult, round_nearest)
self.last_channel = _make_divisible(last_channel * max(1.0, width_mult), round_nearest)
features = [ConvBNReLU(3, input_channel, stride=2)]
# building inverted residual blocks
for t, c, n, s in inverted_residual_setting:
output_channel = _make_divisible(c * width_mult, round_nearest)
for i in range(n):
stride = s if i == 0 else 1
features.append(block(input_channel, output_channel, stride, expand_ratio=t))
input_channel = output_channel
# building last several layers
features.append(ConvBNReLU(input_channel, self.last_channel, kernel_size=1))
# make it nn.Sequential
self.features = nn.Sequential(*features)
# building classifier
self.classifier = nn.Sequential(
# p=0.2
nn.Dropout(0.2),
nn.Linear(self.last_channel, num_classes),
)
# weight initialization
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out')
if m.bias is not None:
nn.init.zeros_(m.bias)
elif isinstance(m, nn.BatchNorm2d):
nn.init.ones_(m.weight)
nn.init.zeros_(m.bias)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
nn.init.zeros_(m.bias)
def _forward_impl(self, x):
# This exists since TorchScript doesn't support inheritance, so the superclass method
# (this one) needs to have a name other than `forward` that can be accessed in a subclass
x = self.features(x)
# Cannot use "squeeze" as batch-size can be 1 => must use reshape with x.shape[0]
x = nn.functional.adaptive_avg_pool2d(x, 1).reshape(x.shape[0], -1)
x = self.classifier(x)
return x
def forward(self, x):
return self._forward_impl(x)
def mobilenet_v2(pretrained=False, progress=True, **kwargs):
"""
Constructs a MobileNetV2 architecture from
`"MobileNetV2: Inverted Residuals and Linear Bottlenecks" <https://arxiv.org/abs/1801.04381>`_.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
model = MobileNetV2(**kwargs)
# if pretrained:
# state_dict = load_state_dict_from_url(model_urls['mobilenet_v2'],
# progress=progress)
# model.load_state_dict(state_dict)
return model
@@ -0,0 +1,638 @@
# -*- coding: utf-8 -*-
import argparse
import os
import random
import shutil
import time
import warnings
import numpy as np
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.distributed as dist
import torch.optim
import torch.multiprocessing as mp
import torch.utils.data
import torch.utils.data.distributed
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models
from mobilenet import mobilenet_v2
from apex import amp
from multi_epochs_dataloader import MultiEpochsDataLoader
from benchmark_log import hwlog
from benchmark_log.basic_utils import get_environment_info
from benchmark_log.basic_utils import get_model_parameter
BATCH_SIZE = 4096
OPTIMIZER_BATCH_SIZE = 4096
# model_names = sorted(name for name in models.__dict__
# if name.islower() and not name.startswith("__")
# and callable(models.__dict__[name]))
parser = argparse.ArgumentParser(description='PyTorch ImageNet Training')
parser.add_argument('--data', metavar='DIR', default='/opt/npu/dataset/imagenet',
help='path to dataset')
# parser.add_argument('-a', '--arch', metavar='ARCH', default='resnet50',
# choices=model_names,
# help='model architecture: ' +
# ' | '.join(model_names) +
# ' (default: resnet18)')
parser.add_argument('-j', '--workers', default=32, type=int, metavar='N',
help='number of data loading workers (default: 4)')
parser.add_argument('--epochs', default=90, type=int, metavar='N',
help='number of total epochs to run')
parser.add_argument('--start-epoch', default=0, type=int, metavar='N',
help='manual epoch number (useful on restarts)')
parser.add_argument('-b', '--batch-size', default=BATCH_SIZE, type=int,
metavar='N',
help='mini-batch size (default: 256), this is the total '
'batch size of all GPUs on the current node when '
'using Data Parallel or Distributed Data Parallel')
parser.add_argument('--lr', '--learning-rate', default=0.1, type=float,
metavar='LR', help='initial learning rate', dest='lr')
parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
help='momentum')
parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,
metavar='W', help='weight decay (default: 1e-4)',
dest='weight_decay')
parser.add_argument('--workspace', type=str, default='./', metavar='DIR',
help='path to directory where checkpoints will be stored')
parser.add_argument('-p', '--print-freq', default=10, type=int,
metavar='N', help='print frequency (default: 10)')
parser.add_argument('-ef', '--eval-freq', default=5, type=int,
metavar='N', help='evaluate frequency (default: 5)')
parser.add_argument('--resume', default='', type=str, metavar='PATH',
help='path to latest checkpoint (default: none)')
parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true',
help='evaluate model on validation set')
parser.add_argument('--pretrained', dest='pretrained', action='store_true',
help='use pre-trained model')
parser.add_argument('--world-size', default=-1, type=int,
help='number of nodes for distributed training')
parser.add_argument('--rank', default=-1, type=int,
help='node rank for distributed training')
parser.add_argument('--dist-url', default='tcp://224.66.41.62:23456', type=str,
help='url used to set up distributed training')
parser.add_argument('--dist-backend', default='nccl', type=str,
help='distributed backend')
parser.add_argument('--seed', default=None, type=int,
help='seed for initializing training. ')
parser.add_argument('--gpu', default=None, type=int,
help='GPU id to use.')
parser.add_argument('--multiprocessing-distributed', action='store_true',
help='Use multi-processing distributed training to launch '
'N processes per node, which has N GPUs. This is the '
'fastest way to use PyTorch for either single node or '
'multi node data parallel training')
parser.add_argument('-bm', '--benchmark', default=0, type=int,
metavar='N', help='set benchmark status (default: 1,run benchmark)')
parser.add_argument('--device', default='npu', type=str, help='npu or gpu')
parser.add_argument('--addr', default='10.136.181.115', type=str, help='master addr')
parser.add_argument('--checkpoint-nameprefix', default='checkpoint', type=str, help='checkpoint-nameprefix')
parser.add_argument('--checkpoint-freq', default=0, type=int,
metavar='N', help='checkpoint frequency (default: 0)'
'0: save only one file whitch per epoch;'
'n: save diff file per n epoch'
'-1:no checkpoint,not support')
# apex
parser.add_argument('--amp', default=False, action='store_true',
help='use amp to train the model')
parser.add_argument('--loss-scale', default=64., type=float,
help='loss scale using in amp, default -1 means dynamic')
parser.add_argument('--opt-level', default='O2', type=str,
help='loss scale using in amp, default -1 means dynamic')
warnings.filterwarnings('ignore')
best_acc1 = 0
def main():
args = parser.parse_args()
print("===============main()=================")
print(args)
print("===============main()=================")
os.environ['KERNEL_NAME_ID'] = str(0)
print("++++++++++++++++++ KERNEL_NAME_ID:", os.environ['KERNEL_NAME_ID'])
if args.seed is not None:
random.seed(args.seed)
torch.manual_seed(args.seed)
cudnn.deterministic = True
warnings.warn('You have chosen to seed training. '
'This will turn on the CUDNN deterministic setting, '
'which can slow down your training considerably! '
'You may see unexpected behavior when restarting '
'from checkpoints.')
os.environ['MASTER_ADDR'] = args.addr # '10.136.181.51'
os.environ['MASTER_PORT'] = '59629'
if args.gpu is not None:
warnings.warn('You have chosen a specific GPU. This will completely '
'disable data parallelism.')
if args.dist_url == "env://" and args.world_size == -1:
args.world_size = int(os.environ["WORLD_SIZE"])
args.distributed = args.world_size > 1 or args.multiprocessing_distributed
if args.device == 'npu':
ngpus_per_node = torch.npu.device_count()
else:
ngpus_per_node = torch.cuda.device_count()
if args.multiprocessing_distributed:
# Since we have ngpus_per_node processes per node, the total world_size
# needs to be adjusted accordingly
args.world_size = ngpus_per_node * args.world_size
# Use torch.multiprocessing.spawn to launch distributed processes: the
# main_worker process function
# The child process uses the environment variables of the parent process,
# we have to set KERNEL_NAME_ID for every proc
if args.device == 'npu':
# main_worker(args.gpu, ngpus_per_node, args)
mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args))
else:
mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args))
else:
# Simply call main_worker function
main_worker(args.gpu, ngpus_per_node, args)
def main_worker(gpu, ngpus_per_node, args):
global best_acc1
args.gpu = gpu
print("[npu id:", args.gpu, "]", "++++++++++++++++ before set KERNEL_NAME_ID:", os.environ['KERNEL_NAME_ID'])
os.environ['KERNEL_NAME_ID'] = str(gpu)
print("[npu id:", args.gpu, "]", "++++++++++++++++ KERNEL_NAME_ID:", os.environ['KERNEL_NAME_ID'])
if args.gpu is not None:
print("[npu id:", args.gpu, "]", "Use GPU: {} for training".format(args.gpu))
if args.distributed:
if args.dist_url == "env://" and args.rank == -1:
args.rank = int(os.environ["RANK"])
if args.multiprocessing_distributed:
# For multiprocessing distributed training, rank needs to be the
# global rank among all the processes
args.rank = args.rank * ngpus_per_node + gpu
if args.device == 'npu':
dist.init_process_group(backend=args.dist_backend, # init_method=args.dist_url,
world_size=args.world_size, rank=args.rank)
else:
dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
world_size=args.world_size, rank=args.rank)
loc = 'npu:{}'.format(args.gpu)
torch.npu.set_device(loc)
args.batch_size = int(args.batch_size / ngpus_per_node)
args.workers = int((args.workers + ngpus_per_node - 1) / ngpus_per_node)
print("[npu id:", args.gpu, "]", "===============main_worker()=================")
print("[npu id:", args.gpu, "]", args)
print("[npu id:", args.gpu, "]", "===============main_worker()=================")
# Data loading code
# traindir = os.path.join(args.data, 'train')
# valdir = os.path.join(args.data, 'val')
# normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
# std=[0.229, 0.224, 0.225])
# train_dataset = datasets.ImageFolder(
# traindir,
# transforms.Compose([
# transforms.RandomResizedCrop(224),
# transforms.RandomHorizontalFlip(),
# transforms.ToTensor(),
# normalize,
# ]))
#
# if args.distributed:
# train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
# else:
# train_sampler = None
#
# train_loader = torch.utils.data.DataLoader(
# train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None),
# num_workers=args.workers, pin_memory=True, sampler=train_sampler, drop_last=True)
train_loader, train_loader_len, train_sampler = get_pytorch_train_loader(args.data,
args.batch_size,
workers=args.workers,
distributed=args.distributed)
# val_loader = torch.utils.data.DataLoader(
# datasets.ImageFolder(valdir, transforms.Compose([
# transforms.Resize(256),
# transforms.CenterCrop(224),
# transforms.ToTensor(),
# normalize,
# ])),
# batch_size=args.batch_size, shuffle=True,
# num_workers=args.workers, pin_memory=True, drop_last=True)
val_loader = get_pytorch_val_loader(args.data, args.batch_size, args.workers, distributed=False)
# create model
print("[npu id:", args.gpu, "]", "=> creating model '{}'".format('mobilenetv2'))
# model = models.__dict__[args.arch]()
model = mobilenet_v2()
model = model.to(loc)
# define loss function (criterion) and optimizer
criterion = nn.CrossEntropyLoss().to(loc)
optimizer = torch.optim.SGD(model.parameters(), args.lr,
momentum=args.momentum,
weight_decay=args.weight_decay)
if args.amp:
model, optimizer = amp.initialize(model, optimizer, opt_level=args.opt_level, loss_scale=args.loss_scale)
model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu], broadcast_buffers=False)
# optionally resume from a checkpoint
if args.resume:
if os.path.isfile(args.resume):
print("=> loading checkpoint '{}'".format(args.resume))
checkpoint = torch.load(args.resume, map_location=loc)
args.start_epoch = checkpoint['epoch']
best_acc1 = checkpoint['best_acc1']
model.load_state_dict(checkpoint['state_dict'])
optimizer.load_state_dict(checkpoint['optimizer'])
if args.amp:
amp.load_state_dict(checkpoint['amp'])
print("=> loaded checkpoint '{}' (epoch {})".format(args.resume, checkpoint['epoch']))
else:
print("=> no checkpoint found at '{}'".format(args.resume))
cudnn.benchmark = True
if args.evaluate:
validate(val_loader, model, criterion, args, ngpus_per_node)
return
for epoch in range(args.start_epoch, args.epochs):
if args.distributed:
train_sampler.set_epoch(epoch)
# adjust_learning_rate(optimizer, epoch, args)
# train for one epoch
train(train_loader, train_loader_len, model, criterion, optimizer, epoch, args, ngpus_per_node)
if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
# evaluate on validation set
acc1 = validate(val_loader, model, criterion, args, ngpus_per_node)
# remember best acc@1 and save checkpoint
is_best = acc1 > best_acc1
best_acc1 = max(acc1, best_acc1)
if not args.multiprocessing_distributed or \
(args.multiprocessing_distributed and args.rank % ngpus_per_node == 0 and epoch == args.epochs - 1):
if args.amp:
save_checkpoint({
'epoch': epoch + 1,
'state_dict': model.state_dict(),
'best_acc1': best_acc1,
'optimizer': optimizer.state_dict(),
'amp': amp.state_dict(),
}, is_best)
else:
save_checkpoint({
'epoch': epoch + 1,
'state_dict': model.state_dict(),
'best_acc1': best_acc1,
'optimizer': optimizer.state_dict(),
}, is_best)
def train(train_loader, train_loader_len, model, criterion, optimizer, epoch, args, ngpus_per_node):
batch_time = AverageMeter('Time', ':6.3f')
data_time = AverageMeter('Data', ':6.3f')
losses = AverageMeter('Loss', ':.4e')
top1 = AverageMeter('Acc@1', ':6.2f')
top5 = AverageMeter('Acc@5', ':6.2f')
progress = ProgressMeter(
train_loader_len,
[batch_time, data_time, losses, top1, top5],
prefix="Epoch: [{}]".format(epoch))
loc = 'npu:{}'.format(args.gpu)
mean = torch.tensor([0.485 * 255, 0.456 * 255, 0.406 * 255]).view(1, 3, 1, 1)
std = torch.tensor([0.229 * 255, 0.224 * 255, 0.225 * 255]).view(1, 3, 1, 1)
mean = mean.to(loc, non_blocking=True)
std = std.to(loc, non_blocking=True)
# switch to train mode
model.train()
end = time.time()
if args.benchmark == 1:
optimizer.zero_grad()
# steps_per_epoch = len(train_loader)
steps_per_epoch = train_loader_len
print('==========step per epoch======================', steps_per_epoch)
for i, (images, target) in enumerate(train_loader):
# measure data loading time
data_time.update(time.time() - end)
global_step = epoch * steps_per_epoch + i
lr = adjust_learning_rate(optimizer, global_step, steps_per_epoch, args)
target = target.to(torch.int32)
images = images.to(loc, non_blocking=True).to(torch.float).sub(mean).div(std)
target = target.to(loc, non_blocking=True)
# compute output
output = model(images)
# stream = torch.npu.current_stream()
# stream.synchronize()
loss = criterion(output, target)
# stream = torch.npu.current_stream()
# stream.synchronize()
# measure accuracy and record loss
acc1, acc5 = accuracy(output, target, topk=(1, 5))
losses.update(loss.item(), images.size(0))
top1.update(acc1[0], images.size(0))
top5.update(acc5[0], images.size(0))
# compute gradient and do SGD step
if args.benchmark == 0:
optimizer.zero_grad()
if args.amp:
with amp.scale_loss(loss, optimizer) as scaled_loss:
scaled_loss.backward()
else:
loss.backward()
# stream = torch.npu.current_stream()
# stream.synchronize()
if args.benchmark == 0:
optimizer.step()
elif args.benchmark == 1:
BATCH_SIZE_multiplier = int(OPTIMIZER_BATCH_SIZE / args.batch_size)
BM_optimizer_step = ((i + 1) % BATCH_SIZE_multiplier) == 0
if BM_optimizer_step:
for param_group in optimizer.param_groups:
for param in param_group['params']:
param.grad /= BATCH_SIZE_multiplier
optimizer.step()
optimizer.zero_grad()
# stream = torch.npu.current_stream()
# stream.synchronize()
# measure elapsed time
batch_time.update(time.time() - end)
end = time.time()
if i % args.print_freq == 0:
if not args.multiprocessing_distributed or (args.multiprocessing_distributed
and args.rank % ngpus_per_node == 0):
progress.display(i)
if not args.multiprocessing_distributed or (args.multiprocessing_distributed
and args.rank % ngpus_per_node == 0):
print("[npu id:", args.gpu, "]", '* FPS@all {:.3f}'.format(ngpus_per_node * args.batch_size / batch_time.avg))
hwlog.remark_print(key=hwlog.FPS,
value=' * FPS@all {:.3f}'.format(ngpus_per_node * args.batch_size / batch_time.avg))
def validate(val_loader, model, criterion, args, ngpus_per_node):
batch_time = AverageMeter('Time', ':6.3f')
losses = AverageMeter('Loss', ':.4e')
top1 = AverageMeter('Acc@1', ':6.2f')
top5 = AverageMeter('Acc@5', ':6.2f')
progress = ProgressMeter(
len(val_loader),
[batch_time, losses, top1, top5],
prefix='Test: ')
# switch to evaluate mode
model.eval()
with torch.no_grad():
loc = 'npu:{}'.format(args.gpu)
mean = torch.tensor([0.485 * 255, 0.456 * 255, 0.406 * 255]).view(1, 3, 1, 1)
std = torch.tensor([0.229 * 255, 0.224 * 255, 0.225 * 255]).view(1, 3, 1, 1)
mean = mean.to(loc, non_blocking=True)
std = std.to(loc, non_blocking=True)
end = time.time()
for i, (images, target) in enumerate(val_loader):
target = target.to(torch.int32)
images = images.to(loc, non_blocking=True).to(torch.float).sub(mean).div(std)
target = target.to(loc, non_blocking=True)
# compute output
output = model(images)
loss = criterion(output, target)
# measure accuracy and record loss
acc1, acc5 = accuracy(output, target, topk=(1, 5))
losses.update(loss.item(), images.size(0))
top1.update(acc1[0], images.size(0))
top5.update(acc5[0], images.size(0))
# measure elapsed time
batch_time.update(time.time() - end)
end = time.time()
if i % args.print_freq == 0:
if not args.multiprocessing_distributed or \
(args.multiprocessing_distributed and args.rank % ngpus_per_node == 0):
progress.display(i)
# TODO: this should also be done with the ProgressMeter
if not args.multiprocessing_distributed or \
(args.multiprocessing_distributed and args.rank % ngpus_per_node == 0):
print("[npu id:", args.gpu, "]", '[AVG-ACC] * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'
.format(top1=top1, top5=top5))
hwlog.remark_print(key=hwlog.EVAL_ACCURACY_TOP1, value="{top1.avg:.3f}".format(top1=top1))
hwlog.remark_print(key=hwlog.EVAL_ACCURACY_TOP5, value="{top5.avg:.3f}".format(top5=top5))
return top1.avg
def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
torch.save(state, filename)
if is_best:
shutil.copyfile(filename, 'model_best_acc%.4f_epoch%d.pth.tar' % (state['best_acc1'], state['epoch']))
class AverageMeter(object):
"""Computes and stores the average and current value"""
def __init__(self, name, fmt=':f'):
self.name = name
self.fmt = fmt
self.reset()
self.start_count_index = 10
def reset(self):
self.val = 0
self.avg = 0
self.sum = 0
self.count = 0
def update(self, val, n=1):
if self.count == 0:
self.batchsize = n
self.val = val
self.count += n
if self.count > (self.start_count_index * self.batchsize):
self.sum += val * n
self.avg = self.sum / (self.count - self.start_count_index * self.batchsize)
def __str__(self):
fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
return fmtstr.format(**self.__dict__)
class ProgressMeter(object):
def __init__(self, num_batches, meters, prefix=""):
self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
self.meters = meters
self.prefix = prefix
def display(self, batch):
entries = [self.prefix + self.batch_fmtstr.format(batch)]
entries += [str(meter) for meter in self.meters]
print("[npu id:", os.environ['KERNEL_NAME_ID'], "]", '\t'.join(entries))
train_acc1 = str(entries).split("Acc@1")[1].strip().split(" ")[0]
train_acc5 = str(entries).split("Acc@5")[1].strip().split(" ")[0]
hwlog.remark_print(key=hwlog.TRAIN_ACCURACY_TOP1, value=train_acc1)
hwlog.remark_print(key=hwlog.TRAIN_ACCURACY_TOP5, value=train_acc5)
def _get_batch_fmtstr(self, num_batches):
num_digits = len(str(num_batches // 1))
fmt = '{:' + str(num_digits) + 'd}'
return '[' + fmt + '/' + fmt.format(num_batches) + ']'
# def adjust_learning_rate(optimizer, epoch, args):
# """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
# lr = args.lr * (0.1 ** (epoch // 30))
# for param_group in optimizer.param_groups:
# param_group['lr'] = lr
def adjust_learning_rate(optimizer, global_step, steps_per_epoch, args):
"""Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
# lr = args.lr * (0.98 ** (epoch / 2.5))
lr = args.lr * (0.98 ** (global_step // int(steps_per_epoch * 2.5)))
for param_group in optimizer.param_groups:
param_group['lr'] = lr
return lr
def accuracy(output, target, topk=(1,)):
"""Computes the accuracy over the k top predictions for the specified values of k"""
with torch.no_grad():
maxk = max(topk)
batch_size = target.size(0)
_, pred = output.topk(maxk, 1, True, True)
pred = pred.t()
correct = pred.eq(target.view(1, -1).expand_as(pred))
res = []
for k in topk:
correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
res.append(correct_k.mul_(100.0 / batch_size))
return res
def fast_collate(batch):
imgs = [img[0] for img in batch]
targets = torch.tensor([target[1] for target in batch], dtype=torch.int64)
w = imgs[0].size[0]
h = imgs[0].size[1]
tensor = torch.zeros((len(imgs), 3, h, w), dtype=torch.uint8)
for i, img in enumerate(imgs):
nump_array = np.asarray(img, dtype=np.uint8)
if nump_array.ndim < 3:
nump_array = np.expand_dims(nump_array, axis=-1)
nump_array = np.rollaxis(nump_array, 2)
tensor[i] += torch.from_numpy(nump_array)
return tensor, targets
def get_pytorch_train_loader(data_path, batch_size, workers=5, _worker_init_fn=None, distributed=False):
traindir = os.path.join(data_path, 'train')
train_dataset = datasets.ImageFolder(
traindir,
transforms.Compose([
transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
]))
if distributed:
train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
else:
train_sampler = None
dataloader_fn = MultiEpochsDataLoader # torch.utils.data.DataLoader
train_loader = dataloader_fn(
train_dataset, batch_size=batch_size, shuffle=(train_sampler is None),
num_workers=workers, worker_init_fn=_worker_init_fn, pin_memory=True, sampler=train_sampler, collate_fn=fast_collate, drop_last=True)
return train_loader, len(train_loader), train_sampler
def get_pytorch_val_loader(data_path, batch_size, workers=5, _worker_init_fn=None, distributed=False):
valdir = os.path.join(data_path, 'val')
val_dataset = datasets.ImageFolder(
valdir, transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
]))
if distributed:
val_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset)
else:
val_sampler = None
dataloader_fn = MultiEpochsDataLoader # torch.utils.data.DataLoader
val_loader = dataloader_fn(
val_dataset,
sampler=val_sampler,
batch_size=batch_size, shuffle=(val_sampler is None),
num_workers=workers, worker_init_fn=_worker_init_fn, pin_memory=True, collate_fn=fast_collate)
return val_loader
if __name__ == '__main__':
hwlog.ROOT_DIR = os.path.split(os.path.abspath(__file__))[0]
cpu_info, npu_info, framework_info, os_info, benchmark_version = get_environment_info("pytorch")
config_info = get_model_parameter("pytorch_config")
initinal_data = {"base_lr": 0.1, "dataset": "imagenet", "optimizer": "SGD", "loss_scale": 1024}
hwlog.remark_print(key=hwlog.CPU_INFO, value=cpu_info)
hwlog.remark_print(key=hwlog.NPU_INFO, value=npu_info)
hwlog.remark_print(key=hwlog.OS_INFO, value=os_info)
hwlog.remark_print(key=hwlog.FRAMEWORK_INFO, value=framework_info)
hwlog.remark_print(key=hwlog.BENCHMARK_VERSION, value=benchmark_version)
hwlog.remark_print(key=hwlog.CONFIG_INFO, value=config_info)
hwlog.remark_print(key=hwlog.BASE_LR, value=initinal_data.get("base_lr"))
hwlog.remark_print(key=hwlog.DATASET, value=initinal_data.get("dataset"))
hwlog.remark_print(key=hwlog.OPT_NAME, value=initinal_data.get("optimizer"))
hwlog.remark_print(key=hwlog.LOSS_SCALE, value=initinal_data.get("loss_scale"))
main()
@@ -0,0 +1,663 @@
# -*- coding: utf-8 -*-
import argparse
import os
import random
import shutil
import time
import warnings
import numpy as np
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.distributed as dist
import torch.optim
import torch.multiprocessing as mp
import torch.utils.data
import torch.utils.data.distributed
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models
from mobilenet import mobilenet_v2
from apex import amp
from multi_epochs_dataloader import MultiEpochsDataLoader
from benchmark_log import hwlog
from benchmark_log.basic_utils import get_environment_info
from benchmark_log.basic_utils import get_model_parameter
BATCH_SIZE = 6144
OPTIMIZER_BATCH_SIZE = 6144
# model_names = sorted(name for name in models.__dict__
# if name.islower() and not name.startswith("__")
# and callable(models.__dict__[name]))
parser = argparse.ArgumentParser(description='PyTorch ImageNet Training')
parser.add_argument('--data', metavar='DIR', default='/opt/npu/dataset/imagenet',
help='path to dataset')
# parser.add_argument('-a', '--arch', metavar='ARCH', default='resnet50',
# choices=model_names,
# help='model architecture: ' +
# ' | '.join(model_names) +
# ' (default: resnet18)')
parser.add_argument('-j', '--workers', default=32, type=int, metavar='N',
help='number of data loading workers (default: 4)')
parser.add_argument('--epochs', default=90, type=int, metavar='N',
help='number of total epochs to run')
parser.add_argument('--start-epoch', default=0, type=int, metavar='N',
help='manual epoch number (useful on restarts)')
parser.add_argument('-b', '--batch-size', default=BATCH_SIZE, type=int,
metavar='N',
help='mini-batch size (default: 256), this is the total '
'batch size of all GPUs on the current node when '
'using Data Parallel or Distributed Data Parallel')
parser.add_argument('--lr', '--learning-rate', default=0.1, type=float,
metavar='LR', help='initial learning rate', dest='lr')
parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
help='momentum')
parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,
metavar='W', help='weight decay (default: 1e-4)',
dest='weight_decay')
parser.add_argument('--workspace', type=str, default='./', metavar='DIR',
help='path to directory where checkpoints will be stored')
parser.add_argument('-p', '--print-freq', default=10, type=int,
metavar='N', help='print frequency (default: 10)')
parser.add_argument('-ef', '--eval-freq', default=5, type=int,
metavar='N', help='evaluate frequency (default: 5)')
parser.add_argument('--resume', default='', type=str, metavar='PATH',
help='path to latest checkpoint (default: none)')
parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true',
help='evaluate model on validation set')
parser.add_argument('--pretrained', dest='pretrained', action='store_true',
help='use pre-trained model')
parser.add_argument('--world-size', default=-1, type=int,
help='number of nodes for distributed training')
parser.add_argument('--rank', default=-1, type=int,
help='node rank for distributed training')
parser.add_argument('--dist-url', default='tcp://224.66.41.62:23456', type=str,
help='url used to set up distributed training')
parser.add_argument('--dist-backend', default='nccl', type=str,
help='distributed backend')
parser.add_argument('--seed', default=None, type=int,
help='seed for initializing training. ')
parser.add_argument('--gpu', default=None, type=int,
help='GPU id to use.')
parser.add_argument('--multiprocessing-distributed', action='store_true',
help='Use multi-processing distributed training to launch '
'N processes per node, which has N GPUs. This is the '
'fastest way to use PyTorch for either single node or '
'multi node data parallel training')
parser.add_argument('-bm', '--benchmark', default=0, type=int,
metavar='N', help='set benchmark status (default: 1,run benchmark)')
parser.add_argument('--device', default='npu', type=str, help='npu or gpu')
parser.add_argument('--addr', default='10.136.181.115', type=str, help='master addr')
parser.add_argument('--checkpoint-nameprefix', default='checkpoint', type=str, help='checkpoint-nameprefix')
parser.add_argument('--checkpoint-freq', default=0, type=int,
metavar='N', help='checkpoint frequency (default: 0)'
'0: save only one file whitch per epoch;'
'n: save diff file per n epoch'
'-1:no checkpoint,not support')
parser.add_argument('--device-list', default='0,1,2,3,4,5,6,7', type=str, help='device id list')
# apex
parser.add_argument('--amp', default=False, action='store_true',
help='use amp to train the model')
parser.add_argument('--loss-scale', default=64., type=float,
help='loss scale using in amp, default -1 means dynamic')
parser.add_argument('--opt-level', default='O2', type=str,
help='loss scale using in amp, default -1 means dynamic')
warnings.filterwarnings('ignore')
best_acc1 = 0
def device_id_to_process_device_map(device_list):
devices = device_list.split(",")
devices = [int(x) for x in devices]
devices.sort()
process_device_map = dict()
for process_id, device_id in enumerate(devices):
process_device_map[process_id] = device_id
return process_device_map
def main():
args = parser.parse_args()
print("===============main()=================")
print(args)
print("===============main()=================")
os.environ['KERNEL_NAME_ID'] = str(0)
print("++++++++++++++++++ KERNEL_NAME_ID:", os.environ['KERNEL_NAME_ID'])
if args.seed is not None:
random.seed(args.seed)
torch.manual_seed(args.seed)
cudnn.deterministic = True
warnings.warn('You have chosen to seed training. '
'This will turn on the CUDNN deterministic setting, '
'which can slow down your training considerably! '
'You may see unexpected behavior when restarting '
'from checkpoints.')
os.environ['MASTER_ADDR'] = args.addr # '10.136.181.51'
os.environ['MASTER_PORT'] = '59629'
if args.gpu is not None:
warnings.warn('You have chosen a specific GPU. This will completely '
'disable data parallelism.')
if args.dist_url == "env://" and args.world_size == -1:
args.world_size = int(os.environ["WORLD_SIZE"])
args.distributed = args.world_size > 1 or args.multiprocessing_distributed
args.process_device_map = device_id_to_process_device_map(args.device_list)
if args.device == 'npu':
# ngpus_per_node = torch.npu.device_count()
ngpus_per_node = len(args.process_device_map)
else:
ngpus_per_node = torch.cuda.device_count()
if args.multiprocessing_distributed:
# Since we have ngpus_per_node processes per node, the total world_size
# needs to be adjusted accordingly
args.world_size = ngpus_per_node * args.world_size
# Use torch.multiprocessing.spawn to launch distributed processes: the
# main_worker process function
# The child process uses the environment variables of the parent process,
# we have to set KERNEL_NAME_ID for every proc
if args.device == 'npu':
# main_worker(args.gpu, ngpus_per_node, args)
mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args))
else:
mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args))
else:
# Simply call main_worker function
main_worker(args.gpu, ngpus_per_node, args)
def main_worker(gpu, ngpus_per_node, args):
global best_acc1
# args.gpu = gpu
args.gpu = args.process_device_map[gpu]
print("[npu id:", args.gpu, "]", "++++++++++++++++ before set KERNEL_NAME_ID:", os.environ['KERNEL_NAME_ID'])
os.environ['KERNEL_NAME_ID'] = str(gpu)
print("[npu id:", args.gpu, "]", "++++++++++++++++ KERNEL_NAME_ID:", os.environ['KERNEL_NAME_ID'])
if args.gpu is not None:
print("[npu id:", args.gpu, "]", "Use GPU: {} for training".format(args.gpu))
if args.distributed:
if args.dist_url == "env://" and args.rank == -1:
args.rank = int(os.environ["RANK"])
if args.multiprocessing_distributed:
# For multiprocessing distributed training, rank needs to be the
# global rank among all the processes
args.rank = args.rank * ngpus_per_node + gpu
if args.device == 'npu':
dist.init_process_group(backend=args.dist_backend, # init_method=args.dist_url,
world_size=args.world_size, rank=args.rank)
else:
dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
world_size=args.world_size, rank=args.rank)
loc = 'npu:{}'.format(args.gpu)
torch.npu.set_device(loc)
args.batch_size = int(args.batch_size / ngpus_per_node)
args.workers = int((args.workers + ngpus_per_node - 1) / ngpus_per_node)
print("[npu id:", args.gpu, "]", "===============main_worker()=================")
print("[npu id:", args.gpu, "]", args)
print("[npu id:", args.gpu, "]", "===============main_worker()=================")
# Data loading code
# traindir = os.path.join(args.data, 'train')
# valdir = os.path.join(args.data, 'val')
# normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
# std=[0.229, 0.224, 0.225])
# train_dataset = datasets.ImageFolder(
# traindir,
# transforms.Compose([
# transforms.RandomResizedCrop(224),
# transforms.RandomHorizontalFlip(),
# transforms.ToTensor(),
# normalize,
# ]))
#
# if args.distributed:
# train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
# else:
# train_sampler = None
#
# train_loader = torch.utils.data.DataLoader(
# train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None),
# num_workers=args.workers, pin_memory=True, sampler=train_sampler, drop_last=True)
train_loader, train_loader_len, train_sampler = get_pytorch_train_loader(args.data,
args.batch_size,
workers=args.workers,
distributed=args.distributed)
# val_loader = torch.utils.data.DataLoader(
# datasets.ImageFolder(valdir, transforms.Compose([
# transforms.Resize(256),
# transforms.CenterCrop(224),
# transforms.ToTensor(),
# normalize,
# ])),
# batch_size=args.batch_size, shuffle=True,
# num_workers=args.workers, pin_memory=True, drop_last=True)
val_loader = get_pytorch_val_loader(args.data, args.batch_size, args.workers, distributed=False)
# create model
print("[npu id:", args.gpu, "]", "=> creating model '{}'".format('mobilenetv2'))
# model = models.__dict__[args.arch]()
model = mobilenet_v2()
model = model.to(loc)
# define loss function (criterion) and optimizer
criterion = nn.CrossEntropyLoss().to(loc)
optimizer = torch.optim.SGD(model.parameters(), args.lr,
momentum=args.momentum,
weight_decay=args.weight_decay)
if args.amp:
model, optimizer = amp.initialize(model, optimizer, opt_level=args.opt_level, loss_scale=args.loss_scale)
model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu], broadcast_buffers=False)
# optionally resume from a checkpoint
if args.resume:
if os.path.isfile(args.resume):
print("=> loading checkpoint '{}'".format(args.resume))
checkpoint = torch.load(args.resume, map_location=loc)
args.start_epoch = checkpoint['epoch']
best_acc1 = checkpoint['best_acc1']
model.load_state_dict(checkpoint['state_dict'])
optimizer.load_state_dict(checkpoint['optimizer'])
if args.amp:
amp.load_state_dict(checkpoint['amp'])
print("=> loaded checkpoint '{}' (epoch {})".format(args.resume, checkpoint['epoch']))
else:
print("=> no checkpoint found at '{}'".format(args.resume))
cudnn.benchmark = True
if args.evaluate:
validate(val_loader, model, criterion, args, ngpus_per_node)
return
for epoch in range(args.start_epoch, args.epochs):
if args.distributed:
train_sampler.set_epoch(epoch)
# adjust_learning_rate(optimizer, epoch, args)
# train for one epoch
train(train_loader, train_loader_len, model, criterion, optimizer, epoch, args, ngpus_per_node)
if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
# evaluate on validation set
acc1 = validate(val_loader, model, criterion, args, ngpus_per_node)
# remember best acc@1 and save checkpoint
is_best = acc1 > best_acc1
best_acc1 = max(acc1, best_acc1)
if not args.multiprocessing_distributed or \
(args.multiprocessing_distributed and args.rank % ngpus_per_node == 0 and epoch == args.epochs - 1):
if args.amp:
save_checkpoint({
'epoch': epoch + 1,
'state_dict': model.state_dict(),
'best_acc1': best_acc1,
'optimizer': optimizer.state_dict(),
'amp': amp.state_dict(),
}, is_best)
else:
save_checkpoint({
'epoch': epoch + 1,
'state_dict': model.state_dict(),
'best_acc1': best_acc1,
'optimizer': optimizer.state_dict(),
}, is_best)
def train(train_loader, train_loader_len, model, criterion, optimizer, epoch, args, ngpus_per_node):
batch_time = AverageMeter('Time', ':6.3f')
data_time = AverageMeter('Data', ':6.3f')
losses = AverageMeter('Loss', ':.4e')
top1 = AverageMeter('Acc@1', ':6.2f')
top5 = AverageMeter('Acc@5', ':6.2f')
progress = ProgressMeter(
train_loader_len,
[batch_time, data_time, losses, top1, top5],
prefix="Epoch: [{}]".format(epoch))
loc = 'npu:{}'.format(args.gpu)
mean = torch.tensor([0.485 * 255, 0.456 * 255, 0.406 * 255]).view(1, 3, 1, 1)
std = torch.tensor([0.229 * 255, 0.224 * 255, 0.225 * 255]).view(1, 3, 1, 1)
mean = mean.to(loc, non_blocking=True)
std = std.to(loc, non_blocking=True)
# switch to train mode
model.train()
end = time.time()
if args.benchmark == 1:
optimizer.zero_grad()
# steps_per_epoch = len(train_loader)
steps_per_epoch = train_loader_len
print('==========step per epoch======================', steps_per_epoch)
for i, (images, target) in enumerate(train_loader):
# measure data loading time
data_time.update(time.time() - end)
global_step = epoch * steps_per_epoch + i
lr = adjust_learning_rate(optimizer, global_step, steps_per_epoch, args)
target = target.to(torch.int32)
images = images.to(loc, non_blocking=True).to(torch.float).sub(mean).div(std)
target = target.to(loc, non_blocking=True)
# compute output
output = model(images)
stream = torch.npu.current_stream()
stream.synchronize()
loss = criterion(output, target)
stream = torch.npu.current_stream()
stream.synchronize()
# measure accuracy and record loss
acc1, acc5 = accuracy(output, target, topk=(1, 5))
losses.update(loss.item(), images.size(0))
top1.update(acc1[0], images.size(0))
top5.update(acc5[0], images.size(0))
# compute gradient and do SGD step
if args.benchmark == 0:
optimizer.zero_grad()
if args.amp:
with amp.scale_loss(loss, optimizer) as scaled_loss:
scaled_loss.backward()
else:
loss.backward()
stream = torch.npu.current_stream()
stream.synchronize()
if args.benchmark == 0:
optimizer.step()
elif args.benchmark == 1:
BATCH_SIZE_multiplier = int(OPTIMIZER_BATCH_SIZE / args.batch_size)
BM_optimizer_step = ((i + 1) % BATCH_SIZE_multiplier) == 0
if BM_optimizer_step:
for param_group in optimizer.param_groups:
for param in param_group['params']:
param.grad /= BATCH_SIZE_multiplier
optimizer.step()
optimizer.zero_grad()
stream = torch.npu.current_stream()
stream.synchronize()
# measure elapsed time
batch_time.update(time.time() - end)
end = time.time()
if i % args.print_freq == 0:
if not args.multiprocessing_distributed or (args.multiprocessing_distributed
and args.rank % ngpus_per_node == 0):
progress.display(i)
if not args.multiprocessing_distributed or (args.multiprocessing_distributed
and args.rank % ngpus_per_node == 0):
print("[npu id:", args.gpu, "]", '* FPS@all {:.3f}'.format(ngpus_per_node * args.batch_size / batch_time.avg))
hwlog.remark_print(key=hwlog.FPS, value=' * FPS@all {:.3f}'.format(ngpus_per_node * args.batch_size / batch_time.avg))
def validate(val_loader, model, criterion, args, ngpus_per_node):
batch_time = AverageMeter('Time', ':6.3f')
losses = AverageMeter('Loss', ':.4e')
top1 = AverageMeter('Acc@1', ':6.2f')
top5 = AverageMeter('Acc@5', ':6.2f')
progress = ProgressMeter(
len(val_loader),
[batch_time, losses, top1, top5],
prefix='Test: ')
# switch to evaluate mode
model.eval()
with torch.no_grad():
loc = 'npu:{}'.format(args.gpu)
mean = torch.tensor([0.485 * 255, 0.456 * 255, 0.406 * 255]).view(1, 3, 1, 1)
std = torch.tensor([0.229 * 255, 0.224 * 255, 0.225 * 255]).view(1, 3, 1, 1)
mean = mean.to(loc, non_blocking=True)
std = std.to(loc, non_blocking=True)
end = time.time()
for i, (images, target) in enumerate(val_loader):
target = target.to(torch.int32)
images = images.to(loc, non_blocking=True).to(torch.float).sub(mean).div(std)
target = target.to(loc, non_blocking=True)
# compute output
output = model(images)
loss = criterion(output, target)
# measure accuracy and record loss
acc1, acc5 = accuracy(output, target, topk=(1, 5))
losses.update(loss.item(), images.size(0))
top1.update(acc1[0], images.size(0))
top5.update(acc5[0], images.size(0))
# measure elapsed time
batch_time.update(time.time() - end)
end = time.time()
if i % args.print_freq == 0:
if not args.multiprocessing_distributed or \
(args.multiprocessing_distributed and args.rank % ngpus_per_node == 0):
progress.display(i)
# TODO: this should also be done with the ProgressMeter
if not args.multiprocessing_distributed or \
(args.multiprocessing_distributed and args.rank % ngpus_per_node == 0):
print("[npu id:", args.gpu, "]", '[AVG-ACC] * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'
.format(top1=top1, top5=top5))
hwlog.remark_print(key=hwlog.EVAL_ACCURACY_TOP1, value="{top1.avg:.3f}".format(top1=top1))
hwlog.remark_print(key=hwlog.EVAL_ACCURACY_TOP5, value="{top5.avg:.3f}".format(top5=top5))
return top1.avg
def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
torch.save(state, filename)
if is_best:
shutil.copyfile(filename, 'model_best_acc%.4f_epoch%d.pth.tar' % (state['best_acc1'], state['epoch']))
class AverageMeter(object):
"""Computes and stores the average and current value"""
def __init__(self, name, fmt=':f'):
self.name = name
self.fmt = fmt
self.reset()
self.start_count_index = 10
def reset(self):
self.val = 0
self.avg = 0
self.sum = 0
self.count = 0
def update(self, val, n=1):
if self.count == 0:
self.batchsize = n
self.val = val
self.count += n
if self.count > (self.start_count_index * self.batchsize):
self.sum += val * n
self.avg = self.sum / (self.count - self.start_count_index * self.batchsize)
def __str__(self):
fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
return fmtstr.format(**self.__dict__)
class ProgressMeter(object):
def __init__(self, num_batches, meters, prefix=""):
self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
self.meters = meters
self.prefix = prefix
def display(self, batch):
entries = [self.prefix + self.batch_fmtstr.format(batch)]
entries += [str(meter) for meter in self.meters]
print("[npu id:", os.environ['KERNEL_NAME_ID'], "]", '\t'.join(entries))
train_acc1 = str(entries).split("Acc@1")[1].strip().split(" ")[0]
train_acc5 = str(entries).split("Acc@5")[1].strip().split(" ")[0]
hwlog.remark_print(key=hwlog.TRAIN_ACCURACY_TOP1, value=train_acc1)
hwlog.remark_print(key=hwlog.TRAIN_ACCURACY_TOP5, value=train_acc5)
def _get_batch_fmtstr(self, num_batches):
num_digits = len(str(num_batches // 1))
fmt = '{:' + str(num_digits) + 'd}'
return '[' + fmt + '/' + fmt.format(num_batches) + ']'
# def adjust_learning_rate(optimizer, epoch, args):
# """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
# lr = args.lr * (0.1 ** (epoch // 30))
# for param_group in optimizer.param_groups:
# param_group['lr'] = lr
# def adjust_learning_rate(optimizer, global_step, steps_per_epoch, args):
# """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
# # lr = args.lr * (0.98 ** (epoch / 2.5))
# lr = args.lr * (0.98 ** (global_step // int(steps_per_epoch * 2.5)))
# for param_group in optimizer.param_groups:
# param_group['lr'] = lr
# return lr
def adjust_learning_rate(optimizer, global_step, steps_per_epoch, args):
"""Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
# lr = args.lr * (0.98 ** (epoch / 2.5))
lr = args.lr * (0.98 ** (global_step // int(steps_per_epoch * 2.5)))
for param_group in optimizer.param_groups:
param_group['lr'] = lr
return lr
def accuracy(output, target, topk=(1,)):
"""Computes the accuracy over the k top predictions for the specified values of k"""
with torch.no_grad():
maxk = max(topk)
batch_size = target.size(0)
_, pred = output.topk(maxk, 1, True, True)
pred = pred.t()
correct = pred.eq(target.view(1, -1).expand_as(pred))
res = []
for k in topk:
correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
res.append(correct_k.mul_(100.0 / batch_size))
return res
def fast_collate(batch):
imgs = [img[0] for img in batch]
targets = torch.tensor([target[1] for target in batch], dtype=torch.int64)
w = imgs[0].size[0]
h = imgs[0].size[1]
tensor = torch.zeros((len(imgs), 3, h, w), dtype=torch.uint8)
for i, img in enumerate(imgs):
nump_array = np.asarray(img, dtype=np.uint8)
if nump_array.ndim < 3:
nump_array = np.expand_dims(nump_array, axis=-1)
nump_array = np.rollaxis(nump_array, 2)
tensor[i] += torch.from_numpy(nump_array)
return tensor, targets
def get_pytorch_train_loader(data_path, batch_size, workers=5, _worker_init_fn=None, distributed=False):
traindir = os.path.join(data_path, 'train')
train_dataset = datasets.ImageFolder(
traindir,
transforms.Compose([
transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
]))
if distributed:
train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
else:
train_sampler = None
dataloader_fn = MultiEpochsDataLoader # torch.utils.data.DataLoader
train_loader = dataloader_fn(
train_dataset, batch_size=batch_size, shuffle=(train_sampler is None),
num_workers=workers, worker_init_fn=_worker_init_fn, pin_memory=True, sampler=train_sampler, collate_fn=fast_collate, drop_last=True)
return train_loader, len(train_loader), train_sampler
def get_pytorch_val_loader(data_path, batch_size, workers=5, _worker_init_fn=None, distributed=False):
valdir = os.path.join(data_path, 'val')
val_dataset = datasets.ImageFolder(
valdir, transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
]))
if distributed:
val_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset)
else:
val_sampler = None
dataloader_fn = MultiEpochsDataLoader # torch.utils.data.DataLoader
val_loader = dataloader_fn(
val_dataset,
sampler=val_sampler,
batch_size=batch_size, shuffle=(val_sampler is None),
num_workers=workers, worker_init_fn=_worker_init_fn, pin_memory=True, collate_fn=fast_collate)
return val_loader
if __name__ == '__main__':
hwlog.ROOT_DIR = os.path.split(os.path.abspath(__file__))[0]
cpu_info, npu_info, framework_info, os_info, benchmark_version = get_environment_info("pytorch")
config_info = get_model_parameter("pytorch_config")
initinal_data = {"base_lr": 0.1, "dataset": "imagenet", "optimizer": "SGD", "loss_scale": 1024}
hwlog.remark_print(key=hwlog.CPU_INFO, value=cpu_info)
hwlog.remark_print(key=hwlog.NPU_INFO, value=npu_info)
hwlog.remark_print(key=hwlog.OS_INFO, value=os_info)
hwlog.remark_print(key=hwlog.FRAMEWORK_INFO, value=framework_info)
hwlog.remark_print(key=hwlog.BENCHMARK_VERSION, value=benchmark_version)
hwlog.remark_print(key=hwlog.CONFIG_INFO, value=config_info)
hwlog.remark_print(key=hwlog.BASE_LR, value=initinal_data.get("base_lr"))
hwlog.remark_print(key=hwlog.DATASET, value=initinal_data.get("dataset"))
hwlog.remark_print(key=hwlog.OPT_NAME, value=initinal_data.get("optimizer"))
hwlog.remark_print(key=hwlog.LOSS_SCALE, value=initinal_data.get("loss_scale"))
main()
@@ -0,0 +1,31 @@
import torch
class MultiEpochsDataLoader(torch.utils.data.DataLoader):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._DataLoader__initialized = False
self.batch_sampler = _RepeatSampler(self.batch_sampler)
self._DataLoader__initialized = True
self.iterator = super().__iter__()
def __len__(self):
return len(self.batch_sampler.sampler)
def __iter__(self):
for _ in range(len(self)):
yield next(self.iterator)
class _RepeatSampler(object):
""" Sampler that repeats forever.
Args:
sampler (Sampler)
"""
def __init__(self, sampler):
self.sampler = sampler
def __iter__(self):
while True:
yield from iter(self.sampler)
@@ -0,0 +1,18 @@
{
"startCfg":
[
{
"jobID": "123456789",
"deviceID": ["0"],
"features":
[
{
"name": "task_trace"
},
{
"name": "training_trace"
}
]
}
]
}
@@ -0,0 +1,19 @@
source set_env_b023.sh
su HwHiAiUser -c "adc --host 0.0.0.0:22118 --log \"SetLogLevel(0)[error]\" --device 0"
export SLOG_PRINT_TO_STDOUT=0
export TASK_QUEUE_ENABLE=0
nohup taskset -c 1-40 python3.7 densenet121_1p_main.py \
--workers 40 \
--arch densenet121 \
--npu 0 \
--lr 0.1 \
--momentum 0.9 \
--amp \
--print-freq 1 \
--eval-freq 5\
--batch-size 256 \
--epoch 45 \
--resume checkpoint.pth.tar \
--data /home/dataset/imagenet > output_1p.log &
@@ -0,0 +1,27 @@
source set_env_b023.sh
su HwHiAiUser -c "adc --host 0.0.0.0:22118 --log \"SetLogLevel(0)[error]\" --device 0"
su HwHiAiUser -c "adc --host 0.0.0.0:22118 --log \"SetLogLevel(0)[error]\" --device 4"
export SLOG_PRINT_TO_STDOUT=0
export TASK_QUEUE_ENABLE=0
nohup python3.7 ./densenet121_8p_main.py \
--addr='10.246.246.57' \
--seed 49 \
--workers 80 \
--lr 0.8 \
--print-freq 1 \
--eval-freq 5\
--arch densenet121 \
--dist-url 'tcp://127.0.0.1:50000' \
--dist-backend 'hccl' \
--multiprocessing-distributed \
--world-size 1 \
--batch-size 2048 \
--epochs 45 \
--rank 0 \
--amp \
--benchmark 0 \
--resume checkpoint.pth.tar \
--data /train/imagenet > resume_8p.log &
@@ -0,0 +1,18 @@
source set_env_b023.sh
su HwHiAiUser -c "adc --host 0.0.0.0:22118 --log \"SetLogLevel(0)[error]\" --device 0"
export SLOG_PRINT_TO_STDOUT=0
export TASK_QUEUE_ENABLE=0
nohup taskset -c 1-40 python3.7 densenet121_1p_main.py \
--workers 40 \
--arch densenet121 \
--npu 0 \
--lr 0.1 \
--momentum 0.9 \
--amp \
--print-freq 1 \
--eval-freq 5\
--batch-size 256 \
--epoch 90 \
--data /opt/npu/dataset/imagenet > output_1p.log &
@@ -0,0 +1,26 @@
#!/usr/bin/env bash
source set_env_b023.sh
su HwHiAiUser -c "adc --host 0.0.0.0:22118 --log \"SetLogLevel(0)[error]\" --device 0"
su HwHiAiUser -c "adc --host 0.0.0.0:22118 --log \"SetLogLevel(0)[error]\" --device 4"
export SLOG_PRINT_TO_STDOUT=0
export TASK_QUEUE_ENABLE=0
nohup python3.7 ./mobilenetv2_8p_main.py \
--addr='10.246.246.76' \
--seed 49 \
--workers 80 \
--lr 0.24 \
--print-freq 1 \
--eval-freq 5\
--dist-url 'tcp://127.0.0.1:50002' \
--dist-backend 'hccl' \
--multiprocessing-distributed \
--world-size 1 \
--batch-size 6144 \
--epochs 600 \
--rank 0 \
--amp \
--benchmark 0 \
--data /opt/npu/dataset/imagenet > output_8p.log &
@@ -0,0 +1,17 @@
############## toolkit situation ################
#export ASCEND_HOME=/usr/local/Ascend
#export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib/:/usr/lib/:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64:/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:/usr/local/Ascend/add-ons/
#export PYTHONPATH=/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/schedule_search.egg:/usr/local/Ascend/ascend-toolkit/latest/opp/op_impl/built-in/ai_core/tbe:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/hccl
#export PATH=$PATH:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin
#export ASCEND_OPP_PATH=/usr/local/Ascend/ascend-toolkit/latest/opp/
############## nnae situation ################
export ASCEND_HOME=/usr/local/Ascend
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib/:/usr/local/python3.7.5/lib/:/usr/lib/:/usr/local/Ascend/nnae/latest/fwkacllib/lib64:/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:/usr/local/Ascend/add-ons/
export PYTHONPATH=/usr/local/Ascend/nnae/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:/usr/local/Ascend/nnae/latest/fwkacllib/python/site-packages/schedule_search.egg:/usr/local/Ascend/nnae/latest/opp/op_impl/built-in/ai_core/tbe:/usr/local/Ascend/nnae/latest/fwkacllib/python/site-packages/hccl
export PATH=$PATH:/usr/local/Ascend/nnae/latest/fwkacllib/ccec_compiler/bin
export ASCEND_OPP_PATH=/usr/local/Ascend/nnae/latest/opp/
# pip3.7 install --upgrade /usr/local/Ascend/nnae/latest/fwkacllib/lib64/topi-0.4.0-py3-none-any.whl
# pip3.7 install --upgrade /usr/local/Ascend/nnae/latest/fwkacllib/lib64/te-0.4.0-py3-none-any.whl
@@ -0,0 +1,18 @@
############## toolkit situation ################
export LD_LIBRARY_PATH=/usr/local/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:/usr/local/lib/:/usr/lib64/:/usr/lib/:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/:/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:/usr/local/Ascend/add-ons/:/usr/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH
export PATH=$PATH:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin/:/usr/local/Ascend/ascend-toolkit/latest/toolkit/tools/ide_daemon/bin/
export ASCEND_OPP_PATH=/usr/local/Ascend/ascend-toolkit/latest/opp/
export OPTION_EXEC_EXTERN_PLUGIN_PATH=/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so
export PYTHONPATH=/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH
############## nnae situation ################
# export LD_LIBRARY_PATH=/usr/local/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:/usr/local/lib/:/usr/lib64/:/usr/lib/:/usr/local/Ascend/nnae/latest/fwkacllib/lib64/:/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:/usr/local/Ascend/add-ons/:/usr/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH
# export PATH=$PATH:/usr/local/Ascend/nnae/latest/fwkacllib/ccec_compiler/bin/:/usr/local/Ascend/nnae/latest/toolkit/tools/ide_daemon/bin/
# export ASCEND_OPP_PATH=/usr/local/Ascend/nnae/latest/opp/
# export OPTION_EXEC_EXTERN_PLUGIN_PATH=/usr/local/Ascend/nnae/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:/usr/local/Ascend/nnae/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:/usr/local/Ascend/nnae/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so
# export PYTHONPATH=/usr/local/Ascend/nnae/latest/fwkacllib/python/site-packages/:/usr/local/Ascend/nnae/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:/usr/local/Ascend/nnae/latest/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH
# ln -s /usr/local/Ascend/ascend-toolkit/latest/toolkit/bin/adc /usr/local/bin/
@@ -0,0 +1,12 @@
# main env
export LD_LIBRARY_PATH=/usr/local/lib/:/usr/lib/:/usr/local/Ascend/fwkacllib/lib64/:/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:/usr/local/Ascend/add-ons/:/usr/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH
export PATH=$PATH:/usr/local/Ascend/fwkacllib/ccec_compiler/bin
export ASCEND_OPP_PATH=/usr/local/Ascend/opp
export NEW_GE_FE_ID=1
export GE_AICPU_FLAG=1
export PYTHONPATH=/usr/local/Ascend/atc/python/site-packages/te.egg:/usr/local/Ascend/atc/python/site-packages/topi.egg:/usr/local/Ascend/atc/python/site-packages/auto_tune.egg:/usr/local/Ascend/atc/python/site-packages/schedule_search.egg:/usr/local
export CUSTOM_OP_LIB_PATH=/usr/local/Ascend/ops/framework/built-in/tensorflow
export OPTION_EXEC_EXTERN_PLUGIN_PATH=/usr/local/Ascend/fwkacllib/lib64/plugin/opskernel/libfe.so:/usr/local/Ascend/fwkacllib/lib64/plugin/opskernel/libaicpu_plugin.so:/usr/local/Ascend/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so
export PLUGIN_LOAD_PATH=/usr/local/Ascend/fwkacllib/lib64/plugin/opskernel/libfe.so:/usr/local/Ascend/fwkacllib/lib64/plugin/opskernel/libaicpu_plugin.so:/usr/local/Ascend/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so:/usr/local/Ascend/fwkacllib/lib64/plugin/opskernel/librts_engine.so
export SLOG_PRINT_TO_STDOUT=1
@@ -0,0 +1,31 @@
############## toolkit situation ################
#export LD_LIBRARY_PATH=/usr/local/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:/usr/local/lib/:/usr/lib64/:/usr/lib/:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/:/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:/usr/local/Ascend/add-ons/:/usr/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH
#export PATH=$PATH:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin/:/usr/local/Ascend/ascend-toolkit/latest/toolkit/tools/ide_daemon/bin/
#export ASCEND_OPP_PATH=/usr/local/Ascend/ascend-toolkit/latest/opp/
#export OPTION_EXEC_EXTERN_PLUGIN_PATH=/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so
#export PYTHONPATH=/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH
############## nnae situation ################
if [ -d /usr/local/Ascend/nnae/latest ];then
export LD_LIBRARY_PATH=/usr/local/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:/usr/local/lib/:/usr/lib64/:/usr/lib/:/usr/local/Ascend/nnae/latest/fwkacllib/lib64/:/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:/usr/local/Ascend/add-ons/:/usr/lib/aarch64_64-linux-gnu:$LD_LIBRARY_PATH
export PATH=$PATH:/usr/local/Ascend/nnae/latest/fwkacllib/ccec_compiler/bin/:/usr/local/Ascend/nnae/latest/toolkit/tools/ide_daemon/bin/
export ASCEND_OPP_PATH=/usr/local/Ascend/nnae/latest/opp/
export OPTION_EXEC_EXTERN_PLUGIN_PATH=/usr/local/Ascend/nnae/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:/usr/local/Ascend/nnae/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:/usr/local/Ascend/nnae/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so
export PYTHONPATH=/usr/local/Ascend/nnae/latest/fwkacllib/python/site-packages/:/usr/local/Ascend/nnae/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:/usr/local/Ascend/nnae/latest/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH
else
export LD_LIBRARY_PATH=/usr/local/:/usr/local/lib/:/usr/lib64/:/usr/lib/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/:/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:/usr/local/Ascend/add-ons/:/usr/lib/aarch64-linux-gnu:$LD_LIBRARY_PATH
export PATH=$PATH:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin/:/usr/local/Ascend/ascend-toolkit/latest/toolkit/tools/ide_daemon/bin/
export ASCEND_OPP_PATH=/usr/local/Ascend/ascend-toolkit/latest/opp/
export OPTION_EXEC_EXTERN_PLUGIN_PATH=/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so
export PYTHONPATH=/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH
fi
# ln -s /usr/local/Ascend/ascend-toolkit/latest/toolkit/bin/adc /usr/local/bin/
export SLOG_PRINT_TO_STDOUT=0
#su HwHiAiUser -c "adc --host 0.0.0.0:22118 --log \"SetLogLevel(0)[error]\" --device 0"
export TASK_QUEUE_ENABLE=1
@@ -0,0 +1,60 @@
#!/usr/bin/env bash
rank_size=$1
yamlPath=$2
toolsPath=$3
if [ -f /.dockerenv ];then
CLUSTER=$4
MPIRUN_ALL_IP="$5"
export CLUSTER=${CLUSTER}
fi
currentDir=$(cd "$(dirname "$0")/.."; pwd)
source ${currentDir}/config/npu_set_env.sh
eval $(${toolsPath}/get_params_for_yaml.sh ${yamlPath} "pytorch_config")
rm -rf /var/log/npu/slog/host-0/*
currtime=`date +%Y%m%d%H%M%S`
mkdir -p ${currentDir%train*}/train/result/pt_mobilenet/training_job_${currtime}/
train_job_dir=${currentDir%train*}/train/result/pt_mobilenet/training_job_${currtime}/
echo "[`date +%Y%m%d-%H:%M:%S`] [INFO] ${train_job_dir}"
# device 列表, 若无指定 device 或大于等于 8p 时根据 rank_size 顺序选择
eval device_group=\$device_group_${rank_size}p
if [ x"${device_group}" == x"" ] || [ ${rank_size} -ge 8 ];then
device_group="$(seq 0 "$(expr $rank_size - 1)")"
fi
device_group_str=`echo ${device_group} | sed 's/ //g'`
first_device_id=`echo ${device_group_str: 0:1}`
rank_id=0
if [ x"${CLUSTER}" == x"True" ];then
ln -snf ${currentDir%train*}/train/result/pt_mobilenet/training_job_${currtime}/0/hw_mobilenet.log ${currentDir%train*}/train/result/pt_mobilenet/training_job_${currtime}/
this_ip=$(hostname -I |awk '{print $1}')
for ip in $MPIRUN_ALL_IP;do
if [ x"$this_ip" != x"$ip" ];then
scp $yamlPath root@$ip:$yamlPath
scp $jsonFilePath root@$ip:$jsonFilePath
fi
done
export PATH=$PATH:/usr/local/mpirun4.0.2/bin
mpirun -H ${mpirun_ip} \
--bind-to none -map-by slot\
--allow-run-as-root \
--mca btl_tcp_if_exclude lo,docker0,endvnic,virbr0,vethf40501b,docker_gwbridge,br-f42ac38052b4\
--prefix /usr/local/mpirun4.0.2/ \
${currentDir}/scripts/train.sh 0 $rank_size $yamlPath $currtime ${toolsPath} ${CLUSTER}
else
ln -snf ${currentDir%train*}/train/result/pt_mobilenet/training_job_${currtime}/${first_device_id}/hw_mobilenet.log ${currentDir%train*}/train/result/pt_mobilenet/training_job_${currtime}/
#for device_id in $device_group;do
#echo "[`date +%Y%m%d-%H:%M:%S`] [INFO] start: train ${device_id} & " >> ${currentDir}/result/main.log
${currentDir}/scripts/train.sh 0 $rank_size $yamlPath $currtime ${toolsPath} $rank_id&
#let rank_id++
#done
fi
wait
@@ -0,0 +1,146 @@
#!/usr/bin/env bash
device_id=$1
rank_size=$2
yamlPath=$3
currentDir=$(cd "$(dirname "$0")/.."; pwd)
currtime=$4
toolsPath=$5
export YAML_PATH=$3
mkdir -p ${currentDir%train*}/train/result/pt_mobilenet/training_job_${currtime}/
export train_job_dir=${currentDir%train*}/train/result/pt_mobilenet/training_job_${currtime}/
# 从 yaml 获取配置
eval $(${toolsPath}/get_params_for_yaml.sh ${yamlPath} "pytorch_config")
export REMARK_LOG_FILE=hw_mobilenet.log
benchmark_log_path=${currentDir%atlas_benchmark-master*}/atlas_benchmark-master/utils
export PYTHONPATH=$PYTHONPATH:${benchmark_log_path}
source ${currentDir}/config/set_env_b023.sh
# user env
export HCCL_CONNECT_TIMEOUT=600
export JOB_ID=9999001
export RANK_TABLE_FILE=${currentDir}/config/${rank_size}p.json
export RANK_SIZE=${rank_size}
export SLOG_PRINT_TO_STDOUT=0
export DEVICE_ID=${device_id}
DEVICE_INDEX=$(( DEVICE_ID + RANK_INDEX * 8 ))
export DEVICE_INDEX=${DEVICE_INDEX}
cd ${train_job_dir}
curd_dir=${currentDir%atlas_benchmark-master*}/atlas_benchmark-master/utils/atlasboost
export PYTHONPATH=$PYTHONPATH:${curd_dir}
if [ x"$6" != x"True" ];then
rank_id=$6
export RANK_ID=$6
else
device_id_mo=$(python3.7 -c "import src.tensorflow.mpi_ops as atlasboost;atlasboost.init(); \
device_id = atlasboost.local_rank();cluster_device_id = str(device_id); \
atlasboost.set_device_id(device_id);print(atlasboost.rank())")
device_id_mo=`echo $device_id_mo`
rank_id=${device_id_mo##* }
echo rank_id is $rank_id
export RANK_ID=${rank_id}
device=${device_id_mo##*deviceid = }
device_id=${device%% phyid=*}
export DEVICE_ID=${device_id}
echo device_id is $device_id
hccljson=${train_job_dir}/*.json
cp ${hccljson} ${currentDir}/config/${rank_size}p.json
fi
#mkdir exec path
mkdir -p ${train_job_dir}/${device_id}
cd ${train_job_dir}/${device_id}
startTime=`date +%Y%m%d-%H:%M:%S`
startTime_s=`date +%s`
if [ x"$6" == x"True" ];then
python3.7 ${currentDir}/code/8p/mobilenetv2_8p_main.py \
--addr=$(hostname -I |awk '{print $1}') \
--seed 49 \
--workers 128 \
--lr 0.24 \
--print-freq 1 \
--eval-freq 5\
--dist-url 'tcp://127.0.0.1:50002' \
--dist-backend 'hccl' \
--multiprocessing-distributed \
--world-size 1 \
--batch-size ${batch_size} \
--epochs ${epoches} \
--rank 0 \
--amp \
--benchmark 0 \
--data ${data_url} > ${train_job_dir}/train_${rank_size}p.log 2>&1
elif [ x"${rank_size}" == x"1" ];then
# 单卡
python3.7 ${currentDir}/code/1p/main_apex.py \
--workers 128 \
--seed 123456 \
--lr 0.03 \
--amp \
--opt-level 'O2' \
--loss-scale-value 64 \
--momentum 0.9 \
--batch-size ${batch_size} \
--weight-decay 1e-5 \
--epoch ${epoches} \
--print-freq 1 \
--device ${device_single}\
--eval-freq 1 \
--summary-path './runs/mobilenetv2/npu_O2_ls64_c75b150_0909' \
--data ${data_url} > ${train_job_dir}/train_${rank_size}p.log 2>&1
elif [ ${rank_size} -le 8 ];then
# 多卡单机
python3.7 ${currentDir}/code/8p/mobilenetv2_8p_main_anycard.py \
--addr=$(hostname -I |awk '{print $1}') \
--seed 49 \
--workers 128 \
--lr ${lr} \
--print-freq 1 \
--loss-scale 64 \
--eval-freq 1\
--dist-url 'tcp://127.0.0.1:50002' \
--dist-backend 'hccl' \
--multiprocessing-distributed \
--world-size 1 \
--batch-size ${batch_size} \
--epochs ${epoches} \
--rank 0 \
--amp \
--device-list ${device_group_mutli} \
--benchmark 0 \
--data ${data_url} > ${train_job_dir}/train_${rank_size}p.log 2>&1
fi
if [ $? -eq 0 ];then
echo ":::ABK 1.0.0 hw_mobilenet train success"
echo ":::ABK 1.0.0 hw_mobilenet train success" >> ${train_job_dir}/train_${rank_size}p.log
echo ":::ABK 1.0.0 hw_mobilenet train success" >> ./hw_mobilenet.log
else
echo ":::ABK 1.0.0 hw_mobilenet train failed"
echo ":::ABK 1.0.0 hw_mobilenet train failed" >> ${train_job_dir}/train_${rank_size}p.log
echo ":::ABK 1.0.0 hw_mobilenet train failed" >> ./hw_mobilenet.log
fi
endTime=`date +%Y%m%d-%H:%M:%S`
endTime_s=`date +%s`
sumTime=$[ $endTime_s - $startTime_s ]
hour=$(( $sumTime/3600 ))
min=$(( ($sumTime-${hour}*3600)/60 ))
sec=$(( $sumTime-${hour}*3600-${min}*60 ))
echo ":::ABK 1.0.0 mobilenet train total time${hour}:${min}:${sec}"
echo ":::ABK 1.0.0 mobilenet train total time ${hour}:${min}:${sec}" >> ./hw_mobilenet.log
@@ -0,0 +1,47 @@
# MobileNet_tensorflow训练说明
### 1. 模型训练参数配置
在train/yaml/MobileNet.yaml中修改相应配置, 配置项含义:
```
tensorflow_config:
# 基本参数
max_steps: 1000
data_url: 数据集路径
epoches: 跑多少个epoch
# 训练(train) 或 评测(evaluate)
mode: train
batch_size: 256
#仅在 mode 为 evaluate 时用到
ckpt_path: /opt/0908/benchmark-benchmark_Alpha/train/result/tf_mobilenet/trainingJob_20200905171017/0/results/model.ckpt-123125
# 仅多机执行需要配置: ip1:卡数量1,ip2:卡数量2
mpirun_ip: 90.90.176.152:8,90.90.176.154:8
# docker 镜像名称:版本号
docker_image: c73:b021
# 指定 device id, 多个 id 使用空格分隔, 数量需与 rank_size 相同
device_group_1p: 0
device_group_2p: 0 1
device_group_4p: 0 1 2 3
profiling_mode: false
profiling_options: training_trace
fp_point: L2Loss
bp_point: gradients/AddN_30
aicpu_profiling_mode: false
```
------
@@ -0,0 +1,211 @@
# MobileNetv2 for Tensorflow
This repository provides a script and recipe to train the MobileNetv2 model to achieve state-of-the-art accuracy.
## Table Of Contents
* [Model overview](#model-overview)
* [Model Architecture](#model-architecture)
* [Default configuration](#default-configuration)
* [Data augmentation](#data-augmentation)
* [Setup](#setup)
* [Requirements](#requirements)
* [Quick start guide](#quick-start-guide)
* [Advanced](#advanced)
* [Command line arguments](#command-line-arguments)
* [Training process](#training-process)
* [Performance](#performance)
* [Results](#results)
* [Training accuracy results](#training-accuracy-results)
* [Training performance results](#training-performance-results)
## Model overview
In this repository, we implement MobileNetv2 from paper [Sandler, Mark, et al. "Mobilenetv2: Inverted residuals and linear bottlenecks." CVPR 2018.](https://arxiv.org/abs/1801.04381)
MobileNetv2 is a mobile architecture. It is mainly constructed based on depthwise separable convolutions, linear bottlenecks and inverted residuals.
### Model architecture
The model architecture can be found from the reference paper.
### Default configuration
The following sections introduce the default configurations and hyperparameters for MobileNetv2 model.
#### Optimizer
This model uses Momentum optimizer from Tensorflow with the following hyperparameters:
- Momentum : 0.9
- Learning rate (LR) : 0.8
- LR schedule: cosine_annealing
- Warmup epoch: 5
- Batch size : 256*8
- Weight decay : 0.00004
- Moving average decay: 0.9999
- Label smoothing = 0.1
- We train for:
- 300 epochs for a standard training process using ImageNet2012
#### Data augmentation
This model uses the data augmentation from InceptionV2:
- For training:
- Convert DataType and RandomResizeCrop
- RandomHorizontalFlip, prob=0.5
- Subtract with 0.5 and multiply with 2.0
- For inference:
- Convert DataType
- CenterCrop 87.5% of the original image and resize to (224, 224)
- Subtract with 0.5 and multiply with 2.0
For more details, we refer readers to read the corresponding source code in Slim.
## Setup
The following section lists the requirements to start training the MobileNetv2 model.
### Requirements
Tensorflow 1.15.0
## Quick Start Guide
### 1. Clone the respository
```shell
git clone xxx
cd ModelZoo_MobileNetv2_TF
```
### 2. Download and preprocess the dataset
1. Download the ImageNet2012 dataset
2. Generate tfrecord files following [Tensorflow-Slim](https://github.com/tensorflow/models/tree/master/research/slim).
3. The train and validation tfrecord files are under the path/data directories.
### 3. Train
- train on a single NPU
- **edit** *train_1p.sh* (see example below)
- bash run_1p.sh
- train on 8 NPUs
- **edit** *train_8p.sh* (see example below)
- bash run_8p.sh
Examples:
- Case for single NPU
- In *train_1p.sh*, python scripts part should look like as follows. For more detailed command lines arguments, please refer to [Command line arguments](#command-line-arguments)
```shell
python3.7 ${currentDir}/train.py \
--dataset_dir=/opt/npu/slimImagenet \
--max_train_steps=500 \
--iterations_per_loop=50 \
--model_name="mobilenet_v2" \
--moving_average_decay=0.9999 \
--label_smoothing=0.1 \
--preprocessing_name="inception_v2" \
--weight_decay='0.00004' \
--batch_size=256 \
--learning_rate_decay_type='cosine_annealing' \
--learning_rate=0.4 \
--optimizer='momentum' \
--momentum='0.9' \
--warmup_epochs=5
```
- Run the program
```
bash run_1p.sh
```
- Case for 8 NPUs
- In *train_8p.sh*, python scripts part should look like as follows.
```shell
python3.7 ${currentDir}/train.py \
--dataset_dir=/opt/npu/slimImagenet \
--max_epoch=300 \
--model_name="mobilenet_v2" \
--moving_average_decay=0.9999 \
--label_smoothing=0.1 \
--preprocessing_name="inception_v2" \
--weight_decay='0.00004' \
--batch_size=256 \
--learning_rate_decay_type='cosine_annealing' \
--learning_rate=0.8 \
--optimizer='momentum' \
--momentum='0.9' \
--warmup_epochs=5
```
- Run the program
```
bash run_8p.sh
```
### 4. Test
- We evaluate results by using following commands:
```shell
python3.7 eval_image_classifier_mobilenet.py --dataset_dir=/opt/npu/slimImagenet \
--checkpoint_path=result/8p/0/results/model.ckpt-187500
```
Remember to modify the dataset path and checkpoint path, then run the command.
## Advanced
### Commmand-line options
We list those important parameters to train this network here. For more details of all the parameters, please read *train.py* and other related files.
```
--dataset_dir directory of dataset (default: /opt/npu/models/slimImagenet)
--max_epoch number of epochs to train the model (default: 200)
--max_train_steps max number of training steps (default: 500)
--iterations_per_loop number of steps to run in devices each iteration (default: None)
--model_name name of the model to train (default: mobilenet_v2_140)
--moving_average_decay the decay to use for the moving average (default: None)
--label_smoothing use label smooth in cross entropy (default: 0.1)
--preprocessing_name preprocessing method for training (default: inception_v2)
--weight_decay weight decay for regularization loss (default: 0)
--batch_size batch size per npu (default: 96)
--learning_rate_decay_type learning rate decay type (default: fixed)
--learning_rate initial learning rate (default: 0.1)
--optimizer the name of optimizer (default: sgd)
--momentum momentum value used in optimizer (default: 0.9)
--warmup_epochs warmup epochs for learning rate (default: 5)
```
### Training process
All the results of the training will be stored in the directory `result`.
## Performance
### Result
Our result were obtained by running the applicable training script. To achieve the same results, follow the steps in the Quick Start Guide.
#### Training accuracy results
| **epochs** | Top1 |
| :--------: | :------------: |
| 300 | 72.47% |
#### Training performance results
| **NPUs** | train performance |
| :------: | :---------------: |
| 1 | 1400 img/s |
| **NPUs** | train performance |
| :------: | :---------------: |
| 8 | 11000 img/s |
@@ -0,0 +1,240 @@
# Copyright 2017 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Functions to read, decode and pre-process input data for the Model.
"""
import collections
import sys
import tensorflow as tf
from tensorflow.python.data.experimental.ops import threadpool
# from tensorflow.contrib import slim
InputEndpoints = collections.namedtuple(
'InputEndpoints', ['images', 'images_orig', 'labels', 'labels_one_hot'])
ShuffleBatchConfig = collections.namedtuple('ShuffleBatchConfig', [
'num_batching_threads', 'queue_capacity', 'min_after_dequeue'
])
DEFAULT_SHUFFLE_CONFIG = ShuffleBatchConfig(
num_batching_threads=8, queue_capacity=3000, min_after_dequeue=1000)
def get_data_files(data_sources):
from tensorflow.python.platform import gfile
if isinstance(data_sources, (list, tuple)):
data_files = []
for source in data_sources:
data_files += get_data_files(source)
else:
if '*' in data_sources or '?' in data_sources or '[' in data_sources:
data_files = gfile.Glob(data_sources)
else:
data_files = [data_sources]
if not data_files:
raise ValueError('No data files found in %s' % (data_sources,))
return data_files
def preprocess_image(image, location, label_one_hot, height=224, width=224):
"""Prepare one image for evaluation.
If height and width are specified it would output an image with that size by
applying resize_bilinear.
If central_fraction is specified it would cropt the central fraction of the
input image.
Args:
image: 3-D Tensor of image. If dtype is tf.float32 then the range should be
[0, 1], otherwise it would converted to tf.float32 assuming that the range
is [0, MAX], where MAX is largest positive representable number for
int(8/16/32) data type (see `tf.image.convert_image_dtype` for details)
height: integer
width: integer
central_fraction: Optional Float, fraction of the image to crop.
scope: Optional scope for name_scope.
Returns:
3-D float Tensor of prepared image.
"""
# if image.dtype != tf.float32:
image = tf.image.convert_image_dtype(image, dtype=tf.float32)
# Crop the central region of the image with an area containing 87.5% of
# the original image.
# if central_fraction:
# image = tf.image.central_crop(image, central_fraction=central_fraction)
# if height and width:
# Resize the image to the specified height and width.
image = tf.expand_dims(image, 0)
image = tf.image.resize_bilinear(image, [height, width], align_corners=False)
image = tf.squeeze(image, [0])
# image = tf.cast(image, tf.float32)
# image = tf.multiply(image, 1/255.)
image = tf.subtract(image, 0.5)
image = tf.multiply(image, 2.0)
return image, location, label_one_hot
def _int64_feature(value):
"""Wrapper for inserting int64 features into Example proto."""
if not isinstance(value, list):
value = [value]
return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
def parse_example_proto(example_serialized, num_classes, labels_offset, image_preprocessing_fn):
feature_map = {
'image/encoded': tf.FixedLenFeature([], tf.string, ''),
'image/class/label': tf.FixedLenFeature([1], tf.int64, -1),
'image/class/text': tf.FixedLenFeature([], tf.string, ''),
'image/object/bbox/xmin': tf.VarLenFeature(dtype=tf.float32),
'image/object/bbox/ymin': tf.VarLenFeature(dtype=tf.float32),
'image/object/bbox/xmax': tf.VarLenFeature(dtype=tf.float32),
'image/object/bbox/ymax': tf.VarLenFeature(dtype=tf.float32)
}
with tf.compat.v1.name_scope('deserialize_image_record'):
obj = tf.io.parse_single_example(serialized=example_serialized, features=feature_map)
image = tf.image.decode_jpeg(obj['image/encoded'], channels=3, fancy_upscaling=False,
dct_method='INTEGER_FAST')
if image_preprocessing_fn:
image = image_preprocessing_fn(image, 224, 224)
else:
image = tf.image.resize(image, [224, 224])
label = tf.cast(obj['image/class/label'], tf.int32)
label = tf.squeeze(label)
label -= labels_offset
label = tf.one_hot(label, num_classes - labels_offset)
return image, label
def parse_example_decode(example_serialized):
feature_map = {
'image/encoded': tf.FixedLenFeature([], tf.string, ''),
'image/class/label': tf.FixedLenFeature([1], tf.int64, -1),
'image/class/text': tf.FixedLenFeature([], tf.string, ''),
'image/object/bbox/xmin': tf.VarLenFeature(dtype=tf.float32),
'image/object/bbox/ymin': tf.VarLenFeature(dtype=tf.float32),
'image/object/bbox/xmax': tf.VarLenFeature(dtype=tf.float32),
'image/object/bbox/ymax': tf.VarLenFeature(dtype=tf.float32)
}
with tf.compat.v1.name_scope('deserialize_image_record'):
obj = tf.io.parse_single_example(serialized=example_serialized, features=feature_map)
image = tf.image.decode_jpeg(obj['image/encoded'], channels=3, fancy_upscaling=False,
dct_method='INTEGER_FAST')
return image, obj['image/class/label']
def parse_example(image, label, num_classes, labels_offset, image_preprocessing_fn):
with tf.compat.v1.name_scope('deserialize_image_record'):
if image_preprocessing_fn:
image = image_preprocessing_fn(image, 224, 224)
else:
image = tf.image.resize(image, [224, 224])
label = tf.cast(label, tf.int32)
label = tf.squeeze(label)
label -= labels_offset
label = tf.one_hot(label, num_classes - labels_offset)
return image, label
def parse_example1(example_serialized, image_preprocessing_fn1):
feature_map = {
'image/encoded': tf.FixedLenFeature([], tf.string, ''),
'image/class/label': tf.FixedLenFeature([1], tf.int64, -1),
'image/class/text': tf.FixedLenFeature([], tf.string, ''),
'image/object/bbox/xmin': tf.VarLenFeature(dtype=tf.float32),
'image/object/bbox/ymin': tf.VarLenFeature(dtype=tf.float32),
'image/object/bbox/xmax': tf.VarLenFeature(dtype=tf.float32),
'image/object/bbox/ymax': tf.VarLenFeature(dtype=tf.float32)
}
with tf.compat.v1.name_scope('deserialize_image_record'):
obj = tf.io.parse_single_example(serialized=example_serialized, features=feature_map)
image = tf.image.decode_jpeg(obj['image/encoded'], channels=3, fancy_upscaling=False,
dct_method='INTEGER_FAST')
image = image_preprocessing_fn1(image, 224, 224)
return image, obj['image/class/label']
def parse_example2(image, label, num_classes, labels_offset, image_preprocessing_fn2):
with tf.compat.v1.name_scope('deserialize_image_record'):
image = image_preprocessing_fn2(image, 224, 224)
label = tf.cast(label, tf.int32)
label = tf.squeeze(label)
label -= labels_offset
label = tf.one_hot(label, num_classes - labels_offset)
return image, label
def get_data(dataset, batch_size, num_classes, labels_offset, is_training,
preprocessing_name=None, use_grayscale=None, add_image_summaries=False):
return get_data_united(dataset, batch_size, num_classes, labels_offset, is_training,
preprocessing_name, use_grayscale, add_image_summaries)
def create_ds(data_sources, is_training):
data_files = get_data_files(data_sources)
ds = tf.data.Dataset.from_tensor_slices(data_files)
if is_training:
ds = ds.shuffle(1000)
# add for eval
else:
ds = ds.take(50000)
##### change #####
num_readers = 10
ds = ds.interleave(
tf.data.TFRecordDataset, cycle_length=num_readers, block_length=1,
num_parallel_calls=tf.data.experimental.AUTOTUNE)
counter = tf.data.Dataset.range(sys.maxsize)
ds = tf.data.Dataset.zip((ds, counter))
##### change #####
if is_training:
ds = ds.repeat()
return ds
def get_data_united(dataset, batch_size, num_classes, labels_offset, is_training,
preprocessing_name=None, use_grayscale=None, add_image_summaries=False):
from preprocessing import preprocessing_factory
image_preprocessing_fn = preprocessing_factory.get_preprocessing(
name='inception_v2',
is_training=is_training,
use_grayscale=use_grayscale,
add_image_summaries=add_image_summaries
)
ds = create_ds(dataset.data_sources, is_training)
ds = ds.map(lambda example, counter: parse_example_proto(example, num_classes, labels_offset, image_preprocessing_fn), num_parallel_calls=24)
ds = ds.batch(batch_size, drop_remainder=True)
ds = ds.prefetch(buffer_size=tf.contrib.data.AUTOTUNE)
iterator = ds.make_initializable_iterator()
ds = threadpool.override_threadpool(ds,threadpool.PrivateThreadPool(128, display_name='input_pipeline_thread_pool'))
return iterator, ds
@@ -0,0 +1,705 @@
# Copyright 2016 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Converts ImageNet data to TFRecords file format with Example protos.
The raw ImageNet data set is expected to reside in JPEG files located in the
following directory structure.
data_dir/n01440764/ILSVRC2012_val_00000293.JPEG
data_dir/n01440764/ILSVRC2012_val_00000543.JPEG
...
where 'n01440764' is the unique synset label associated with
these images.
The training data set consists of 1000 sub-directories (i.e. labels)
each containing 1200 JPEG images for a total of 1.2M JPEG images.
The evaluation data set consists of 1000 sub-directories (i.e. labels)
each containing 50 JPEG images for a total of 50K JPEG images.
This TensorFlow script converts the training and evaluation data into
a sharded data set consisting of 1024 and 128 TFRecord files, respectively.
train_directory/train-00000-of-01024
train_directory/train-00001-of-01024
...
train_directory/train-00127-of-01024
and
validation_directory/validation-00000-of-00128
validation_directory/validation-00001-of-00128
...
validation_directory/validation-00127-of-00128
Each validation TFRecord file contains ~390 records. Each training TFREcord
file contains ~1250 records. Each record within the TFRecord file is a
serialized Example proto. The Example proto contains the following fields:
image/encoded: string containing JPEG encoded image in RGB colorspace
image/height: integer, image height in pixels
image/width: integer, image width in pixels
image/colorspace: string, specifying the colorspace, always 'RGB'
image/channels: integer, specifying the number of channels, always 3
image/format: string, specifying the format, always'JPEG'
image/filename: string containing the basename of the image file
e.g. 'n01440764_10026.JPEG' or 'ILSVRC2012_val_00000293.JPEG'
image/class/label: integer specifying the index in a classification layer.
The label ranges from [1, 1000] where 0 is not used.
image/class/synset: string specifying the unique ID of the label,
e.g. 'n01440764'
image/class/text: string specifying the human-readable version of the label
e.g. 'red fox, Vulpes vulpes'
image/object/bbox/xmin: list of integers specifying the 0+ human annotated
bounding boxes
image/object/bbox/xmax: list of integers specifying the 0+ human annotated
bounding boxes
image/object/bbox/ymin: list of integers specifying the 0+ human annotated
bounding boxes
image/object/bbox/ymax: list of integers specifying the 0+ human annotated
bounding boxes
image/object/bbox/label: integer specifying the index in a classification
layer. The label ranges from [1, 1000] where 0 is not used. Note this is
always identical to the image label.
Note that the length of xmin is identical to the length of xmax, ymin and ymax
for each example.
Running this script using 16 threads may take around ~2.5 hours on a HP Z420.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from datetime import datetime
import os
import random
import sys
import threading
import numpy as np
from six.moves import xrange # pylint: disable=redefined-builtin
import tensorflow as tf
tf.app.flags.DEFINE_string('train_directory', '/tmp/',
'Training data directory')
tf.app.flags.DEFINE_string('validation_directory', '/tmp/',
'Validation data directory')
tf.app.flags.DEFINE_string('output_directory', '/tmp/',
'Output data directory')
tf.app.flags.DEFINE_integer('train_shards', 1024,
'Number of shards in training TFRecord files.')
tf.app.flags.DEFINE_integer('validation_shards', 128,
'Number of shards in validation TFRecord files.')
tf.app.flags.DEFINE_integer('num_threads', 8,
'Number of threads to preprocess the images.')
# The labels file contains a list of valid labels are held in this file.
# Assumes that the file contains entries as such:
# n01440764
# n01443537
# n01484850
# where each line corresponds to a label expressed as a synset. We map
# each synset contained in the file to an integer (based on the alphabetical
# ordering). See below for details.
tf.app.flags.DEFINE_string('labels_file',
'imagenet_lsvrc_2015_synsets.txt',
'Labels file')
# This file containing mapping from synset to human-readable label.
# Assumes each line of the file looks like:
#
# n02119247 black fox
# n02119359 silver fox
# n02119477 red fox, Vulpes fulva
#
# where each line corresponds to a unique mapping. Note that each line is
# formatted as <synset>\t<human readable label>.
tf.app.flags.DEFINE_string('imagenet_metadata_file',
'imagenet_metadata.txt',
'ImageNet metadata file')
# This file is the output of process_bounding_box.py
# Assumes each line of the file looks like:
#
# n00007846_64193.JPEG,0.0060,0.2620,0.7545,0.9940
#
# where each line corresponds to one bounding box annotation associated
# with an image. Each line can be parsed as:
#
# <JPEG file name>, <xmin>, <ymin>, <xmax>, <ymax>
#
# Note that there might exist mulitple bounding box annotations associated
# with an image file.
tf.app.flags.DEFINE_string('bounding_box_file',
'./imagenet_2012_bounding_boxes.csv',
'Bounding box file')
FLAGS = tf.app.flags.FLAGS
def _int64_feature(value):
"""Wrapper for inserting int64 features into Example proto."""
if not isinstance(value, list):
value = [value]
return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
def _float_feature(value):
"""Wrapper for inserting float features into Example proto."""
if not isinstance(value, list):
value = [value]
return tf.train.Feature(float_list=tf.train.FloatList(value=value))
def _bytes_feature(value):
"""Wrapper for inserting bytes features into Example proto."""
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def _convert_to_example(filename, image_buffer, label, synset, human, bbox,
height, width):
"""Build an Example proto for an example.
Args:
filename: string, path to an image file, e.g., '/path/to/example.JPG'
image_buffer: string, JPEG encoding of RGB image
label: integer, identifier for the ground truth for the network
synset: string, unique WordNet ID specifying the label, e.g., 'n02323233'
human: string, human-readable label, e.g., 'red fox, Vulpes vulpes'
bbox: list of bounding boxes; each box is a list of integers
specifying [xmin, ymin, xmax, ymax]. All boxes are assumed to belong to
the same label as the image label.
height: integer, image height in pixels
width: integer, image width in pixels
Returns:
Example proto
"""
xmin = []
ymin = []
xmax = []
ymax = []
for b in bbox:
assert len(b) == 4
# pylint: disable=expression-not-assigned
[l.append(point) for l, point in zip([xmin, ymin, xmax, ymax], b)]
# pylint: enable=expression-not-assigned
colorspace = 'RGB'
channels = 3
image_format = 'JPEG'
example = tf.train.Example(features=tf.train.Features(feature={
'image/height': _int64_feature(height),
'image/width': _int64_feature(width),
'image/colorspace': _bytes_feature(colorspace),
'image/channels': _int64_feature(channels),
'image/class/label': _int64_feature(label),
'image/class/synset': _bytes_feature(synset),
'image/class/text': _bytes_feature(human),
'image/object/bbox/xmin': _float_feature(xmin),
'image/object/bbox/xmax': _float_feature(xmax),
'image/object/bbox/ymin': _float_feature(ymin),
'image/object/bbox/ymax': _float_feature(ymax),
'image/object/bbox/label': _int64_feature([label] * len(xmin)),
'image/format': _bytes_feature(image_format),
'image/filename': _bytes_feature(os.path.basename(filename)),
'image/encoded': _bytes_feature(image_buffer)}))
return example
class ImageCoder(object):
"""Helper class that provides TensorFlow image coding utilities."""
def __init__(self):
# Create a single Session to run all image coding calls.
self._sess = tf.Session()
# Initializes function that converts PNG to JPEG data.
self._png_data = tf.placeholder(dtype=tf.string)
image = tf.image.decode_png(self._png_data, channels=3)
self._png_to_jpeg = tf.image.encode_jpeg(image, format='rgb', quality=100)
# Initializes function that converts CMYK JPEG data to RGB JPEG data.
self._cmyk_data = tf.placeholder(dtype=tf.string)
image = tf.image.decode_jpeg(self._cmyk_data, channels=0)
self._cmyk_to_rgb = tf.image.encode_jpeg(image, format='rgb', quality=100)
# Initializes function that decodes RGB JPEG data.
self._decode_jpeg_data = tf.placeholder(dtype=tf.string)
self._decode_jpeg = tf.image.decode_jpeg(self._decode_jpeg_data, channels=3)
def png_to_jpeg(self, image_data):
return self._sess.run(self._png_to_jpeg,
feed_dict={self._png_data: image_data})
def cmyk_to_rgb(self, image_data):
return self._sess.run(self._cmyk_to_rgb,
feed_dict={self._cmyk_data: image_data})
def decode_jpeg(self, image_data):
image = self._sess.run(self._decode_jpeg,
feed_dict={self._decode_jpeg_data: image_data})
assert len(image.shape) == 3
assert image.shape[2] == 3
return image
def _is_png(filename):
"""Determine if a file contains a PNG format image.
Args:
filename: string, path of the image file.
Returns:
boolean indicating if the image is a PNG.
"""
# File list from:
# https://groups.google.com/forum/embed/?place=forum/torch7#!topic/torch7/fOSTXHIESSU
return 'n02105855_2933.JPEG' in filename
def _is_cmyk(filename):
"""Determine if file contains a CMYK JPEG format image.
Args:
filename: string, path of the image file.
Returns:
boolean indicating if the image is a JPEG encoded with CMYK color space.
"""
# File list from:
# https://github.com/cytsai/ilsvrc-cmyk-image-list
blacklist = ['n01739381_1309.JPEG', 'n02077923_14822.JPEG',
'n02447366_23489.JPEG', 'n02492035_15739.JPEG',
'n02747177_10752.JPEG', 'n03018349_4028.JPEG',
'n03062245_4620.JPEG', 'n03347037_9675.JPEG',
'n03467068_12171.JPEG', 'n03529860_11437.JPEG',
'n03544143_17228.JPEG', 'n03633091_5218.JPEG',
'n03710637_5125.JPEG', 'n03961711_5286.JPEG',
'n04033995_2932.JPEG', 'n04258138_17003.JPEG',
'n04264628_27969.JPEG', 'n04336792_7448.JPEG',
'n04371774_5854.JPEG', 'n04596742_4225.JPEG',
'n07583066_647.JPEG', 'n13037406_4650.JPEG']
return filename.split('/')[-1] in blacklist
def _process_image(filename, coder):
"""Process a single image file.
Args:
filename: string, path to an image file e.g., '/path/to/example.JPG'.
coder: instance of ImageCoder to provide TensorFlow image coding utils.
Returns:
image_buffer: string, JPEG encoding of RGB image.
height: integer, image height in pixels.
width: integer, image width in pixels.
"""
# Read the image file.
image_data = tf.gfile.GFile(filename, 'r').read()
# Clean the dirty data.
if _is_png(filename):
# 1 image is a PNG.
print('Converting PNG to JPEG for %s' % filename)
image_data = coder.png_to_jpeg(image_data)
elif _is_cmyk(filename):
# 22 JPEG images are in CMYK colorspace.
print('Converting CMYK to RGB for %s' % filename)
image_data = coder.cmyk_to_rgb(image_data)
# Decode the RGB JPEG.
image = coder.decode_jpeg(image_data)
# Check that image converted to RGB
assert len(image.shape) == 3
height = image.shape[0]
width = image.shape[1]
assert image.shape[2] == 3
return image_data, height, width
def _process_image_files_batch(coder, thread_index, ranges, name, filenames,
synsets, labels, humans, bboxes, num_shards):
"""Processes and saves list of images as TFRecord in 1 thread.
Args:
coder: instance of ImageCoder to provide TensorFlow image coding utils.
thread_index: integer, unique batch to run index is within [0, len(ranges)).
ranges: list of pairs of integers specifying ranges of each batches to
analyze in parallel.
name: string, unique identifier specifying the data set
filenames: list of strings; each string is a path to an image file
synsets: list of strings; each string is a unique WordNet ID
labels: list of integer; each integer identifies the ground truth
humans: list of strings; each string is a human-readable label
bboxes: list of bounding boxes for each image. Note that each entry in this
list might contain from 0+ entries corresponding to the number of bounding
box annotations for the image.
num_shards: integer number of shards for this data set.
"""
# Each thread produces N shards where N = int(num_shards / num_threads).
# For instance, if num_shards = 128, and the num_threads = 2, then the first
# thread would produce shards [0, 64).
num_threads = len(ranges)
assert not num_shards % num_threads
num_shards_per_batch = int(num_shards / num_threads)
shard_ranges = np.linspace(ranges[thread_index][0],
ranges[thread_index][1],
num_shards_per_batch + 1).astype(int)
num_files_in_thread = ranges[thread_index][1] - ranges[thread_index][0]
counter = 0
for s in xrange(num_shards_per_batch):
# Generate a sharded version of the file name, e.g. 'train-00002-of-00010'
shard = thread_index * num_shards_per_batch + s
output_filename = '%s-%.5d-of-%.5d' % (name, shard, num_shards)
output_file = os.path.join(FLAGS.output_directory, output_filename)
writer = tf.python_io.TFRecordWriter(output_file)
shard_counter = 0
files_in_shard = np.arange(shard_ranges[s], shard_ranges[s + 1], dtype=int)
for i in files_in_shard:
filename = filenames[i]
label = labels[i]
synset = synsets[i]
human = humans[i]
bbox = bboxes[i]
image_buffer, height, width = _process_image(filename, coder)
example = _convert_to_example(filename, image_buffer, label,
synset, human, bbox,
height, width)
writer.write(example.SerializeToString())
shard_counter += 1
counter += 1
if not counter % 1000:
print('%s [thread %d]: Processed %d of %d images in thread batch.' %
(datetime.now(), thread_index, counter, num_files_in_thread))
sys.stdout.flush()
writer.close()
print('%s [thread %d]: Wrote %d images to %s' %
(datetime.now(), thread_index, shard_counter, output_file))
sys.stdout.flush()
shard_counter = 0
print('%s [thread %d]: Wrote %d images to %d shards.' %
(datetime.now(), thread_index, counter, num_files_in_thread))
sys.stdout.flush()
def _process_image_files(name, filenames, synsets, labels, humans,
bboxes, num_shards):
"""Process and save list of images as TFRecord of Example protos.
Args:
name: string, unique identifier specifying the data set
filenames: list of strings; each string is a path to an image file
synsets: list of strings; each string is a unique WordNet ID
labels: list of integer; each integer identifies the ground truth
humans: list of strings; each string is a human-readable label
bboxes: list of bounding boxes for each image. Note that each entry in this
list might contain from 0+ entries corresponding to the number of bounding
box annotations for the image.
num_shards: integer number of shards for this data set.
"""
assert len(filenames) == len(synsets)
assert len(filenames) == len(labels)
assert len(filenames) == len(humans)
assert len(filenames) == len(bboxes)
# Break all images into batches with a [ranges[i][0], ranges[i][1]].
spacing = np.linspace(0, len(filenames), FLAGS.num_threads + 1).astype(np.int)
ranges = []
threads = []
for i in xrange(len(spacing) - 1):
ranges.append([spacing[i], spacing[i+1]])
# Launch a thread for each batch.
print('Launching %d threads for spacings: %s' % (FLAGS.num_threads, ranges))
sys.stdout.flush()
# Create a mechanism for monitoring when all threads are finished.
coord = tf.train.Coordinator()
# Create a generic TensorFlow-based utility for converting all image codings.
coder = ImageCoder()
threads = []
for thread_index in xrange(len(ranges)):
args = (coder, thread_index, ranges, name, filenames,
synsets, labels, humans, bboxes, num_shards)
t = threading.Thread(target=_process_image_files_batch, args=args)
t.start()
threads.append(t)
# Wait for all the threads to terminate.
coord.join(threads)
print('%s: Finished writing all %d images in data set.' %
(datetime.now(), len(filenames)))
sys.stdout.flush()
def _find_image_files(data_dir, labels_file):
"""Build a list of all images files and labels in the data set.
Args:
data_dir: string, path to the root directory of images.
Assumes that the ImageNet data set resides in JPEG files located in
the following directory structure.
data_dir/n01440764/ILSVRC2012_val_00000293.JPEG
data_dir/n01440764/ILSVRC2012_val_00000543.JPEG
where 'n01440764' is the unique synset label associated with these images.
labels_file: string, path to the labels file.
The list of valid labels are held in this file. Assumes that the file
contains entries as such:
n01440764
n01443537
n01484850
where each line corresponds to a label expressed as a synset. We map
each synset contained in the file to an integer (based on the alphabetical
ordering) starting with the integer 1 corresponding to the synset
contained in the first line.
The reason we start the integer labels at 1 is to reserve label 0 as an
unused background class.
Returns:
filenames: list of strings; each string is a path to an image file.
synsets: list of strings; each string is a unique WordNet ID.
labels: list of integer; each integer identifies the ground truth.
"""
print('Determining list of input files and labels from %s.' % data_dir)
challenge_synsets = [
l.strip() for l in tf.gfile.GFile(labels_file, 'r').readlines()
]
labels = []
filenames = []
synsets = []
# Leave label index 0 empty as a background class.
label_index = 1
# Construct the list of JPEG files and labels.
for synset in challenge_synsets:
jpeg_file_path = '%s/%s/*.JPEG' % (data_dir, synset)
matching_files = tf.gfile.Glob(jpeg_file_path)
labels.extend([label_index] * len(matching_files))
synsets.extend([synset] * len(matching_files))
filenames.extend(matching_files)
if not label_index % 100:
print('Finished finding files in %d of %d classes.' % (
label_index, len(challenge_synsets)))
label_index += 1
# Shuffle the ordering of all image files in order to guarantee
# random ordering of the images with respect to label in the
# saved TFRecord files. Make the randomization repeatable.
shuffled_index = range(len(filenames))
random.seed(12345)
random.shuffle(shuffled_index)
filenames = [filenames[i] for i in shuffled_index]
synsets = [synsets[i] for i in shuffled_index]
labels = [labels[i] for i in shuffled_index]
print('Found %d JPEG files across %d labels inside %s.' %
(len(filenames), len(challenge_synsets), data_dir))
return filenames, synsets, labels
def _find_human_readable_labels(synsets, synset_to_human):
"""Build a list of human-readable labels.
Args:
synsets: list of strings; each string is a unique WordNet ID.
synset_to_human: dict of synset to human labels, e.g.,
'n02119022' --> 'red fox, Vulpes vulpes'
Returns:
List of human-readable strings corresponding to each synset.
"""
humans = []
for s in synsets:
assert s in synset_to_human, ('Failed to find: %s' % s)
humans.append(synset_to_human[s])
return humans
def _find_image_bounding_boxes(filenames, image_to_bboxes):
"""Find the bounding boxes for a given image file.
Args:
filenames: list of strings; each string is a path to an image file.
image_to_bboxes: dictionary mapping image file names to a list of
bounding boxes. This list contains 0+ bounding boxes.
Returns:
List of bounding boxes for each image. Note that each entry in this
list might contain from 0+ entries corresponding to the number of bounding
box annotations for the image.
"""
num_image_bbox = 0
bboxes = []
for f in filenames:
basename = os.path.basename(f)
if basename in image_to_bboxes:
bboxes.append(image_to_bboxes[basename])
num_image_bbox += 1
else:
bboxes.append([])
print('Found %d images with bboxes out of %d images' % (
num_image_bbox, len(filenames)))
return bboxes
def _process_dataset(name, directory, num_shards, synset_to_human,
image_to_bboxes):
"""Process a complete data set and save it as a TFRecord.
Args:
name: string, unique identifier specifying the data set.
directory: string, root path to the data set.
num_shards: integer number of shards for this data set.
synset_to_human: dict of synset to human labels, e.g.,
'n02119022' --> 'red fox, Vulpes vulpes'
image_to_bboxes: dictionary mapping image file names to a list of
bounding boxes. This list contains 0+ bounding boxes.
"""
filenames, synsets, labels = _find_image_files(directory, FLAGS.labels_file)
humans = _find_human_readable_labels(synsets, synset_to_human)
bboxes = _find_image_bounding_boxes(filenames, image_to_bboxes)
_process_image_files(name, filenames, synsets, labels,
humans, bboxes, num_shards)
def _build_synset_lookup(imagenet_metadata_file):
"""Build lookup for synset to human-readable label.
Args:
imagenet_metadata_file: string, path to file containing mapping from
synset to human-readable label.
Assumes each line of the file looks like:
n02119247 black fox
n02119359 silver fox
n02119477 red fox, Vulpes fulva
where each line corresponds to a unique mapping. Note that each line is
formatted as <synset>\t<human readable label>.
Returns:
Dictionary of synset to human labels, such as:
'n02119022' --> 'red fox, Vulpes vulpes'
"""
lines = tf.gfile.GFile(imagenet_metadata_file, 'r').readlines()
synset_to_human = {}
for l in lines:
if l:
parts = l.strip().split('\t')
assert len(parts) == 2
synset = parts[0]
human = parts[1]
synset_to_human[synset] = human
return synset_to_human
def _build_bounding_box_lookup(bounding_box_file):
"""Build a lookup from image file to bounding boxes.
Args:
bounding_box_file: string, path to file with bounding boxes annotations.
Assumes each line of the file looks like:
n00007846_64193.JPEG,0.0060,0.2620,0.7545,0.9940
where each line corresponds to one bounding box annotation associated
with an image. Each line can be parsed as:
<JPEG file name>, <xmin>, <ymin>, <xmax>, <ymax>
Note that there might exist mulitple bounding box annotations associated
with an image file. This file is the output of process_bounding_boxes.py.
Returns:
Dictionary mapping image file names to a list of bounding boxes. This list
contains 0+ bounding boxes.
"""
lines = tf.gfile.GFile(bounding_box_file, 'r').readlines()
images_to_bboxes = {}
num_bbox = 0
num_image = 0
for l in lines:
if l:
parts = l.split(',')
assert len(parts) == 5, ('Failed to parse: %s' % l)
filename = parts[0]
xmin = float(parts[1])
ymin = float(parts[2])
xmax = float(parts[3])
ymax = float(parts[4])
box = [xmin, ymin, xmax, ymax]
if filename not in images_to_bboxes:
images_to_bboxes[filename] = []
num_image += 1
images_to_bboxes[filename].append(box)
num_bbox += 1
print('Successfully read %d bounding boxes '
'across %d images.' % (num_bbox, num_image))
return images_to_bboxes
def main(unused_argv):
assert not FLAGS.train_shards % FLAGS.num_threads, (
'Please make the FLAGS.num_threads commensurate with FLAGS.train_shards')
assert not FLAGS.validation_shards % FLAGS.num_threads, (
'Please make the FLAGS.num_threads commensurate with '
'FLAGS.validation_shards')
print('Saving results to %s' % FLAGS.output_directory)
# Build a map from synset to human-readable label.
synset_to_human = _build_synset_lookup(FLAGS.imagenet_metadata_file)
image_to_bboxes = _build_bounding_box_lookup(FLAGS.bounding_box_file)
# Run it!
_process_dataset('validation', FLAGS.validation_directory,
FLAGS.validation_shards, synset_to_human, image_to_bboxes)
_process_dataset('train', FLAGS.train_directory, FLAGS.train_shards,
synset_to_human, image_to_bboxes)
if __name__ == '__main__':
tf.app.run()
@@ -0,0 +1,100 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Provides data for the Cifar10 dataset.
The dataset scripts used to create the dataset can be found at:
tensorflow/models/research/slim/datasets/download_and_convert_cifar10.py
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import tensorflow as tf
from tensorflow.contrib import slim as contrib_slim
from datasets import dataset_utils
slim = contrib_slim
_FILE_PATTERN = 'cifar10_%s.tfrecord'
SPLITS_TO_SIZES = {'train': 50000, 'test': 10000}
_NUM_CLASSES = 10
_ITEMS_TO_DESCRIPTIONS = {
'image': 'A [32 x 32 x 3] color image.',
'label': 'A single integer between 0 and 9',
}
def get_split(split_name, dataset_dir, file_pattern=None, reader=None):
"""Gets a dataset tuple with instructions for reading cifar10.
Args:
split_name: A train/test split name.
dataset_dir: The base directory of the dataset sources.
file_pattern: The file pattern to use when matching the dataset sources.
It is assumed that the pattern contains a '%s' string so that the split
name can be inserted.
reader: The TensorFlow reader type.
Returns:
A `Dataset` namedtuple.
Raises:
ValueError: if `split_name` is not a valid train/test split.
"""
if split_name not in SPLITS_TO_SIZES:
raise ValueError('split name %s was not recognized.' % split_name)
if not file_pattern:
file_pattern = _FILE_PATTERN
file_pattern = os.path.join(dataset_dir, file_pattern % split_name)
# Allowing None in the signature so that dataset_factory can use the default.
if not reader:
reader = tf.TFRecordReader
keys_to_features = {
'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''),
'image/format': tf.FixedLenFeature((), tf.string, default_value='png'),
'image/class/label': tf.FixedLenFeature(
[], tf.int64, default_value=tf.zeros([], dtype=tf.int64)),
}
items_to_handlers = {
'image': slim.tfexample_decoder.Image(shape=[32, 32, 3]),
'label': slim.tfexample_decoder.Tensor('image/class/label'),
}
decoder = slim.tfexample_decoder.TFExampleDecoder(
keys_to_features, items_to_handlers)
labels_to_names = None
if dataset_utils.has_labels(dataset_dir):
labels_to_names = dataset_utils.read_label_file(dataset_dir)
return slim.dataset.Dataset(
data_sources=file_pattern,
reader=reader,
decoder=decoder,
num_samples=SPLITS_TO_SIZES[split_name],
items_to_descriptions=_ITEMS_TO_DESCRIPTIONS,
num_classes=_NUM_CLASSES,
labels_to_names=labels_to_names,
)
@@ -0,0 +1,59 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""A factory-pattern class which returns classification image/label pairs."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from datasets import cifar10
from datasets import flowers
from datasets import imagenet
from datasets import mnist
from datasets import visualwakewords
datasets_map = {
'cifar10': cifar10,
'flowers': flowers,
'imagenet': imagenet,
'mnist': mnist,
'visualwakewords': visualwakewords,
}
def get_dataset(name, split_name, dataset_dir, file_pattern=None, reader=None):
"""Given a dataset name and a split_name returns a Dataset.
Args:
name: String, the name of the dataset.
split_name: A train/test split name.
dataset_dir: The directory where the dataset files are stored.
file_pattern: The file pattern to use for matching the dataset source files.
reader: The subclass of tf.ReaderBase. If left as `None`, then the default
reader defined by each dataset is used.
Returns:
A `Dataset` class.
Raises:
ValueError: If the dataset `name` is unknown.
"""
if name not in datasets_map:
raise ValueError('Name of dataset unknown %s' % name)
return datasets_map[name].get_split(
split_name,
dataset_dir,
file_pattern,
reader)
@@ -0,0 +1,240 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains utilities for downloading and converting datasets."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import sys
import tarfile
import zipfile
from six.moves import urllib
import tensorflow as tf
LABELS_FILENAME = 'labels.txt'
def int64_feature(values):
"""Returns a TF-Feature of int64s.
Args:
values: A scalar or list of values.
Returns:
A TF-Feature.
"""
if not isinstance(values, (tuple, list)):
values = [values]
return tf.train.Feature(int64_list=tf.train.Int64List(value=values))
def bytes_list_feature(values):
"""Returns a TF-Feature of list of bytes.
Args:
values: A string or list of strings.
Returns:
A TF-Feature.
"""
return tf.train.Feature(bytes_list=tf.train.BytesList(value=values))
def float_list_feature(values):
"""Returns a TF-Feature of list of floats.
Args:
values: A float or list of floats.
Returns:
A TF-Feature.
"""
return tf.train.Feature(float_list=tf.train.FloatList(value=values))
def bytes_feature(values):
"""Returns a TF-Feature of bytes.
Args:
values: A string.
Returns:
A TF-Feature.
"""
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[values]))
def float_feature(values):
"""Returns a TF-Feature of floats.
Args:
values: A scalar of list of values.
Returns:
A TF-Feature.
"""
if not isinstance(values, (tuple, list)):
values = [values]
return tf.train.Feature(float_list=tf.train.FloatList(value=values))
def image_to_tfexample(image_data, image_format, height, width, class_id):
return tf.train.Example(features=tf.train.Features(feature={
'image/encoded': bytes_feature(image_data),
'image/format': bytes_feature(image_format),
'image/class/label': int64_feature(class_id),
'image/height': int64_feature(height),
'image/width': int64_feature(width),
}))
def download_url(url, dataset_dir):
"""Downloads the tarball or zip file from url into filepath.
Args:
url: The URL of a tarball or zip file.
dataset_dir: The directory where the temporary files are stored.
Returns:
filepath: path where the file is downloaded.
"""
filename = url.split('/')[-1]
filepath = os.path.join(dataset_dir, filename)
def _progress(count, block_size, total_size):
sys.stdout.write('\r>> Downloading %s %.1f%%' % (
filename, float(count * block_size) / float(total_size) * 100.0))
sys.stdout.flush()
filepath, _ = urllib.request.urlretrieve(url, filepath, _progress)
print()
statinfo = os.stat(filepath)
print('Successfully downloaded', filename, statinfo.st_size, 'bytes.')
return filepath
def download_and_uncompress_tarball(tarball_url, dataset_dir):
"""Downloads the `tarball_url` and uncompresses it locally.
Args:
tarball_url: The URL of a tarball file.
dataset_dir: The directory where the temporary files are stored.
"""
filepath = download_url(tarball_url, dataset_dir)
tarfile.open(filepath, 'r:gz').extractall(dataset_dir)
def download_and_uncompress_zipfile(zip_url, dataset_dir):
"""Downloads the `zip_url` and uncompresses it locally.
Args:
zip_url: The URL of a zip file.
dataset_dir: The directory where the temporary files are stored.
"""
filename = zip_url.split('/')[-1]
filepath = os.path.join(dataset_dir, filename)
if tf.gfile.Exists(filepath):
print('File {filename} has been already downloaded at {filepath}. '
'Unzipping it....'.format(filename=filename, filepath=filepath))
else:
filepath = download_url(zip_url, dataset_dir)
with zipfile.ZipFile(filepath, 'r') as zip_file:
for member in zip_file.namelist():
memberpath = os.path.join(dataset_dir, member)
# extract only if file doesn't exist
if not (os.path.exists(memberpath) or os.path.isfile(memberpath)):
zip_file.extract(member, dataset_dir)
def write_label_file(labels_to_class_names,
dataset_dir,
filename=LABELS_FILENAME):
"""Writes a file with the list of class names.
Args:
labels_to_class_names: A map of (integer) labels to class names.
dataset_dir: The directory in which the labels file should be written.
filename: The filename where the class names are written.
"""
labels_filename = os.path.join(dataset_dir, filename)
with tf.gfile.Open(labels_filename, 'w') as f:
for label in labels_to_class_names:
class_name = labels_to_class_names[label]
f.write('%d:%s\n' % (label, class_name))
def has_labels(dataset_dir, filename=LABELS_FILENAME):
"""Specifies whether or not the dataset directory contains a label map file.
Args:
dataset_dir: The directory in which the labels file is found.
filename: The filename where the class names are written.
Returns:
`True` if the labels file exists and `False` otherwise.
"""
return tf.gfile.Exists(os.path.join(dataset_dir, filename))
def read_label_file(dataset_dir, filename=LABELS_FILENAME):
"""Reads the labels file and returns a mapping from ID to class name.
Args:
dataset_dir: The directory in which the labels file is found.
filename: The filename where the class names are written.
Returns:
A map from a label (integer) to class name.
"""
labels_filename = os.path.join(dataset_dir, filename)
with tf.gfile.Open(labels_filename, 'rb') as f:
lines = f.read().decode()
lines = lines.split('\n')
lines = filter(None, lines)
labels_to_class_names = {}
for line in lines:
index = line.index(':')
labels_to_class_names[int(line[:index])] = line[index+1:]
return labels_to_class_names
def open_sharded_output_tfrecords(exit_stack, base_path, num_shards):
"""Opens all TFRecord shards for writing and adds them to an exit stack.
Args:
exit_stack: A context2.ExitStack used to automatically closed the TFRecords
opened in this function.
base_path: The base path for all shards
num_shards: The number of shards
Returns:
The list of opened TFRecords. Position k in the list corresponds to shard k.
"""
tf_record_output_filenames = [
'{}-{:05d}-of-{:05d}'.format(base_path, idx, num_shards)
for idx in range(num_shards)
]
tfrecords = [
exit_stack.enter_context(tf.python_io.TFRecordWriter(file_name))
for file_name in tf_record_output_filenames
]
return tfrecords
@@ -0,0 +1,198 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Downloads and converts cifar10 data to TFRecords of TF-Example protos.
This module downloads the cifar10 data, uncompresses it, reads the files
that make up the cifar10 data and creates two TFRecord datasets: one for train
and one for test. Each TFRecord dataset is comprised of a set of TF-Example
protocol buffers, each of which contain a single image and label.
The script should take several minutes to run.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import sys
import tarfile
import numpy as np
from six.moves import cPickle
from six.moves import urllib
import tensorflow as tf
from datasets import dataset_utils
# The URL where the CIFAR data can be downloaded.
_DATA_URL = 'https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz'
# The number of training files.
_NUM_TRAIN_FILES = 5
# The height and width of each image.
_IMAGE_SIZE = 32
# The names of the classes.
_CLASS_NAMES = [
'airplane',
'automobile',
'bird',
'cat',
'deer',
'dog',
'frog',
'horse',
'ship',
'truck',
]
def _add_to_tfrecord(filename, tfrecord_writer, offset=0):
"""Loads data from the cifar10 pickle files and writes files to a TFRecord.
Args:
filename: The filename of the cifar10 pickle file.
tfrecord_writer: The TFRecord writer to use for writing.
offset: An offset into the absolute number of images previously written.
Returns:
The new offset.
"""
with tf.gfile.Open(filename, 'rb') as f:
if sys.version_info < (3,):
data = cPickle.load(f)
else:
data = cPickle.load(f, encoding='bytes')
images = data[b'data']
num_images = images.shape[0]
images = images.reshape((num_images, 3, 32, 32))
labels = data[b'labels']
with tf.Graph().as_default():
image_placeholder = tf.placeholder(dtype=tf.uint8)
encoded_image = tf.image.encode_png(image_placeholder)
with tf.Session('') as sess:
for j in range(num_images):
sys.stdout.write('\r>> Reading file [%s] image %d/%d' % (
filename, offset + j + 1, offset + num_images))
sys.stdout.flush()
image = np.squeeze(images[j]).transpose((1, 2, 0))
label = labels[j]
png_string = sess.run(encoded_image,
feed_dict={image_placeholder: image})
example = dataset_utils.image_to_tfexample(
png_string, b'png', _IMAGE_SIZE, _IMAGE_SIZE, label)
tfrecord_writer.write(example.SerializeToString())
return offset + num_images
def _get_output_filename(dataset_dir, split_name):
"""Creates the output filename.
Args:
dataset_dir: The dataset directory where the dataset is stored.
split_name: The name of the train/test split.
Returns:
An absolute file path.
"""
return '%s/cifar10_%s.tfrecord' % (dataset_dir, split_name)
def _download_and_uncompress_dataset(dataset_dir):
"""Downloads cifar10 and uncompresses it locally.
Args:
dataset_dir: The directory where the temporary files are stored.
"""
filename = _DATA_URL.split('/')[-1]
filepath = os.path.join(dataset_dir, filename)
if not os.path.exists(filepath):
def _progress(count, block_size, total_size):
sys.stdout.write('\r>> Downloading %s %.1f%%' % (
filename, float(count * block_size) / float(total_size) * 100.0))
sys.stdout.flush()
filepath, _ = urllib.request.urlretrieve(_DATA_URL, filepath, _progress)
print()
statinfo = os.stat(filepath)
print('Successfully downloaded', filename, statinfo.st_size, 'bytes.')
tarfile.open(filepath, 'r:gz').extractall(dataset_dir)
def _clean_up_temporary_files(dataset_dir):
"""Removes temporary files used to create the dataset.
Args:
dataset_dir: The directory where the temporary files are stored.
"""
filename = _DATA_URL.split('/')[-1]
filepath = os.path.join(dataset_dir, filename)
tf.gfile.Remove(filepath)
tmp_dir = os.path.join(dataset_dir, 'cifar-10-batches-py')
tf.gfile.DeleteRecursively(tmp_dir)
def run(dataset_dir):
"""Runs the download and conversion operation.
Args:
dataset_dir: The dataset directory where the dataset is stored.
"""
if not tf.gfile.Exists(dataset_dir):
tf.gfile.MakeDirs(dataset_dir)
training_filename = _get_output_filename(dataset_dir, 'train')
testing_filename = _get_output_filename(dataset_dir, 'test')
if tf.gfile.Exists(training_filename) and tf.gfile.Exists(testing_filename):
print('Dataset files already exist. Exiting without re-creating them.')
return
dataset_utils.download_and_uncompress_tarball(_DATA_URL, dataset_dir)
# First, process the training data:
with tf.python_io.TFRecordWriter(training_filename) as tfrecord_writer:
offset = 0
for i in range(_NUM_TRAIN_FILES):
filename = os.path.join(dataset_dir,
'cifar-10-batches-py',
'data_batch_%d' % (i + 1)) # 1-indexed.
offset = _add_to_tfrecord(filename, tfrecord_writer, offset)
# Next, process the testing data:
with tf.python_io.TFRecordWriter(testing_filename) as tfrecord_writer:
filename = os.path.join(dataset_dir,
'cifar-10-batches-py',
'test_batch')
_add_to_tfrecord(filename, tfrecord_writer)
# Finally, write the labels file:
labels_to_class_names = dict(zip(range(len(_CLASS_NAMES)), _CLASS_NAMES))
dataset_utils.write_label_file(labels_to_class_names, dataset_dir)
_clean_up_temporary_files(dataset_dir)
print('\nFinished converting the Cifar10 dataset!')
@@ -0,0 +1,211 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Downloads and converts Flowers data to TFRecords of TF-Example protos.
This module downloads the Flowers data, uncompresses it, reads the files
that make up the Flowers data and creates two TFRecord datasets: one for train
and one for test. Each TFRecord dataset is comprised of a set of TF-Example
protocol buffers, each of which contain a single image and label.
The script should take about a minute to run.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import os
import random
import sys
import tensorflow as tf
from datasets import dataset_utils
# The URL where the Flowers data can be downloaded.
_DATA_URL = 'http://download.tensorflow.org/example_images/flower_photos.tgz'
# The number of images in the validation set.
_NUM_VALIDATION = 350
# Seed for repeatability.
_RANDOM_SEED = 0
# The number of shards per dataset split.
_NUM_SHARDS = 5
class ImageReader(object):
"""Helper class that provides TensorFlow image coding utilities."""
def __init__(self):
# Initializes function that decodes RGB JPEG data.
self._decode_jpeg_data = tf.placeholder(dtype=tf.string)
self._decode_jpeg = tf.image.decode_jpeg(self._decode_jpeg_data, channels=3)
def read_image_dims(self, sess, image_data):
image = self.decode_jpeg(sess, image_data)
return image.shape[0], image.shape[1]
def decode_jpeg(self, sess, image_data):
image = sess.run(self._decode_jpeg,
feed_dict={self._decode_jpeg_data: image_data})
assert len(image.shape) == 3
assert image.shape[2] == 3
return image
def _get_filenames_and_classes(dataset_dir):
"""Returns a list of filenames and inferred class names.
Args:
dataset_dir: A directory containing a set of subdirectories representing
class names. Each subdirectory should contain PNG or JPG encoded images.
Returns:
A list of image file paths, relative to `dataset_dir` and the list of
subdirectories, representing class names.
"""
flower_root = os.path.join(dataset_dir, 'flower_photos')
directories = []
class_names = []
for filename in os.listdir(flower_root):
path = os.path.join(flower_root, filename)
if os.path.isdir(path):
directories.append(path)
class_names.append(filename)
photo_filenames = []
for directory in directories:
for filename in os.listdir(directory):
path = os.path.join(directory, filename)
photo_filenames.append(path)
return photo_filenames, sorted(class_names)
def _get_dataset_filename(dataset_dir, split_name, shard_id):
output_filename = 'flowers_%s_%05d-of-%05d.tfrecord' % (
split_name, shard_id, _NUM_SHARDS)
return os.path.join(dataset_dir, output_filename)
def _convert_dataset(split_name, filenames, class_names_to_ids, dataset_dir):
"""Converts the given filenames to a TFRecord dataset.
Args:
split_name: The name of the dataset, either 'train' or 'validation'.
filenames: A list of absolute paths to png or jpg images.
class_names_to_ids: A dictionary from class names (strings) to ids
(integers).
dataset_dir: The directory where the converted datasets are stored.
"""
assert split_name in ['train', 'validation']
num_per_shard = int(math.ceil(len(filenames) / float(_NUM_SHARDS)))
with tf.Graph().as_default():
image_reader = ImageReader()
with tf.Session('') as sess:
for shard_id in range(_NUM_SHARDS):
output_filename = _get_dataset_filename(
dataset_dir, split_name, shard_id)
with tf.python_io.TFRecordWriter(output_filename) as tfrecord_writer:
start_ndx = shard_id * num_per_shard
end_ndx = min((shard_id+1) * num_per_shard, len(filenames))
for i in range(start_ndx, end_ndx):
sys.stdout.write('\r>> Converting image %d/%d shard %d' % (
i+1, len(filenames), shard_id))
sys.stdout.flush()
# Read the filename:
image_data = tf.gfile.GFile(filenames[i], 'rb').read()
height, width = image_reader.read_image_dims(sess, image_data)
class_name = os.path.basename(os.path.dirname(filenames[i]))
class_id = class_names_to_ids[class_name]
example = dataset_utils.image_to_tfexample(
image_data, b'jpg', height, width, class_id)
tfrecord_writer.write(example.SerializeToString())
sys.stdout.write('\n')
sys.stdout.flush()
def _clean_up_temporary_files(dataset_dir):
"""Removes temporary files used to create the dataset.
Args:
dataset_dir: The directory where the temporary files are stored.
"""
filename = _DATA_URL.split('/')[-1]
filepath = os.path.join(dataset_dir, filename)
tf.gfile.Remove(filepath)
tmp_dir = os.path.join(dataset_dir, 'flower_photos')
tf.gfile.DeleteRecursively(tmp_dir)
def _dataset_exists(dataset_dir):
for split_name in ['train', 'validation']:
for shard_id in range(_NUM_SHARDS):
output_filename = _get_dataset_filename(
dataset_dir, split_name, shard_id)
if not tf.gfile.Exists(output_filename):
return False
return True
def run(dataset_dir):
"""Runs the download and conversion operation.
Args:
dataset_dir: The dataset directory where the dataset is stored.
"""
if not tf.gfile.Exists(dataset_dir):
tf.gfile.MakeDirs(dataset_dir)
if _dataset_exists(dataset_dir):
print('Dataset files already exist. Exiting without re-creating them.')
return
dataset_utils.download_and_uncompress_tarball(_DATA_URL, dataset_dir)
photo_filenames, class_names = _get_filenames_and_classes(dataset_dir)
class_names_to_ids = dict(zip(class_names, range(len(class_names))))
# Divide into train and test:
random.seed(_RANDOM_SEED)
random.shuffle(photo_filenames)
training_filenames = photo_filenames[_NUM_VALIDATION:]
validation_filenames = photo_filenames[:_NUM_VALIDATION]
# First, convert the training and validation sets.
_convert_dataset('train', training_filenames, class_names_to_ids,
dataset_dir)
_convert_dataset('validation', validation_filenames, class_names_to_ids,
dataset_dir)
# Finally, write the labels file:
labels_to_class_names = dict(zip(range(len(class_names)), class_names))
dataset_utils.write_label_file(labels_to_class_names, dataset_dir)
_clean_up_temporary_files(dataset_dir)
print('\nFinished converting the Flowers dataset!')
@@ -0,0 +1,103 @@
#!/bin/bash
# Copyright 2016 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
# Script to download and preprocess ImageNet Challenge 2012
# training and validation data set.
#
# The final output of this script are sharded TFRecord files containing
# serialized Example protocol buffers. See build_imagenet_data.py for
# details of how the Example protocol buffers contain the ImageNet data.
#
# The final output of this script appears as such:
#
# data_dir/train-00000-of-01024
# data_dir/train-00001-of-01024
# ...
# data_dir/train-00127-of-01024
#
# and
#
# data_dir/validation-00000-of-00128
# data_dir/validation-00001-of-00128
# ...
# data_dir/validation-00127-of-00128
#
# Note that this script may take several hours to run to completion. The
# conversion of the ImageNet data to TFRecords alone takes 2-3 hours depending
# on the speed of your machine. Please be patient.
#
# **IMPORTANT**
# To download the raw images, the user must create an account with image-net.org
# and generate a username and access_key. The latter two are required for
# downloading the raw images.
#
# usage:
# cd research/slim
# bazel build :download_and_convert_imagenet
# ./bazel-bin/download_and_convert_imagenet.sh [data-dir]
set -e
if [ -z "$1" ]; then
echo "usage download_and_convert_imagenet.sh [data dir]"
exit
fi
# Create the output and temporary directories.
DATA_DIR="${1%/}"
SCRATCH_DIR="${DATA_DIR}/raw-data/"
mkdir -p "${DATA_DIR}"
mkdir -p "${SCRATCH_DIR}"
WORK_DIR="$0.runfiles/__main__"
# Download the ImageNet data.
LABELS_FILE="${WORK_DIR}/datasets/imagenet_lsvrc_2015_synsets.txt"
DOWNLOAD_SCRIPT="${WORK_DIR}/datasets/download_imagenet.sh"
"${DOWNLOAD_SCRIPT}" "${SCRATCH_DIR}" "${LABELS_FILE}"
# Note the locations of the train and validation data.
TRAIN_DIRECTORY="${SCRATCH_DIR}train/"
VALIDATION_DIRECTORY="${SCRATCH_DIR}validation/"
# Preprocess the validation data by moving the images into the appropriate
# sub-directory based on the label (synset) of the image.
echo "Organizing the validation data into sub-directories."
PREPROCESS_VAL_SCRIPT="${WORK_DIR}/datasets/preprocess_imagenet_validation_data.py"
VAL_LABELS_FILE="${WORK_DIR}/datasets/imagenet_2012_validation_synset_labels.txt"
"${PREPROCESS_VAL_SCRIPT}" "${VALIDATION_DIRECTORY}" "${VAL_LABELS_FILE}"
# Convert the XML files for bounding box annotations into a single CSV.
echo "Extracting bounding box information from XML."
BOUNDING_BOX_SCRIPT="${WORK_DIR}/datasets/process_bounding_boxes.py"
BOUNDING_BOX_FILE="${SCRATCH_DIR}/imagenet_2012_bounding_boxes.csv"
BOUNDING_BOX_DIR="${SCRATCH_DIR}bounding_boxes/"
"${BOUNDING_BOX_SCRIPT}" "${BOUNDING_BOX_DIR}" "${LABELS_FILE}" \
| sort >"${BOUNDING_BOX_FILE}"
echo "Finished downloading and preprocessing the ImageNet data."
# Build the TFRecords version of the ImageNet data.
BUILD_SCRIPT="${WORK_DIR}/build_imagenet_data"
OUTPUT_DIRECTORY="${DATA_DIR}"
IMAGENET_METADATA_FILE="${WORK_DIR}/datasets/imagenet_metadata.txt"
"${BUILD_SCRIPT}" \
--train_directory="${TRAIN_DIRECTORY}" \
--validation_directory="${VALIDATION_DIRECTORY}" \
--output_directory="${OUTPUT_DIRECTORY}" \
--imagenet_metadata_file="${IMAGENET_METADATA_FILE}" \
--labels_file="${LABELS_FILE}" \
--bounding_box_file="${BOUNDING_BOX_FILE}"
@@ -0,0 +1,221 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Downloads and converts MNIST data to TFRecords of TF-Example protos.
This module downloads the MNIST data, uncompresses it, reads the files
that make up the MNIST data and creates two TFRecord datasets: one for train
and one for test. Each TFRecord dataset is comprised of a set of TF-Example
protocol buffers, each of which contain a single image and label.
The script should take about a minute to run.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import gzip
import os
import sys
import numpy as np
from six.moves import urllib
import tensorflow as tf
from datasets import dataset_utils
# The URLs where the MNIST data can be downloaded.
_DATA_URL = 'http://yann.lecun.com/exdb/mnist/'
_TRAIN_DATA_FILENAME = 'train-images-idx3-ubyte.gz'
_TRAIN_LABELS_FILENAME = 'train-labels-idx1-ubyte.gz'
_TEST_DATA_FILENAME = 't10k-images-idx3-ubyte.gz'
_TEST_LABELS_FILENAME = 't10k-labels-idx1-ubyte.gz'
_IMAGE_SIZE = 28
_NUM_CHANNELS = 1
# The names of the classes.
_CLASS_NAMES = [
'zero',
'one',
'two',
'three',
'four',
'five',
'size',
'seven',
'eight',
'nine',
]
def _extract_images(filename, num_images):
"""Extract the images into a numpy array.
Args:
filename: The path to an MNIST images file.
num_images: The number of images in the file.
Returns:
A numpy array of shape [number_of_images, height, width, channels].
"""
print('Extracting images from: ', filename)
with gzip.open(filename) as bytestream:
bytestream.read(16)
buf = bytestream.read(
_IMAGE_SIZE * _IMAGE_SIZE * num_images * _NUM_CHANNELS)
data = np.frombuffer(buf, dtype=np.uint8)
data = data.reshape(num_images, _IMAGE_SIZE, _IMAGE_SIZE, _NUM_CHANNELS)
return data
def _extract_labels(filename, num_labels):
"""Extract the labels into a vector of int64 label IDs.
Args:
filename: The path to an MNIST labels file.
num_labels: The number of labels in the file.
Returns:
A numpy array of shape [number_of_labels]
"""
print('Extracting labels from: ', filename)
with gzip.open(filename) as bytestream:
bytestream.read(8)
buf = bytestream.read(1 * num_labels)
labels = np.frombuffer(buf, dtype=np.uint8).astype(np.int64)
return labels
def _add_to_tfrecord(data_filename, labels_filename, num_images,
tfrecord_writer):
"""Loads data from the binary MNIST files and writes files to a TFRecord.
Args:
data_filename: The filename of the MNIST images.
labels_filename: The filename of the MNIST labels.
num_images: The number of images in the dataset.
tfrecord_writer: The TFRecord writer to use for writing.
"""
images = _extract_images(data_filename, num_images)
labels = _extract_labels(labels_filename, num_images)
shape = (_IMAGE_SIZE, _IMAGE_SIZE, _NUM_CHANNELS)
with tf.Graph().as_default():
image = tf.placeholder(dtype=tf.uint8, shape=shape)
encoded_png = tf.image.encode_png(image)
with tf.Session('') as sess:
for j in range(num_images):
sys.stdout.write('\r>> Converting image %d/%d' % (j + 1, num_images))
sys.stdout.flush()
png_string = sess.run(encoded_png, feed_dict={image: images[j]})
example = dataset_utils.image_to_tfexample(
png_string, 'png'.encode(), _IMAGE_SIZE, _IMAGE_SIZE, labels[j])
tfrecord_writer.write(example.SerializeToString())
def _get_output_filename(dataset_dir, split_name):
"""Creates the output filename.
Args:
dataset_dir: The directory where the temporary files are stored.
split_name: The name of the train/test split.
Returns:
An absolute file path.
"""
return '%s/mnist_%s.tfrecord' % (dataset_dir, split_name)
def _download_dataset(dataset_dir):
"""Downloads MNIST locally.
Args:
dataset_dir: The directory where the temporary files are stored.
"""
for filename in [_TRAIN_DATA_FILENAME,
_TRAIN_LABELS_FILENAME,
_TEST_DATA_FILENAME,
_TEST_LABELS_FILENAME]:
filepath = os.path.join(dataset_dir, filename)
if not os.path.exists(filepath):
print('Downloading file %s...' % filename)
def _progress(count, block_size, total_size):
sys.stdout.write('\r>> Downloading %.1f%%' % (
float(count * block_size) / float(total_size) * 100.0))
sys.stdout.flush()
filepath, _ = urllib.request.urlretrieve(_DATA_URL + filename,
filepath,
_progress)
print()
with tf.gfile.GFile(filepath) as f:
size = f.size()
print('Successfully downloaded', filename, size, 'bytes.')
def _clean_up_temporary_files(dataset_dir):
"""Removes temporary files used to create the dataset.
Args:
dataset_dir: The directory where the temporary files are stored.
"""
for filename in [_TRAIN_DATA_FILENAME,
_TRAIN_LABELS_FILENAME,
_TEST_DATA_FILENAME,
_TEST_LABELS_FILENAME]:
filepath = os.path.join(dataset_dir, filename)
tf.gfile.Remove(filepath)
def run(dataset_dir):
"""Runs the download and conversion operation.
Args:
dataset_dir: The dataset directory where the dataset is stored.
"""
if not tf.gfile.Exists(dataset_dir):
tf.gfile.MakeDirs(dataset_dir)
training_filename = _get_output_filename(dataset_dir, 'train')
testing_filename = _get_output_filename(dataset_dir, 'test')
if tf.gfile.Exists(training_filename) and tf.gfile.Exists(testing_filename):
print('Dataset files already exist. Exiting without re-creating them.')
return
_download_dataset(dataset_dir)
# First, process the training data:
with tf.python_io.TFRecordWriter(training_filename) as tfrecord_writer:
data_filename = os.path.join(dataset_dir, _TRAIN_DATA_FILENAME)
labels_filename = os.path.join(dataset_dir, _TRAIN_LABELS_FILENAME)
_add_to_tfrecord(data_filename, labels_filename, 60000, tfrecord_writer)
# Next, process the testing data:
with tf.python_io.TFRecordWriter(testing_filename) as tfrecord_writer:
data_filename = os.path.join(dataset_dir, _TEST_DATA_FILENAME)
labels_filename = os.path.join(dataset_dir, _TEST_LABELS_FILENAME)
_add_to_tfrecord(data_filename, labels_filename, 10000, tfrecord_writer)
# Finally, write the labels file:
labels_to_class_names = dict(zip(range(len(_CLASS_NAMES)), _CLASS_NAMES))
dataset_utils.write_label_file(labels_to_class_names, dataset_dir)
_clean_up_temporary_files(dataset_dir)
print('\nFinished converting the MNIST dataset!')
@@ -0,0 +1,158 @@
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Downloads and converts VisualWakewords data to TFRecords of TF-Example protos.
This module downloads the COCO dataset, uncompresses it, derives the
VisualWakeWords dataset to create two TFRecord datasets: one for
train and one for test. Each TFRecord dataset is comprised of a set of
TF-Example protocol buffers, each of which contain a single image and label.
The script should take several minutes to run.
Please note that this tool creates sharded output files.
VisualWakeWords dataset is used to design tiny models classifying two classes,
such as person/not-person. The two steps to generate the VisualWakeWords
dataset from the COCO dataset are given below:
1. Use COCO annotations to create VisualWakeWords annotations:
Note: A bounding box is 'valid' if it has the foreground_class_of_interest
(e.g. person) and it's area is greater than 0.5% of the image area.
The resulting annotations file has the following fields, where 'images' are
the same as COCO dataset. 'categories' only contains information about the
foreground_class_of_interest (e.g. person) and 'annotations' maps an image to
objects (a list of valid bounding boxes) and label (value is 1 if it has
atleast one valid bounding box, otherwise 0)
images[{
"id", "width", "height", "file_name", "flickr_url", "coco_url",
"license", "date_captured",
}]
categories{
"id": {"id", "name", "supercategory"}
}
annotations{
"image_id": {"objects":[{"area", "bbox" : [x,y,width,height]}], "label"}
}
2. Use VisualWakeWords annotations to create TFRecords:
The resulting TFRecord file contains the following features:
{ image/height, image/width, image/source_id, image/encoded,
image/class/label_text, image/class/label,
image/object/class/text,
image/object/bbox/ymin, image/object/bbox/xmin, image/object/bbox/ymax,
image/object/bbox/xmax, image/object/area
image/filename, image/format, image/key/sha256}
For classification models, you need the image/encoded and image/class/label.
Example usage:
Run download_and_convert_data.py in the parent directory as follows:
python download_and_convert_visualwakewords.py --logtostderr \
--dataset_name=visualwakewords \
--dataset_dir="${DATASET_DIR}" \
--small_object_area_threshold=0.005 \
--foreground_class_of_interest='person'
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import tensorflow as tf
from datasets import download_and_convert_visualwakewords_lib
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO)
tf.compat.v1.app.flags.DEFINE_string(
'coco_dirname', 'coco_dataset',
'A subdirectory in visualwakewords dataset directory'
'containing the coco dataset')
FLAGS = tf.compat.v1.app.flags.FLAGS
def run(dataset_dir, small_object_area_threshold, foreground_class_of_interest):
"""Runs the download and conversion operation.
Args:
dataset_dir: The dataset directory where the dataset is stored.
small_object_area_threshold: Threshold of fraction of image area below which
small objects are filtered
foreground_class_of_interest: Build a binary classifier based on the
presence or absence of this object in the image.
"""
# 1. Download the coco dataset into a subdirectory under the visualwakewords
# dataset directory
coco_dir = os.path.join(dataset_dir, FLAGS.coco_dirname)
if not tf.gfile.IsDirectory(coco_dir):
tf.gfile.MakeDirs(coco_dir)
download_and_convert_visualwakewords_lib.download_coco_dataset(coco_dir)
# Path to COCO annotations
train_annotations_file = os.path.join(coco_dir, 'annotations',
'instances_train2014.json')
val_annotations_file = os.path.join(coco_dir, 'annotations',
'instances_val2014.json')
train_image_dir = os.path.join(coco_dir, 'train2014')
val_image_dir = os.path.join(coco_dir, 'val2014')
# Path to VisualWakeWords annotations
visualwakewords_annotations_train = os.path.join(
dataset_dir, 'instances_visualwakewords_train2014.json')
visualwakewords_annotations_val = os.path.join(
dataset_dir, 'instances_visualwakewords_val2014.json')
visualwakewords_labels_filename = os.path.join(dataset_dir, 'labels.txt')
train_output_path = os.path.join(dataset_dir, 'train.record')
val_output_path = os.path.join(dataset_dir, 'val.record')
# 2. Create a labels file
tf.logging.info('Creating a labels file...')
download_and_convert_visualwakewords_lib.create_labels_file(
foreground_class_of_interest, visualwakewords_labels_filename)
# 3. Use COCO annotations to create VisualWakeWords annotations
tf.logging.info('Creating train VisualWakeWords annotations...')
download_and_convert_visualwakewords_lib.create_visual_wakeword_annotations(
train_annotations_file, visualwakewords_annotations_train,
small_object_area_threshold, foreground_class_of_interest)
tf.logging.info('Creating validation VisualWakeWords annotations...')
download_and_convert_visualwakewords_lib.create_visual_wakeword_annotations(
val_annotations_file, visualwakewords_annotations_val,
small_object_area_threshold, foreground_class_of_interest)
# 4. Use VisualWakeWords annotations to create the TFRecords
tf.logging.info('Creating train TFRecords for VisualWakeWords dataset...')
download_and_convert_visualwakewords_lib.create_tf_record_for_visualwakewords_dataset(
visualwakewords_annotations_train,
train_image_dir,
train_output_path,
num_shards=100)
tf.logging.info(
'Creating validation TFRecords for VisualWakeWords dataset...')
download_and_convert_visualwakewords_lib.create_tf_record_for_visualwakewords_dataset(
visualwakewords_annotations_val,
val_image_dir,
val_output_path,
num_shards=10)
@@ -0,0 +1,286 @@
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Helper functions to generate the Visual WakeWords dataset.
It filters raw COCO annotations file to Visual WakeWords Dataset
annotations. The resulting annotations and COCO images are then converted
to TF records.
See download_and_convert_visualwakewords.py for the sample usage.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import hashlib
import io
import json
import os
import contextlib2
import PIL.Image
import tensorflow as tf
from datasets import dataset_utils
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO)
tf.compat.v1.app.flags.DEFINE_string(
'coco_train_url',
'http://images.cocodataset.org/zips/train2014.zip',
'Link to zip file containing coco training data')
tf.compat.v1.app.flags.DEFINE_string(
'coco_validation_url',
'http://images.cocodataset.org/zips/val2014.zip',
'Link to zip file containing coco validation data')
tf.compat.v1.app.flags.DEFINE_string(
'coco_annotations_url',
'http://images.cocodataset.org/annotations/annotations_trainval2014.zip',
'Link to zip file containing coco annotation data')
FLAGS = tf.compat.v1.app.flags.FLAGS
def download_coco_dataset(dataset_dir):
"""Download the coco dataset.
Args:
dataset_dir: Path where coco dataset should be downloaded.
"""
dataset_utils.download_and_uncompress_zipfile(FLAGS.coco_train_url,
dataset_dir)
dataset_utils.download_and_uncompress_zipfile(FLAGS.coco_validation_url,
dataset_dir)
dataset_utils.download_and_uncompress_zipfile(FLAGS.coco_annotations_url,
dataset_dir)
def create_labels_file(foreground_class_of_interest,
visualwakewords_labels_file):
"""Generate visualwakewords labels file.
Args:
foreground_class_of_interest: category from COCO dataset that is filtered by
the visualwakewords dataset
visualwakewords_labels_file: output visualwakewords label file
"""
labels_to_class_names = {0: 'background', 1: foreground_class_of_interest}
with open(visualwakewords_labels_file, 'w') as fp:
for label in labels_to_class_names:
fp.write(str(label) + ':' + str(labels_to_class_names[label]) + '\n')
def create_visual_wakeword_annotations(annotations_file,
visualwakewords_annotations_file,
small_object_area_threshold,
foreground_class_of_interest):
"""Generate visual wakewords annotations file.
Loads COCO annotation json files to generate visualwakewords annotations file.
Args:
annotations_file: JSON file containing COCO bounding box annotations
visualwakewords_annotations_file: path to output annotations file
small_object_area_threshold: threshold on fraction of image area below which
small object bounding boxes are filtered
foreground_class_of_interest: category from COCO dataset that is filtered by
the visual wakewords dataset
"""
# default object of interest is person
foreground_class_of_interest_id = 1
with tf.gfile.GFile(annotations_file, 'r') as fid:
groundtruth_data = json.load(fid)
images = groundtruth_data['images']
# Create category index
category_index = {}
for category in groundtruth_data['categories']:
if category['name'] == foreground_class_of_interest:
foreground_class_of_interest_id = category['id']
category_index[category['id']] = category
# Create annotations index, a map of image_id to it's annotations
tf.logging.info('Building annotations index...')
annotations_index = collections.defaultdict(
lambda: collections.defaultdict(list))
# structure is { "image_id": {"objects" : [list of the image annotations]}}
for annotation in groundtruth_data['annotations']:
annotations_index[annotation['image_id']]['objects'].append(annotation)
missing_annotation_count = len(images) - len(annotations_index)
tf.logging.info('%d images are missing annotations.',
missing_annotation_count)
# Create filtered annotations index
annotations_index_filtered = {}
for idx, image in enumerate(images):
if idx % 100 == 0:
tf.logging.info('On image %d of %d', idx, len(images))
annotations = annotations_index[image['id']]
annotations_filtered = _filter_annotations(
annotations, image, small_object_area_threshold,
foreground_class_of_interest_id)
annotations_index_filtered[image['id']] = annotations_filtered
with open(visualwakewords_annotations_file, 'w') as fp:
json.dump(
{
'images': images,
'annotations': annotations_index_filtered,
'categories': category_index
}, fp)
def _filter_annotations(annotations, image, small_object_area_threshold,
foreground_class_of_interest_id):
"""Filters COCO annotations to visual wakewords annotations.
Args:
annotations: dicts with keys: {
u'objects': [{u'id', u'image_id', u'category_id', u'segmentation',
u'area', u'bbox' : [x,y,width,height], u'iscrowd'}] } Notice
that bounding box coordinates in the official COCO dataset
are given as [x, y, width, height] tuples using absolute
coordinates where x, y represent the top-left (0-indexed)
corner.
image: dict with keys: [u'license', u'file_name', u'coco_url', u'height',
u'width', u'date_captured', u'flickr_url', u'id']
small_object_area_threshold: threshold on fraction of image area below which
small objects are filtered
foreground_class_of_interest_id: category of COCO dataset which visual
wakewords filters
Returns:
annotations_filtered: dict with keys: {
u'objects': [{"area", "bbox" : [x,y,width,height]}],
u'label',
}
"""
objects = []
image_area = image['height'] * image['width']
for annotation in annotations['objects']:
normalized_object_area = annotation['area'] / image_area
category_id = int(annotation['category_id'])
# Filter valid bounding boxes
if category_id == foreground_class_of_interest_id and \
normalized_object_area > small_object_area_threshold:
objects.append({
u'area': annotation['area'],
u'bbox': annotation['bbox'],
})
label = 1 if objects else 0
return {
'objects': objects,
'label': label,
}
def create_tf_record_for_visualwakewords_dataset(annotations_file, image_dir,
output_path, num_shards):
"""Loads Visual WakeWords annotations/images and converts to tf.Record format.
Args:
annotations_file: JSON file containing bounding box annotations.
image_dir: Directory containing the image files.
output_path: Path to output tf.Record file.
num_shards: number of output file shards.
"""
with contextlib2.ExitStack() as tf_record_close_stack, \
tf.gfile.GFile(annotations_file, 'r') as fid:
output_tfrecords = dataset_utils.open_sharded_output_tfrecords(
tf_record_close_stack, output_path, num_shards)
groundtruth_data = json.load(fid)
images = groundtruth_data['images']
annotations_index = groundtruth_data['annotations']
annotations_index = {int(k): v for k, v in annotations_index.iteritems()}
# convert 'unicode' key to 'int' key after we parse the json file
for idx, image in enumerate(images):
if idx % 100 == 0:
tf.logging.info('On image %d of %d', idx, len(images))
annotations = annotations_index[image['id']]
tf_example = _create_tf_example(image, annotations, image_dir)
shard_idx = idx % num_shards
output_tfrecords[shard_idx].write(tf_example.SerializeToString())
def _create_tf_example(image, annotations, image_dir):
"""Converts image and annotations to a tf.Example proto.
Args:
image: dict with keys: [u'license', u'file_name', u'coco_url', u'height',
u'width', u'date_captured', u'flickr_url', u'id']
annotations: dict with objects (a list of image annotations) and a label.
{u'objects':[{"area", "bbox" : [x,y,width,height}], u'label'}. Notice
that bounding box coordinates in the COCO dataset are given as[x, y,
width, height] tuples using absolute coordinates where x, y represent
the top-left (0-indexed) corner. This function also converts to the format
that can be used by the Tensorflow Object Detection API (which is [ymin,
xmin, ymax, xmax] with coordinates normalized relative to image size).
image_dir: directory containing the image files.
Returns:
tf_example: The converted tf.Example
Raises:
ValueError: if the image pointed to by data['filename'] is not a valid JPEG
"""
image_height = image['height']
image_width = image['width']
filename = image['file_name']
image_id = image['id']
full_path = os.path.join(image_dir, filename)
with tf.gfile.GFile(full_path, 'rb') as fid:
encoded_jpg = fid.read()
encoded_jpg_io = io.BytesIO(encoded_jpg)
image = PIL.Image.open(encoded_jpg_io)
key = hashlib.sha256(encoded_jpg).hexdigest()
xmin, xmax, ymin, ymax, area = [], [], [], [], []
for obj in annotations['objects']:
(x, y, width, height) = tuple(obj['bbox'])
xmin.append(float(x) / image_width)
xmax.append(float(x + width) / image_width)
ymin.append(float(y) / image_height)
ymax.append(float(y + height) / image_height)
area.append(obj['area'])
feature_dict = {
'image/height':
dataset_utils.int64_feature(image_height),
'image/width':
dataset_utils.int64_feature(image_width),
'image/filename':
dataset_utils.bytes_feature(filename.encode('utf8')),
'image/source_id':
dataset_utils.bytes_feature(str(image_id).encode('utf8')),
'image/key/sha256':
dataset_utils.bytes_feature(key.encode('utf8')),
'image/encoded':
dataset_utils.bytes_feature(encoded_jpg),
'image/format':
dataset_utils.bytes_feature('jpeg'.encode('utf8')),
'image/class/label':
dataset_utils.int64_feature(annotations['label']),
'image/object/bbox/xmin':
dataset_utils.float_list_feature(xmin),
'image/object/bbox/xmax':
dataset_utils.float_list_feature(xmax),
'image/object/bbox/ymin':
dataset_utils.float_list_feature(ymin),
'image/object/bbox/ymax':
dataset_utils.float_list_feature(ymax),
'image/object/area':
dataset_utils.float_list_feature(area),
}
example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
return example
@@ -0,0 +1,99 @@
#!/bin/bash
# Copyright 2016 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
# Script to download ImageNet Challenge 2012 training and validation data set.
#
# Downloads and decompresses raw images and bounding boxes.
#
# **IMPORTANT**
# To download the raw images, the user must create an account with image-net.org
# and generate a username and access_key. The latter two are required for
# downloading the raw images.
#
# usage:
# ./download_imagenet.sh [dirname]
set -e
if [ "x$IMAGENET_ACCESS_KEY" == x -o "x$IMAGENET_USERNAME" == x ]; then
cat <<END
In order to download the imagenet data, you have to create an account with
image-net.org. This will get you a username and an access key. You can set the
IMAGENET_USERNAME and IMAGENET_ACCESS_KEY environment variables, or you can
enter the credentials here.
END
read -p "Username: " IMAGENET_USERNAME
read -p "Access key: " IMAGENET_ACCESS_KEY
fi
OUTDIR="${1:-./imagenet-data}"
SYNSETS_FILE="${2:-./synsets.txt}"
echo "Saving downloaded files to $OUTDIR"
mkdir -p "${OUTDIR}"
CURRENT_DIR=$(pwd)
BBOX_DIR="${OUTDIR}bounding_boxes"
mkdir -p "${BBOX_DIR}"
cd "${OUTDIR}"
# Download and process all of the ImageNet bounding boxes.
BASE_URL="http://www.image-net.org/challenges/LSVRC/2012/nnoupb"
# See here for details: http://www.image-net.org/download-bboxes
BOUNDING_BOX_ANNOTATIONS="${BASE_URL}/ILSVRC2012_bbox_train_v2.tar.gz"
BBOX_TAR_BALL="${BBOX_DIR}/annotations.tar.gz"
echo "Downloading bounding box annotations."
wget "${BOUNDING_BOX_ANNOTATIONS}" -O "${BBOX_TAR_BALL}"
echo "Uncompressing bounding box annotations ..."
tar xzf "${BBOX_TAR_BALL}" -C "${BBOX_DIR}"
LABELS_ANNOTATED="${BBOX_DIR}/*"
NUM_XML=$(ls -1 ${LABELS_ANNOTATED} | wc -l)
echo "Identified ${NUM_XML} bounding box annotations."
# Download and uncompress all images from the ImageNet 2012 validation dataset.
VALIDATION_TARBALL="ILSVRC2012_img_val.tar"
OUTPUT_PATH="${OUTDIR}validation/"
mkdir -p "${OUTPUT_PATH}"
cd "${OUTDIR}/.."
echo "Downloading ${VALIDATION_TARBALL} to ${OUTPUT_PATH}."
wget -nd -c "${BASE_URL}/${VALIDATION_TARBALL}"
tar xf "${VALIDATION_TARBALL}" -C "${OUTPUT_PATH}"
# Download all images from the ImageNet 2012 train dataset.
TRAIN_TARBALL="ILSVRC2012_img_train.tar"
OUTPUT_PATH="${OUTDIR}train/"
mkdir -p "${OUTPUT_PATH}"
cd "${OUTDIR}/.."
echo "Downloading ${TRAIN_TARBALL} to ${OUTPUT_PATH}."
wget -nd -c "${BASE_URL}/${TRAIN_TARBALL}"
# Un-compress the individual tar-files within the train tar-file.
echo "Uncompressing individual train tar-balls in the training data."
while read SYNSET; do
echo "Processing: ${SYNSET}"
# Create a directory and delete anything there.
mkdir -p "${OUTPUT_PATH}/${SYNSET}"
rm -rf "${OUTPUT_PATH}/${SYNSET}/*"
# Uncompress into the directory.
tar xf "${TRAIN_TARBALL}" "${SYNSET}.tar"
tar xf "${SYNSET}.tar" -C "${OUTPUT_PATH}/${SYNSET}/"
rm -f "${SYNSET}.tar"
echo "Finished processing: ${SYNSET}"
done < "${SYNSETS_FILE}"
@@ -0,0 +1,99 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Provides data for the flowers dataset.
The dataset scripts used to create the dataset can be found at:
tensorflow/models/research/slim/datasets/download_and_convert_flowers.py
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import tensorflow as tf
from tensorflow.contrib import slim as contrib_slim
from datasets import dataset_utils
slim = contrib_slim
_FILE_PATTERN = 'flowers_%s_*.tfrecord'
SPLITS_TO_SIZES = {'train': 3320, 'validation': 350}
_NUM_CLASSES = 5
_ITEMS_TO_DESCRIPTIONS = {
'image': 'A color image of varying size.',
'label': 'A single integer between 0 and 4',
}
def get_split(split_name, dataset_dir, file_pattern=None, reader=None):
"""Gets a dataset tuple with instructions for reading flowers.
Args:
split_name: A train/validation split name.
dataset_dir: The base directory of the dataset sources.
file_pattern: The file pattern to use when matching the dataset sources.
It is assumed that the pattern contains a '%s' string so that the split
name can be inserted.
reader: The TensorFlow reader type.
Returns:
A `Dataset` namedtuple.
Raises:
ValueError: if `split_name` is not a valid train/validation split.
"""
if split_name not in SPLITS_TO_SIZES:
raise ValueError('split name %s was not recognized.' % split_name)
if not file_pattern:
file_pattern = _FILE_PATTERN
file_pattern = os.path.join(dataset_dir, file_pattern % split_name)
# Allowing None in the signature so that dataset_factory can use the default.
if reader is None:
reader = tf.TFRecordReader
keys_to_features = {
'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''),
'image/format': tf.FixedLenFeature((), tf.string, default_value='png'),
'image/class/label': tf.FixedLenFeature(
[], tf.int64, default_value=tf.zeros([], dtype=tf.int64)),
}
items_to_handlers = {
'image': slim.tfexample_decoder.Image(),
'label': slim.tfexample_decoder.Tensor('image/class/label'),
}
decoder = slim.tfexample_decoder.TFExampleDecoder(
keys_to_features, items_to_handlers)
labels_to_names = None
if dataset_utils.has_labels(dataset_dir):
labels_to_names = dataset_utils.read_label_file(dataset_dir)
return slim.dataset.Dataset(
data_sources=file_pattern,
reader=reader,
decoder=decoder,
num_samples=SPLITS_TO_SIZES[split_name],
items_to_descriptions=_ITEMS_TO_DESCRIPTIONS,
num_classes=_NUM_CLASSES,
labels_to_names=labels_to_names)
@@ -0,0 +1,199 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Provides data for the ImageNet ILSVRC 2012 Dataset plus some bounding boxes.
Some images have one or more bounding boxes associated with the label of the
image. See details here: http://image-net.org/download-bboxes
ImageNet is based upon WordNet 3.0. To uniquely identify a synset, we use
"WordNet ID" (wnid), which is a concatenation of POS ( i.e. part of speech )
and SYNSET OFFSET of WordNet. For more information, please refer to the
WordNet documentation[http://wordnet.princeton.edu/wordnet/documentation/].
"There are bounding boxes for over 3000 popular synsets available.
For each synset, there are on average 150 images with bounding boxes."
WARNING: Don't use for object detection, in this case all the bounding boxes
of the image belong to just one class.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
from six.moves import urllib
import tensorflow as tf
from tensorflow.contrib import slim as contrib_slim
from datasets import dataset_utils
slim = contrib_slim
# TODO(nsilberman): Add tfrecord file type once the script is updated.
_FILE_PATTERN = '%s-*'
_SPLITS_TO_SIZES = {
'train': 1281167,
'validation': 50000,
}
_ITEMS_TO_DESCRIPTIONS = {
'image': 'A color image of varying height and width.',
'label': 'The label id of the image, integer between 0 and 999',
'label_text': 'The text of the label.',
'object/bbox': 'A list of bounding boxes.',
'object/label': 'A list of labels, one per each object.',
}
_NUM_CLASSES = 1001
# If set to false, will not try to set label_to_names in dataset
# by reading them from labels.txt or github.
LOAD_READABLE_NAMES = True
def create_readable_names_for_imagenet_labels():
"""Create a dict mapping label id to human readable string.
Returns:
labels_to_names: dictionary where keys are integers from to 1000
and values are human-readable names.
We retrieve a synset file, which contains a list of valid synset labels used
by ILSVRC competition. There is one synset one per line, eg.
# n01440764
# n01443537
We also retrieve a synset_to_human_file, which contains a mapping from synsets
to human-readable names for every synset in Imagenet. These are stored in a
tsv format, as follows:
# n02119247 black fox
# n02119359 silver fox
We assign each synset (in alphabetical order) an integer, starting from 1
(since 0 is reserved for the background class).
Code is based on
https://github.com/tensorflow/models/blob/master/research/inception/inception/data/build_imagenet_data.py#L463
"""
# pylint: disable=g-line-too-long
base_url = 'https://raw.githubusercontent.com/tensorflow/models/master/research/inception/inception/data/'
synset_url = '{}/imagenet_lsvrc_2015_synsets.txt'.format(base_url)
synset_to_human_url = '{}/imagenet_metadata.txt'.format(base_url)
filename, _ = urllib.request.urlretrieve(synset_url)
synset_list = [s.strip() for s in open(filename).readlines()]
num_synsets_in_ilsvrc = len(synset_list)
assert num_synsets_in_ilsvrc == 1000
filename, _ = urllib.request.urlretrieve(synset_to_human_url)
synset_to_human_list = open(filename).readlines()
num_synsets_in_all_imagenet = len(synset_to_human_list)
assert num_synsets_in_all_imagenet == 21842
synset_to_human = {}
for s in synset_to_human_list:
parts = s.strip().split('\t')
assert len(parts) == 2
synset = parts[0]
human = parts[1]
synset_to_human[synset] = human
label_index = 1
labels_to_names = {0: 'background'}
for synset in synset_list:
name = synset_to_human[synset]
labels_to_names[label_index] = name
label_index += 1
return labels_to_names
def get_split(split_name, dataset_dir, file_pattern=None, reader=None):
"""Gets a dataset tuple with instructions for reading ImageNet.
Args:
split_name: A train/test split name.
dataset_dir: The base directory of the dataset sources.
file_pattern: The file pattern to use when matching the dataset sources.
It is assumed that the pattern contains a '%s' string so that the split
name can be inserted.
reader: The TensorFlow reader type.
Returns:
A `Dataset` namedtuple.
Raises:
ValueError: if `split_name` is not a valid train/test split.
"""
if split_name not in _SPLITS_TO_SIZES:
raise ValueError('split name %s was not recognized.' % split_name)
if not file_pattern:
file_pattern = _FILE_PATTERN
file_pattern = os.path.join(dataset_dir, file_pattern % split_name)
# Allowing None in the signature so that dataset_factory can use the default.
if reader is None:
reader = tf.TFRecordReader
keys_to_features = {
'image/encoded': tf.FixedLenFeature(
(), tf.string, default_value=''),
'image/format': tf.FixedLenFeature(
(), tf.string, default_value='jpeg'),
'image/class/label': tf.FixedLenFeature(
[], dtype=tf.int64, default_value=-1),
'image/class/text': tf.FixedLenFeature(
[], dtype=tf.string, default_value=''),
'image/object/bbox/xmin': tf.VarLenFeature(
dtype=tf.float32),
'image/object/bbox/ymin': tf.VarLenFeature(
dtype=tf.float32),
'image/object/bbox/xmax': tf.VarLenFeature(
dtype=tf.float32),
'image/object/bbox/ymax': tf.VarLenFeature(
dtype=tf.float32),
'image/object/class/label': tf.VarLenFeature(
dtype=tf.int64),
}
items_to_handlers = {
'image': slim.tfexample_decoder.Image('image/encoded', 'image/format'),
'label': slim.tfexample_decoder.Tensor('image/class/label'),
'label_text': slim.tfexample_decoder.Tensor('image/class/text'),
'object/bbox': slim.tfexample_decoder.BoundingBox(
['ymin', 'xmin', 'ymax', 'xmax'], 'image/object/bbox/'),
'object/label': slim.tfexample_decoder.Tensor('image/object/class/label'),
}
decoder = slim.tfexample_decoder.TFExampleDecoder(
keys_to_features, items_to_handlers)
labels_to_names = None
if LOAD_READABLE_NAMES:
if dataset_utils.has_labels(dataset_dir):
labels_to_names = dataset_utils.read_label_file(dataset_dir)
else:
labels_to_names = create_readable_names_for_imagenet_labels()
dataset_utils.write_label_file(labels_to_names, dataset_dir)
return slim.dataset.Dataset(
data_sources=file_pattern,
reader=reader,
decoder=decoder,
num_samples=_SPLITS_TO_SIZES[split_name],
items_to_descriptions=_ITEMS_TO_DESCRIPTIONS,
num_classes=_NUM_CLASSES,
labels_to_names=labels_to_names)
@@ -0,0 +1,99 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Provides data for the MNIST dataset.
The dataset scripts used to create the dataset can be found at:
tensorflow/models/research/slim/datasets/download_and_convert_mnist.py
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import tensorflow as tf
from tensorflow.contrib import slim as contrib_slim
from datasets import dataset_utils
slim = contrib_slim
_FILE_PATTERN = 'mnist_%s.tfrecord'
_SPLITS_TO_SIZES = {'train': 60000, 'test': 10000}
_NUM_CLASSES = 10
_ITEMS_TO_DESCRIPTIONS = {
'image': 'A [28 x 28 x 1] grayscale image.',
'label': 'A single integer between 0 and 9',
}
def get_split(split_name, dataset_dir, file_pattern=None, reader=None):
"""Gets a dataset tuple with instructions for reading MNIST.
Args:
split_name: A train/test split name.
dataset_dir: The base directory of the dataset sources.
file_pattern: The file pattern to use when matching the dataset sources.
It is assumed that the pattern contains a '%s' string so that the split
name can be inserted.
reader: The TensorFlow reader type.
Returns:
A `Dataset` namedtuple.
Raises:
ValueError: if `split_name` is not a valid train/test split.
"""
if split_name not in _SPLITS_TO_SIZES:
raise ValueError('split name %s was not recognized.' % split_name)
if not file_pattern:
file_pattern = _FILE_PATTERN
file_pattern = os.path.join(dataset_dir, file_pattern % split_name)
# Allowing None in the signature so that dataset_factory can use the default.
if reader is None:
reader = tf.TFRecordReader
keys_to_features = {
'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''),
'image/format': tf.FixedLenFeature((), tf.string, default_value='raw'),
'image/class/label': tf.FixedLenFeature(
[1], tf.int64, default_value=tf.zeros([1], dtype=tf.int64)),
}
items_to_handlers = {
'image': slim.tfexample_decoder.Image(shape=[28, 28, 1], channels=1),
'label': slim.tfexample_decoder.Tensor('image/class/label', shape=[]),
}
decoder = slim.tfexample_decoder.TFExampleDecoder(
keys_to_features, items_to_handlers)
labels_to_names = None
if dataset_utils.has_labels(dataset_dir):
labels_to_names = dataset_utils.read_label_file(dataset_dir)
return slim.dataset.Dataset(
data_sources=file_pattern,
reader=reader,
decoder=decoder,
num_samples=_SPLITS_TO_SIZES[split_name],
num_classes=_NUM_CLASSES,
items_to_descriptions=_ITEMS_TO_DESCRIPTIONS,
labels_to_names=labels_to_names)
@@ -0,0 +1,83 @@
#!/usr/bin/python
# Copyright 2016 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Process the ImageNet Challenge bounding boxes for TensorFlow model training.
Associate the ImageNet 2012 Challenge validation data set with labels.
The raw ImageNet validation data set is expected to reside in JPEG files
located in the following directory structure.
data_dir/ILSVRC2012_val_00000001.JPEG
data_dir/ILSVRC2012_val_00000002.JPEG
...
data_dir/ILSVRC2012_val_00050000.JPEG
This script moves the files into a directory structure like such:
data_dir/n01440764/ILSVRC2012_val_00000293.JPEG
data_dir/n01440764/ILSVRC2012_val_00000543.JPEG
...
where 'n01440764' is the unique synset label associated with
these images.
This directory reorganization requires a mapping from validation image
number (i.e. suffix of the original file) to the associated label. This
is provided in the ImageNet development kit via a Matlab file.
In order to make life easier and divorce ourselves from Matlab, we instead
supply a custom text file that provides this mapping for us.
Sample usage:
./preprocess_imagenet_validation_data.py ILSVRC2012_img_val \
imagenet_2012_validation_synset_labels.txt
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import sys
from six.moves import xrange # pylint: disable=redefined-builtin
if __name__ == '__main__':
if len(sys.argv) < 3:
print('Invalid usage\n'
'usage: preprocess_imagenet_validation_data.py '
'<validation data dir> <validation labels file>')
sys.exit(-1)
data_dir = sys.argv[1]
validation_labels_file = sys.argv[2]
# Read in the 50000 synsets associated with the validation data set.
labels = [l.strip() for l in open(validation_labels_file).readlines()]
unique_labels = set(labels)
# Make all sub-directories in the validation data dir.
for label in unique_labels:
labeled_data_dir = os.path.join(data_dir, label)
os.makedirs(labeled_data_dir)
# Move all of the image to the appropriate sub-directory.
for i in xrange(len(labels)):
basename = 'ILSVRC2012_val_000%.5d.JPEG' % (i + 1)
original_filename = os.path.join(data_dir, basename)
if not os.path.exists(original_filename):
print('Failed to find: ', original_filename)
sys.exit(-1)
new_filename = os.path.join(data_dir, labels[i], basename)
os.rename(original_filename, new_filename)
@@ -0,0 +1,253 @@
#!/usr/bin/python
# Copyright 2016 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Process the ImageNet Challenge bounding boxes for TensorFlow model training.
This script is called as
process_bounding_boxes.py <dir> [synsets-file]
Where <dir> is a directory containing the downloaded and unpacked bounding box
data. If [synsets-file] is supplied, then only the bounding boxes whose
synstes are contained within this file are returned. Note that the
[synsets-file] file contains synset ids, one per line.
The script dumps out a CSV text file in which each line contains an entry.
n00007846_64193.JPEG,0.0060,0.2620,0.7545,0.9940
The entry can be read as:
<JPEG file name>, <xmin>, <ymin>, <xmax>, <ymax>
The bounding box for <JPEG file name> contains two points (xmin, ymin) and
(xmax, ymax) specifying the lower-left corner and upper-right corner of a
bounding box in *relative* coordinates.
The user supplies a directory where the XML files reside. The directory
structure in the directory <dir> is assumed to look like this:
<dir>/nXXXXXXXX/nXXXXXXXX_YYYY.xml
Each XML file contains a bounding box annotation. The script:
(1) Parses the XML file and extracts the filename, label and bounding box info.
(2) The bounding box is specified in the XML files as integer (xmin, ymin) and
(xmax, ymax) *relative* to image size displayed to the human annotator. The
size of the image displayed to the human annotator is stored in the XML file
as integer (height, width).
Note that the displayed size will differ from the actual size of the image
downloaded from image-net.org. To make the bounding box annotation useable,
we convert bounding box to floating point numbers relative to displayed
height and width of the image.
Note that each XML file might contain N bounding box annotations.
Note that the points are all clamped at a range of [0.0, 1.0] because some
human annotations extend outside the range of the supplied image.
See details here: http://image-net.org/download-bboxes
(3) By default, the script outputs all valid bounding boxes. If a
[synsets-file] is supplied, only the subset of bounding boxes associated
with those synsets are outputted. Importantly, one can supply a list of
synsets in the ImageNet Challenge and output the list of bounding boxes
associated with the training images of the ILSVRC.
We use these bounding boxes to inform the random distortion of images
supplied to the network.
If you run this script successfully, you will see the following output
to stderr:
> Finished processing 544546 XML files.
> Skipped 0 XML files not in ImageNet Challenge.
> Skipped 0 bounding boxes not in ImageNet Challenge.
> Wrote 615299 bounding boxes from 544546 annotated images.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import glob
import os.path
import sys
import xml.etree.ElementTree as ET
from six.moves import xrange # pylint: disable=redefined-builtin
class BoundingBox(object):
pass
def GetItem(name, root, index=0):
count = 0
for item in root.iter(name):
if count == index:
return item.text
count += 1
# Failed to find "index" occurrence of item.
return -1
def GetInt(name, root, index=0):
return int(GetItem(name, root, index))
def FindNumberBoundingBoxes(root):
index = 0
while True:
if GetInt('xmin', root, index) == -1:
break
index += 1
return index
def ProcessXMLAnnotation(xml_file):
"""Process a single XML file containing a bounding box."""
# pylint: disable=broad-except
try:
tree = ET.parse(xml_file)
except Exception:
print('Failed to parse: ' + xml_file, file=sys.stderr)
return None
# pylint: enable=broad-except
root = tree.getroot()
num_boxes = FindNumberBoundingBoxes(root)
boxes = []
for index in xrange(num_boxes):
box = BoundingBox()
# Grab the 'index' annotation.
box.xmin = GetInt('xmin', root, index)
box.ymin = GetInt('ymin', root, index)
box.xmax = GetInt('xmax', root, index)
box.ymax = GetInt('ymax', root, index)
box.width = GetInt('width', root)
box.height = GetInt('height', root)
box.filename = GetItem('filename', root) + '.JPEG'
box.label = GetItem('name', root)
xmin = float(box.xmin) / float(box.width)
xmax = float(box.xmax) / float(box.width)
ymin = float(box.ymin) / float(box.height)
ymax = float(box.ymax) / float(box.height)
# Some images contain bounding box annotations that
# extend outside of the supplied image. See, e.g.
# n03127925/n03127925_147.xml
# Additionally, for some bounding boxes, the min > max
# or the box is entirely outside of the image.
min_x = min(xmin, xmax)
max_x = max(xmin, xmax)
box.xmin_scaled = min(max(min_x, 0.0), 1.0)
box.xmax_scaled = min(max(max_x, 0.0), 1.0)
min_y = min(ymin, ymax)
max_y = max(ymin, ymax)
box.ymin_scaled = min(max(min_y, 0.0), 1.0)
box.ymax_scaled = min(max(max_y, 0.0), 1.0)
boxes.append(box)
return boxes
if __name__ == '__main__':
if len(sys.argv) < 2 or len(sys.argv) > 3:
print('Invalid usage\n'
'usage: process_bounding_boxes.py <dir> [synsets-file]',
file=sys.stderr)
sys.exit(-1)
xml_files = glob.glob(sys.argv[1] + '/*/*.xml')
print('Identified %d XML files in %s' % (len(xml_files), sys.argv[1]),
file=sys.stderr)
if len(sys.argv) == 3:
labels = set([l.strip() for l in open(sys.argv[2]).readlines()])
print('Identified %d synset IDs in %s' % (len(labels), sys.argv[2]),
file=sys.stderr)
else:
labels = None
skipped_boxes = 0
skipped_files = 0
saved_boxes = 0
saved_files = 0
for file_index, one_file in enumerate(xml_files):
# Example: <...>/n06470073/n00141669_6790.xml
label = os.path.basename(os.path.dirname(one_file))
# Determine if the annotation is from an ImageNet Challenge label.
if labels is not None and label not in labels:
skipped_files += 1
continue
bboxes = ProcessXMLAnnotation(one_file)
assert bboxes is not None, 'No bounding boxes found in ' + one_file
found_box = False
for bbox in bboxes:
if labels is not None:
if bbox.label != label:
# Note: There is a slight bug in the bounding box annotation data.
# Many of the dog labels have the human label 'Scottish_deerhound'
# instead of the synset ID 'n02092002' in the bbox.label field. As a
# simple hack to overcome this issue, we only exclude bbox labels
# *which are synset ID's* that do not match original synset label for
# the XML file.
if bbox.label in labels:
skipped_boxes += 1
continue
# Guard against improperly specified boxes.
if (bbox.xmin_scaled >= bbox.xmax_scaled or
bbox.ymin_scaled >= bbox.ymax_scaled):
skipped_boxes += 1
continue
# Note bbox.filename occasionally contains '%s' in the name. This is
# data set noise that is fixed by just using the basename of the XML file.
image_filename = os.path.splitext(os.path.basename(one_file))[0]
print('%s.JPEG,%.4f,%.4f,%.4f,%.4f' %
(image_filename,
bbox.xmin_scaled, bbox.ymin_scaled,
bbox.xmax_scaled, bbox.ymax_scaled))
saved_boxes += 1
found_box = True
if found_box:
saved_files += 1
else:
skipped_files += 1
if not file_index % 5000:
print('--> processed %d of %d XML files.' %
(file_index + 1, len(xml_files)),
file=sys.stderr)
print('--> skipped %d boxes and %d XML files.' %
(skipped_boxes, skipped_files), file=sys.stderr)
print('Finished processing %d XML files.' % len(xml_files), file=sys.stderr)
print('Skipped %d XML files not in ImageNet Challenge.' % skipped_files,
file=sys.stderr)
print('Skipped %d bounding boxes not in ImageNet Challenge.' % skipped_boxes,
file=sys.stderr)
print('Wrote %d bounding boxes from %d annotated images.' %
(saved_boxes, saved_files),
file=sys.stderr)
print('Finished.', file=sys.stderr)
@@ -0,0 +1,129 @@
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Provides data for Visual WakeWords Dataset with images+labels.
Visual WakeWords Dataset derives from the COCO dataset to design tiny models
classifying two classes, such as person/not-person. The COCO annotations
are filtered to two classes: person and not-person (or another user-defined
category). Bounding boxes for small objects with area less than 5% of the image
area are filtered out.
See build_visualwakewords_data.py which generates the Visual WakeWords dataset
annotations from the raw COCO dataset and converts them to TFRecord.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import tensorflow as tf
from tensorflow.contrib import slim as contrib_slim
from datasets import dataset_utils
slim = contrib_slim
_FILE_PATTERN = '%s.record-*'
_SPLITS_TO_SIZES = {
'train': 82783,
'val': 40504,
}
_ITEMS_TO_DESCRIPTIONS = {
'image': 'A color image of varying height and width.',
'label': 'The label id of the image, an integer in {0, 1}',
'object/bbox': 'A list of bounding boxes.',
}
_NUM_CLASSES = 2
# labels file
LABELS_FILENAME = 'labels.txt'
def get_split(split_name, dataset_dir, file_pattern=None, reader=None):
"""Gets a dataset tuple with instructions for reading ImageNet.
Args:
split_name: A train/test split name.
dataset_dir: The base directory of the dataset sources.
file_pattern: The file pattern to use when matching the dataset sources. It
is assumed that the pattern contains a '%s' string so that the split name
can be inserted.
reader: The TensorFlow reader type.
Returns:
A `Dataset` namedtuple.
Raises:
ValueError: if `split_name` is not a valid train/test split.
"""
if split_name not in _SPLITS_TO_SIZES:
raise ValueError('split name %s was not recognized.' % split_name)
if not file_pattern:
file_pattern = _FILE_PATTERN
file_pattern = os.path.join(dataset_dir, file_pattern % split_name)
# Allowing None in the signature so that dataset_factory can use the default.
if reader is None:
reader = tf.TFRecordReader
keys_to_features = {
'image/encoded':
tf.FixedLenFeature((), tf.string, default_value=''),
'image/format':
tf.FixedLenFeature((), tf.string, default_value='jpeg'),
'image/class/label':
tf.FixedLenFeature([], dtype=tf.int64, default_value=-1),
'image/object/bbox/xmin':
tf.VarLenFeature(dtype=tf.float32),
'image/object/bbox/ymin':
tf.VarLenFeature(dtype=tf.float32),
'image/object/bbox/xmax':
tf.VarLenFeature(dtype=tf.float32),
'image/object/bbox/ymax':
tf.VarLenFeature(dtype=tf.float32),
}
items_to_handlers = {
'image':
slim.tfexample_decoder.Image('image/encoded', 'image/format'),
'label':
slim.tfexample_decoder.Tensor('image/class/label'),
'object/bbox':
slim.tfexample_decoder.BoundingBox(['ymin', 'xmin', 'ymax', 'xmax'],
'image/object/bbox/'),
}
decoder = slim.tfexample_decoder.TFExampleDecoder(keys_to_features,
items_to_handlers)
labels_to_names = None
labels_file = os.path.join(dataset_dir, LABELS_FILENAME)
if tf.gfile.Exists(labels_file):
labels_to_names = dataset_utils.read_label_file(dataset_dir)
return slim.dataset.Dataset(
data_sources=file_pattern,
reader=reader,
decoder=decoder,
num_samples=_SPLITS_TO_SIZES[split_name],
items_to_descriptions=_ITEMS_TO_DESCRIPTIONS,
num_classes=_NUM_CLASSES,
labels_to_names=labels_to_names)
@@ -0,0 +1,677 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Deploy Slim models across multiple clones and replicas.
# TODO(sguada) docstring paragraph by (a) motivating the need for the file and
# (b) defining clones.
# TODO(sguada) describe the high-level components of model deployment.
# E.g. "each model deployment is composed of several parts: a DeploymentConfig,
# which captures A, B and C, an input_fn which loads data.. etc
To easily train a model on multiple GPUs or across multiple machines this
module provides a set of helper functions: `create_clones`,
`optimize_clones` and `deploy`.
Usage:
g = tf.Graph()
# Set up DeploymentConfig
config = model_deploy.DeploymentConfig(num_clones=2, clone_on_cpu=True)
# Create the global step on the device storing the variables.
with tf.device(config.variables_device()):
global_step = slim.create_global_step()
# Define the inputs
with tf.device(config.inputs_device()):
images, labels = LoadData(...)
inputs_queue = slim.data.prefetch_queue((images, labels))
# Define the optimizer.
with tf.device(config.optimizer_device()):
optimizer = tf.train.MomentumOptimizer(FLAGS.learning_rate, FLAGS.momentum)
# Define the model including the loss.
def model_fn(inputs_queue):
images, labels = inputs_queue.dequeue()
predictions = CreateNetwork(images)
slim.losses.log_loss(predictions, labels)
model_dp = model_deploy.deploy(config, model_fn, [inputs_queue],
optimizer=optimizer)
# Run training.
slim.learning.train(model_dp.train_op, my_log_dir,
summary_op=model_dp.summary_op)
The Clone namedtuple holds together the values associated with each call to
model_fn:
* outputs: The return values of the calls to `model_fn()`.
* scope: The scope used to create the clone.
* device: The device used to create the clone.
DeployedModel namedtuple, holds together the values needed to train multiple
clones:
* train_op: An operation that run the optimizer training op and include
all the update ops created by `model_fn`. Present only if an optimizer
was specified.
* summary_op: An operation that run the summaries created by `model_fn`
and process_gradients.
* total_loss: A `Tensor` that contains the sum of all losses created by
`model_fn` plus the regularization losses.
* clones: List of `Clone` tuples returned by `create_clones()`.
DeploymentConfig parameters:
* num_clones: Number of model clones to deploy in each replica.
* clone_on_cpu: True if clones should be placed on CPU.
* replica_id: Integer. Index of the replica for which the model is
deployed. Usually 0 for the chief replica.
* num_replicas: Number of replicas to use.
* num_ps_tasks: Number of tasks for the `ps` job. 0 to not use replicas.
* worker_job_name: A name for the worker job.
* ps_job_name: A name for the parameter server job.
TODO(sguada):
- describe side effect to the graph.
- what happens to summaries and update_ops.
- which graph collections are altered.
- write a tutorial on how to use this.
- analyze the possibility of calling deploy more than once.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import tensorflow as tf
from tensorflow.contrib import slim as contrib_slim
slim = contrib_slim
__all__ = ['create_clones',
'deploy',
'optimize_clones',
'DeployedModel',
'DeploymentConfig',
'Clone',
]
# Namedtuple used to represent a clone during deployment.
Clone = collections.namedtuple('Clone',
['outputs', # Whatever model_fn() returned.
'scope', # The scope used to create it.
'device', # The device used to create.
])
# Namedtuple used to represent a DeployedModel, returned by deploy().
DeployedModel = collections.namedtuple('DeployedModel',
['train_op', # The `train_op`
'summary_op', # The `summary_op`
'total_loss', # The loss `Tensor`
'clones', # A list of `Clones` tuples.
])
# Default parameters for DeploymentConfig
_deployment_params = {'num_clones': 1,
'clone_on_cpu': False,
'replica_id': 0,
'num_replicas': 1,
'num_ps_tasks': 0,
'worker_job_name': 'worker',
'ps_job_name': 'ps'}
def create_clones(config, model_fn, args=None, kwargs=None):
"""Creates multiple clones according to config using a `model_fn`.
The returned values of `model_fn(*args, **kwargs)` are collected along with
the scope and device used to created it in a namedtuple
`Clone(outputs, scope, device)`
Note: it is assumed that any loss created by `model_fn` is collected at
the tf.GraphKeys.LOSSES collection.
To recover the losses, summaries or update_ops created by the clone use:
```python
losses = tf.get_collection(tf.GraphKeys.LOSSES, clone.scope)
summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, clone.scope)
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, clone.scope)
```
The deployment options are specified by the config object and support
deploying one or several clones on different GPUs and one or several replicas
of such clones.
The argument `model_fn` is called `config.num_clones` times to create the
model clones as `model_fn(*args, **kwargs)`.
If `config` specifies deployment on multiple replicas then the default
tensorflow device is set appropriatly for each call to `model_fn` and for the
slim variable creation functions: model and global variables will be created
on the `ps` device, the clone operations will be on the `worker` device.
Args:
config: A DeploymentConfig object.
model_fn: A callable. Called as `model_fn(*args, **kwargs)`
args: Optional list of arguments to pass to `model_fn`.
kwargs: Optional list of keyword arguments to pass to `model_fn`.
Returns:
A list of namedtuples `Clone`.
"""
clones = []
args = args or []
kwargs = kwargs or {}
with slim.arg_scope([slim.model_variable, slim.variable],
device=config.variables_device()):
# Create clones.
for i in range(0, config.num_clones):
with tf.name_scope(config.clone_scope(i)) as clone_scope:
clone_device = config.clone_device(i)
with tf.device(clone_device):
with tf.variable_scope(tf.get_variable_scope(),
reuse=True if i > 0 else None):
outputs = model_fn(*args, **kwargs)
clones.append(Clone(outputs, clone_scope, clone_device))
return clones
def _gather_clone_loss(clone, num_clones, regularization_losses):
"""Gather the loss for a single clone.
Args:
clone: A Clone namedtuple.
num_clones: The number of clones being deployed.
regularization_losses: Possibly empty list of regularization_losses
to add to the clone losses.
Returns:
A tensor for the total loss for the clone. Can be None.
"""
# The return value.
sum_loss = None
# Individual components of the loss that will need summaries.
clone_loss = None
regularization_loss = None
# Compute and aggregate losses on the clone device.
with tf.device(clone.device):
all_losses = []
clone_losses = tf.get_collection(tf.GraphKeys.LOSSES, clone.scope)
if clone_losses:
clone_loss = tf.add_n(clone_losses, name='clone_loss')
if num_clones > 1:
clone_loss = tf.div(clone_loss, 1.0 * num_clones,
name='scaled_clone_loss')
all_losses.append(clone_loss)
if regularization_losses:
regularization_loss = tf.add_n(regularization_losses,
name='regularization_loss')
all_losses.append(regularization_loss)
if all_losses:
sum_loss = tf.add_n(all_losses)
# Add the summaries out of the clone device block.
if clone_loss is not None:
tf.summary.scalar('/'.join(filter(None,
['Losses', clone.scope, 'clone_loss'])),
clone_loss)
if regularization_loss is not None:
tf.summary.scalar('Losses/regularization_loss', regularization_loss)
return sum_loss
def _optimize_clone(optimizer, clone, num_clones, regularization_losses,
**kwargs):
"""Compute losses and gradients for a single clone.
Args:
optimizer: A tf.Optimizer object.
clone: A Clone namedtuple.
num_clones: The number of clones being deployed.
regularization_losses: Possibly empty list of regularization_losses
to add to the clone losses.
**kwargs: Dict of kwarg to pass to compute_gradients().
Returns:
A tuple (clone_loss, clone_grads_and_vars).
- clone_loss: A tensor for the total loss for the clone. Can be None.
- clone_grads_and_vars: List of (gradient, variable) for the clone.
Can be empty.
"""
sum_loss = _gather_clone_loss(clone, num_clones, regularization_losses)
clone_grad = None
if sum_loss is not None:
# with tf.device(clone.device):
# clone_grad = optimizer.compute_gradients(sum_loss, **kwargs)
clone_grad = optimizer.compute_gradients(sum_loss, **kwargs)
return sum_loss, clone_grad
def optimize_clones(clones, optimizer,
regularization_losses=None,
**kwargs):
"""Compute clone losses and gradients for the given list of `Clones`.
Note: The regularization_losses are added to the first clone losses.
Args:
clones: List of `Clones` created by `create_clones()`.
optimizer: An `Optimizer` object.
regularization_losses: Optional list of regularization losses. If None it
will gather them from tf.GraphKeys.REGULARIZATION_LOSSES. Pass `[]` to
exclude them.
**kwargs: Optional list of keyword arguments to pass to `compute_gradients`.
Returns:
A tuple (total_loss, grads_and_vars).
- total_loss: A Tensor containing the average of the clone losses including
the regularization loss.
- grads_and_vars: A List of tuples (gradient, variable) containing the sum
of the gradients for each variable.
"""
grads_and_vars = []
clones_losses = []
num_clones = len(clones)
if regularization_losses is None:
regularization_losses = tf.get_collection(
tf.GraphKeys.REGULARIZATION_LOSSES)
for clone in clones:
with tf.name_scope(clone.scope):
clone_loss, clone_grad = _optimize_clone(
optimizer, clone, num_clones, regularization_losses, **kwargs)
if clone_loss is not None:
clones_losses.append(clone_loss)
grads_and_vars.append(clone_grad)
# Only use regularization_losses for the first clone
regularization_losses = None
# Compute the total_loss summing all the clones_losses.
total_loss = tf.add_n(clones_losses, name='total_loss')
# Sum the gradients across clones.
grads_and_vars = _sum_clones_gradients(grads_and_vars)
return total_loss, grads_and_vars
def deploy(config,
model_fn,
args=None,
kwargs=None,
optimizer=None,
summarize_gradients=False):
"""Deploys a Slim-constructed model across multiple clones.
The deployment options are specified by the config object and support
deploying one or several clones on different GPUs and one or several replicas
of such clones.
The argument `model_fn` is called `config.num_clones` times to create the
model clones as `model_fn(*args, **kwargs)`.
The optional argument `optimizer` is an `Optimizer` object. If not `None`,
the deployed model is configured for training with that optimizer.
If `config` specifies deployment on multiple replicas then the default
tensorflow device is set appropriatly for each call to `model_fn` and for the
slim variable creation functions: model and global variables will be created
on the `ps` device, the clone operations will be on the `worker` device.
Args:
config: A `DeploymentConfig` object.
model_fn: A callable. Called as `model_fn(*args, **kwargs)`
args: Optional list of arguments to pass to `model_fn`.
kwargs: Optional list of keyword arguments to pass to `model_fn`.
optimizer: Optional `Optimizer` object. If passed the model is deployed
for training with that optimizer.
summarize_gradients: Whether or not add summaries to the gradients.
Returns:
A `DeployedModel` namedtuple.
"""
# Gather initial summaries.
summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))
# Create Clones.
clones = create_clones(config, model_fn, args, kwargs)
first_clone = clones[0]
# Gather update_ops from the first clone. These contain, for example,
# the updates for the batch_norm variables created by model_fn.
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone.scope)
train_op = None
total_loss = None
with tf.device(config.optimizer_device()):
if optimizer:
# Place the global step on the device storing the variables.
with tf.device(config.variables_device()):
global_step = slim.get_or_create_global_step()
# Compute the gradients for the clones.
total_loss, clones_gradients = optimize_clones(clones, optimizer)
if clones_gradients:
if summarize_gradients:
# Add summaries to the gradients.
summaries |= set(_add_gradients_summaries(clones_gradients))
# Create gradient updates.
grad_updates = optimizer.apply_gradients(clones_gradients,
global_step=global_step)
update_ops.append(grad_updates)
update_op = tf.group(*update_ops)
with tf.control_dependencies([update_op]):
train_op = tf.identity(total_loss, name='train_op')
else:
clones_losses = []
regularization_losses = tf.get_collection(
tf.GraphKeys.REGULARIZATION_LOSSES)
for clone in clones:
with tf.name_scope(clone.scope):
clone_loss = _gather_clone_loss(clone, len(clones),
regularization_losses)
if clone_loss is not None:
clones_losses.append(clone_loss)
# Only use regularization_losses for the first clone
regularization_losses = None
if clones_losses:
total_loss = tf.add_n(clones_losses, name='total_loss')
# Add the summaries from the first clone. These contain the summaries
# created by model_fn and either optimize_clones() or _gather_clone_loss().
summaries |= set(tf.get_collection(tf.GraphKeys.SUMMARIES,
first_clone.scope))
if total_loss is not None:
# Add total_loss to summary.
summaries.add(tf.summary.scalar('total_loss', total_loss))
if summaries:
# Merge all summaries together.
summary_op = tf.summary.merge(list(summaries), name='summary_op')
else:
summary_op = None
return DeployedModel(train_op, summary_op, total_loss, clones)
def _sum_clones_gradients(clone_grads):
"""Calculate the sum gradient for each shared variable across all clones.
This function assumes that the clone_grads has been scaled appropriately by
1 / num_clones.
Args:
clone_grads: A List of List of tuples (gradient, variable), one list per
`Clone`.
Returns:
List of tuples of (gradient, variable) where the gradient has been summed
across all clones.
"""
sum_grads = []
for grad_and_vars in zip(*clone_grads):
# Note that each grad_and_vars looks like the following:
# ((grad_var0_clone0, var0), ... (grad_varN_cloneN, varN))
grads = []
var = grad_and_vars[0][1]
for g, v in grad_and_vars:
assert v == var
if g is not None:
grads.append(g)
if grads:
if len(grads) > 1:
sum_grad = tf.add_n(grads, name=var.op.name + '/sum_grads')
else:
sum_grad = grads[0]
sum_grads.append((sum_grad, var))
return sum_grads
def _add_gradients_summaries(grads_and_vars):
"""Add histogram summaries to gradients.
Note: The summaries are also added to the SUMMARIES collection.
Args:
grads_and_vars: A list of gradient to variable pairs (tuples).
Returns:
The _list_ of the added summaries for grads_and_vars.
"""
summaries = []
for grad, var in grads_and_vars:
if grad is not None:
if isinstance(grad, tf.IndexedSlices):
grad_values = grad.values
else:
grad_values = grad
summaries.append(tf.summary.histogram(var.op.name + ':gradient',
grad_values))
summaries.append(tf.summary.histogram(var.op.name + ':gradient_norm',
tf.global_norm([grad_values])))
else:
tf.logging.info('Var %s has no gradient', var.op.name)
return summaries
class DeploymentConfig(object):
"""Configuration for deploying a model with `deploy()`.
You can pass an instance of this class to `deploy()` to specify exactly
how to deploy the model to build. If you do not pass one, an instance built
from the default deployment_hparams will be used.
"""
def __init__(self,
num_clones=1,
clone_on_cpu=False,
replica_id=0,
num_replicas=1,
num_ps_tasks=0,
worker_job_name='worker',
ps_job_name='ps'):
"""Create a DeploymentConfig.
The config describes how to deploy a model across multiple clones and
replicas. The model will be replicated `num_clones` times in each replica.
If `clone_on_cpu` is True, each clone will placed on CPU.
If `num_replicas` is 1, the model is deployed via a single process. In that
case `worker_device`, `num_ps_tasks`, and `ps_device` are ignored.
If `num_replicas` is greater than 1, then `worker_device` and `ps_device`
must specify TensorFlow devices for the `worker` and `ps` jobs and
`num_ps_tasks` must be positive.
Args:
num_clones: Number of model clones to deploy in each replica.
clone_on_cpu: If True clones would be placed on CPU.
replica_id: Integer. Index of the replica for which the model is
deployed. Usually 0 for the chief replica.
num_replicas: Number of replicas to use.
num_ps_tasks: Number of tasks for the `ps` job. 0 to not use replicas.
worker_job_name: A name for the worker job.
ps_job_name: A name for the parameter server job.
Raises:
ValueError: If the arguments are invalid.
"""
if num_replicas > 1:
if num_ps_tasks < 1:
raise ValueError('When using replicas num_ps_tasks must be positive')
if num_replicas > 1 or num_ps_tasks > 0:
if not worker_job_name:
raise ValueError('Must specify worker_job_name when using replicas')
if not ps_job_name:
raise ValueError('Must specify ps_job_name when using parameter server')
if replica_id >= num_replicas:
raise ValueError('replica_id must be less than num_replicas')
self._num_clones = num_clones
self._clone_on_cpu = clone_on_cpu
self._replica_id = replica_id
self._num_replicas = num_replicas
self._num_ps_tasks = num_ps_tasks
self._ps_device = '/job:' + ps_job_name if num_ps_tasks > 0 else ''
self._worker_device = '/job:' + worker_job_name if num_ps_tasks > 0 else ''
@property
def num_clones(self):
return self._num_clones
@property
def clone_on_cpu(self):
return self._clone_on_cpu
@property
def replica_id(self):
return self._replica_id
@property
def num_replicas(self):
return self._num_replicas
@property
def num_ps_tasks(self):
return self._num_ps_tasks
@property
def ps_device(self):
return self._ps_device
@property
def worker_device(self):
return self._worker_device
def caching_device(self):
"""Returns the device to use for caching variables.
Variables are cached on the worker CPU when using replicas.
Returns:
A device string or None if the variables do not need to be cached.
"""
if self._num_ps_tasks > 0:
return lambda op: op.device
else:
return None
def clone_device(self, clone_index):
"""Device used to create the clone and all the ops inside the clone.
Args:
clone_index: Int, representing the clone_index.
Returns:
A value suitable for `tf.device()`.
Raises:
ValueError: if `clone_index` is greater or equal to the number of clones".
"""
if clone_index >= self._num_clones:
raise ValueError('clone_index must be less than num_clones')
device = ''
if self._num_ps_tasks > 0:
device += self._worker_device
if self._clone_on_cpu:
device += '/device:CPU:0'
else:
device += '/device:GPU:%d' % clone_index
return device
def clone_scope(self, clone_index):
"""Name scope to create the clone.
Args:
clone_index: Int, representing the clone_index.
Returns:
A name_scope suitable for `tf.name_scope()`.
Raises:
ValueError: if `clone_index` is greater or equal to the number of clones".
"""
if clone_index >= self._num_clones:
raise ValueError('clone_index must be less than num_clones')
scope = ''
if self._num_clones > 1:
scope = 'clone_%d' % clone_index
return scope
def optimizer_device(self):
"""Device to use with the optimizer.
Returns:
A value suitable for `tf.device()`.
"""
if self._num_ps_tasks > 0 or self._num_clones > 0:
return self._worker_device + '/device:CPU:0'
else:
return ''
def inputs_device(self):
"""Device to use to build the inputs.
Returns:
A value suitable for `tf.device()`.
"""
device = ''
if self._num_ps_tasks > 0:
device += self._worker_device
device += '/device:CPU:0'
return device
def variables_device(self):
"""Returns the device to use for variables created inside the clone.
Returns:
A value suitable for `tf.device()`.
"""
device = ''
if self._num_ps_tasks > 0:
device += self._ps_device
device += '/device:CPU:0'
class _PSDeviceChooser(object):
"""Slim device chooser for variables when using PS."""
def __init__(self, device, tasks):
self._device = device
self._tasks = tasks
self._task = 0
def choose(self, op):
if op.device:
return op.device
node_def = op if isinstance(op, tf.NodeDef) else op.node_def
if node_def.op.startswith('Variable'):
t = self._task
self._task = (self._task + 1) % self._tasks
d = '%s/task:%d' % (self._device, t)
return d
else:
return op.device
if not self._num_ps_tasks:
return device
else:
chooser = _PSDeviceChooser(device, self._num_ps_tasks)
return chooser.choose
@@ -0,0 +1,574 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for model_deploy."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import tensorflow as tf
from tensorflow.contrib import framework as contrib_framework
from tensorflow.contrib import layers as contrib_layers
from tensorflow.contrib import slim as contrib_slim
from deployment import model_deploy
slim = contrib_slim
class DeploymentConfigTest(tf.test.TestCase):
def testDefaults(self):
deploy_config = model_deploy.DeploymentConfig()
self.assertEqual(slim.get_variables(), [])
self.assertEqual(deploy_config.caching_device(), None)
self.assertDeviceEqual(deploy_config.clone_device(0), 'GPU:0')
self.assertEqual(deploy_config.clone_scope(0), '')
self.assertDeviceEqual(deploy_config.optimizer_device(), 'CPU:0')
self.assertDeviceEqual(deploy_config.inputs_device(), 'CPU:0')
self.assertDeviceEqual(deploy_config.variables_device(), 'CPU:0')
def testCPUonly(self):
deploy_config = model_deploy.DeploymentConfig(clone_on_cpu=True)
self.assertEqual(deploy_config.caching_device(), None)
self.assertDeviceEqual(deploy_config.clone_device(0), 'CPU:0')
self.assertEqual(deploy_config.clone_scope(0), '')
self.assertDeviceEqual(deploy_config.optimizer_device(), 'CPU:0')
self.assertDeviceEqual(deploy_config.inputs_device(), 'CPU:0')
self.assertDeviceEqual(deploy_config.variables_device(), 'CPU:0')
def testMultiGPU(self):
deploy_config = model_deploy.DeploymentConfig(num_clones=2)
self.assertEqual(deploy_config.caching_device(), None)
self.assertDeviceEqual(deploy_config.clone_device(0), 'GPU:0')
self.assertDeviceEqual(deploy_config.clone_device(1), 'GPU:1')
self.assertEqual(deploy_config.clone_scope(0), 'clone_0')
self.assertEqual(deploy_config.clone_scope(1), 'clone_1')
self.assertDeviceEqual(deploy_config.optimizer_device(), 'CPU:0')
self.assertDeviceEqual(deploy_config.inputs_device(), 'CPU:0')
self.assertDeviceEqual(deploy_config.variables_device(), 'CPU:0')
def testPS(self):
deploy_config = model_deploy.DeploymentConfig(num_clones=1, num_ps_tasks=1)
self.assertDeviceEqual(deploy_config.clone_device(0),
'/job:worker/device:GPU:0')
self.assertEqual(deploy_config.clone_scope(0), '')
self.assertDeviceEqual(deploy_config.optimizer_device(),
'/job:worker/device:CPU:0')
self.assertDeviceEqual(deploy_config.inputs_device(),
'/job:worker/device:CPU:0')
with tf.device(deploy_config.variables_device()):
a = tf.Variable(0)
b = tf.Variable(0)
c = tf.no_op()
d = slim.variable('a', [],
caching_device=deploy_config.caching_device())
self.assertDeviceEqual(a.device, '/job:ps/task:0/device:CPU:0')
self.assertDeviceEqual(a.device, a.value().device)
self.assertDeviceEqual(b.device, '/job:ps/task:0/device:CPU:0')
self.assertDeviceEqual(b.device, b.value().device)
self.assertDeviceEqual(c.device, '')
self.assertDeviceEqual(d.device, '/job:ps/task:0/device:CPU:0')
self.assertDeviceEqual(d.value().device, '')
def testMultiGPUPS(self):
deploy_config = model_deploy.DeploymentConfig(num_clones=2, num_ps_tasks=1)
self.assertEqual(deploy_config.caching_device()(tf.no_op()), '')
self.assertDeviceEqual(deploy_config.clone_device(0),
'/job:worker/device:GPU:0')
self.assertDeviceEqual(deploy_config.clone_device(1),
'/job:worker/device:GPU:1')
self.assertEqual(deploy_config.clone_scope(0), 'clone_0')
self.assertEqual(deploy_config.clone_scope(1), 'clone_1')
self.assertDeviceEqual(deploy_config.optimizer_device(),
'/job:worker/device:CPU:0')
self.assertDeviceEqual(deploy_config.inputs_device(),
'/job:worker/device:CPU:0')
def testReplicasPS(self):
deploy_config = model_deploy.DeploymentConfig(num_replicas=2,
num_ps_tasks=2)
self.assertDeviceEqual(deploy_config.clone_device(0),
'/job:worker/device:GPU:0')
self.assertEqual(deploy_config.clone_scope(0), '')
self.assertDeviceEqual(deploy_config.optimizer_device(),
'/job:worker/device:CPU:0')
self.assertDeviceEqual(deploy_config.inputs_device(),
'/job:worker/device:CPU:0')
def testReplicasMultiGPUPS(self):
deploy_config = model_deploy.DeploymentConfig(num_replicas=2,
num_clones=2,
num_ps_tasks=2)
self.assertDeviceEqual(deploy_config.clone_device(0),
'/job:worker/device:GPU:0')
self.assertDeviceEqual(deploy_config.clone_device(1),
'/job:worker/device:GPU:1')
self.assertEqual(deploy_config.clone_scope(0), 'clone_0')
self.assertEqual(deploy_config.clone_scope(1), 'clone_1')
self.assertDeviceEqual(deploy_config.optimizer_device(),
'/job:worker/device:CPU:0')
self.assertDeviceEqual(deploy_config.inputs_device(),
'/job:worker/device:CPU:0')
def testVariablesPS(self):
deploy_config = model_deploy.DeploymentConfig(num_ps_tasks=2)
with tf.device(deploy_config.variables_device()):
a = tf.Variable(0)
b = tf.Variable(0)
c = tf.no_op()
d = slim.variable('a', [],
caching_device=deploy_config.caching_device())
self.assertDeviceEqual(a.device, '/job:ps/task:0/device:CPU:0')
self.assertDeviceEqual(a.device, a.value().device)
self.assertDeviceEqual(b.device, '/job:ps/task:1/device:CPU:0')
self.assertDeviceEqual(b.device, b.value().device)
self.assertDeviceEqual(c.device, '')
self.assertDeviceEqual(d.device, '/job:ps/task:0/device:CPU:0')
self.assertDeviceEqual(d.value().device, '')
def LogisticClassifier(inputs, labels, scope=None, reuse=None):
with tf.variable_scope(scope, 'LogisticClassifier', [inputs, labels],
reuse=reuse):
predictions = slim.fully_connected(inputs, 1, activation_fn=tf.sigmoid,
scope='fully_connected')
slim.losses.log_loss(predictions, labels)
return predictions
def BatchNormClassifier(inputs, labels, scope=None, reuse=None):
with tf.variable_scope(scope, 'BatchNormClassifier', [inputs, labels],
reuse=reuse):
inputs = slim.batch_norm(inputs, decay=0.1, fused=True)
predictions = slim.fully_connected(inputs, 1,
activation_fn=tf.sigmoid,
scope='fully_connected')
slim.losses.log_loss(predictions, labels)
return predictions
class CreatecloneTest(tf.test.TestCase):
def setUp(self):
# Create an easy training set:
np.random.seed(0)
self._inputs = np.zeros((16, 4))
self._labels = np.random.randint(0, 2, size=(16, 1)).astype(np.float32)
self._logdir = self.get_temp_dir()
for i in range(16):
j = int(2 * self._labels[i] + np.random.randint(0, 2))
self._inputs[i, j] = 1
def testCreateLogisticClassifier(self):
g = tf.Graph()
with g.as_default():
tf.set_random_seed(0)
tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
tf_labels = tf.constant(self._labels, dtype=tf.float32)
model_fn = LogisticClassifier
clone_args = (tf_inputs, tf_labels)
deploy_config = model_deploy.DeploymentConfig(num_clones=1)
self.assertEqual(slim.get_variables(), [])
clones = model_deploy.create_clones(deploy_config, model_fn, clone_args)
clone = clones[0]
self.assertEqual(len(slim.get_variables()), 2)
for v in slim.get_variables():
self.assertDeviceEqual(v.device, 'CPU:0')
self.assertDeviceEqual(v.value().device, 'CPU:0')
self.assertEqual(clone.outputs.op.name,
'LogisticClassifier/fully_connected/Sigmoid')
self.assertEqual(clone.scope, '')
self.assertDeviceEqual(clone.device, 'GPU:0')
self.assertEqual(len(slim.losses.get_losses()), 1)
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
self.assertEqual(update_ops, [])
def testCreateSingleclone(self):
g = tf.Graph()
with g.as_default():
tf.set_random_seed(0)
tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
tf_labels = tf.constant(self._labels, dtype=tf.float32)
model_fn = BatchNormClassifier
clone_args = (tf_inputs, tf_labels)
deploy_config = model_deploy.DeploymentConfig(num_clones=1)
self.assertEqual(slim.get_variables(), [])
clones = model_deploy.create_clones(deploy_config, model_fn, clone_args)
clone = clones[0]
self.assertEqual(len(slim.get_variables()), 5)
for v in slim.get_variables():
self.assertDeviceEqual(v.device, 'CPU:0')
self.assertDeviceEqual(v.value().device, 'CPU:0')
self.assertEqual(clone.outputs.op.name,
'BatchNormClassifier/fully_connected/Sigmoid')
self.assertEqual(clone.scope, '')
self.assertDeviceEqual(clone.device, 'GPU:0')
self.assertEqual(len(slim.losses.get_losses()), 1)
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
self.assertEqual(len(update_ops), 2)
def testCreateMulticlone(self):
g = tf.Graph()
with g.as_default():
tf.set_random_seed(0)
tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
tf_labels = tf.constant(self._labels, dtype=tf.float32)
model_fn = BatchNormClassifier
clone_args = (tf_inputs, tf_labels)
num_clones = 4
deploy_config = model_deploy.DeploymentConfig(num_clones=num_clones)
self.assertEqual(slim.get_variables(), [])
clones = model_deploy.create_clones(deploy_config, model_fn, clone_args)
self.assertEqual(len(slim.get_variables()), 5)
for v in slim.get_variables():
self.assertDeviceEqual(v.device, 'CPU:0')
self.assertDeviceEqual(v.value().device, 'CPU:0')
self.assertEqual(len(clones), num_clones)
for i, clone in enumerate(clones):
self.assertEqual(
clone.outputs.op.name,
'clone_%d/BatchNormClassifier/fully_connected/Sigmoid' % i)
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, clone.scope)
self.assertEqual(len(update_ops), 2)
self.assertEqual(clone.scope, 'clone_%d/' % i)
self.assertDeviceEqual(clone.device, 'GPU:%d' % i)
def testCreateOnecloneWithPS(self):
g = tf.Graph()
with g.as_default():
tf.set_random_seed(0)
tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
tf_labels = tf.constant(self._labels, dtype=tf.float32)
model_fn = BatchNormClassifier
clone_args = (tf_inputs, tf_labels)
deploy_config = model_deploy.DeploymentConfig(num_clones=1,
num_ps_tasks=1)
self.assertEqual(slim.get_variables(), [])
clones = model_deploy.create_clones(deploy_config, model_fn, clone_args)
self.assertEqual(len(clones), 1)
clone = clones[0]
self.assertEqual(clone.outputs.op.name,
'BatchNormClassifier/fully_connected/Sigmoid')
self.assertDeviceEqual(clone.device, '/job:worker/device:GPU:0')
self.assertEqual(clone.scope, '')
self.assertEqual(len(slim.get_variables()), 5)
for v in slim.get_variables():
self.assertDeviceEqual(v.device, '/job:ps/task:0/CPU:0')
self.assertDeviceEqual(v.device, v.value().device)
def testCreateMulticloneWithPS(self):
g = tf.Graph()
with g.as_default():
tf.set_random_seed(0)
tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
tf_labels = tf.constant(self._labels, dtype=tf.float32)
model_fn = BatchNormClassifier
clone_args = (tf_inputs, tf_labels)
deploy_config = model_deploy.DeploymentConfig(num_clones=2,
num_ps_tasks=2)
self.assertEqual(slim.get_variables(), [])
clones = model_deploy.create_clones(deploy_config, model_fn, clone_args)
self.assertEqual(len(slim.get_variables()), 5)
for i, v in enumerate(slim.get_variables()):
t = i % 2
self.assertDeviceEqual(v.device, '/job:ps/task:%d/device:CPU:0' % t)
self.assertDeviceEqual(v.device, v.value().device)
self.assertEqual(len(clones), 2)
for i, clone in enumerate(clones):
self.assertEqual(
clone.outputs.op.name,
'clone_%d/BatchNormClassifier/fully_connected/Sigmoid' % i)
self.assertEqual(clone.scope, 'clone_%d/' % i)
self.assertDeviceEqual(clone.device, '/job:worker/device:GPU:%d' % i)
class OptimizeclonesTest(tf.test.TestCase):
def setUp(self):
# Create an easy training set:
np.random.seed(0)
self._inputs = np.zeros((16, 4))
self._labels = np.random.randint(0, 2, size=(16, 1)).astype(np.float32)
self._logdir = self.get_temp_dir()
for i in range(16):
j = int(2 * self._labels[i] + np.random.randint(0, 2))
self._inputs[i, j] = 1
def testCreateLogisticClassifier(self):
g = tf.Graph()
with g.as_default():
tf.set_random_seed(0)
tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
tf_labels = tf.constant(self._labels, dtype=tf.float32)
model_fn = LogisticClassifier
clone_args = (tf_inputs, tf_labels)
deploy_config = model_deploy.DeploymentConfig(num_clones=1)
self.assertEqual(slim.get_variables(), [])
clones = model_deploy.create_clones(deploy_config, model_fn, clone_args)
self.assertEqual(len(slim.get_variables()), 2)
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
self.assertEqual(update_ops, [])
optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0)
total_loss, grads_and_vars = model_deploy.optimize_clones(clones,
optimizer)
self.assertEqual(len(grads_and_vars), len(tf.trainable_variables()))
self.assertEqual(total_loss.op.name, 'total_loss')
for g, v in grads_and_vars:
self.assertDeviceEqual(g.device, 'GPU:0')
self.assertDeviceEqual(v.device, 'CPU:0')
def testCreateSingleclone(self):
g = tf.Graph()
with g.as_default():
tf.set_random_seed(0)
tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
tf_labels = tf.constant(self._labels, dtype=tf.float32)
model_fn = BatchNormClassifier
clone_args = (tf_inputs, tf_labels)
deploy_config = model_deploy.DeploymentConfig(num_clones=1)
self.assertEqual(slim.get_variables(), [])
clones = model_deploy.create_clones(deploy_config, model_fn, clone_args)
self.assertEqual(len(slim.get_variables()), 5)
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
self.assertEqual(len(update_ops), 2)
optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0)
total_loss, grads_and_vars = model_deploy.optimize_clones(clones,
optimizer)
self.assertEqual(len(grads_and_vars), len(tf.trainable_variables()))
self.assertEqual(total_loss.op.name, 'total_loss')
for g, v in grads_and_vars:
self.assertDeviceEqual(g.device, 'GPU:0')
self.assertDeviceEqual(v.device, 'CPU:0')
def testCreateMulticlone(self):
g = tf.Graph()
with g.as_default():
tf.set_random_seed(0)
tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
tf_labels = tf.constant(self._labels, dtype=tf.float32)
model_fn = BatchNormClassifier
clone_args = (tf_inputs, tf_labels)
num_clones = 4
deploy_config = model_deploy.DeploymentConfig(num_clones=num_clones)
self.assertEqual(slim.get_variables(), [])
clones = model_deploy.create_clones(deploy_config, model_fn, clone_args)
self.assertEqual(len(slim.get_variables()), 5)
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
self.assertEqual(len(update_ops), num_clones * 2)
optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0)
total_loss, grads_and_vars = model_deploy.optimize_clones(clones,
optimizer)
self.assertEqual(len(grads_and_vars), len(tf.trainable_variables()))
self.assertEqual(total_loss.op.name, 'total_loss')
for g, v in grads_and_vars:
self.assertDeviceEqual(g.device, '')
self.assertDeviceEqual(v.device, 'CPU:0')
def testCreateMulticloneCPU(self):
g = tf.Graph()
with g.as_default():
tf.set_random_seed(0)
tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
tf_labels = tf.constant(self._labels, dtype=tf.float32)
model_fn = BatchNormClassifier
model_args = (tf_inputs, tf_labels)
num_clones = 4
deploy_config = model_deploy.DeploymentConfig(num_clones=num_clones,
clone_on_cpu=True)
self.assertEqual(slim.get_variables(), [])
clones = model_deploy.create_clones(deploy_config, model_fn, model_args)
self.assertEqual(len(slim.get_variables()), 5)
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
self.assertEqual(len(update_ops), num_clones * 2)
optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0)
total_loss, grads_and_vars = model_deploy.optimize_clones(clones,
optimizer)
self.assertEqual(len(grads_and_vars), len(tf.trainable_variables()))
self.assertEqual(total_loss.op.name, 'total_loss')
for g, v in grads_and_vars:
self.assertDeviceEqual(g.device, '')
self.assertDeviceEqual(v.device, 'CPU:0')
def testCreateOnecloneWithPS(self):
g = tf.Graph()
with g.as_default():
tf.set_random_seed(0)
tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
tf_labels = tf.constant(self._labels, dtype=tf.float32)
model_fn = BatchNormClassifier
model_args = (tf_inputs, tf_labels)
deploy_config = model_deploy.DeploymentConfig(num_clones=1,
num_ps_tasks=1)
self.assertEqual(slim.get_variables(), [])
clones = model_deploy.create_clones(deploy_config, model_fn, model_args)
self.assertEqual(len(slim.get_variables()), 5)
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
self.assertEqual(len(update_ops), 2)
optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0)
total_loss, grads_and_vars = model_deploy.optimize_clones(clones,
optimizer)
self.assertEqual(len(grads_and_vars), len(tf.trainable_variables()))
self.assertEqual(total_loss.op.name, 'total_loss')
for g, v in grads_and_vars:
self.assertDeviceEqual(g.device, '/job:worker/device:GPU:0')
self.assertDeviceEqual(v.device, '/job:ps/task:0/CPU:0')
class DeployTest(tf.test.TestCase):
def setUp(self):
# Create an easy training set:
np.random.seed(0)
self._inputs = np.zeros((16, 4))
self._labels = np.random.randint(0, 2, size=(16, 1)).astype(np.float32)
self._logdir = self.get_temp_dir()
for i in range(16):
j = int(2 * self._labels[i] + np.random.randint(0, 2))
self._inputs[i, j] = 1
def _addBesselsCorrection(self, sample_size, expected_var):
correction_factor = sample_size / (sample_size - 1)
expected_var *= correction_factor
return expected_var
def testLocalTrainOp(self):
g = tf.Graph()
with g.as_default():
tf.set_random_seed(0)
tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
tf_labels = tf.constant(self._labels, dtype=tf.float32)
model_fn = BatchNormClassifier
model_args = (tf_inputs, tf_labels)
deploy_config = model_deploy.DeploymentConfig(num_clones=2,
clone_on_cpu=True)
optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0)
self.assertEqual(slim.get_variables(), [])
model = model_deploy.deploy(deploy_config, model_fn, model_args,
optimizer=optimizer)
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
self.assertEqual(len(update_ops), 4)
self.assertEqual(len(model.clones), 2)
self.assertEqual(model.total_loss.op.name, 'total_loss')
self.assertEqual(model.summary_op.op.name, 'summary_op/summary_op')
self.assertEqual(model.train_op.op.name, 'train_op')
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
moving_mean = contrib_framework.get_variables_by_name('moving_mean')[0]
moving_variance = contrib_framework.get_variables_by_name(
'moving_variance')[0]
initial_loss = sess.run(model.total_loss)
initial_mean, initial_variance = sess.run([moving_mean,
moving_variance])
self.assertAllClose(initial_mean, [0.0, 0.0, 0.0, 0.0])
self.assertAllClose(initial_variance, [1.0, 1.0, 1.0, 1.0])
for _ in range(10):
sess.run(model.train_op)
final_loss = sess.run(model.total_loss)
self.assertLess(final_loss, initial_loss / 5.0)
final_mean, final_variance = sess.run([moving_mean,
moving_variance])
expected_mean = np.array([0.125, 0.25, 0.375, 0.25])
expected_var = np.array([0.109375, 0.1875, 0.234375, 0.1875])
expected_var = self._addBesselsCorrection(16, expected_var)
self.assertAllClose(final_mean, expected_mean)
self.assertAllClose(final_variance, expected_var)
def testNoSummariesOnGPU(self):
with tf.Graph().as_default():
deploy_config = model_deploy.DeploymentConfig(num_clones=2)
# clone function creates a fully_connected layer with a regularizer loss.
def ModelFn():
inputs = tf.constant(1.0, shape=(10, 20), dtype=tf.float32)
reg = contrib_layers.l2_regularizer(0.001)
contrib_layers.fully_connected(inputs, 30, weights_regularizer=reg)
model = model_deploy.deploy(
deploy_config, ModelFn,
optimizer=tf.train.GradientDescentOptimizer(1.0))
# The model summary op should have a few summary inputs and all of them
# should be on the CPU.
self.assertTrue(model.summary_op.op.inputs)
for inp in model.summary_op.op.inputs:
self.assertEqual('/device:CPU:0', inp.device)
def testNoSummariesOnGPUForEvals(self):
with tf.Graph().as_default():
deploy_config = model_deploy.DeploymentConfig(num_clones=2)
# clone function creates a fully_connected layer with a regularizer loss.
def ModelFn():
inputs = tf.constant(1.0, shape=(10, 20), dtype=tf.float32)
reg = contrib_layers.l2_regularizer(0.001)
contrib_layers.fully_connected(inputs, 30, weights_regularizer=reg)
# No optimizer here, it's an eval.
model = model_deploy.deploy(deploy_config, ModelFn)
# The model summary op should have a few summary inputs and all of them
# should be on the CPU.
self.assertTrue(model.summary_op.op.inputs)
for inp in model.summary_op.op.inputs:
self.assertEqual('/device:CPU:0', inp.device)
if __name__ == '__main__':
tf.test.main()
@@ -0,0 +1,94 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Downloads and converts a particular dataset.
Usage:
```shell
$ python download_and_convert_data.py \
--dataset_name=flowers \
--dataset_dir=/tmp/flowers
$ python download_and_convert_data.py \
--dataset_name=cifar10 \
--dataset_dir=/tmp/cifar10
$ python download_and_convert_data.py \
--dataset_name=mnist \
--dataset_dir=/tmp/mnist
$ python download_and_convert_data.py \
--dataset_name=visualwakewords \
--dataset_dir=/tmp/visualwakewords
```
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from datasets import download_and_convert_cifar10
from datasets import download_and_convert_flowers
from datasets import download_and_convert_mnist
from datasets import download_and_convert_visualwakewords
FLAGS = tf.compat.v1.app.flags.FLAGS
tf.compat.v1.app.flags.DEFINE_string(
'dataset_name',
None,
'The name of the dataset to convert, one of "flowers", "cifar10", "mnist", "visualwakewords"'
)
tf.compat.v1.app.flags.DEFINE_string(
'dataset_dir',
None,
'The directory where the output TFRecords and temporary files are saved.')
tf.flags.DEFINE_float(
'small_object_area_threshold', 0.005,
'For --dataset_name=visualwakewords only. Threshold of fraction of image '
'area below which small objects are filtered')
tf.flags.DEFINE_string(
'foreground_class_of_interest', 'person',
'For --dataset_name=visualwakewords only. Build a binary classifier based '
'on the presence or absence of this object in the image.')
def main(_):
if not FLAGS.dataset_name:
raise ValueError('You must supply the dataset name with --dataset_name')
if not FLAGS.dataset_dir:
raise ValueError('You must supply the dataset directory with --dataset_dir')
if FLAGS.dataset_name == 'flowers':
download_and_convert_flowers.run(FLAGS.dataset_dir)
elif FLAGS.dataset_name == 'cifar10':
download_and_convert_cifar10.run(FLAGS.dataset_dir)
elif FLAGS.dataset_name == 'mnist':
download_and_convert_mnist.run(FLAGS.dataset_dir)
elif FLAGS.dataset_name == 'visualwakewords':
download_and_convert_visualwakewords.run(
FLAGS.dataset_dir, FLAGS.small_object_area_threshold,
FLAGS.foreground_class_of_interest)
else:
raise ValueError(
'dataset_name [%s] was not recognized.' % FLAGS.dataset_name)
if __name__ == '__main__':
tf.compat.v1.app.run()
@@ -0,0 +1,182 @@
import tensorflow as tf
from time import gmtime, strftime
from tensorflow.contrib import slim as contrib_slim
from gpu_helper import get_custom_getter
import random
import numpy as np
import os
np.random.seed(0)
random.seed(0)
tf.set_random_seed(0)
class Env:
def __init__(self, FLAGS):
self.FLAGS = FLAGS
self.slim = contrib_slim
self.num_samples = 1281167
def _configure_optimizer(self, learning_rate):
"""Configures the optimizer used for training.
Args:
learning_rate: A scalar or `Tensor` learning rate.
Returns:
An instance of an optimizer.
Raises:
ValueError: if Initializer.FLAGS.optimizer is not recognized.
"""
if self.FLAGS.optimizer == 'adadelta':
optimizer = tf.train.AdadeltaOptimizer(
learning_rate,
rho=self.FLAGS.adadelta_rho,
epsilon=self.FLAGS.opt_epsilon)
elif self.FLAGS.optimizer == 'adagrad':
optimizer = tf.train.AdagradOptimizer(
learning_rate,
initial_accumulator_value=self.FLAGS.adagrad_initial_accumulator_value)
elif self.FLAGS.optimizer == 'adam':
optimizer = tf.train.AdamOptimizer(
learning_rate,
beta1=self.FLAGS.adam_beta1,
beta2=self.FLAGS.adam_beta2,
epsilon=self.FLAGS.opt_epsilon)
elif self.FLAGS.optimizer == 'ftrl':
optimizer = tf.train.FtrlOptimizer(
learning_rate,
learning_rate_power=self.FLAGS.ftrl_learning_rate_power,
initial_accumulator_value=self.FLAGS.ftrl_initial_accumulator_value,
l1_regularization_strength=self.FLAGS.ftrl_l1,
l2_regularization_strength=self.FLAGS.ftrl_l2)
elif self.FLAGS.optimizer == 'momentum':
optimizer = tf.train.MomentumOptimizer(
learning_rate,
momentum=self.FLAGS.momentum,
name='Momentum')
elif self.FLAGS.optimizer == 'rmsprop':
optimizer = tf.train.RMSPropOptimizer(
learning_rate,
decay=self.FLAGS.rmsprop_decay,
momentum=self.FLAGS.rmsprop_momentum,
epsilon=self.FLAGS.opt_epsilon)
elif self.FLAGS.optimizer == 'sgd':
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
else:
raise ValueError('Optimizer [%s] was not recognized' % self.FLAGS.optimizer)
return optimizer
def create_logdir(self):
logdir = "results"
os.makedirs(logdir, exist_ok=True)
return logdir
def calc_logits(self, network_fn, images):
logits, end_points = network_fn(images, reuse=tf.AUTO_REUSE)
return logits
def calc_loss(self, logits_train, labels_train):
base_loss = self.slim.losses.softmax_cross_entropy(
logits_train, labels_train, label_smoothing=self.FLAGS.label_smoothing, weights=1.0)
reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
total_loss = tf.add_n([base_loss] + reg_losses, name='total_loss')
loss = tf.add_n([base_loss])
loss = tf.identity(loss, name='loss')
return loss, total_loss
def calc_steps_per_epoch(self):
return self.num_samples // (self.FLAGS.batch_size * int(os.getenv('RANK_SIZE')))
def _configure_learning_rate(self, global_step):
steps_per_epoch = self.calc_steps_per_epoch()
decay_steps = int(steps_per_epoch * self.FLAGS.num_epochs_per_decay)
if self.FLAGS.learning_rate_decay_type == 'exponential':
learning_rate = tf.train.exponential_decay(
self.FLAGS.learning_rate,
global_step,
decay_steps,
self.FLAGS.learning_rate_decay_factor,
staircase=True,
name='exponential_decay_learning_rate')
elif self.FLAGS.learning_rate_decay_type == 'fixed':
learning_rate = tf.constant(self.FLAGS.learning_rate, name='fixed_learning_rate')
elif self.FLAGS.learning_rate_decay_type == 'cosine_annealing':
current_step_epoch = global_step // steps_per_epoch * steps_per_epoch
learning_rate = tf.train.cosine_decay(self.FLAGS.learning_rate, current_step_epoch,
self.FLAGS.max_number_of_steps)
elif self.FLAGS.learning_rate_decay_type == 'polynomial':
learning_rate = tf.train.polynomial_decay(
self.FLAGS.learning_rate, global_step,
decay_steps,
self.FLAGS.end_learning_rate,
power=1.0,
cycle=False,
name='polynomial_decay_learning_rate')
else:
raise ValueError('learning_rate_decay_type [%s] was not recognized' %
self.FLAGS.learning_rate_decay_type)
if self.FLAGS.warmup_epochs:
warmup_lr = (
self.FLAGS.learning_rate * tf.cast(global_step, tf.float32) /
(steps_per_epoch * self.FLAGS.warmup_epochs))
learning_rate = tf.minimum(warmup_lr, learning_rate)
learning_rate = tf.identity(learning_rate, name='learning_rate')
# tf.Print(learning_rate, [learning_rate], '*****************')
return learning_rate
def create_train_op(self, global_step, summaries, loss):
# Gather update_ops from the first clone. These contain, for example,
# the updates for the batch_norm variables created by network_fn.
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) or []
#################################
# Configure the moving averages #
#################################
if self.FLAGS.moving_average_decay:
moving_average_variables = self.slim.get_model_variables()
variable_averages = tf.train.ExponentialMovingAverage(
self.FLAGS.moving_average_decay, global_step)
else:
moving_average_variables, variable_averages = None, None
#########################################
# Configure the optimization procedure. #
#########################################
learning_rate = self._configure_learning_rate(global_step)
summaries.add(tf.summary.scalar('learning_rate', learning_rate))
if self.FLAGS.moving_average_decay:
# Update ops executed locally by trainer.
update_ops.append(variable_averages.apply(moving_average_variables))
opt = self._configure_optimizer(learning_rate)
from npu_bridge.estimator.npu.npu_optimizer import NPUDistributedOptimizer
from npu_bridge.estimator.npu.npu_loss_scale_optimizer import NPULossScaleOptimizer
from npu_bridge.estimator.npu.npu_loss_scale_manager import FixedLossScaleManager
from npu_bridge.estimator.npu.npu_loss_scale_manager import ExponentialUpdateLossScaleManager
loss_scale_manager = FixedLossScaleManager(loss_scale=4096)
# loss_scale_manager = ExponentialUpdateLossScaleManager(init_loss_scale=1024, incr_every_n_steps=1000, decr_every_n_nan_or_inf=2, decr_ratio=0.5)
if int(os.getenv('RANK_SIZE')) == 1:
opt = NPULossScaleOptimizer(opt, loss_scale_manager)
else:
opt = NPULossScaleOptimizer(opt, loss_scale_manager, is_distributed=True)
opt = NPUDistributedOptimizer(opt)
update_op = tf.group(*update_ops)
with tf.control_dependencies([update_op]):
gate_gradients = (tf.train.Optimizer.GATE_NONE)
grads_and_vars = opt.compute_gradients(loss)
train_op = opt.apply_gradients(grads_and_vars, global_step=global_step)
return train_op
@@ -0,0 +1,133 @@
import tensorflow as tf
from dataloader import data_provider
from datasets import dataset_factory
from nets import nets_factory
import os
class EstimatorImpl:
def __init__(self, env):
self.env = env
def model_fn(self, features, labels, mode, params):
num_classes = 1001
summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))
if mode == tf.estimator.ModeKeys.TRAIN:
network_fn = nets_factory.get_network_fn(
self.env.FLAGS.model_name,
num_classes=(num_classes - self.env.FLAGS.labels_offset),
weight_decay=self.env.FLAGS.weight_decay,
is_training=True)
logits = self.env.calc_logits(network_fn, features)
loss, total_loss = self.env.calc_loss(logits, labels)
# ### accuracy ### #
predictions = tf.argmax(logits, 1)
accuracy_ops = tf.metrics.accuracy(tf.argmax(labels, 1), predictions)
tf.identity(accuracy_ops[1], name='train_accuracy')
# ### accuracy ### #
tf.identity(total_loss, 'train_loss')
global_step = tf.train.get_or_create_global_step()
train_op = self.env.create_train_op(global_step, summaries, total_loss)
estimator_spec = tf.estimator.EstimatorSpec(
mode=tf.estimator.ModeKeys.TRAIN, loss=total_loss, train_op=train_op)
elif mode == tf.estimator.ModeKeys.EVAL:
network_fn = nets_factory.get_network_fn(
self.env.FLAGS.model_name,
num_classes=(num_classes - self.env.FLAGS.labels_offset),
weight_decay=self.env.FLAGS.weight_decay,
is_training=False)
logits = self.env.calc_logits(network_fn, features)
loss, total_loss = self.env.calc_loss(logits, labels)
predictions = tf.argmax(logits, 1)
accuracy_ops = tf.metrics.accuracy(tf.argmax(labels, 1), predictions)
tf.identity(accuracy_ops[1], name='eval_accuracy')
estimator_spec = tf.estimator.EstimatorSpec(
mode=tf.estimator.ModeKeys.EVAL,
loss=total_loss, eval_metric_ops={'accuracy': accuracy_ops})
return estimator_spec
def main(self):
logdir = self.env.create_logdir()
from logger import LogSessionRunHook
config = {
'num_training_samples': self.env.num_samples,
# for 1p, just per loop print, for 8p, print each epoch
'display_every': 1,
'log_name': 'train_log.log',
'log_dir': logdir,
'global_batch_size': self.env.FLAGS.batch_size * int(os.getenv('RANK_SIZE')),
'iterations_per_loop': self.env.FLAGS.iterations_per_loop if self.env.FLAGS.iterations_per_loop is not None else self.env.calc_steps_per_epoch()
}
hooks = [LogSessionRunHook(config, warmup_steps=self.env.FLAGS.warmup_epochs * self.env.calc_steps_per_epoch())]
#################################################################
from npu_bridge.estimator.npu.npu_config import NPURunConfig
from npu_bridge.estimator.npu.npu_estimator import NPUEstimator
self.estimator_config = tf.ConfigProto(
inter_op_parallelism_threads=10,
intra_op_parallelism_threads=10,
allow_soft_placement=True)
self.estimator_config.gpu_options.allow_growth = True
gpu_thread_count = 2
os.environ['TF_GPU_THREAD_MODE'] = 'gpu_private'
os.environ['TF_GPU_THREAD_COUNT'] = str(gpu_thread_count)
os.environ['TF_USE_CUDNN_BATCHNORM_SPATIAL_PERSISTENT'] = '1'
os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1'
run_config = NPURunConfig(
hcom_parallel=True,
precision_mode="allow_mix_precision",
enable_data_pre_proc=True,
save_checkpoints_steps=self.env.calc_steps_per_epoch(),
session_config=self.estimator_config,
model_dir=logdir,
iterations_per_loop=config['iterations_per_loop'],
keep_checkpoint_max=5)
classifier = NPUEstimator(
model_fn=self.model_fn,
config=run_config
)
###################################################################
classifier.train(
input_fn=self.train_data,
max_steps=self.env.FLAGS.max_number_of_steps,
hooks=hooks,
)
def train_data(self):
dataset = dataset_factory.get_dataset(self.env.FLAGS.dataset_name, 'train', self.env.FLAGS.dataset_dir)
preprocessing_name = self.env.FLAGS.preprocessing_name or self.env.FLAGS.model_name
_, ds = data_provider.get_data(dataset, self.env.FLAGS.batch_size,
dataset.num_classes, self.env.FLAGS.labels_offset, True,
preprocessing_name, self.env.FLAGS.use_grayscale)
return ds
def eval_data(self):
dataset = dataset_factory.get_dataset(self.env.FLAGS.dataset_name, 'validation', self.env.FLAGS.dataset_dir)
preprocessing_name = self.env.FLAGS.preprocessing_name or self.env.FLAGS.model_name
_, ds = data_provider.get_data(dataset, self.env.FLAGS.batch_size,
dataset.num_classes, self.env.FLAGS.labels_offset, False,
preprocessing_name, self.env.FLAGS.use_grayscale)
return ds
@@ -0,0 +1,174 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Generic evaluation script that evaluates a model using a given dataset."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import tensorflow as tf
from tensorflow.contrib import quantize as contrib_quantize
from tensorflow.contrib import slim as contrib_slim
from benchmark_log import hwlog
from datasets import dataset_factory
from nets import nets_factory
import os
os.environ["CUDA_VISIBLE_DEVICES"] = '4'
slim = contrib_slim
tf.app.flags.DEFINE_integer(
'batch_size', 100, 'The number of samples in each batch.')
tf.app.flags.DEFINE_integer(
'max_num_batches', None,
'Max number of batches to evaluate by default use all.')
tf.app.flags.DEFINE_string(
'master', '', 'The address of the TensorFlow master to use.')
ckpt_path = './results/0526023335_train_hvdTrue_mnmobilenet_v2_augmentedTrue_mixedpFalse_lr0.4_optmomentum_me200_lrdtcosine_annealing_nepd0.3125_lrdf0.98_b256_me_param'
# ckpt_path = './results/0523130615_train_hvdTrue_mnmobilenet_v2_augmentedTrue_mixedpFalse_lr0.4_optmomentum_me200_lrdtcosine_annealing_nepd0.3125_lrdf0.98_b256_me_param'
tf.app.flags.DEFINE_string(
'checkpoint_path', ckpt_path,
'The directory where the model was written to or an absolute path to a '
'checkpoint file.')
tf.app.flags.DEFINE_string(
'eval_dir', ckpt_path, 'Directory where the results are saved to.')
tf.app.flags.DEFINE_integer(
'num_preprocessing_threads', 4,
'The number of threads used to create the batches.')
tf.app.flags.DEFINE_string(
'dataset_name', 'imagenet', 'The name of the dataset to load.')
tf.app.flags.DEFINE_string(
'dataset_split_name', 'validation', 'The name of the train/test split.')
tf.app.flags.DEFINE_string(
'dataset_dir', '/data/Datasets/imagenet_TF', 'The directory where the dataset files are stored.')
tf.app.flags.DEFINE_integer(
'labels_offset', 0,
'An offset for the labels in the dataset. This flag is primarily used to '
'evaluate the VGG and ResNet architectures which do not use a background '
'class for the ImageNet dataset.')
tf.app.flags.DEFINE_string(
'model_name', 'mobilenet_v2', 'The name of the architecture to evaluate.')
tf.app.flags.DEFINE_string(
'preprocessing_name', None, 'The name of the preprocessing to use. If left '
'as `None`, then the model_name flag is used.')
tf.app.flags.DEFINE_float(
'moving_average_decay', None,
'The decay to use for the moving average.'
'If left as None, then moving averages are not used.')
tf.app.flags.DEFINE_integer(
'eval_image_size', None, 'Eval image size')
tf.app.flags.DEFINE_bool(
'quantize', False, 'whether to use quantized graph or not.')
tf.app.flags.DEFINE_bool('use_grayscale', False,
'Whether to convert input images to grayscale.')
FLAGS = tf.app.flags.FLAGS
def main(_):
if not FLAGS.dataset_dir:
raise ValueError('You must supply the dataset directory with --dataset_dir')
tf.logging.set_verbosity(tf.logging.INFO)
with tf.Graph().as_default():
tf_global_step = slim.get_or_create_global_step()
######################
# Select the dataset #
######################
dataset = dataset_factory.get_dataset(
FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir)
####################
# Select the model #
####################
network_fn = nets_factory.get_network_fn(
FLAGS.model_name,
num_classes=(dataset.num_classes - FLAGS.labels_offset),
is_training=False)
from dataloader import data_provider
preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name
iterator, _ = data_provider.get_data(dataset, FLAGS.batch_size,
dataset.num_classes, FLAGS.labels_offset, is_training=False,
preprocessing_name=preprocessing_name,
use_grayscale=FLAGS.use_grayscale,
hvd=None, enable_hvd=None)
images, labels = iterator.get_next() # label: [100, 1001]
images = tf.reshape(images, [FLAGS.batch_size, 224, 224, 3]) # (100, 224, 224, 3), float32
labels = tf.argmax(labels, axis=1) # [100]
logits, _ = network_fn(images)
if FLAGS.quantize:
contrib_quantize.create_eval_graph()
predictions = tf.argmax(logits, 1)
labels = tf.squeeze(labels)
eval_accuracy, metric_update_op = tf.metrics.accuracy(labels, predictions)
# tf.summary.scalar('top1_acc', top1_accu)
# summaries_op = tf.summary.merge_all()
# TODO(sguada) use num_epochs=1
if FLAGS.max_num_batches:
num_batches = FLAGS.max_num_batches
else:
# This ensures that we make a single pass over all of the data.
num_batches = math.ceil(dataset.num_samples / float(FLAGS.batch_size))
if tf.gfile.IsDirectory(FLAGS.checkpoint_path):
checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_path)
else:
checkpoint_path = FLAGS.checkpoint_path
##### evaluate #####
tf.logging.info('Evaluating %s' % checkpoint_path)
saver = tf.train.Saver()
from time import gmtime, strftime
logdir = "results/%s" % strftime("%m%d%H%M%S_evel", gmtime())
# summary_writer = tf.summary.FileWriter(logdir=logdir, graph=tf.get_default_graph())
with tf.Session() as sess:
sess.run(iterator.initializer)
sess.run(tf.global_variables_initializer())
sess.run(tf.local_variables_initializer())
saver.restore(sess, f'{checkpoint_path}')
tf.train.write_graph(sess.graph, logdir, 'graph.pbtxt')
for step in range(num_batches):
_metric_update_op = sess.run([metric_update_op])
print(f'{step}, _metric_update_op: {_metric_update_op}')
acc = sess.run([eval_accuracy])
print(f'acc: {acc}')
hwlog.remark_print(key=hwlog.EVAL_ACCURACY_TOP1, value=f'{acc}')
if __name__ == '__main__':
tf.app.run()
@@ -0,0 +1,181 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Generic evaluation script that evaluates a model using a given dataset."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import tensorflow as tf
from tensorflow.contrib import quantize as contrib_quantize
from tensorflow.contrib import slim as contrib_slim
from datasets import dataset_factory
from nets import nets_factory
from benchmark_log import hwlog
import os
os.environ["CUDA_VISIBLE_DEVICES"] = '4'
slim = contrib_slim
tf.app.flags.DEFINE_integer(
'batch_size', 256, 'The number of samples in each batch.')
tf.app.flags.DEFINE_integer(
'max_num_batches', None,
'Max number of batches to evaluate by default use all.')
tf.app.flags.DEFINE_string(
'master', '', 'The address of the TensorFlow master to use.')
tf.app.flags.DEFINE_string(
'checkpoint_path', 'ckpt_path',
'The directory where the model was written to or an absolute path to a '
'checkpoint file.')
tf.app.flags.DEFINE_string(
'eval_dir', 'ckpt_path', 'Directory where the results are saved to.')
tf.app.flags.DEFINE_integer(
'num_preprocessing_threads', 4,
'The number of threads used to create the batches.')
tf.app.flags.DEFINE_string(
'dataset_name', 'imagenet', 'The name of the dataset to load.')
tf.app.flags.DEFINE_string(
'dataset_split_name', 'validation', 'The name of the train/test split.')
tf.app.flags.DEFINE_string(
'dataset_dir', '/opt/npu/slimImagenet', 'The directory where the dataset files are stored.')
tf.app.flags.DEFINE_integer(
'labels_offset', 0,
'An offset for the labels in the dataset. This flag is primarily used to '
'evaluate the VGG and ResNet architectures which do not use a background '
'class for the ImageNet dataset.')
tf.app.flags.DEFINE_string(
'model_name', 'mobilenet_v2', 'The name of the architecture to evaluate.')
tf.app.flags.DEFINE_string(
'preprocessing_name', None, 'The name of the preprocessing to use. If left '
'as `None`, then the model_name flag is used.')
tf.app.flags.DEFINE_float(
'moving_average_decay', None,
'The decay to use for the moving average.'
'If left as None, then moving averages are not used.')
tf.app.flags.DEFINE_integer(
'eval_image_size', None, 'Eval image size')
tf.app.flags.DEFINE_bool(
'quantize', False, 'whether to use quantized graph or not.')
tf.app.flags.DEFINE_bool('use_grayscale', False,
'Whether to convert input images to grayscale.')
FLAGS = tf.app.flags.FLAGS
def main(_):
if not FLAGS.dataset_dir:
raise ValueError('You must supply the dataset directory with --dataset_dir')
tf.logging.set_verbosity(tf.logging.INFO)
with tf.Graph().as_default():
tf_global_step = slim.get_or_create_global_step()
######################
# Select the dataset #
######################
dataset = dataset_factory.get_dataset(
FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir)
####################
# Select the model #
####################
network_fn = nets_factory.get_network_fn(
FLAGS.model_name,
num_classes=(dataset.num_classes - FLAGS.labels_offset),
is_training=False)
from dataloader import data_provider
preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name
iterator, _ = data_provider.get_data(dataset, FLAGS.batch_size,
dataset.num_classes, FLAGS.labels_offset, is_training=False,
preprocessing_name=preprocessing_name,
use_grayscale=FLAGS.use_grayscale)
#tf.logging.info('iterator %s' % iterator)
images, labels = iterator.get_next() # label: [100, 1001]
images = tf.reshape(images, [FLAGS.batch_size, 224, 224, 3]) # (100, 224, 224, 3), float32
labels = tf.argmax(labels, axis=1) # [100]
logits, _ = network_fn(images)
if FLAGS.quantize:
contrib_quantize.create_eval_graph()
predictions = tf.argmax(logits, 1)
labels = tf.squeeze(labels)
eval_accuracy, metric_update_op = tf.metrics.accuracy(labels, predictions)
#hwlog.remark_print(key=hwlog.EVAL_ACCURACY, value="".format(eval_accuracy))
# tf.summary.scalar('top1_acc', top1_accu)
# summaries_op = tf.summary.merge_all()
# TODO(sguada) use num_epochs=1
if FLAGS.max_num_batches:
num_batches = FLAGS.max_num_batches
else:
# This ensures that we make a single pass over all of the data.
num_batches = math.ceil(dataset.num_samples / float(FLAGS.batch_size)) - 1
if tf.gfile.IsDirectory(FLAGS.checkpoint_path):
checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_path)
else:
checkpoint_path = FLAGS.checkpoint_path
# checkpoint_path = '/opt/npu/models/mobilenetv2_v0.1/ckpt/model.ckpt'
print(dataset.num_samples)
print(FLAGS.batch_size)
hwlog.remark_print(key=hwlog.GLOBAL_BATCH_SIZE, value=FLAGS.batch_size)
##### evaluate #####
tf.logging.info('Evaluating %s' % checkpoint_path)
saver = tf.train.Saver()
from time import gmtime, strftime
logdir = "ckpt/%s" % strftime("%m%d%H%M%S_evel", gmtime())
# summary_writer = tf.summary.FileWriter(logdir=logdir, graph=tf.get_default_graph())
with tf.Session() as sess:
sess.run(iterator.initializer)
sess.run(tf.global_variables_initializer())
sess.run(tf.local_variables_initializer())
saver.restore(sess, f'{checkpoint_path}')
# saver.restore(sess, 'result/8p/2/results/model.ckpt-3750')
tf.train.write_graph(sess.graph, logdir, 'graph.pbtxt')
for step in range(num_batches):
_metric_update_op = sess.run([metric_update_op])
print(f'{step}, _metric_update_op: {_metric_update_op}')
hwlog.remark_print(key=hwlog.GLOBAL_STEP, value=f'{step}')
hwlog.remark_print(key=hwlog.EVAL_ACCURACY, value=f'{_metric_update_op}')
acc = sess.run([eval_accuracy])
print(f'acc: {acc}')
hwlog.remark_print(key=hwlog.EVAL_ACCURACY_TOP1, value=f'{acc}')
if __name__ == '__main__':
tf.app.run()
@@ -0,0 +1,164 @@
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Saves out a GraphDef containing the architecture of the model.
To use it, run something like this, with a model name defined by slim:
bazel build tensorflow_models/research/slim:export_inference_graph
bazel-bin/tensorflow_models/research/slim/export_inference_graph \
--model_name=inception_v3 --output_file=/tmp/inception_v3_inf_graph.pb
If you then want to use the resulting model with your own or pretrained
checkpoints as part of a mobile model, you can run freeze_graph to get a graph
def with the variables inlined as constants using:
bazel build tensorflow/python/tools:freeze_graph
bazel-bin/tensorflow/python/tools/freeze_graph \
--input_graph=/tmp/inception_v3_inf_graph.pb \
--input_checkpoint=/tmp/checkpoints/inception_v3.ckpt \
--input_binary=true --output_graph=/tmp/frozen_inception_v3.pb \
--output_node_names=InceptionV3/Predictions/Reshape_1
The output node names will vary depending on the model, but you can inspect and
estimate them using the summarize_graph tool:
bazel build tensorflow/tools/graph_transforms:summarize_graph
bazel-bin/tensorflow/tools/graph_transforms/summarize_graph \
--in_graph=/tmp/inception_v3_inf_graph.pb
To run the resulting graph in C++, you can look at the label_image sample code:
bazel build tensorflow/examples/label_image:label_image
bazel-bin/tensorflow/examples/label_image/label_image \
--image=${HOME}/Pictures/flowers.jpg \
--input_layer=input \
--output_layer=InceptionV3/Predictions/Reshape_1 \
--graph=/tmp/frozen_inception_v3.pb \
--labels=/tmp/imagenet_slim_labels.txt \
--input_mean=0 \
--input_std=255
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import tensorflow as tf
from tensorflow.contrib import quantize as contrib_quantize
from tensorflow.contrib import slim as contrib_slim
from tensorflow.python.platform import gfile
from datasets import dataset_factory
from nets import nets_factory
slim = contrib_slim
tf.app.flags.DEFINE_string(
'model_name', 'inception_v3', 'The name of the architecture to save.')
tf.app.flags.DEFINE_boolean(
'is_training', False,
'Whether to save out a training-focused version of the model.')
tf.app.flags.DEFINE_integer(
'image_size', None,
'The image size to use, otherwise use the model default_image_size.')
tf.app.flags.DEFINE_integer(
'batch_size', None,
'Batch size for the exported model. Defaulted to "None" so batch size can '
'be specified at model runtime.')
tf.app.flags.DEFINE_string('dataset_name', 'imagenet',
'The name of the dataset to use with the model.')
tf.app.flags.DEFINE_integer(
'labels_offset', 0,
'An offset for the labels in the dataset. This flag is primarily used to '
'evaluate the VGG and ResNet architectures which do not use a background '
'class for the ImageNet dataset.')
tf.app.flags.DEFINE_string(
'output_file', '', 'Where to save the resulting file to.')
tf.app.flags.DEFINE_string(
'dataset_dir', '', 'Directory to save intermediate dataset files to')
tf.app.flags.DEFINE_bool(
'quantize', False, 'whether to use quantized graph or not.')
tf.app.flags.DEFINE_bool(
'is_video_model', False, 'whether to use 5-D inputs for video model.')
tf.app.flags.DEFINE_integer(
'num_frames', None,
'The number of frames to use. Only used if is_video_model is True.')
tf.app.flags.DEFINE_bool('write_text_graphdef', False,
'Whether to write a text version of graphdef.')
tf.app.flags.DEFINE_bool('use_grayscale', False,
'Whether to convert input images to grayscale.')
FLAGS = tf.app.flags.FLAGS
def main(_):
if not FLAGS.output_file:
raise ValueError('You must supply the path to save to with --output_file')
if FLAGS.is_video_model and not FLAGS.num_frames:
raise ValueError(
'Number of frames must be specified for video models with --num_frames')
tf.logging.set_verbosity(tf.logging.INFO)
with tf.Graph().as_default() as graph:
dataset = dataset_factory.get_dataset(FLAGS.dataset_name, 'train',
FLAGS.dataset_dir)
network_fn = nets_factory.get_network_fn(
FLAGS.model_name,
num_classes=(dataset.num_classes - FLAGS.labels_offset),
is_training=FLAGS.is_training)
image_size = FLAGS.image_size or network_fn.default_image_size
num_channels = 1 if FLAGS.use_grayscale else 3
if FLAGS.is_video_model:
input_shape = [
FLAGS.batch_size, FLAGS.num_frames, image_size, image_size,
num_channels
]
else:
input_shape = [FLAGS.batch_size, image_size, image_size, num_channels]
placeholder = tf.placeholder(name='input', dtype=tf.float32,
shape=input_shape)
network_fn(placeholder)
if FLAGS.quantize:
contrib_quantize.create_eval_graph()
graph_def = graph.as_graph_def()
if FLAGS.write_text_graphdef:
tf.io.write_graph(
graph_def,
os.path.dirname(FLAGS.output_file),
os.path.basename(FLAGS.output_file),
as_text=True)
else:
with gfile.GFile(FLAGS.output_file, 'wb') as f:
f.write(graph_def.SerializeToString())
if __name__ == '__main__':
tf.app.run()
@@ -0,0 +1,44 @@
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for export_inference_graph."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import tensorflow as tf
from tensorflow.python.platform import gfile
import export_inference_graph
class ExportInferenceGraphTest(tf.test.TestCase):
def testExportInferenceGraph(self):
tmpdir = self.get_temp_dir()
output_file = os.path.join(tmpdir, 'inception_v3.pb')
flags = tf.app.flags.FLAGS
flags.output_file = output_file
flags.model_name = 'inception_v3'
flags.dataset_dir = tmpdir
export_inference_graph.main(None)
self.assertTrue(gfile.Exists(output_file))
if __name__ == '__main__':
tf.test.main()
@@ -0,0 +1,44 @@
import tensorflow as tf
import numpy as np
def float32_variable_storage_getter(getter, name, shape=None, dtype=None,
initializer=None, regularizer=None,
trainable=True,
*args, **kwargs):
"""Custom variable getter that forces trainable variables to be stored in
float32 precision and then casts them to the training precision.
"""
storage_dtype = tf.float32 if trainable else dtype
variable = getter(name, shape, dtype=storage_dtype,
initializer=initializer, regularizer=regularizer,
trainable=trainable,
*args, **kwargs)
if trainable and dtype != tf.float32:
variable = tf.cast(variable, dtype)
return variable
def get_custom_getter(compute_type):
return float32_variable_storage_getter if compute_type == tf.float16 else None
def float32_variable_storage_getter_1(getter, name, shape=None, dtype=None,
initializer=None, regularizer=None,
trainable=True,
*args, **kwargs):
"""Custom variable getter that forces trainable variables to be stored in
float32 precision and then casts them to the training precision.
"""
dtype = tf.float16
storage_dtype = tf.float32 if trainable else dtype
variable = getter(name, shape, dtype=storage_dtype,
initializer=initializer, regularizer=regularizer,
trainable=trainable,
*args, **kwargs)
if trainable and dtype != tf.float32:
variable = tf.cast(variable, dtype)
return variable
def get_custom_getter_1(compute_type):
return float32_variable_storage_getter_1 if compute_type == tf.float16 else None
@@ -0,0 +1,86 @@
from __future__ import print_function
import datetime
import logging
import os
import sys
import time
from benchmark_log import hwlog
import numpy as np
import tensorflow as tf
class LogSessionRunHook(tf.train.SessionRunHook):
def __init__(self, config, warmup_steps=5):
self.global_batch_size = config['global_batch_size']
self.iterations_per_loop = config['iterations_per_loop']
self.warmup_steps = warmup_steps
self.iter_times = []
self.num_records = config['num_training_samples']
self.display_every = config['display_every']
self.logger = get_logger(config['log_name'], config['log_dir'])
rank0log(self.logger, 'PY' + str(sys.version) + 'TF' + str(tf.__version__))
def after_create_session(self, session, coord):
rank0log(self.logger, 'Step Epoch Speed Loss FinLoss LR')
self.elapsed_secs = 0.
self.count = 0
def before_run(self, run_context):
self.t0 = time.time()
return tf.train.SessionRunArgs(
fetches=[tf.train.get_global_step(), 'loss:0', 'total_loss:0', 'learning_rate:0',
'train_accuracy:0'])
def after_run(self, run_context, run_values):
batch_time = time.time() - self.t0
self.iter_times.append(batch_time)
self.elapsed_secs += batch_time
self.count += 1
global_step, loss, total_loss, lr, train_accuracy = run_values.results
if global_step == 1 or global_step % self.display_every == 0:
dt = self.elapsed_secs / self.count
img_per_sec = self.global_batch_size * self.iterations_per_loop / dt
epoch = global_step * self.global_batch_size / self.num_records
self.logger.info(f'step:{global_step} epoch:{epoch} ips:{img_per_sec} '
f'loss:{loss} total_loss:{total_loss} lr:{lr}, '
f'train_accuracy:{train_accuracy}')
hwlog.remark_print(key=hwlog.GLOBAL_STEP, value=f"{global_step}")
hwlog.remark_print(key=hwlog.CURRENT_EPOCH, value=f"{epoch}")
hwlog.remark_print(key=hwlog.TRAIN_ACCURACY, value=f"{train_accuracy}")
hwlog.remark_print(key=hwlog.FPS, value=f"{img_per_sec}")
self.elapsed_secs = 0.
self.count = 0
def get_average_speed(self):
avg_time = np.mean(self.iter_times[self.warmup_steps:])
speed = self.global_batch_size / avg_time
return speed
def rank0log(logger, *args, **kwargs):
if logger:
logger.info(''.join([str(x) for x in list(args)]))
else:
print(*args, **kwargs)
def get_logger(log_name, log_dir):
logger = logging.getLogger(log_name)
logger.setLevel(logging.INFO) # INFO, ERROR
if not os.path.isdir(log_dir):
try:
os.makedirs(log_dir)
except FileExistsError:
pass
ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
formatter = logging.Formatter('%(message)s')
ch.setFormatter(formatter)
logger.addHandler(ch)
fh = logging.FileHandler(os.path.join(log_dir, log_name))
fh.setLevel(logging.DEBUG)
fh.setFormatter(formatter)
logger.addHandler(fh)
return logger
@@ -0,0 +1,148 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains a model definition for AlexNet.
This work was first described in:
ImageNet Classification with Deep Convolutional Neural Networks
Alex Krizhevsky, Ilya Sutskever and Geoffrey E. Hinton
and later refined in:
One weird trick for parallelizing convolutional neural networks
Alex Krizhevsky, 2014
Here we provide the implementation proposed in "One weird trick" and not
"ImageNet Classification", as per the paper, the LRN layers have been removed.
Usage:
with slim.arg_scope(alexnet.alexnet_v2_arg_scope()):
outputs, end_points = alexnet.alexnet_v2(inputs)
@@alexnet_v2
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from tensorflow.contrib import slim as contrib_slim
slim = contrib_slim
# pylint: disable=g-long-lambda
trunc_normal = lambda stddev: tf.compat.v1.truncated_normal_initializer(
0.0, stddev)
def alexnet_v2_arg_scope(weight_decay=0.0005):
with slim.arg_scope([slim.conv2d, slim.fully_connected],
activation_fn=tf.nn.relu,
biases_initializer=tf.compat.v1.constant_initializer(0.1),
weights_regularizer=slim.l2_regularizer(weight_decay)):
with slim.arg_scope([slim.conv2d], padding='SAME'):
with slim.arg_scope([slim.max_pool2d], padding='VALID') as arg_sc:
return arg_sc
def alexnet_v2(inputs,
num_classes=1000,
is_training=True,
dropout_keep_prob=0.5,
spatial_squeeze=True,
scope='alexnet_v2',
global_pool=False):
"""AlexNet version 2.
Described in: http://arxiv.org/pdf/1404.5997v2.pdf
Parameters from:
github.com/akrizhevsky/cuda-convnet2/blob/master/layers/
layers-imagenet-1gpu.cfg
Note: All the fully_connected layers have been transformed to conv2d layers.
To use in classification mode, resize input to 224x224 or set
global_pool=True. To use in fully convolutional mode, set
spatial_squeeze to false.
The LRN layers have been removed and change the initializers from
random_normal_initializer to xavier_initializer.
Args:
inputs: a tensor of size [batch_size, height, width, channels].
num_classes: the number of predicted classes. If 0 or None, the logits layer
is omitted and the input features to the logits layer are returned instead.
is_training: whether or not the model is being trained.
dropout_keep_prob: the probability that activations are kept in the dropout
layers during training.
spatial_squeeze: whether or not should squeeze the spatial dimensions of the
logits. Useful to remove unnecessary dimensions for classification.
scope: Optional scope for the variables.
global_pool: Optional boolean flag. If True, the input to the classification
layer is avgpooled to size 1x1, for any input size. (This is not part
of the original AlexNet.)
Returns:
net: the output of the logits layer (if num_classes is a non-zero integer),
or the non-dropped-out input to the logits layer (if num_classes is 0
or None).
end_points: a dict of tensors with intermediate activations.
"""
with tf.compat.v1.variable_scope(scope, 'alexnet_v2', [inputs]) as sc:
end_points_collection = sc.original_name_scope + '_end_points'
# Collect outputs for conv2d, fully_connected and max_pool2d.
with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d],
outputs_collections=[end_points_collection]):
net = slim.conv2d(inputs, 64, [11, 11], 4, padding='VALID',
scope='conv1')
net = slim.max_pool2d(net, [3, 3], 2, scope='pool1')
net = slim.conv2d(net, 192, [5, 5], scope='conv2')
net = slim.max_pool2d(net, [3, 3], 2, scope='pool2')
net = slim.conv2d(net, 384, [3, 3], scope='conv3')
net = slim.conv2d(net, 384, [3, 3], scope='conv4')
net = slim.conv2d(net, 256, [3, 3], scope='conv5')
net = slim.max_pool2d(net, [3, 3], 2, scope='pool5')
# Use conv2d instead of fully_connected layers.
with slim.arg_scope(
[slim.conv2d],
weights_initializer=trunc_normal(0.005),
biases_initializer=tf.compat.v1.constant_initializer(0.1)):
net = slim.conv2d(net, 4096, [5, 5], padding='VALID',
scope='fc6')
net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
scope='dropout6')
net = slim.conv2d(net, 4096, [1, 1], scope='fc7')
# Convert end_points_collection into a end_point dict.
end_points = slim.utils.convert_collection_to_dict(
end_points_collection)
if global_pool:
net = tf.reduce_mean(
input_tensor=net, axis=[1, 2], keepdims=True, name='global_pool')
end_points['global_pool'] = net
if num_classes:
net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
scope='dropout7')
net = slim.conv2d(
net,
num_classes, [1, 1],
activation_fn=None,
normalizer_fn=None,
biases_initializer=tf.compat.v1.zeros_initializer(),
scope='fc8')
if spatial_squeeze:
net = tf.squeeze(net, [1, 2], name='fc8/squeezed')
end_points[sc.name + '/fc8'] = net
return net, end_points
alexnet_v2.default_image_size = 224
@@ -0,0 +1,181 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for slim.nets.alexnet."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from tensorflow.contrib import slim as contrib_slim
from nets import alexnet
slim = contrib_slim
class AlexnetV2Test(tf.test.TestCase):
def testBuild(self):
batch_size = 5
height, width = 224, 224
num_classes = 1000
with self.test_session():
inputs = tf.random.uniform((batch_size, height, width, 3))
logits, _ = alexnet.alexnet_v2(inputs, num_classes)
self.assertEquals(logits.op.name, 'alexnet_v2/fc8/squeezed')
self.assertListEqual(logits.get_shape().as_list(),
[batch_size, num_classes])
def testFullyConvolutional(self):
batch_size = 1
height, width = 300, 400
num_classes = 1000
with self.test_session():
inputs = tf.random.uniform((batch_size, height, width, 3))
logits, _ = alexnet.alexnet_v2(inputs, num_classes, spatial_squeeze=False)
self.assertEquals(logits.op.name, 'alexnet_v2/fc8/BiasAdd')
self.assertListEqual(logits.get_shape().as_list(),
[batch_size, 4, 7, num_classes])
def testGlobalPool(self):
batch_size = 1
height, width = 256, 256
num_classes = 1000
with self.test_session():
inputs = tf.random.uniform((batch_size, height, width, 3))
logits, _ = alexnet.alexnet_v2(inputs, num_classes, spatial_squeeze=False,
global_pool=True)
self.assertEquals(logits.op.name, 'alexnet_v2/fc8/BiasAdd')
self.assertListEqual(logits.get_shape().as_list(),
[batch_size, 1, 1, num_classes])
def testEndPoints(self):
batch_size = 5
height, width = 224, 224
num_classes = 1000
with self.test_session():
inputs = tf.random.uniform((batch_size, height, width, 3))
_, end_points = alexnet.alexnet_v2(inputs, num_classes)
expected_names = ['alexnet_v2/conv1',
'alexnet_v2/pool1',
'alexnet_v2/conv2',
'alexnet_v2/pool2',
'alexnet_v2/conv3',
'alexnet_v2/conv4',
'alexnet_v2/conv5',
'alexnet_v2/pool5',
'alexnet_v2/fc6',
'alexnet_v2/fc7',
'alexnet_v2/fc8'
]
self.assertSetEqual(set(end_points.keys()), set(expected_names))
def testNoClasses(self):
batch_size = 5
height, width = 224, 224
num_classes = None
with self.test_session():
inputs = tf.random.uniform((batch_size, height, width, 3))
net, end_points = alexnet.alexnet_v2(inputs, num_classes)
expected_names = ['alexnet_v2/conv1',
'alexnet_v2/pool1',
'alexnet_v2/conv2',
'alexnet_v2/pool2',
'alexnet_v2/conv3',
'alexnet_v2/conv4',
'alexnet_v2/conv5',
'alexnet_v2/pool5',
'alexnet_v2/fc6',
'alexnet_v2/fc7'
]
self.assertSetEqual(set(end_points.keys()), set(expected_names))
self.assertTrue(net.op.name.startswith('alexnet_v2/fc7'))
self.assertListEqual(net.get_shape().as_list(),
[batch_size, 1, 1, 4096])
def testModelVariables(self):
batch_size = 5
height, width = 224, 224
num_classes = 1000
with self.test_session():
inputs = tf.random.uniform((batch_size, height, width, 3))
alexnet.alexnet_v2(inputs, num_classes)
expected_names = ['alexnet_v2/conv1/weights',
'alexnet_v2/conv1/biases',
'alexnet_v2/conv2/weights',
'alexnet_v2/conv2/biases',
'alexnet_v2/conv3/weights',
'alexnet_v2/conv3/biases',
'alexnet_v2/conv4/weights',
'alexnet_v2/conv4/biases',
'alexnet_v2/conv5/weights',
'alexnet_v2/conv5/biases',
'alexnet_v2/fc6/weights',
'alexnet_v2/fc6/biases',
'alexnet_v2/fc7/weights',
'alexnet_v2/fc7/biases',
'alexnet_v2/fc8/weights',
'alexnet_v2/fc8/biases',
]
model_variables = [v.op.name for v in slim.get_model_variables()]
self.assertSetEqual(set(model_variables), set(expected_names))
def testEvaluation(self):
batch_size = 2
height, width = 224, 224
num_classes = 1000
with self.test_session():
eval_inputs = tf.random.uniform((batch_size, height, width, 3))
logits, _ = alexnet.alexnet_v2(eval_inputs, is_training=False)
self.assertListEqual(logits.get_shape().as_list(),
[batch_size, num_classes])
predictions = tf.argmax(input=logits, axis=1)
self.assertListEqual(predictions.get_shape().as_list(), [batch_size])
def testTrainEvalWithReuse(self):
train_batch_size = 2
eval_batch_size = 1
train_height, train_width = 224, 224
eval_height, eval_width = 300, 400
num_classes = 1000
with self.test_session():
train_inputs = tf.random.uniform(
(train_batch_size, train_height, train_width, 3))
logits, _ = alexnet.alexnet_v2(train_inputs)
self.assertListEqual(logits.get_shape().as_list(),
[train_batch_size, num_classes])
tf.compat.v1.get_variable_scope().reuse_variables()
eval_inputs = tf.random.uniform(
(eval_batch_size, eval_height, eval_width, 3))
logits, _ = alexnet.alexnet_v2(eval_inputs, is_training=False,
spatial_squeeze=False)
self.assertListEqual(logits.get_shape().as_list(),
[eval_batch_size, 4, 7, num_classes])
logits = tf.reduce_mean(input_tensor=logits, axis=[1, 2])
predictions = tf.argmax(input=logits, axis=1)
self.assertEquals(predictions.get_shape().as_list(), [eval_batch_size])
def testForward(self):
batch_size = 1
height, width = 224, 224
with self.test_session() as sess:
inputs = tf.random.uniform((batch_size, height, width, 3))
logits, _ = alexnet.alexnet_v2(inputs)
sess.run(tf.compat.v1.global_variables_initializer())
output = sess.run(logits)
self.assertTrue(output.any())
if __name__ == '__main__':
tf.test.main()
@@ -0,0 +1,123 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains a variant of the CIFAR-10 model definition."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from tensorflow.contrib import slim as contrib_slim
slim = contrib_slim
# pylint: disable=g-long-lambda
trunc_normal = lambda stddev: tf.compat.v1.truncated_normal_initializer(
stddev=stddev)
def cifarnet(images, num_classes=10, is_training=False,
dropout_keep_prob=0.5,
prediction_fn=slim.softmax,
scope='CifarNet'):
"""Creates a variant of the CifarNet model.
Note that since the output is a set of 'logits', the values fall in the
interval of (-infinity, infinity). Consequently, to convert the outputs to a
probability distribution over the characters, one will need to convert them
using the softmax function:
logits = cifarnet.cifarnet(images, is_training=False)
probabilities = tf.nn.softmax(logits)
predictions = tf.argmax(logits, 1)
Args:
images: A batch of `Tensors` of size [batch_size, height, width, channels].
num_classes: the number of classes in the dataset. If 0 or None, the logits
layer is omitted and the input features to the logits layer are returned
instead.
is_training: specifies whether or not we're currently training the model.
This variable will determine the behaviour of the dropout layer.
dropout_keep_prob: the percentage of activation values that are retained.
prediction_fn: a function to get predictions out of logits.
scope: Optional variable_scope.
Returns:
net: a 2D Tensor with the logits (pre-softmax activations) if num_classes
is a non-zero integer, or the input to the logits layer if num_classes
is 0 or None.
end_points: a dictionary from components of the network to the corresponding
activation.
"""
end_points = {}
with tf.compat.v1.variable_scope(scope, 'CifarNet', [images]):
net = slim.conv2d(images, 64, [5, 5], scope='conv1')
end_points['conv1'] = net
net = slim.max_pool2d(net, [2, 2], 2, scope='pool1')
end_points['pool1'] = net
net = tf.nn.lrn(net, 4, bias=1.0, alpha=0.001/9.0, beta=0.75, name='norm1')
net = slim.conv2d(net, 64, [5, 5], scope='conv2')
end_points['conv2'] = net
net = tf.nn.lrn(net, 4, bias=1.0, alpha=0.001/9.0, beta=0.75, name='norm2')
net = slim.max_pool2d(net, [2, 2], 2, scope='pool2')
end_points['pool2'] = net
net = slim.flatten(net)
end_points['Flatten'] = net
net = slim.fully_connected(net, 384, scope='fc3')
end_points['fc3'] = net
net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
scope='dropout3')
net = slim.fully_connected(net, 192, scope='fc4')
end_points['fc4'] = net
if not num_classes:
return net, end_points
logits = slim.fully_connected(
net,
num_classes,
biases_initializer=tf.compat.v1.zeros_initializer(),
weights_initializer=trunc_normal(1 / 192.0),
weights_regularizer=None,
activation_fn=None,
scope='logits')
end_points['Logits'] = logits
end_points['Predictions'] = prediction_fn(logits, scope='Predictions')
return logits, end_points
cifarnet.default_image_size = 32
def cifarnet_arg_scope(weight_decay=0.004):
"""Defines the default cifarnet argument scope.
Args:
weight_decay: The weight decay to use for regularizing the model.
Returns:
An `arg_scope` to use for the inception v3 model.
"""
with slim.arg_scope(
[slim.conv2d],
weights_initializer=tf.compat.v1.truncated_normal_initializer(
stddev=5e-2),
activation_fn=tf.nn.relu):
with slim.arg_scope(
[slim.fully_connected],
biases_initializer=tf.compat.v1.constant_initializer(0.1),
weights_initializer=trunc_normal(0.04),
weights_regularizer=slim.l2_regularizer(weight_decay),
activation_fn=tf.nn.relu) as sc:
return sc
@@ -0,0 +1,280 @@
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Defines the CycleGAN generator and discriminator networks."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
from six.moves import xrange # pylint: disable=redefined-builtin
import tensorflow as tf
from tensorflow.contrib import framework as contrib_framework
from tensorflow.contrib import layers as contrib_layers
from tensorflow.contrib import util as contrib_util
layers = contrib_layers
def cyclegan_arg_scope(instance_norm_center=True,
instance_norm_scale=True,
instance_norm_epsilon=0.001,
weights_init_stddev=0.02,
weight_decay=0.0):
"""Returns a default argument scope for all generators and discriminators.
Args:
instance_norm_center: Whether instance normalization applies centering.
instance_norm_scale: Whether instance normalization applies scaling.
instance_norm_epsilon: Small float added to the variance in the instance
normalization to avoid dividing by zero.
weights_init_stddev: Standard deviation of the random values to initialize
the convolution kernels with.
weight_decay: Magnitude of weight decay applied to all convolution kernel
variables of the generator.
Returns:
An arg-scope.
"""
instance_norm_params = {
'center': instance_norm_center,
'scale': instance_norm_scale,
'epsilon': instance_norm_epsilon,
}
weights_regularizer = None
if weight_decay and weight_decay > 0.0:
weights_regularizer = layers.l2_regularizer(weight_decay)
with contrib_framework.arg_scope(
[layers.conv2d],
normalizer_fn=layers.instance_norm,
normalizer_params=instance_norm_params,
weights_initializer=tf.compat.v1.random_normal_initializer(
0, weights_init_stddev),
weights_regularizer=weights_regularizer) as sc:
return sc
def cyclegan_upsample(net, num_outputs, stride, method='conv2d_transpose',
pad_mode='REFLECT', align_corners=False):
"""Upsamples the given inputs.
Args:
net: A Tensor of size [batch_size, height, width, filters].
num_outputs: The number of output filters.
stride: A list of 2 scalars or a 1x2 Tensor indicating the scale,
relative to the inputs, of the output dimensions. For example, if kernel
size is [2, 3], then the output height and width will be twice and three
times the input size.
method: The upsampling method: 'nn_upsample_conv', 'bilinear_upsample_conv',
or 'conv2d_transpose'.
pad_mode: mode for tf.pad, one of "CONSTANT", "REFLECT", or "SYMMETRIC".
align_corners: option for method, 'bilinear_upsample_conv'. If true, the
centers of the 4 corner pixels of the input and output tensors are
aligned, preserving the values at the corner pixels.
Returns:
A Tensor which was upsampled using the specified method.
Raises:
ValueError: if `method` is not recognized.
"""
with tf.compat.v1.variable_scope('upconv'):
net_shape = tf.shape(input=net)
height = net_shape[1]
width = net_shape[2]
# Reflection pad by 1 in spatial dimensions (axes 1, 2 = h, w) to make a 3x3
# 'valid' convolution produce an output with the same dimension as the
# input.
spatial_pad_1 = np.array([[0, 0], [1, 1], [1, 1], [0, 0]])
if method == 'nn_upsample_conv':
net = tf.image.resize(
net, [stride[0] * height, stride[1] * width],
method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
net = tf.pad(tensor=net, paddings=spatial_pad_1, mode=pad_mode)
net = layers.conv2d(net, num_outputs, kernel_size=[3, 3], padding='valid')
elif method == 'bilinear_upsample_conv':
net = tf.compat.v1.image.resize_bilinear(
net, [stride[0] * height, stride[1] * width],
align_corners=align_corners)
net = tf.pad(tensor=net, paddings=spatial_pad_1, mode=pad_mode)
net = layers.conv2d(net, num_outputs, kernel_size=[3, 3], padding='valid')
elif method == 'conv2d_transpose':
# This corrects 1 pixel offset for images with even width and height.
# conv2d is left aligned and conv2d_transpose is right aligned for even
# sized images (while doing 'SAME' padding).
# Note: This doesn't reflect actual model in paper.
net = layers.conv2d_transpose(
net, num_outputs, kernel_size=[3, 3], stride=stride, padding='valid')
net = net[:, 1:, 1:, :]
else:
raise ValueError('Unknown method: [%s]' % method)
return net
def _dynamic_or_static_shape(tensor):
shape = tf.shape(input=tensor)
static_shape = contrib_util.constant_value(shape)
return static_shape if static_shape is not None else shape
def cyclegan_generator_resnet(images,
arg_scope_fn=cyclegan_arg_scope,
num_resnet_blocks=6,
num_filters=64,
upsample_fn=cyclegan_upsample,
kernel_size=3,
tanh_linear_slope=0.0,
is_training=False):
"""Defines the cyclegan resnet network architecture.
As closely as possible following
https://github.com/junyanz/CycleGAN/blob/master/models/architectures.lua#L232
FYI: This network requires input height and width to be divisible by 4 in
order to generate an output with shape equal to input shape. Assertions will
catch this if input dimensions are known at graph construction time, but
there's no protection if unknown at graph construction time (you'll see an
error).
Args:
images: Input image tensor of shape [batch_size, h, w, 3].
arg_scope_fn: Function to create the global arg_scope for the network.
num_resnet_blocks: Number of ResNet blocks in the middle of the generator.
num_filters: Number of filters of the first hidden layer.
upsample_fn: Upsampling function for the decoder part of the generator.
kernel_size: Size w or list/tuple [h, w] of the filter kernels for all inner
layers.
tanh_linear_slope: Slope of the linear function to add to the tanh over the
logits.
is_training: Whether the network is created in training mode or inference
only mode. Not actually needed, just for compliance with other generator
network functions.
Returns:
A `Tensor` representing the model output and a dictionary of model end
points.
Raises:
ValueError: If the input height or width is known at graph construction time
and not a multiple of 4.
"""
# Neither dropout nor batch norm -> dont need is_training
del is_training
end_points = {}
input_size = images.shape.as_list()
height, width = input_size[1], input_size[2]
if height and height % 4 != 0:
raise ValueError('The input height must be a multiple of 4.')
if width and width % 4 != 0:
raise ValueError('The input width must be a multiple of 4.')
num_outputs = input_size[3]
if not isinstance(kernel_size, (list, tuple)):
kernel_size = [kernel_size, kernel_size]
kernel_height = kernel_size[0]
kernel_width = kernel_size[1]
pad_top = (kernel_height - 1) // 2
pad_bottom = kernel_height // 2
pad_left = (kernel_width - 1) // 2
pad_right = kernel_width // 2
paddings = np.array(
[[0, 0], [pad_top, pad_bottom], [pad_left, pad_right], [0, 0]],
dtype=np.int32)
spatial_pad_3 = np.array([[0, 0], [3, 3], [3, 3], [0, 0]])
with contrib_framework.arg_scope(arg_scope_fn()):
###########
# Encoder #
###########
with tf.compat.v1.variable_scope('input'):
# 7x7 input stage
net = tf.pad(tensor=images, paddings=spatial_pad_3, mode='REFLECT')
net = layers.conv2d(net, num_filters, kernel_size=[7, 7], padding='VALID')
end_points['encoder_0'] = net
with tf.compat.v1.variable_scope('encoder'):
with contrib_framework.arg_scope([layers.conv2d],
kernel_size=kernel_size,
stride=2,
activation_fn=tf.nn.relu,
padding='VALID'):
net = tf.pad(tensor=net, paddings=paddings, mode='REFLECT')
net = layers.conv2d(net, num_filters * 2)
end_points['encoder_1'] = net
net = tf.pad(tensor=net, paddings=paddings, mode='REFLECT')
net = layers.conv2d(net, num_filters * 4)
end_points['encoder_2'] = net
###################
# Residual Blocks #
###################
with tf.compat.v1.variable_scope('residual_blocks'):
with contrib_framework.arg_scope([layers.conv2d],
kernel_size=kernel_size,
stride=1,
activation_fn=tf.nn.relu,
padding='VALID'):
for block_id in xrange(num_resnet_blocks):
with tf.compat.v1.variable_scope('block_{}'.format(block_id)):
res_net = tf.pad(tensor=net, paddings=paddings, mode='REFLECT')
res_net = layers.conv2d(res_net, num_filters * 4)
res_net = tf.pad(tensor=res_net, paddings=paddings, mode='REFLECT')
res_net = layers.conv2d(res_net, num_filters * 4,
activation_fn=None)
net += res_net
end_points['resnet_block_%d' % block_id] = net
###########
# Decoder #
###########
with tf.compat.v1.variable_scope('decoder'):
with contrib_framework.arg_scope([layers.conv2d],
kernel_size=kernel_size,
stride=1,
activation_fn=tf.nn.relu):
with tf.compat.v1.variable_scope('decoder1'):
net = upsample_fn(net, num_outputs=num_filters * 2, stride=[2, 2])
end_points['decoder1'] = net
with tf.compat.v1.variable_scope('decoder2'):
net = upsample_fn(net, num_outputs=num_filters, stride=[2, 2])
end_points['decoder2'] = net
with tf.compat.v1.variable_scope('output'):
net = tf.pad(tensor=net, paddings=spatial_pad_3, mode='REFLECT')
logits = layers.conv2d(
net,
num_outputs, [7, 7],
activation_fn=None,
normalizer_fn=None,
padding='valid')
logits = tf.reshape(logits, _dynamic_or_static_shape(images))
end_points['logits'] = logits
end_points['predictions'] = tf.tanh(logits) + logits * tanh_linear_slope
return end_points['predictions'], end_points
@@ -0,0 +1,110 @@
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for tensorflow.contrib.slim.nets.cyclegan."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from nets import cyclegan
# TODO(joelshor): Add a test to check generator endpoints.
class CycleganTest(tf.test.TestCase):
def test_generator_inference(self):
"""Check one inference step."""
img_batch = tf.zeros([2, 32, 32, 3])
model_output, _ = cyclegan.cyclegan_generator_resnet(img_batch)
with self.test_session() as sess:
sess.run(tf.compat.v1.global_variables_initializer())
sess.run(model_output)
def _test_generator_graph_helper(self, shape):
"""Check that generator can take small and non-square inputs."""
output_imgs, _ = cyclegan.cyclegan_generator_resnet(tf.ones(shape))
self.assertAllEqual(shape, output_imgs.shape.as_list())
def test_generator_graph_small(self):
self._test_generator_graph_helper([4, 32, 32, 3])
def test_generator_graph_medium(self):
self._test_generator_graph_helper([3, 128, 128, 3])
def test_generator_graph_nonsquare(self):
self._test_generator_graph_helper([2, 80, 400, 3])
def test_generator_unknown_batch_dim(self):
"""Check that generator can take unknown batch dimension inputs."""
img = tf.compat.v1.placeholder(tf.float32, shape=[None, 32, None, 3])
output_imgs, _ = cyclegan.cyclegan_generator_resnet(img)
self.assertAllEqual([None, 32, None, 3], output_imgs.shape.as_list())
def _input_and_output_same_shape_helper(self, kernel_size):
img_batch = tf.compat.v1.placeholder(tf.float32, shape=[None, 32, 32, 3])
output_img_batch, _ = cyclegan.cyclegan_generator_resnet(
img_batch, kernel_size=kernel_size)
self.assertAllEqual(img_batch.shape.as_list(),
output_img_batch.shape.as_list())
def input_and_output_same_shape_kernel3(self):
self._input_and_output_same_shape_helper(3)
def input_and_output_same_shape_kernel4(self):
self._input_and_output_same_shape_helper(4)
def input_and_output_same_shape_kernel5(self):
self._input_and_output_same_shape_helper(5)
def input_and_output_same_shape_kernel6(self):
self._input_and_output_same_shape_helper(6)
def _error_if_height_not_multiple_of_four_helper(self, height):
self.assertRaisesRegexp(
ValueError, 'The input height must be a multiple of 4.',
cyclegan.cyclegan_generator_resnet,
tf.compat.v1.placeholder(tf.float32, shape=[None, height, 32, 3]))
def test_error_if_height_not_multiple_of_four_height29(self):
self._error_if_height_not_multiple_of_four_helper(29)
def test_error_if_height_not_multiple_of_four_height30(self):
self._error_if_height_not_multiple_of_four_helper(30)
def test_error_if_height_not_multiple_of_four_height31(self):
self._error_if_height_not_multiple_of_four_helper(31)
def _error_if_width_not_multiple_of_four_helper(self, width):
self.assertRaisesRegexp(
ValueError, 'The input width must be a multiple of 4.',
cyclegan.cyclegan_generator_resnet,
tf.compat.v1.placeholder(tf.float32, shape=[None, 32, width, 3]))
def test_error_if_width_not_multiple_of_four_width29(self):
self._error_if_width_not_multiple_of_four_helper(29)
def test_error_if_width_not_multiple_of_four_width30(self):
self._error_if_width_not_multiple_of_four_helper(30)
def test_error_if_width_not_multiple_of_four_width31(self):
self._error_if_width_not_multiple_of_four_helper(31)
if __name__ == '__main__':
tf.test.main()
@@ -0,0 +1,205 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""DCGAN generator and discriminator from https://arxiv.org/abs/1511.06434."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from math import log
from six.moves import xrange # pylint: disable=redefined-builtin
import tensorflow as tf
from tensorflow.contrib import slim as contrib_slim
slim = contrib_slim
def _validate_image_inputs(inputs):
inputs.get_shape().assert_has_rank(4)
inputs.get_shape()[1:3].assert_is_fully_defined()
if inputs.get_shape()[1] != inputs.get_shape()[2]:
raise ValueError('Input tensor does not have equal width and height: ',
inputs.get_shape()[1:3])
width = inputs.get_shape().as_list()[1]
if log(width, 2) != int(log(width, 2)):
raise ValueError('Input tensor `width` is not a power of 2: ', width)
# TODO(joelshor): Use fused batch norm by default. Investigate why some GAN
# setups need the gradient of gradient FusedBatchNormGrad.
def discriminator(inputs,
depth=64,
is_training=True,
reuse=None,
scope='Discriminator',
fused_batch_norm=False):
"""Discriminator network for DCGAN.
Construct discriminator network from inputs to the final endpoint.
Args:
inputs: A tensor of size [batch_size, height, width, channels]. Must be
floating point.
depth: Number of channels in first convolution layer.
is_training: Whether the network is for training or not.
reuse: Whether or not the network variables should be reused. `scope`
must be given to be reused.
scope: Optional variable_scope.
fused_batch_norm: If `True`, use a faster, fused implementation of
batch norm.
Returns:
logits: The pre-softmax activations, a tensor of size [batch_size, 1]
end_points: a dictionary from components of the network to their activation.
Raises:
ValueError: If the input image shape is not 4-dimensional, if the spatial
dimensions aren't defined at graph construction time, if the spatial
dimensions aren't square, or if the spatial dimensions aren't a power of
two.
"""
normalizer_fn = slim.batch_norm
normalizer_fn_args = {
'is_training': is_training,
'zero_debias_moving_mean': True,
'fused': fused_batch_norm,
}
_validate_image_inputs(inputs)
inp_shape = inputs.get_shape().as_list()[1]
end_points = {}
with tf.compat.v1.variable_scope(
scope, values=[inputs], reuse=reuse) as scope:
with slim.arg_scope([normalizer_fn], **normalizer_fn_args):
with slim.arg_scope([slim.conv2d],
stride=2,
kernel_size=4,
activation_fn=tf.nn.leaky_relu):
net = inputs
for i in xrange(int(log(inp_shape, 2))):
scope = 'conv%i' % (i + 1)
current_depth = depth * 2**i
normalizer_fn_ = None if i == 0 else normalizer_fn
net = slim.conv2d(
net, current_depth, normalizer_fn=normalizer_fn_, scope=scope)
end_points[scope] = net
logits = slim.conv2d(net, 1, kernel_size=1, stride=1, padding='VALID',
normalizer_fn=None, activation_fn=None)
logits = tf.reshape(logits, [-1, 1])
end_points['logits'] = logits
return logits, end_points
# TODO(joelshor): Use fused batch norm by default. Investigate why some GAN
# setups need the gradient of gradient FusedBatchNormGrad.
def generator(inputs,
depth=64,
final_size=32,
num_outputs=3,
is_training=True,
reuse=None,
scope='Generator',
fused_batch_norm=False):
"""Generator network for DCGAN.
Construct generator network from inputs to the final endpoint.
Args:
inputs: A tensor with any size N. [batch_size, N]
depth: Number of channels in last deconvolution layer.
final_size: The shape of the final output.
num_outputs: Number of output features. For images, this is the number of
channels.
is_training: whether is training or not.
reuse: Whether or not the network has its variables should be reused. scope
must be given to be reused.
scope: Optional variable_scope.
fused_batch_norm: If `True`, use a faster, fused implementation of
batch norm.
Returns:
logits: the pre-softmax activations, a tensor of size
[batch_size, 32, 32, channels]
end_points: a dictionary from components of the network to their activation.
Raises:
ValueError: If `inputs` is not 2-dimensional.
ValueError: If `final_size` isn't a power of 2 or is less than 8.
"""
normalizer_fn = slim.batch_norm
normalizer_fn_args = {
'is_training': is_training,
'zero_debias_moving_mean': True,
'fused': fused_batch_norm,
}
inputs.get_shape().assert_has_rank(2)
if log(final_size, 2) != int(log(final_size, 2)):
raise ValueError('`final_size` (%i) must be a power of 2.' % final_size)
if final_size < 8:
raise ValueError('`final_size` (%i) must be greater than 8.' % final_size)
end_points = {}
num_layers = int(log(final_size, 2)) - 1
with tf.compat.v1.variable_scope(
scope, values=[inputs], reuse=reuse) as scope:
with slim.arg_scope([normalizer_fn], **normalizer_fn_args):
with slim.arg_scope([slim.conv2d_transpose],
normalizer_fn=normalizer_fn,
stride=2,
kernel_size=4):
net = tf.expand_dims(tf.expand_dims(inputs, 1), 1)
# First upscaling is different because it takes the input vector.
current_depth = depth * 2 ** (num_layers - 1)
scope = 'deconv1'
net = slim.conv2d_transpose(
net, current_depth, stride=1, padding='VALID', scope=scope)
end_points[scope] = net
for i in xrange(2, num_layers):
scope = 'deconv%i' % (i)
current_depth = depth * 2 ** (num_layers - i)
net = slim.conv2d_transpose(net, current_depth, scope=scope)
end_points[scope] = net
# Last layer has different normalizer and activation.
scope = 'deconv%i' % (num_layers)
net = slim.conv2d_transpose(
net, depth, normalizer_fn=None, activation_fn=None, scope=scope)
end_points[scope] = net
# Convert to proper channels.
scope = 'logits'
logits = slim.conv2d(
net,
num_outputs,
normalizer_fn=None,
activation_fn=None,
kernel_size=1,
stride=1,
padding='VALID',
scope=scope)
end_points[scope] = logits
logits.get_shape().assert_has_rank(4)
logits.get_shape().assert_is_compatible_with(
[None, final_size, final_size, num_outputs])
return logits, end_points
@@ -0,0 +1,121 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for dcgan."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from six.moves import xrange # pylint: disable=redefined-builtin
import tensorflow as tf
from nets import dcgan
class DCGANTest(tf.test.TestCase):
def test_generator_run(self):
tf.compat.v1.set_random_seed(1234)
noise = tf.random.normal([100, 64])
image, _ = dcgan.generator(noise)
with self.test_session() as sess:
sess.run(tf.compat.v1.global_variables_initializer())
image.eval()
def test_generator_graph(self):
tf.compat.v1.set_random_seed(1234)
# Check graph construction for a number of image size/depths and batch
# sizes.
for i, batch_size in zip(xrange(3, 7), xrange(3, 8)):
tf.compat.v1.reset_default_graph()
final_size = 2 ** i
noise = tf.random.normal([batch_size, 64])
image, end_points = dcgan.generator(
noise,
depth=32,
final_size=final_size)
self.assertAllEqual([batch_size, final_size, final_size, 3],
image.shape.as_list())
expected_names = ['deconv%i' % j for j in xrange(1, i)] + ['logits']
self.assertSetEqual(set(expected_names), set(end_points.keys()))
# Check layer depths.
for j in range(1, i):
layer = end_points['deconv%i' % j]
self.assertEqual(32 * 2**(i-j-1), layer.get_shape().as_list()[-1])
def test_generator_invalid_input(self):
wrong_dim_input = tf.zeros([5, 32, 32])
with self.assertRaises(ValueError):
dcgan.generator(wrong_dim_input)
correct_input = tf.zeros([3, 2])
with self.assertRaisesRegexp(ValueError, 'must be a power of 2'):
dcgan.generator(correct_input, final_size=30)
with self.assertRaisesRegexp(ValueError, 'must be greater than 8'):
dcgan.generator(correct_input, final_size=4)
def test_discriminator_run(self):
image = tf.random.uniform([5, 32, 32, 3], -1, 1)
output, _ = dcgan.discriminator(image)
with self.test_session() as sess:
sess.run(tf.compat.v1.global_variables_initializer())
output.eval()
def test_discriminator_graph(self):
# Check graph construction for a number of image size/depths and batch
# sizes.
for i, batch_size in zip(xrange(1, 6), xrange(3, 8)):
tf.compat.v1.reset_default_graph()
img_w = 2 ** i
image = tf.random.uniform([batch_size, img_w, img_w, 3], -1, 1)
output, end_points = dcgan.discriminator(
image,
depth=32)
self.assertAllEqual([batch_size, 1], output.get_shape().as_list())
expected_names = ['conv%i' % j for j in xrange(1, i+1)] + ['logits']
self.assertSetEqual(set(expected_names), set(end_points.keys()))
# Check layer depths.
for j in range(1, i+1):
layer = end_points['conv%i' % j]
self.assertEqual(32 * 2**(j-1), layer.get_shape().as_list()[-1])
def test_discriminator_invalid_input(self):
wrong_dim_img = tf.zeros([5, 32, 32])
with self.assertRaises(ValueError):
dcgan.discriminator(wrong_dim_img)
spatially_undefined_shape = tf.compat.v1.placeholder(
tf.float32, [5, 32, None, 3])
with self.assertRaises(ValueError):
dcgan.discriminator(spatially_undefined_shape)
not_square = tf.zeros([5, 32, 16, 3])
with self.assertRaisesRegexp(ValueError, 'not have equal width and height'):
dcgan.discriminator(not_square)
not_power_2 = tf.zeros([5, 30, 30, 3])
with self.assertRaisesRegexp(ValueError, 'not a power of 2'):
dcgan.discriminator(not_power_2)
if __name__ == '__main__':
tf.test.main()
@@ -0,0 +1,181 @@
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains the definition for Inflated 3D Inception V1 (I3D).
The network architecture is proposed by:
Joao Carreira and Andrew Zisserman,
Quo Vadis, Action Recognition? A New Model and the Kinetics Dataset.
https://arxiv.org/abs/1705.07750
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from tensorflow.contrib import slim as contrib_slim
from nets import i3d_utils
from nets import s3dg
slim = contrib_slim
# pylint: disable=g-long-lambda
trunc_normal = lambda stddev: tf.compat.v1.truncated_normal_initializer(
0.0, stddev)
conv3d_spatiotemporal = i3d_utils.conv3d_spatiotemporal
def i3d_arg_scope(weight_decay=1e-7,
batch_norm_decay=0.999,
batch_norm_epsilon=0.001,
use_renorm=False,
separable_conv3d=False):
"""Defines default arg_scope for I3D.
Args:
weight_decay: The weight decay to use for regularizing the model.
batch_norm_decay: Decay for batch norm moving average.
batch_norm_epsilon: Small float added to variance to avoid dividing by zero
in batch norm.
use_renorm: Whether to use batch renormalization or not.
separable_conv3d: Whether to use separable 3d Convs.
Returns:
sc: An arg_scope to use for the models.
"""
batch_norm_params = {
# Decay for the moving averages.
'decay': batch_norm_decay,
# epsilon to prevent 0s in variance.
'epsilon': batch_norm_epsilon,
# Turns off fused batch norm.
'fused': False,
'renorm': use_renorm,
# collection containing the moving mean and moving variance.
'variables_collections': {
'beta': None,
'gamma': None,
'moving_mean': ['moving_vars'],
'moving_variance': ['moving_vars'],
}
}
with slim.arg_scope(
[slim.conv3d, conv3d_spatiotemporal],
weights_regularizer=slim.l2_regularizer(weight_decay),
activation_fn=tf.nn.relu,
normalizer_fn=slim.batch_norm,
normalizer_params=batch_norm_params):
with slim.arg_scope(
[conv3d_spatiotemporal], separable=separable_conv3d) as sc:
return sc
def i3d_base(inputs, final_endpoint='Mixed_5c',
scope='InceptionV1'):
"""Defines the I3D base architecture.
Note that we use the names as defined in Inception V1 to facilitate checkpoint
conversion from an image-trained Inception V1 checkpoint to I3D checkpoint.
Args:
inputs: A 5-D float tensor of size [batch_size, num_frames, height, width,
channels].
final_endpoint: Specifies the endpoint to construct the network up to. It
can be one of ['Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1',
'Conv2d_2c_3x3', 'MaxPool_3a_3x3', 'Mixed_3b', 'Mixed_3c',
'MaxPool_4a_3x3', 'Mixed_4b', 'Mixed_4c', 'Mixed_4d', 'Mixed_4e',
'Mixed_4f', 'MaxPool_5a_2x2', 'Mixed_5b', 'Mixed_5c']
scope: Optional variable_scope.
Returns:
A dictionary from components of the network to the corresponding activation.
Raises:
ValueError: if final_endpoint is not set to one of the predefined values.
"""
return s3dg.s3dg_base(
inputs,
first_temporal_kernel_size=7,
temporal_conv_startat='Conv2d_2c_3x3',
gating_startat=None,
final_endpoint=final_endpoint,
min_depth=16,
depth_multiplier=1.0,
data_format='NDHWC',
scope=scope)
def i3d(inputs,
num_classes=1000,
dropout_keep_prob=0.8,
is_training=True,
prediction_fn=slim.softmax,
spatial_squeeze=True,
reuse=None,
scope='InceptionV1'):
"""Defines the I3D architecture.
The default image size used to train this network is 224x224.
Args:
inputs: A 5-D float tensor of size [batch_size, num_frames, height, width,
channels].
num_classes: number of predicted classes.
dropout_keep_prob: the percentage of activation values that are retained.
is_training: whether is training or not.
prediction_fn: a function to get predictions out of logits.
spatial_squeeze: if True, logits is of shape is [B, C], if false logits is
of shape [B, 1, 1, C], where B is batch_size and C is number of classes.
reuse: whether or not the network and its variables should be reused. To be
able to reuse 'scope' must be given.
scope: Optional variable_scope.
Returns:
logits: the pre-softmax activations, a tensor of size
[batch_size, num_classes]
end_points: a dictionary from components of the network to the corresponding
activation.
"""
# Final pooling and prediction
with tf.compat.v1.variable_scope(
scope, 'InceptionV1', [inputs, num_classes], reuse=reuse) as scope:
with slim.arg_scope(
[slim.batch_norm, slim.dropout], is_training=is_training):
net, end_points = i3d_base(inputs, scope=scope)
with tf.compat.v1.variable_scope('Logits'):
kernel_size = i3d_utils.reduced_kernel_size_3d(net, [2, 7, 7])
net = slim.avg_pool3d(
net, kernel_size, stride=1, scope='AvgPool_0a_7x7')
net = slim.dropout(net, dropout_keep_prob, scope='Dropout_0b')
logits = slim.conv3d(
net,
num_classes, [1, 1, 1],
activation_fn=None,
normalizer_fn=None,
scope='Conv2d_0c_1x1')
# Temporal average pooling.
logits = tf.reduce_mean(input_tensor=logits, axis=1)
if spatial_squeeze:
logits = tf.squeeze(logits, [1, 2], name='SpatialSqueeze')
end_points['Logits'] = logits
end_points['Predictions'] = prediction_fn(logits, scope='Predictions')
return logits, end_points
i3d.default_image_size = 224
@@ -0,0 +1,149 @@
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for networks.i3d."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from nets import i3d
class I3DTest(tf.test.TestCase):
def testBuildClassificationNetwork(self):
batch_size = 5
num_frames = 64
height, width = 224, 224
num_classes = 1000
inputs = tf.random.uniform((batch_size, num_frames, height, width, 3))
logits, end_points = i3d.i3d(inputs, num_classes)
self.assertTrue(logits.op.name.startswith('InceptionV1/Logits'))
self.assertListEqual(logits.get_shape().as_list(),
[batch_size, num_classes])
self.assertTrue('Predictions' in end_points)
self.assertListEqual(end_points['Predictions'].get_shape().as_list(),
[batch_size, num_classes])
def testBuildBaseNetwork(self):
batch_size = 5
num_frames = 64
height, width = 224, 224
inputs = tf.random.uniform((batch_size, num_frames, height, width, 3))
mixed_6c, end_points = i3d.i3d_base(inputs)
self.assertTrue(mixed_6c.op.name.startswith('InceptionV1/Mixed_5c'))
self.assertListEqual(mixed_6c.get_shape().as_list(),
[batch_size, 8, 7, 7, 1024])
expected_endpoints = ['Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1',
'Conv2d_2c_3x3', 'MaxPool_3a_3x3', 'Mixed_3b',
'Mixed_3c', 'MaxPool_4a_3x3', 'Mixed_4b', 'Mixed_4c',
'Mixed_4d', 'Mixed_4e', 'Mixed_4f', 'MaxPool_5a_2x2',
'Mixed_5b', 'Mixed_5c']
self.assertItemsEqual(end_points.keys(), expected_endpoints)
def testBuildOnlyUptoFinalEndpoint(self):
batch_size = 5
num_frames = 64
height, width = 224, 224
endpoints = ['Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1',
'Conv2d_2c_3x3', 'MaxPool_3a_3x3', 'Mixed_3b', 'Mixed_3c',
'MaxPool_4a_3x3', 'Mixed_4b', 'Mixed_4c', 'Mixed_4d',
'Mixed_4e', 'Mixed_4f', 'MaxPool_5a_2x2', 'Mixed_5b',
'Mixed_5c']
for index, endpoint in enumerate(endpoints):
with tf.Graph().as_default():
inputs = tf.random.uniform((batch_size, num_frames, height, width, 3))
out_tensor, end_points = i3d.i3d_base(
inputs, final_endpoint=endpoint)
self.assertTrue(out_tensor.op.name.startswith(
'InceptionV1/' + endpoint))
self.assertItemsEqual(endpoints[:index+1], end_points)
def testBuildAndCheckAllEndPointsUptoMixed5c(self):
batch_size = 5
num_frames = 64
height, width = 224, 224
inputs = tf.random.uniform((batch_size, num_frames, height, width, 3))
_, end_points = i3d.i3d_base(inputs,
final_endpoint='Mixed_5c')
endpoints_shapes = {'Conv2d_1a_7x7': [5, 32, 112, 112, 64],
'MaxPool_2a_3x3': [5, 32, 56, 56, 64],
'Conv2d_2b_1x1': [5, 32, 56, 56, 64],
'Conv2d_2c_3x3': [5, 32, 56, 56, 192],
'MaxPool_3a_3x3': [5, 32, 28, 28, 192],
'Mixed_3b': [5, 32, 28, 28, 256],
'Mixed_3c': [5, 32, 28, 28, 480],
'MaxPool_4a_3x3': [5, 16, 14, 14, 480],
'Mixed_4b': [5, 16, 14, 14, 512],
'Mixed_4c': [5, 16, 14, 14, 512],
'Mixed_4d': [5, 16, 14, 14, 512],
'Mixed_4e': [5, 16, 14, 14, 528],
'Mixed_4f': [5, 16, 14, 14, 832],
'MaxPool_5a_2x2': [5, 8, 7, 7, 832],
'Mixed_5b': [5, 8, 7, 7, 832],
'Mixed_5c': [5, 8, 7, 7, 1024]}
self.assertItemsEqual(endpoints_shapes.keys(), end_points.keys())
for endpoint_name, expected_shape in endpoints_shapes.iteritems():
self.assertTrue(endpoint_name in end_points)
self.assertListEqual(end_points[endpoint_name].get_shape().as_list(),
expected_shape)
def testHalfSizeImages(self):
batch_size = 5
num_frames = 64
height, width = 112, 112
inputs = tf.random.uniform((batch_size, num_frames, height, width, 3))
mixed_5c, _ = i3d.i3d_base(inputs)
self.assertTrue(mixed_5c.op.name.startswith('InceptionV1/Mixed_5c'))
self.assertListEqual(mixed_5c.get_shape().as_list(),
[batch_size, 8, 4, 4, 1024])
def testTenFrames(self):
batch_size = 5
num_frames = 10
height, width = 224, 224
inputs = tf.random.uniform((batch_size, num_frames, height, width, 3))
mixed_5c, _ = i3d.i3d_base(inputs)
self.assertTrue(mixed_5c.op.name.startswith('InceptionV1/Mixed_5c'))
self.assertListEqual(mixed_5c.get_shape().as_list(),
[batch_size, 2, 7, 7, 1024])
def testEvaluation(self):
batch_size = 2
num_frames = 64
height, width = 224, 224
num_classes = 1000
eval_inputs = tf.random.uniform((batch_size, num_frames, height, width, 3))
logits, _ = i3d.i3d(eval_inputs, num_classes,
is_training=False)
predictions = tf.argmax(input=logits, axis=1)
with self.test_session() as sess:
sess.run(tf.compat.v1.global_variables_initializer())
output = sess.run(predictions)
self.assertEquals(output.shape, (batch_size,))
if __name__ == '__main__':
tf.test.main()
@@ -0,0 +1,289 @@
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Utilities for building I3D network models."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import tensorflow as tf
from tensorflow.contrib import framework as contrib_framework
from tensorflow.contrib import layers as contrib_layers
# Orignaly, add_arg_scope = slim.add_arg_scope and layers = slim, now switch to
# more update-to-date tf.contrib.* API.
add_arg_scope = contrib_framework.add_arg_scope
layers = contrib_layers
def center_initializer():
"""Centering Initializer for I3D.
This initializer allows identity mapping for temporal convolution at the
initialization, which is critical for a desired convergence behavior
for training a seprable I3D model.
The centering behavior of this initializer requires an odd-sized kernel,
typically set to 3.
Returns:
A weight initializer op used in temporal convolutional layers.
Raises:
ValueError: Input tensor data type has to be tf.float32.
ValueError: If input tensor is not a 5-D tensor.
ValueError: If input and output channel dimensions are different.
ValueError: If spatial kernel sizes are not 1.
ValueError: If temporal kernel size is even.
"""
def _initializer(shape, dtype=tf.float32, partition_info=None): # pylint: disable=unused-argument
"""Initializer op."""
if dtype != tf.float32 and dtype != tf.bfloat16:
raise ValueError(
'Input tensor data type has to be tf.float32 or tf.bfloat16.')
if len(shape) != 5:
raise ValueError('Input tensor has to be 5-D.')
if shape[3] != shape[4]:
raise ValueError('Input and output channel dimensions must be the same.')
if shape[1] != 1 or shape[2] != 1:
raise ValueError('Spatial kernel sizes must be 1 (pointwise conv).')
if shape[0] % 2 == 0:
raise ValueError('Temporal kernel size has to be odd.')
center_pos = int(shape[0] / 2)
init_mat = np.zeros(
[shape[0], shape[1], shape[2], shape[3], shape[4]], dtype=np.float32)
for i in range(0, shape[3]):
init_mat[center_pos, 0, 0, i, i] = 1.0
init_op = tf.constant(init_mat, dtype=dtype)
return init_op
return _initializer
@add_arg_scope
def conv3d_spatiotemporal(inputs,
num_outputs,
kernel_size,
stride=1,
padding='SAME',
activation_fn=None,
normalizer_fn=None,
normalizer_params=None,
weights_regularizer=None,
separable=False,
data_format='NDHWC',
scope=''):
"""A wrapper for conv3d to model spatiotemporal representations.
This allows switching between original 3D convolution and separable 3D
convolutions for spatial and temporal features respectively. On Kinetics,
seprable 3D convolutions yields better classification performance.
Args:
inputs: a 5-D tensor `[batch_size, depth, height, width, channels]`.
num_outputs: integer, the number of output filters.
kernel_size: a list of length 3
`[kernel_depth, kernel_height, kernel_width]` of the filters. Can be an
int if all values are the same.
stride: a list of length 3 `[stride_depth, stride_height, stride_width]`.
Can be an int if all strides are the same.
padding: one of `VALID` or `SAME`.
activation_fn: activation function.
normalizer_fn: normalization function to use instead of `biases`.
normalizer_params: dictionary of normalization function parameters.
weights_regularizer: Optional regularizer for the weights.
separable: If `True`, use separable spatiotemporal convolutions.
data_format: An optional string from: "NDHWC", "NCDHW". Defaults to "NDHWC".
The data format of the input and output data. With the default format
"NDHWC", the data is stored in the order of: [batch, in_depth, in_height,
in_width, in_channels]. Alternatively, the format could be "NCDHW", the
data storage order is:
[batch, in_channels, in_depth, in_height, in_width].
scope: scope for `variable_scope`.
Returns:
A tensor representing the output of the (separable) conv3d operation.
"""
assert len(kernel_size) == 3
if separable and kernel_size[0] != 1:
spatial_kernel_size = [1, kernel_size[1], kernel_size[2]]
temporal_kernel_size = [kernel_size[0], 1, 1]
if isinstance(stride, list) and len(stride) == 3:
spatial_stride = [1, stride[1], stride[2]]
temporal_stride = [stride[0], 1, 1]
else:
spatial_stride = [1, stride, stride]
temporal_stride = [stride, 1, 1]
net = layers.conv3d(
inputs,
num_outputs,
spatial_kernel_size,
stride=spatial_stride,
padding=padding,
activation_fn=activation_fn,
normalizer_fn=normalizer_fn,
normalizer_params=normalizer_params,
weights_regularizer=weights_regularizer,
data_format=data_format,
scope=scope)
net = layers.conv3d(
net,
num_outputs,
temporal_kernel_size,
stride=temporal_stride,
padding=padding,
scope=scope + '/temporal',
activation_fn=activation_fn,
normalizer_fn=None,
data_format=data_format,
weights_initializer=center_initializer())
return net
else:
return layers.conv3d(
inputs,
num_outputs,
kernel_size,
stride=stride,
padding=padding,
activation_fn=activation_fn,
normalizer_fn=normalizer_fn,
normalizer_params=normalizer_params,
weights_regularizer=weights_regularizer,
data_format=data_format,
scope=scope)
@add_arg_scope
def inception_block_v1_3d(inputs,
num_outputs_0_0a,
num_outputs_1_0a,
num_outputs_1_0b,
num_outputs_2_0a,
num_outputs_2_0b,
num_outputs_3_0b,
temporal_kernel_size=3,
self_gating_fn=None,
data_format='NDHWC',
scope=''):
"""A 3D Inception v1 block.
This allows use of separable 3D convolutions and self-gating, as
described in:
Saining Xie, Chen Sun, Jonathan Huang, Zhuowen Tu and Kevin Murphy,
Rethinking Spatiotemporal Feature Learning For Video Understanding.
https://arxiv.org/abs/1712.04851.
Args:
inputs: a 5-D tensor `[batch_size, depth, height, width, channels]`.
num_outputs_0_0a: integer, the number of output filters for Branch 0,
operation Conv2d_0a_1x1.
num_outputs_1_0a: integer, the number of output filters for Branch 1,
operation Conv2d_0a_1x1.
num_outputs_1_0b: integer, the number of output filters for Branch 1,
operation Conv2d_0b_3x3.
num_outputs_2_0a: integer, the number of output filters for Branch 2,
operation Conv2d_0a_1x1.
num_outputs_2_0b: integer, the number of output filters for Branch 2,
operation Conv2d_0b_3x3.
num_outputs_3_0b: integer, the number of output filters for Branch 3,
operation Conv2d_0b_1x1.
temporal_kernel_size: integer, the size of the temporal convolutional
filters in the conv3d_spatiotemporal blocks.
self_gating_fn: function which optionally performs self-gating.
Must have two arguments, `inputs` and `scope`, and return one output
tensor the same size as `inputs`. If `None`, no self-gating is
applied.
data_format: An optional string from: "NDHWC", "NCDHW". Defaults to "NDHWC".
The data format of the input and output data. With the default format
"NDHWC", the data is stored in the order of: [batch, in_depth, in_height,
in_width, in_channels]. Alternatively, the format could be "NCDHW", the
data storage order is:
[batch, in_channels, in_depth, in_height, in_width].
scope: scope for `variable_scope`.
Returns:
A 5-D tensor `[batch_size, depth, height, width, out_channels]`, where
`out_channels = num_outputs_0_0a + num_outputs_1_0b + num_outputs_2_0b
+ num_outputs_3_0b`.
"""
use_gating = self_gating_fn is not None
with tf.compat.v1.variable_scope(scope):
with tf.compat.v1.variable_scope('Branch_0'):
branch_0 = layers.conv3d(
inputs, num_outputs_0_0a, [1, 1, 1], scope='Conv2d_0a_1x1')
if use_gating:
branch_0 = self_gating_fn(branch_0, scope='Conv2d_0a_1x1')
with tf.compat.v1.variable_scope('Branch_1'):
branch_1 = layers.conv3d(
inputs, num_outputs_1_0a, [1, 1, 1], scope='Conv2d_0a_1x1')
branch_1 = conv3d_spatiotemporal(
branch_1, num_outputs_1_0b, [temporal_kernel_size, 3, 3],
scope='Conv2d_0b_3x3')
if use_gating:
branch_1 = self_gating_fn(branch_1, scope='Conv2d_0b_3x3')
with tf.compat.v1.variable_scope('Branch_2'):
branch_2 = layers.conv3d(
inputs, num_outputs_2_0a, [1, 1, 1], scope='Conv2d_0a_1x1')
branch_2 = conv3d_spatiotemporal(
branch_2, num_outputs_2_0b, [temporal_kernel_size, 3, 3],
scope='Conv2d_0b_3x3')
if use_gating:
branch_2 = self_gating_fn(branch_2, scope='Conv2d_0b_3x3')
with tf.compat.v1.variable_scope('Branch_3'):
branch_3 = layers.max_pool3d(inputs, [3, 3, 3], scope='MaxPool_0a_3x3')
branch_3 = layers.conv3d(
branch_3, num_outputs_3_0b, [1, 1, 1], scope='Conv2d_0b_1x1')
if use_gating:
branch_3 = self_gating_fn(branch_3, scope='Conv2d_0b_1x1')
index_c = data_format.index('C')
assert 1 <= index_c <= 4, 'Cannot identify channel dimension.'
output = tf.concat([branch_0, branch_1, branch_2, branch_3], index_c)
return output
def reduced_kernel_size_3d(input_tensor, kernel_size):
"""Define kernel size which is automatically reduced for small input.
If the shape of the input images is unknown at graph construction time this
function assumes that the input images are large enough.
Args:
input_tensor: input tensor of size
[batch_size, time, height, width, channels].
kernel_size: desired kernel size of length 3, corresponding to time,
height and width.
Returns:
a tensor with the kernel size.
"""
assert len(kernel_size) == 3
shape = input_tensor.get_shape().as_list()
assert len(shape) == 5
if None in shape[1:4]:
kernel_size_out = kernel_size
else:
kernel_size_out = [min(shape[1], kernel_size[0]),
min(shape[2], kernel_size[1]),
min(shape[3], kernel_size[2])]
return kernel_size_out
@@ -0,0 +1,37 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Brings all inception models under one namespace."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
# pylint: disable=unused-import
from nets.inception_resnet_v2 import inception_resnet_v2
from nets.inception_resnet_v2 import inception_resnet_v2_arg_scope
from nets.inception_resnet_v2 import inception_resnet_v2_base
from nets.inception_v1 import inception_v1
from nets.inception_v1 import inception_v1_arg_scope
from nets.inception_v1 import inception_v1_base
from nets.inception_v2 import inception_v2
from nets.inception_v2 import inception_v2_arg_scope
from nets.inception_v2 import inception_v2_base
from nets.inception_v3 import inception_v3
from nets.inception_v3 import inception_v3_arg_scope
from nets.inception_v3 import inception_v3_base
from nets.inception_v4 import inception_v4
from nets.inception_v4 import inception_v4_arg_scope
from nets.inception_v4 import inception_v4_base
# pylint: enable=unused-import
@@ -0,0 +1,408 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains the definition of the Inception Resnet V2 architecture.
As described in http://arxiv.org/abs/1602.07261.
Inception-v4, Inception-ResNet and the Impact of Residual Connections
on Learning
Christian Szegedy, Sergey Ioffe, Vincent Vanhoucke, Alex Alemi
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from tensorflow.contrib import slim as contrib_slim
slim = contrib_slim
def block35(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None):
"""Builds the 35x35 resnet block."""
with tf.compat.v1.variable_scope(scope, 'Block35', [net], reuse=reuse):
with tf.compat.v1.variable_scope('Branch_0'):
tower_conv = slim.conv2d(net, 32, 1, scope='Conv2d_1x1')
with tf.compat.v1.variable_scope('Branch_1'):
tower_conv1_0 = slim.conv2d(net, 32, 1, scope='Conv2d_0a_1x1')
tower_conv1_1 = slim.conv2d(tower_conv1_0, 32, 3, scope='Conv2d_0b_3x3')
with tf.compat.v1.variable_scope('Branch_2'):
tower_conv2_0 = slim.conv2d(net, 32, 1, scope='Conv2d_0a_1x1')
tower_conv2_1 = slim.conv2d(tower_conv2_0, 48, 3, scope='Conv2d_0b_3x3')
tower_conv2_2 = slim.conv2d(tower_conv2_1, 64, 3, scope='Conv2d_0c_3x3')
mixed = tf.concat(axis=3, values=[tower_conv, tower_conv1_1, tower_conv2_2])
up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None,
activation_fn=None, scope='Conv2d_1x1')
scaled_up = up * scale
if activation_fn == tf.nn.relu6:
# Use clip_by_value to simulate bandpass activation.
scaled_up = tf.clip_by_value(scaled_up, -6.0, 6.0)
net += scaled_up
if activation_fn:
net = activation_fn(net)
return net
def block17(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None):
"""Builds the 17x17 resnet block."""
with tf.compat.v1.variable_scope(scope, 'Block17', [net], reuse=reuse):
with tf.compat.v1.variable_scope('Branch_0'):
tower_conv = slim.conv2d(net, 192, 1, scope='Conv2d_1x1')
with tf.compat.v1.variable_scope('Branch_1'):
tower_conv1_0 = slim.conv2d(net, 128, 1, scope='Conv2d_0a_1x1')
tower_conv1_1 = slim.conv2d(tower_conv1_0, 160, [1, 7],
scope='Conv2d_0b_1x7')
tower_conv1_2 = slim.conv2d(tower_conv1_1, 192, [7, 1],
scope='Conv2d_0c_7x1')
mixed = tf.concat(axis=3, values=[tower_conv, tower_conv1_2])
up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None,
activation_fn=None, scope='Conv2d_1x1')
scaled_up = up * scale
if activation_fn == tf.nn.relu6:
# Use clip_by_value to simulate bandpass activation.
scaled_up = tf.clip_by_value(scaled_up, -6.0, 6.0)
net += scaled_up
if activation_fn:
net = activation_fn(net)
return net
def block8(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None):
"""Builds the 8x8 resnet block."""
with tf.compat.v1.variable_scope(scope, 'Block8', [net], reuse=reuse):
with tf.compat.v1.variable_scope('Branch_0'):
tower_conv = slim.conv2d(net, 192, 1, scope='Conv2d_1x1')
with tf.compat.v1.variable_scope('Branch_1'):
tower_conv1_0 = slim.conv2d(net, 192, 1, scope='Conv2d_0a_1x1')
tower_conv1_1 = slim.conv2d(tower_conv1_0, 224, [1, 3],
scope='Conv2d_0b_1x3')
tower_conv1_2 = slim.conv2d(tower_conv1_1, 256, [3, 1],
scope='Conv2d_0c_3x1')
mixed = tf.concat(axis=3, values=[tower_conv, tower_conv1_2])
up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None,
activation_fn=None, scope='Conv2d_1x1')
scaled_up = up * scale
if activation_fn == tf.nn.relu6:
# Use clip_by_value to simulate bandpass activation.
scaled_up = tf.clip_by_value(scaled_up, -6.0, 6.0)
net += scaled_up
if activation_fn:
net = activation_fn(net)
return net
def inception_resnet_v2_base(inputs,
final_endpoint='Conv2d_7b_1x1',
output_stride=16,
align_feature_maps=False,
scope=None,
activation_fn=tf.nn.relu):
"""Inception model from http://arxiv.org/abs/1602.07261.
Constructs an Inception Resnet v2 network from inputs to the given final
endpoint. This method can construct the network up to the final inception
block Conv2d_7b_1x1.
Args:
inputs: a tensor of size [batch_size, height, width, channels].
final_endpoint: specifies the endpoint to construct the network up to. It
can be one of ['Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3',
'MaxPool_3a_3x3', 'Conv2d_3b_1x1', 'Conv2d_4a_3x3', 'MaxPool_5a_3x3',
'Mixed_5b', 'Mixed_6a', 'PreAuxLogits', 'Mixed_7a', 'Conv2d_7b_1x1']
output_stride: A scalar that specifies the requested ratio of input to
output spatial resolution. Only supports 8 and 16.
align_feature_maps: When true, changes all the VALID paddings in the network
to SAME padding so that the feature maps are aligned.
scope: Optional variable_scope.
activation_fn: Activation function for block scopes.
Returns:
tensor_out: output tensor corresponding to the final_endpoint.
end_points: a set of activations for external use, for example summaries or
losses.
Raises:
ValueError: if final_endpoint is not set to one of the predefined values,
or if the output_stride is not 8 or 16, or if the output_stride is 8 and
we request an end point after 'PreAuxLogits'.
"""
if output_stride != 8 and output_stride != 16:
raise ValueError('output_stride must be 8 or 16.')
padding = 'SAME' if align_feature_maps else 'VALID'
end_points = {}
def add_and_check_final(name, net):
end_points[name] = net
return name == final_endpoint
with tf.compat.v1.variable_scope(scope, 'InceptionResnetV2', [inputs]):
with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
stride=1, padding='SAME'):
# 149 x 149 x 32
net = slim.conv2d(inputs, 32, 3, stride=2, padding=padding,
scope='Conv2d_1a_3x3')
if add_and_check_final('Conv2d_1a_3x3', net): return net, end_points
# 147 x 147 x 32
net = slim.conv2d(net, 32, 3, padding=padding,
scope='Conv2d_2a_3x3')
if add_and_check_final('Conv2d_2a_3x3', net): return net, end_points
# 147 x 147 x 64
net = slim.conv2d(net, 64, 3, scope='Conv2d_2b_3x3')
if add_and_check_final('Conv2d_2b_3x3', net): return net, end_points
# 73 x 73 x 64
net = slim.max_pool2d(net, 3, stride=2, padding=padding,
scope='MaxPool_3a_3x3')
if add_and_check_final('MaxPool_3a_3x3', net): return net, end_points
# 73 x 73 x 80
net = slim.conv2d(net, 80, 1, padding=padding,
scope='Conv2d_3b_1x1')
if add_and_check_final('Conv2d_3b_1x1', net): return net, end_points
# 71 x 71 x 192
net = slim.conv2d(net, 192, 3, padding=padding,
scope='Conv2d_4a_3x3')
if add_and_check_final('Conv2d_4a_3x3', net): return net, end_points
# 35 x 35 x 192
net = slim.max_pool2d(net, 3, stride=2, padding=padding,
scope='MaxPool_5a_3x3')
if add_and_check_final('MaxPool_5a_3x3', net): return net, end_points
# 35 x 35 x 320
with tf.compat.v1.variable_scope('Mixed_5b'):
with tf.compat.v1.variable_scope('Branch_0'):
tower_conv = slim.conv2d(net, 96, 1, scope='Conv2d_1x1')
with tf.compat.v1.variable_scope('Branch_1'):
tower_conv1_0 = slim.conv2d(net, 48, 1, scope='Conv2d_0a_1x1')
tower_conv1_1 = slim.conv2d(tower_conv1_0, 64, 5,
scope='Conv2d_0b_5x5')
with tf.compat.v1.variable_scope('Branch_2'):
tower_conv2_0 = slim.conv2d(net, 64, 1, scope='Conv2d_0a_1x1')
tower_conv2_1 = slim.conv2d(tower_conv2_0, 96, 3,
scope='Conv2d_0b_3x3')
tower_conv2_2 = slim.conv2d(tower_conv2_1, 96, 3,
scope='Conv2d_0c_3x3')
with tf.compat.v1.variable_scope('Branch_3'):
tower_pool = slim.avg_pool2d(net, 3, stride=1, padding='SAME',
scope='AvgPool_0a_3x3')
tower_pool_1 = slim.conv2d(tower_pool, 64, 1,
scope='Conv2d_0b_1x1')
net = tf.concat(
[tower_conv, tower_conv1_1, tower_conv2_2, tower_pool_1], 3)
if add_and_check_final('Mixed_5b', net): return net, end_points
# TODO(alemi): Register intermediate endpoints
net = slim.repeat(net, 10, block35, scale=0.17,
activation_fn=activation_fn)
# 17 x 17 x 1088 if output_stride == 8,
# 33 x 33 x 1088 if output_stride == 16
use_atrous = output_stride == 8
with tf.compat.v1.variable_scope('Mixed_6a'):
with tf.compat.v1.variable_scope('Branch_0'):
tower_conv = slim.conv2d(net, 384, 3, stride=1 if use_atrous else 2,
padding=padding,
scope='Conv2d_1a_3x3')
with tf.compat.v1.variable_scope('Branch_1'):
tower_conv1_0 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1')
tower_conv1_1 = slim.conv2d(tower_conv1_0, 256, 3,
scope='Conv2d_0b_3x3')
tower_conv1_2 = slim.conv2d(tower_conv1_1, 384, 3,
stride=1 if use_atrous else 2,
padding=padding,
scope='Conv2d_1a_3x3')
with tf.compat.v1.variable_scope('Branch_2'):
tower_pool = slim.max_pool2d(net, 3, stride=1 if use_atrous else 2,
padding=padding,
scope='MaxPool_1a_3x3')
net = tf.concat([tower_conv, tower_conv1_2, tower_pool], 3)
if add_and_check_final('Mixed_6a', net): return net, end_points
# TODO(alemi): register intermediate endpoints
with slim.arg_scope([slim.conv2d], rate=2 if use_atrous else 1):
net = slim.repeat(net, 20, block17, scale=0.10,
activation_fn=activation_fn)
if add_and_check_final('PreAuxLogits', net): return net, end_points
if output_stride == 8:
# TODO(gpapan): Properly support output_stride for the rest of the net.
raise ValueError('output_stride==8 is only supported up to the '
'PreAuxlogits end_point for now.')
# 8 x 8 x 2080
with tf.compat.v1.variable_scope('Mixed_7a'):
with tf.compat.v1.variable_scope('Branch_0'):
tower_conv = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1')
tower_conv_1 = slim.conv2d(tower_conv, 384, 3, stride=2,
padding=padding,
scope='Conv2d_1a_3x3')
with tf.compat.v1.variable_scope('Branch_1'):
tower_conv1 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1')
tower_conv1_1 = slim.conv2d(tower_conv1, 288, 3, stride=2,
padding=padding,
scope='Conv2d_1a_3x3')
with tf.compat.v1.variable_scope('Branch_2'):
tower_conv2 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1')
tower_conv2_1 = slim.conv2d(tower_conv2, 288, 3,
scope='Conv2d_0b_3x3')
tower_conv2_2 = slim.conv2d(tower_conv2_1, 320, 3, stride=2,
padding=padding,
scope='Conv2d_1a_3x3')
with tf.compat.v1.variable_scope('Branch_3'):
tower_pool = slim.max_pool2d(net, 3, stride=2,
padding=padding,
scope='MaxPool_1a_3x3')
net = tf.concat(
[tower_conv_1, tower_conv1_1, tower_conv2_2, tower_pool], 3)
if add_and_check_final('Mixed_7a', net): return net, end_points
# TODO(alemi): register intermediate endpoints
net = slim.repeat(net, 9, block8, scale=0.20, activation_fn=activation_fn)
net = block8(net, activation_fn=None)
# 8 x 8 x 1536
net = slim.conv2d(net, 1536, 1, scope='Conv2d_7b_1x1')
if add_and_check_final('Conv2d_7b_1x1', net): return net, end_points
raise ValueError('final_endpoint (%s) not recognized', final_endpoint)
def inception_resnet_v2(inputs, num_classes=1001, is_training=True,
dropout_keep_prob=0.8,
reuse=None,
scope='InceptionResnetV2',
create_aux_logits=True,
activation_fn=tf.nn.relu):
"""Creates the Inception Resnet V2 model.
Args:
inputs: a 4-D tensor of size [batch_size, height, width, 3].
Dimension batch_size may be undefined. If create_aux_logits is false,
also height and width may be undefined.
num_classes: number of predicted classes. If 0 or None, the logits layer
is omitted and the input features to the logits layer (before dropout)
are returned instead.
is_training: whether is training or not.
dropout_keep_prob: float, the fraction to keep before final layer.
reuse: whether or not the network and its variables should be reused. To be
able to reuse 'scope' must be given.
scope: Optional variable_scope.
create_aux_logits: Whether to include the auxilliary logits.
activation_fn: Activation function for conv2d.
Returns:
net: the output of the logits layer (if num_classes is a non-zero integer),
or the non-dropped-out input to the logits layer (if num_classes is 0 or
None).
end_points: the set of end_points from the inception model.
"""
end_points = {}
with tf.compat.v1.variable_scope(
scope, 'InceptionResnetV2', [inputs], reuse=reuse) as scope:
with slim.arg_scope([slim.batch_norm, slim.dropout],
is_training=is_training):
net, end_points = inception_resnet_v2_base(inputs, scope=scope,
activation_fn=activation_fn)
if create_aux_logits and num_classes:
with tf.compat.v1.variable_scope('AuxLogits'):
aux = end_points['PreAuxLogits']
aux = slim.avg_pool2d(aux, 5, stride=3, padding='VALID',
scope='Conv2d_1a_3x3')
aux = slim.conv2d(aux, 128, 1, scope='Conv2d_1b_1x1')
aux = slim.conv2d(aux, 768, aux.get_shape()[1:3],
padding='VALID', scope='Conv2d_2a_5x5')
aux = slim.flatten(aux)
aux = slim.fully_connected(aux, num_classes, activation_fn=None,
scope='Logits')
end_points['AuxLogits'] = aux
with tf.compat.v1.variable_scope('Logits'):
# TODO(sguada,arnoegw): Consider adding a parameter global_pool which
# can be set to False to disable pooling here (as in resnet_*()).
kernel_size = net.get_shape()[1:3]
if kernel_size.is_fully_defined():
net = slim.avg_pool2d(net, kernel_size, padding='VALID',
scope='AvgPool_1a_8x8')
else:
net = tf.reduce_mean(
input_tensor=net, axis=[1, 2], keepdims=True, name='global_pool')
end_points['global_pool'] = net
if not num_classes:
return net, end_points
net = slim.flatten(net)
net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
scope='Dropout')
end_points['PreLogitsFlatten'] = net
logits = slim.fully_connected(net, num_classes, activation_fn=None,
scope='Logits')
end_points['Logits'] = logits
end_points['Predictions'] = tf.nn.softmax(logits, name='Predictions')
return logits, end_points
inception_resnet_v2.default_image_size = 299
def inception_resnet_v2_arg_scope(
weight_decay=0.00004,
batch_norm_decay=0.9997,
batch_norm_epsilon=0.001,
activation_fn=tf.nn.relu,
batch_norm_updates_collections=tf.compat.v1.GraphKeys.UPDATE_OPS,
batch_norm_scale=False):
"""Returns the scope with the default parameters for inception_resnet_v2.
Args:
weight_decay: the weight decay for weights variables.
batch_norm_decay: decay for the moving average of batch_norm momentums.
batch_norm_epsilon: small float added to variance to avoid dividing by zero.
activation_fn: Activation function for conv2d.
batch_norm_updates_collections: Collection for the update ops for
batch norm.
batch_norm_scale: If True, uses an explicit `gamma` multiplier to scale the
activations in the batch normalization layer.
Returns:
a arg_scope with the parameters needed for inception_resnet_v2.
"""
# Set weight_decay for weights in conv2d and fully_connected layers.
with slim.arg_scope([slim.conv2d, slim.fully_connected],
weights_regularizer=slim.l2_regularizer(weight_decay),
biases_regularizer=slim.l2_regularizer(weight_decay)):
batch_norm_params = {
'decay': batch_norm_decay,
'epsilon': batch_norm_epsilon,
'updates_collections': batch_norm_updates_collections,
'fused': None, # Use fused batch norm if possible.
'scale': batch_norm_scale,
}
# Set activation_fn and parameters for batch_norm.
with slim.arg_scope([slim.conv2d], activation_fn=activation_fn,
normalizer_fn=slim.batch_norm,
normalizer_params=batch_norm_params) as scope:
return scope
@@ -0,0 +1,338 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for slim.inception_resnet_v2."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from tensorflow.contrib import slim as contrib_slim
from nets import inception
class InceptionTest(tf.test.TestCase):
def testBuildLogits(self):
batch_size = 5
height, width = 299, 299
num_classes = 1000
with self.test_session():
inputs = tf.random.uniform((batch_size, height, width, 3))
logits, endpoints = inception.inception_resnet_v2(inputs, num_classes)
self.assertTrue('AuxLogits' in endpoints)
auxlogits = endpoints['AuxLogits']
self.assertTrue(
auxlogits.op.name.startswith('InceptionResnetV2/AuxLogits'))
self.assertListEqual(auxlogits.get_shape().as_list(),
[batch_size, num_classes])
self.assertTrue(logits.op.name.startswith('InceptionResnetV2/Logits'))
self.assertListEqual(logits.get_shape().as_list(),
[batch_size, num_classes])
def testBuildWithoutAuxLogits(self):
batch_size = 5
height, width = 299, 299
num_classes = 1000
with self.test_session():
inputs = tf.random.uniform((batch_size, height, width, 3))
logits, endpoints = inception.inception_resnet_v2(inputs, num_classes,
create_aux_logits=False)
self.assertTrue('AuxLogits' not in endpoints)
self.assertTrue(logits.op.name.startswith('InceptionResnetV2/Logits'))
self.assertListEqual(logits.get_shape().as_list(),
[batch_size, num_classes])
def testBuildNoClasses(self):
batch_size = 5
height, width = 299, 299
num_classes = None
with self.test_session():
inputs = tf.random.uniform((batch_size, height, width, 3))
net, endpoints = inception.inception_resnet_v2(inputs, num_classes)
self.assertTrue('AuxLogits' not in endpoints)
self.assertTrue('Logits' not in endpoints)
self.assertTrue(
net.op.name.startswith('InceptionResnetV2/Logits/AvgPool'))
self.assertListEqual(net.get_shape().as_list(), [batch_size, 1, 1, 1536])
def testBuildEndPoints(self):
batch_size = 5
height, width = 299, 299
num_classes = 1000
with self.test_session():
inputs = tf.random.uniform((batch_size, height, width, 3))
_, end_points = inception.inception_resnet_v2(inputs, num_classes)
self.assertTrue('Logits' in end_points)
logits = end_points['Logits']
self.assertListEqual(logits.get_shape().as_list(),
[batch_size, num_classes])
self.assertTrue('AuxLogits' in end_points)
aux_logits = end_points['AuxLogits']
self.assertListEqual(aux_logits.get_shape().as_list(),
[batch_size, num_classes])
pre_pool = end_points['Conv2d_7b_1x1']
self.assertListEqual(pre_pool.get_shape().as_list(),
[batch_size, 8, 8, 1536])
def testBuildBaseNetwork(self):
batch_size = 5
height, width = 299, 299
inputs = tf.random.uniform((batch_size, height, width, 3))
net, end_points = inception.inception_resnet_v2_base(inputs)
self.assertTrue(net.op.name.startswith('InceptionResnetV2/Conv2d_7b_1x1'))
self.assertListEqual(net.get_shape().as_list(),
[batch_size, 8, 8, 1536])
expected_endpoints = ['Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3',
'MaxPool_3a_3x3', 'Conv2d_3b_1x1', 'Conv2d_4a_3x3',
'MaxPool_5a_3x3', 'Mixed_5b', 'Mixed_6a',
'PreAuxLogits', 'Mixed_7a', 'Conv2d_7b_1x1']
self.assertItemsEqual(end_points.keys(), expected_endpoints)
def testBuildOnlyUptoFinalEndpoint(self):
batch_size = 5
height, width = 299, 299
endpoints = ['Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3',
'MaxPool_3a_3x3', 'Conv2d_3b_1x1', 'Conv2d_4a_3x3',
'MaxPool_5a_3x3', 'Mixed_5b', 'Mixed_6a',
'PreAuxLogits', 'Mixed_7a', 'Conv2d_7b_1x1']
for index, endpoint in enumerate(endpoints):
with tf.Graph().as_default():
inputs = tf.random.uniform((batch_size, height, width, 3))
out_tensor, end_points = inception.inception_resnet_v2_base(
inputs, final_endpoint=endpoint)
if endpoint != 'PreAuxLogits':
self.assertTrue(out_tensor.op.name.startswith(
'InceptionResnetV2/' + endpoint))
self.assertItemsEqual(endpoints[:index+1], end_points.keys())
def testBuildAndCheckAllEndPointsUptoPreAuxLogits(self):
batch_size = 5
height, width = 299, 299
inputs = tf.random.uniform((batch_size, height, width, 3))
_, end_points = inception.inception_resnet_v2_base(
inputs, final_endpoint='PreAuxLogits')
endpoints_shapes = {'Conv2d_1a_3x3': [5, 149, 149, 32],
'Conv2d_2a_3x3': [5, 147, 147, 32],
'Conv2d_2b_3x3': [5, 147, 147, 64],
'MaxPool_3a_3x3': [5, 73, 73, 64],
'Conv2d_3b_1x1': [5, 73, 73, 80],
'Conv2d_4a_3x3': [5, 71, 71, 192],
'MaxPool_5a_3x3': [5, 35, 35, 192],
'Mixed_5b': [5, 35, 35, 320],
'Mixed_6a': [5, 17, 17, 1088],
'PreAuxLogits': [5, 17, 17, 1088]
}
self.assertItemsEqual(endpoints_shapes.keys(), end_points.keys())
for endpoint_name in endpoints_shapes:
expected_shape = endpoints_shapes[endpoint_name]
self.assertTrue(endpoint_name in end_points)
self.assertListEqual(end_points[endpoint_name].get_shape().as_list(),
expected_shape)
def testBuildAndCheckAllEndPointsUptoPreAuxLogitsWithAlignedFeatureMaps(self):
batch_size = 5
height, width = 299, 299
inputs = tf.random.uniform((batch_size, height, width, 3))
_, end_points = inception.inception_resnet_v2_base(
inputs, final_endpoint='PreAuxLogits', align_feature_maps=True)
endpoints_shapes = {'Conv2d_1a_3x3': [5, 150, 150, 32],
'Conv2d_2a_3x3': [5, 150, 150, 32],
'Conv2d_2b_3x3': [5, 150, 150, 64],
'MaxPool_3a_3x3': [5, 75, 75, 64],
'Conv2d_3b_1x1': [5, 75, 75, 80],
'Conv2d_4a_3x3': [5, 75, 75, 192],
'MaxPool_5a_3x3': [5, 38, 38, 192],
'Mixed_5b': [5, 38, 38, 320],
'Mixed_6a': [5, 19, 19, 1088],
'PreAuxLogits': [5, 19, 19, 1088]
}
self.assertItemsEqual(endpoints_shapes.keys(), end_points.keys())
for endpoint_name in endpoints_shapes:
expected_shape = endpoints_shapes[endpoint_name]
self.assertTrue(endpoint_name in end_points)
self.assertListEqual(end_points[endpoint_name].get_shape().as_list(),
expected_shape)
def testBuildAndCheckAllEndPointsUptoPreAuxLogitsWithOutputStrideEight(self):
batch_size = 5
height, width = 299, 299
inputs = tf.random.uniform((batch_size, height, width, 3))
_, end_points = inception.inception_resnet_v2_base(
inputs, final_endpoint='PreAuxLogits', output_stride=8)
endpoints_shapes = {'Conv2d_1a_3x3': [5, 149, 149, 32],
'Conv2d_2a_3x3': [5, 147, 147, 32],
'Conv2d_2b_3x3': [5, 147, 147, 64],
'MaxPool_3a_3x3': [5, 73, 73, 64],
'Conv2d_3b_1x1': [5, 73, 73, 80],
'Conv2d_4a_3x3': [5, 71, 71, 192],
'MaxPool_5a_3x3': [5, 35, 35, 192],
'Mixed_5b': [5, 35, 35, 320],
'Mixed_6a': [5, 33, 33, 1088],
'PreAuxLogits': [5, 33, 33, 1088]
}
self.assertItemsEqual(endpoints_shapes.keys(), end_points.keys())
for endpoint_name in endpoints_shapes:
expected_shape = endpoints_shapes[endpoint_name]
self.assertTrue(endpoint_name in end_points)
self.assertListEqual(end_points[endpoint_name].get_shape().as_list(),
expected_shape)
def testVariablesSetDevice(self):
batch_size = 5
height, width = 299, 299
num_classes = 1000
with self.test_session():
inputs = tf.random.uniform((batch_size, height, width, 3))
# Force all Variables to reside on the device.
with tf.compat.v1.variable_scope('on_cpu'), tf.device('/cpu:0'):
inception.inception_resnet_v2(inputs, num_classes)
with tf.compat.v1.variable_scope('on_gpu'), tf.device('/gpu:0'):
inception.inception_resnet_v2(inputs, num_classes)
for v in tf.compat.v1.get_collection(
tf.compat.v1.GraphKeys.GLOBAL_VARIABLES, scope='on_cpu'):
self.assertDeviceEqual(v.device, '/cpu:0')
for v in tf.compat.v1.get_collection(
tf.compat.v1.GraphKeys.GLOBAL_VARIABLES, scope='on_gpu'):
self.assertDeviceEqual(v.device, '/gpu:0')
def testHalfSizeImages(self):
batch_size = 5
height, width = 150, 150
num_classes = 1000
with self.test_session():
inputs = tf.random.uniform((batch_size, height, width, 3))
logits, end_points = inception.inception_resnet_v2(inputs, num_classes)
self.assertTrue(logits.op.name.startswith('InceptionResnetV2/Logits'))
self.assertListEqual(logits.get_shape().as_list(),
[batch_size, num_classes])
pre_pool = end_points['Conv2d_7b_1x1']
self.assertListEqual(pre_pool.get_shape().as_list(),
[batch_size, 3, 3, 1536])
def testGlobalPool(self):
batch_size = 1
height, width = 330, 400
num_classes = 1000
with self.test_session():
inputs = tf.random.uniform((batch_size, height, width, 3))
logits, end_points = inception.inception_resnet_v2(inputs, num_classes)
self.assertTrue(logits.op.name.startswith('InceptionResnetV2/Logits'))
self.assertListEqual(logits.get_shape().as_list(),
[batch_size, num_classes])
pre_pool = end_points['Conv2d_7b_1x1']
self.assertListEqual(pre_pool.get_shape().as_list(),
[batch_size, 8, 11, 1536])
def testGlobalPoolUnknownImageShape(self):
batch_size = 1
height, width = 330, 400
num_classes = 1000
with self.test_session() as sess:
inputs = tf.compat.v1.placeholder(tf.float32, (batch_size, None, None, 3))
logits, end_points = inception.inception_resnet_v2(
inputs, num_classes, create_aux_logits=False)
self.assertTrue(logits.op.name.startswith('InceptionResnetV2/Logits'))
self.assertListEqual(logits.get_shape().as_list(),
[batch_size, num_classes])
pre_pool = end_points['Conv2d_7b_1x1']
images = tf.random.uniform((batch_size, height, width, 3))
sess.run(tf.compat.v1.global_variables_initializer())
logits_out, pre_pool_out = sess.run([logits, pre_pool],
{inputs: images.eval()})
self.assertTupleEqual(logits_out.shape, (batch_size, num_classes))
self.assertTupleEqual(pre_pool_out.shape, (batch_size, 8, 11, 1536))
def testUnknownBatchSize(self):
batch_size = 1
height, width = 299, 299
num_classes = 1000
with self.test_session() as sess:
inputs = tf.compat.v1.placeholder(tf.float32, (None, height, width, 3))
logits, _ = inception.inception_resnet_v2(inputs, num_classes)
self.assertTrue(logits.op.name.startswith('InceptionResnetV2/Logits'))
self.assertListEqual(logits.get_shape().as_list(),
[None, num_classes])
images = tf.random.uniform((batch_size, height, width, 3))
sess.run(tf.compat.v1.global_variables_initializer())
output = sess.run(logits, {inputs: images.eval()})
self.assertEquals(output.shape, (batch_size, num_classes))
def testEvaluation(self):
batch_size = 2
height, width = 299, 299
num_classes = 1000
with self.test_session() as sess:
eval_inputs = tf.random.uniform((batch_size, height, width, 3))
logits, _ = inception.inception_resnet_v2(eval_inputs,
num_classes,
is_training=False)
predictions = tf.argmax(input=logits, axis=1)
sess.run(tf.compat.v1.global_variables_initializer())
output = sess.run(predictions)
self.assertEquals(output.shape, (batch_size,))
def testTrainEvalWithReuse(self):
train_batch_size = 5
eval_batch_size = 2
height, width = 150, 150
num_classes = 1000
with self.test_session() as sess:
train_inputs = tf.random.uniform((train_batch_size, height, width, 3))
inception.inception_resnet_v2(train_inputs, num_classes)
eval_inputs = tf.random.uniform((eval_batch_size, height, width, 3))
logits, _ = inception.inception_resnet_v2(eval_inputs,
num_classes,
is_training=False,
reuse=True)
predictions = tf.argmax(input=logits, axis=1)
sess.run(tf.compat.v1.global_variables_initializer())
output = sess.run(predictions)
self.assertEquals(output.shape, (eval_batch_size,))
def testNoBatchNormScaleByDefault(self):
height, width = 299, 299
num_classes = 1000
inputs = tf.compat.v1.placeholder(tf.float32, (1, height, width, 3))
with contrib_slim.arg_scope(inception.inception_resnet_v2_arg_scope()):
inception.inception_resnet_v2(inputs, num_classes, is_training=False)
self.assertEqual(tf.compat.v1.global_variables('.*/BatchNorm/gamma:0$'), [])
def testBatchNormScale(self):
height, width = 299, 299
num_classes = 1000
inputs = tf.compat.v1.placeholder(tf.float32, (1, height, width, 3))
with contrib_slim.arg_scope(
inception.inception_resnet_v2_arg_scope(batch_norm_scale=True)):
inception.inception_resnet_v2(inputs, num_classes, is_training=False)
gamma_names = set(
v.op.name
for v in tf.compat.v1.global_variables('.*/BatchNorm/gamma:0$'))
self.assertGreater(len(gamma_names), 0)
for v in tf.compat.v1.global_variables('.*/BatchNorm/moving_mean:0$'):
self.assertIn(v.op.name[:-len('moving_mean')] + 'gamma', gamma_names)
if __name__ == '__main__':
tf.test.main()
@@ -0,0 +1,84 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains common code shared by all inception models.
Usage of arg scope:
with slim.arg_scope(inception_arg_scope()):
logits, end_points = inception.inception_v3(images, num_classes,
is_training=is_training)
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from tensorflow.contrib import slim as contrib_slim
slim = contrib_slim
def inception_arg_scope(
weight_decay=0.00004,
use_batch_norm=True,
batch_norm_decay=0.9997,
batch_norm_epsilon=0.001,
activation_fn=tf.nn.relu,
batch_norm_updates_collections=tf.compat.v1.GraphKeys.UPDATE_OPS,
batch_norm_scale=False):
"""Defines the default arg scope for inception models.
Args:
weight_decay: The weight decay to use for regularizing the model.
use_batch_norm: "If `True`, batch_norm is applied after each convolution.
batch_norm_decay: Decay for batch norm moving average.
batch_norm_epsilon: Small float added to variance to avoid dividing by zero
in batch norm.
activation_fn: Activation function for conv2d.
batch_norm_updates_collections: Collection for the update ops for
batch norm.
batch_norm_scale: If True, uses an explicit `gamma` multiplier to scale the
activations in the batch normalization layer.
Returns:
An `arg_scope` to use for the inception models.
"""
batch_norm_params = {
# Decay for the moving averages.
'decay': batch_norm_decay,
# epsilon to prevent 0s in variance.
'epsilon': batch_norm_epsilon,
# collection containing update_ops.
'updates_collections': batch_norm_updates_collections,
# use fused batch norm if possible.
'fused': None,
'scale': batch_norm_scale,
}
if use_batch_norm:
normalizer_fn = slim.batch_norm
normalizer_params = batch_norm_params
else:
normalizer_fn = None
normalizer_params = {}
# Set weight_decay for weights in Conv and FC layers.
with slim.arg_scope([slim.conv2d, slim.fully_connected],
weights_regularizer=slim.l2_regularizer(weight_decay)):
with slim.arg_scope(
[slim.conv2d],
weights_initializer=slim.variance_scaling_initializer(),
activation_fn=activation_fn,
normalizer_fn=normalizer_fn,
normalizer_params=normalizer_params) as sc:
return sc
@@ -0,0 +1,347 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains the definition for inception v1 classification network."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from tensorflow.contrib import slim as contrib_slim
from nets import inception_utils
slim = contrib_slim
# pylint: disable=g-long-lambda
trunc_normal = lambda stddev: tf.compat.v1.truncated_normal_initializer(
0.0, stddev)
def inception_v1_base(inputs,
final_endpoint='Mixed_5c',
include_root_block=True,
scope='InceptionV1'):
"""Defines the Inception V1 base architecture.
This architecture is defined in:
Going deeper with convolutions
Christian Szegedy, Wei Liu, Yangqing Jia, Pierre Sermanet, Scott Reed,
Dragomir Anguelov, Dumitru Erhan, Vincent Vanhoucke, Andrew Rabinovich.
http://arxiv.org/pdf/1409.4842v1.pdf.
Args:
inputs: a tensor of size [batch_size, height, width, channels].
final_endpoint: specifies the endpoint to construct the network up to. It
can be one of ['Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1',
'Conv2d_2c_3x3', 'MaxPool_3a_3x3', 'Mixed_3b', 'Mixed_3c',
'MaxPool_4a_3x3', 'Mixed_4b', 'Mixed_4c', 'Mixed_4d', 'Mixed_4e',
'Mixed_4f', 'MaxPool_5a_2x2', 'Mixed_5b', 'Mixed_5c']. If
include_root_block is False, ['Conv2d_1a_7x7', 'MaxPool_2a_3x3',
'Conv2d_2b_1x1', 'Conv2d_2c_3x3', 'MaxPool_3a_3x3'] will not be available.
include_root_block: If True, include the convolution and max-pooling layers
before the inception modules. If False, excludes those layers.
scope: Optional variable_scope.
Returns:
A dictionary from components of the network to the corresponding activation.
Raises:
ValueError: if final_endpoint is not set to one of the predefined values.
"""
end_points = {}
with tf.compat.v1.variable_scope(scope, 'InceptionV1', [inputs]):
with slim.arg_scope(
[slim.conv2d, slim.fully_connected],
weights_initializer=trunc_normal(0.01)):
with slim.arg_scope([slim.conv2d, slim.max_pool2d],
stride=1, padding='SAME'):
net = inputs
if include_root_block:
end_point = 'Conv2d_1a_7x7'
net = slim.conv2d(inputs, 64, [7, 7], stride=2, scope=end_point)
end_points[end_point] = net
if final_endpoint == end_point:
return net, end_points
end_point = 'MaxPool_2a_3x3'
net = slim.max_pool2d(net, [3, 3], stride=2, scope=end_point)
end_points[end_point] = net
if final_endpoint == end_point:
return net, end_points
end_point = 'Conv2d_2b_1x1'
net = slim.conv2d(net, 64, [1, 1], scope=end_point)
end_points[end_point] = net
if final_endpoint == end_point:
return net, end_points
end_point = 'Conv2d_2c_3x3'
net = slim.conv2d(net, 192, [3, 3], scope=end_point)
end_points[end_point] = net
if final_endpoint == end_point:
return net, end_points
end_point = 'MaxPool_3a_3x3'
net = slim.max_pool2d(net, [3, 3], stride=2, scope=end_point)
end_points[end_point] = net
if final_endpoint == end_point:
return net, end_points
end_point = 'Mixed_3b'
with tf.compat.v1.variable_scope(end_point):
with tf.compat.v1.variable_scope('Branch_0'):
branch_0 = slim.conv2d(net, 64, [1, 1], scope='Conv2d_0a_1x1')
with tf.compat.v1.variable_scope('Branch_1'):
branch_1 = slim.conv2d(net, 96, [1, 1], scope='Conv2d_0a_1x1')
branch_1 = slim.conv2d(branch_1, 128, [3, 3], scope='Conv2d_0b_3x3')
with tf.compat.v1.variable_scope('Branch_2'):
branch_2 = slim.conv2d(net, 16, [1, 1], scope='Conv2d_0a_1x1')
branch_2 = slim.conv2d(branch_2, 32, [3, 3], scope='Conv2d_0b_3x3')
with tf.compat.v1.variable_scope('Branch_3'):
branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
branch_3 = slim.conv2d(branch_3, 32, [1, 1], scope='Conv2d_0b_1x1')
net = tf.concat(
axis=3, values=[branch_0, branch_1, branch_2, branch_3])
end_points[end_point] = net
if final_endpoint == end_point: return net, end_points
end_point = 'Mixed_3c'
with tf.compat.v1.variable_scope(end_point):
with tf.compat.v1.variable_scope('Branch_0'):
branch_0 = slim.conv2d(net, 128, [1, 1], scope='Conv2d_0a_1x1')
with tf.compat.v1.variable_scope('Branch_1'):
branch_1 = slim.conv2d(net, 128, [1, 1], scope='Conv2d_0a_1x1')
branch_1 = slim.conv2d(branch_1, 192, [3, 3], scope='Conv2d_0b_3x3')
with tf.compat.v1.variable_scope('Branch_2'):
branch_2 = slim.conv2d(net, 32, [1, 1], scope='Conv2d_0a_1x1')
branch_2 = slim.conv2d(branch_2, 96, [3, 3], scope='Conv2d_0b_3x3')
with tf.compat.v1.variable_scope('Branch_3'):
branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1')
net = tf.concat(
axis=3, values=[branch_0, branch_1, branch_2, branch_3])
end_points[end_point] = net
if final_endpoint == end_point: return net, end_points
end_point = 'MaxPool_4a_3x3'
net = slim.max_pool2d(net, [3, 3], stride=2, scope=end_point)
end_points[end_point] = net
if final_endpoint == end_point: return net, end_points
end_point = 'Mixed_4b'
with tf.compat.v1.variable_scope(end_point):
with tf.compat.v1.variable_scope('Branch_0'):
branch_0 = slim.conv2d(net, 192, [1, 1], scope='Conv2d_0a_1x1')
with tf.compat.v1.variable_scope('Branch_1'):
branch_1 = slim.conv2d(net, 96, [1, 1], scope='Conv2d_0a_1x1')
branch_1 = slim.conv2d(branch_1, 208, [3, 3], scope='Conv2d_0b_3x3')
with tf.compat.v1.variable_scope('Branch_2'):
branch_2 = slim.conv2d(net, 16, [1, 1], scope='Conv2d_0a_1x1')
branch_2 = slim.conv2d(branch_2, 48, [3, 3], scope='Conv2d_0b_3x3')
with tf.compat.v1.variable_scope('Branch_3'):
branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1')
net = tf.concat(
axis=3, values=[branch_0, branch_1, branch_2, branch_3])
end_points[end_point] = net
if final_endpoint == end_point: return net, end_points
end_point = 'Mixed_4c'
with tf.compat.v1.variable_scope(end_point):
with tf.compat.v1.variable_scope('Branch_0'):
branch_0 = slim.conv2d(net, 160, [1, 1], scope='Conv2d_0a_1x1')
with tf.compat.v1.variable_scope('Branch_1'):
branch_1 = slim.conv2d(net, 112, [1, 1], scope='Conv2d_0a_1x1')
branch_1 = slim.conv2d(branch_1, 224, [3, 3], scope='Conv2d_0b_3x3')
with tf.compat.v1.variable_scope('Branch_2'):
branch_2 = slim.conv2d(net, 24, [1, 1], scope='Conv2d_0a_1x1')
branch_2 = slim.conv2d(branch_2, 64, [3, 3], scope='Conv2d_0b_3x3')
with tf.compat.v1.variable_scope('Branch_3'):
branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1')
net = tf.concat(
axis=3, values=[branch_0, branch_1, branch_2, branch_3])
end_points[end_point] = net
if final_endpoint == end_point: return net, end_points
end_point = 'Mixed_4d'
with tf.compat.v1.variable_scope(end_point):
with tf.compat.v1.variable_scope('Branch_0'):
branch_0 = slim.conv2d(net, 128, [1, 1], scope='Conv2d_0a_1x1')
with tf.compat.v1.variable_scope('Branch_1'):
branch_1 = slim.conv2d(net, 128, [1, 1], scope='Conv2d_0a_1x1')
branch_1 = slim.conv2d(branch_1, 256, [3, 3], scope='Conv2d_0b_3x3')
with tf.compat.v1.variable_scope('Branch_2'):
branch_2 = slim.conv2d(net, 24, [1, 1], scope='Conv2d_0a_1x1')
branch_2 = slim.conv2d(branch_2, 64, [3, 3], scope='Conv2d_0b_3x3')
with tf.compat.v1.variable_scope('Branch_3'):
branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1')
net = tf.concat(
axis=3, values=[branch_0, branch_1, branch_2, branch_3])
end_points[end_point] = net
if final_endpoint == end_point: return net, end_points
end_point = 'Mixed_4e'
with tf.compat.v1.variable_scope(end_point):
with tf.compat.v1.variable_scope('Branch_0'):
branch_0 = slim.conv2d(net, 112, [1, 1], scope='Conv2d_0a_1x1')
with tf.compat.v1.variable_scope('Branch_1'):
branch_1 = slim.conv2d(net, 144, [1, 1], scope='Conv2d_0a_1x1')
branch_1 = slim.conv2d(branch_1, 288, [3, 3], scope='Conv2d_0b_3x3')
with tf.compat.v1.variable_scope('Branch_2'):
branch_2 = slim.conv2d(net, 32, [1, 1], scope='Conv2d_0a_1x1')
branch_2 = slim.conv2d(branch_2, 64, [3, 3], scope='Conv2d_0b_3x3')
with tf.compat.v1.variable_scope('Branch_3'):
branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1')
net = tf.concat(
axis=3, values=[branch_0, branch_1, branch_2, branch_3])
end_points[end_point] = net
if final_endpoint == end_point: return net, end_points
end_point = 'Mixed_4f'
with tf.compat.v1.variable_scope(end_point):
with tf.compat.v1.variable_scope('Branch_0'):
branch_0 = slim.conv2d(net, 256, [1, 1], scope='Conv2d_0a_1x1')
with tf.compat.v1.variable_scope('Branch_1'):
branch_1 = slim.conv2d(net, 160, [1, 1], scope='Conv2d_0a_1x1')
branch_1 = slim.conv2d(branch_1, 320, [3, 3], scope='Conv2d_0b_3x3')
with tf.compat.v1.variable_scope('Branch_2'):
branch_2 = slim.conv2d(net, 32, [1, 1], scope='Conv2d_0a_1x1')
branch_2 = slim.conv2d(branch_2, 128, [3, 3], scope='Conv2d_0b_3x3')
with tf.compat.v1.variable_scope('Branch_3'):
branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
branch_3 = slim.conv2d(branch_3, 128, [1, 1], scope='Conv2d_0b_1x1')
net = tf.concat(
axis=3, values=[branch_0, branch_1, branch_2, branch_3])
end_points[end_point] = net
if final_endpoint == end_point: return net, end_points
end_point = 'MaxPool_5a_2x2'
net = slim.max_pool2d(net, [2, 2], stride=2, scope=end_point)
end_points[end_point] = net
if final_endpoint == end_point: return net, end_points
end_point = 'Mixed_5b'
with tf.compat.v1.variable_scope(end_point):
with tf.compat.v1.variable_scope('Branch_0'):
branch_0 = slim.conv2d(net, 256, [1, 1], scope='Conv2d_0a_1x1')
with tf.compat.v1.variable_scope('Branch_1'):
branch_1 = slim.conv2d(net, 160, [1, 1], scope='Conv2d_0a_1x1')
branch_1 = slim.conv2d(branch_1, 320, [3, 3], scope='Conv2d_0b_3x3')
with tf.compat.v1.variable_scope('Branch_2'):
branch_2 = slim.conv2d(net, 32, [1, 1], scope='Conv2d_0a_1x1')
branch_2 = slim.conv2d(branch_2, 128, [3, 3], scope='Conv2d_0a_3x3')
with tf.compat.v1.variable_scope('Branch_3'):
branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
branch_3 = slim.conv2d(branch_3, 128, [1, 1], scope='Conv2d_0b_1x1')
net = tf.concat(
axis=3, values=[branch_0, branch_1, branch_2, branch_3])
end_points[end_point] = net
if final_endpoint == end_point: return net, end_points
end_point = 'Mixed_5c'
with tf.compat.v1.variable_scope(end_point):
with tf.compat.v1.variable_scope('Branch_0'):
branch_0 = slim.conv2d(net, 384, [1, 1], scope='Conv2d_0a_1x1')
with tf.compat.v1.variable_scope('Branch_1'):
branch_1 = slim.conv2d(net, 192, [1, 1], scope='Conv2d_0a_1x1')
branch_1 = slim.conv2d(branch_1, 384, [3, 3], scope='Conv2d_0b_3x3')
with tf.compat.v1.variable_scope('Branch_2'):
branch_2 = slim.conv2d(net, 48, [1, 1], scope='Conv2d_0a_1x1')
branch_2 = slim.conv2d(branch_2, 128, [3, 3], scope='Conv2d_0b_3x3')
with tf.compat.v1.variable_scope('Branch_3'):
branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
branch_3 = slim.conv2d(branch_3, 128, [1, 1], scope='Conv2d_0b_1x1')
net = tf.concat(
axis=3, values=[branch_0, branch_1, branch_2, branch_3])
end_points[end_point] = net
if final_endpoint == end_point: return net, end_points
raise ValueError('Unknown final endpoint %s' % final_endpoint)
def inception_v1(inputs,
num_classes=1000,
is_training=True,
dropout_keep_prob=0.8,
prediction_fn=slim.softmax,
spatial_squeeze=True,
reuse=None,
scope='InceptionV1',
global_pool=False):
"""Defines the Inception V1 architecture.
This architecture is defined in:
Going deeper with convolutions
Christian Szegedy, Wei Liu, Yangqing Jia, Pierre Sermanet, Scott Reed,
Dragomir Anguelov, Dumitru Erhan, Vincent Vanhoucke, Andrew Rabinovich.
http://arxiv.org/pdf/1409.4842v1.pdf.
The default image size used to train this network is 224x224.
Args:
inputs: a tensor of size [batch_size, height, width, channels].
num_classes: number of predicted classes. If 0 or None, the logits layer
is omitted and the input features to the logits layer (before dropout)
are returned instead.
is_training: whether is training or not.
dropout_keep_prob: the percentage of activation values that are retained.
prediction_fn: a function to get predictions out of logits.
spatial_squeeze: if True, logits is of shape [B, C], if false logits is of
shape [B, 1, 1, C], where B is batch_size and C is number of classes.
reuse: whether or not the network and its variables should be reused. To be
able to reuse 'scope' must be given.
scope: Optional variable_scope.
global_pool: Optional boolean flag to control the avgpooling before the
logits layer. If false or unset, pooling is done with a fixed window
that reduces default-sized inputs to 1x1, while larger inputs lead to
larger outputs. If true, any input size is pooled down to 1x1.
Returns:
net: a Tensor with the logits (pre-softmax activations) if num_classes
is a non-zero integer, or the non-dropped-out input to the logits layer
if num_classes is 0 or None.
end_points: a dictionary from components of the network to the corresponding
activation.
"""
# Final pooling and prediction
with tf.compat.v1.variable_scope(
scope, 'InceptionV1', [inputs], reuse=reuse) as scope:
with slim.arg_scope([slim.batch_norm, slim.dropout],
is_training=is_training):
net, end_points = inception_v1_base(inputs, scope=scope)
with tf.compat.v1.variable_scope('Logits'):
if global_pool:
# Global average pooling.
net = tf.reduce_mean(
input_tensor=net, axis=[1, 2], keepdims=True, name='global_pool')
end_points['global_pool'] = net
else:
# Pooling with a fixed kernel size.
net = slim.avg_pool2d(net, [7, 7], stride=1, scope='AvgPool_0a_7x7')
end_points['AvgPool_0a_7x7'] = net
if not num_classes:
return net, end_points
net = slim.dropout(net, dropout_keep_prob, scope='Dropout_0b')
logits = slim.conv2d(net, num_classes, [1, 1], activation_fn=None,
normalizer_fn=None, scope='Conv2d_0c_1x1')
if spatial_squeeze:
logits = tf.squeeze(logits, [1, 2], name='SpatialSqueeze')
end_points['Logits'] = logits
end_points['Predictions'] = prediction_fn(logits, scope='Predictions')
return logits, end_points
inception_v1.default_image_size = 224
inception_v1_arg_scope = inception_utils.inception_arg_scope
@@ -0,0 +1,300 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for nets.inception_v1."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import tensorflow as tf
from tensorflow.contrib import slim as contrib_slim
from nets import inception
slim = contrib_slim
class InceptionV1Test(tf.test.TestCase):
def testBuildClassificationNetwork(self):
batch_size = 5
height, width = 224, 224
num_classes = 1000
inputs = tf.random.uniform((batch_size, height, width, 3))
logits, end_points = inception.inception_v1(inputs, num_classes)
self.assertTrue(logits.op.name.startswith(
'InceptionV1/Logits/SpatialSqueeze'))
self.assertListEqual(logits.get_shape().as_list(),
[batch_size, num_classes])
self.assertTrue('Predictions' in end_points)
self.assertListEqual(end_points['Predictions'].get_shape().as_list(),
[batch_size, num_classes])
def testBuildPreLogitsNetwork(self):
batch_size = 5
height, width = 224, 224
num_classes = None
inputs = tf.random.uniform((batch_size, height, width, 3))
net, end_points = inception.inception_v1(inputs, num_classes)
self.assertTrue(net.op.name.startswith('InceptionV1/Logits/AvgPool'))
self.assertListEqual(net.get_shape().as_list(), [batch_size, 1, 1, 1024])
self.assertFalse('Logits' in end_points)
self.assertFalse('Predictions' in end_points)
def testBuildBaseNetwork(self):
batch_size = 5
height, width = 224, 224
inputs = tf.random.uniform((batch_size, height, width, 3))
mixed_6c, end_points = inception.inception_v1_base(inputs)
self.assertTrue(mixed_6c.op.name.startswith('InceptionV1/Mixed_5c'))
self.assertListEqual(mixed_6c.get_shape().as_list(),
[batch_size, 7, 7, 1024])
expected_endpoints = ['Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1',
'Conv2d_2c_3x3', 'MaxPool_3a_3x3', 'Mixed_3b',
'Mixed_3c', 'MaxPool_4a_3x3', 'Mixed_4b', 'Mixed_4c',
'Mixed_4d', 'Mixed_4e', 'Mixed_4f', 'MaxPool_5a_2x2',
'Mixed_5b', 'Mixed_5c']
self.assertItemsEqual(end_points.keys(), expected_endpoints)
def testBuildOnlyUptoFinalEndpoint(self):
batch_size = 5
height, width = 224, 224
endpoints = ['Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1',
'Conv2d_2c_3x3', 'MaxPool_3a_3x3', 'Mixed_3b', 'Mixed_3c',
'MaxPool_4a_3x3', 'Mixed_4b', 'Mixed_4c', 'Mixed_4d',
'Mixed_4e', 'Mixed_4f', 'MaxPool_5a_2x2', 'Mixed_5b',
'Mixed_5c']
for index, endpoint in enumerate(endpoints):
with tf.Graph().as_default():
inputs = tf.random.uniform((batch_size, height, width, 3))
out_tensor, end_points = inception.inception_v1_base(
inputs, final_endpoint=endpoint)
self.assertTrue(out_tensor.op.name.startswith(
'InceptionV1/' + endpoint))
self.assertItemsEqual(endpoints[:index+1], end_points.keys())
def testBuildAndCheckAllEndPointsUptoMixed5c(self):
batch_size = 5
height, width = 224, 224
inputs = tf.random.uniform((batch_size, height, width, 3))
_, end_points = inception.inception_v1_base(inputs,
final_endpoint='Mixed_5c')
endpoints_shapes = {
'Conv2d_1a_7x7': [5, 112, 112, 64],
'MaxPool_2a_3x3': [5, 56, 56, 64],
'Conv2d_2b_1x1': [5, 56, 56, 64],
'Conv2d_2c_3x3': [5, 56, 56, 192],
'MaxPool_3a_3x3': [5, 28, 28, 192],
'Mixed_3b': [5, 28, 28, 256],
'Mixed_3c': [5, 28, 28, 480],
'MaxPool_4a_3x3': [5, 14, 14, 480],
'Mixed_4b': [5, 14, 14, 512],
'Mixed_4c': [5, 14, 14, 512],
'Mixed_4d': [5, 14, 14, 512],
'Mixed_4e': [5, 14, 14, 528],
'Mixed_4f': [5, 14, 14, 832],
'MaxPool_5a_2x2': [5, 7, 7, 832],
'Mixed_5b': [5, 7, 7, 832],
'Mixed_5c': [5, 7, 7, 1024]
}
self.assertItemsEqual(endpoints_shapes.keys(), end_points.keys())
for endpoint_name in endpoints_shapes:
expected_shape = endpoints_shapes[endpoint_name]
self.assertTrue(endpoint_name in end_points)
self.assertListEqual(end_points[endpoint_name].get_shape().as_list(),
expected_shape)
def testModelHasExpectedNumberOfParameters(self):
batch_size = 5
height, width = 224, 224
inputs = tf.random.uniform((batch_size, height, width, 3))
with slim.arg_scope(inception.inception_v1_arg_scope()):
inception.inception_v1_base(inputs)
total_params, _ = slim.model_analyzer.analyze_vars(
slim.get_model_variables())
self.assertAlmostEqual(5607184, total_params)
def testHalfSizeImages(self):
batch_size = 5
height, width = 112, 112
inputs = tf.random.uniform((batch_size, height, width, 3))
mixed_5c, _ = inception.inception_v1_base(inputs)
self.assertTrue(mixed_5c.op.name.startswith('InceptionV1/Mixed_5c'))
self.assertListEqual(mixed_5c.get_shape().as_list(),
[batch_size, 4, 4, 1024])
def testBuildBaseNetworkWithoutRootBlock(self):
batch_size = 5
height, width = 28, 28
channels = 192
inputs = tf.random.uniform((batch_size, height, width, channels))
_, end_points = inception.inception_v1_base(
inputs, include_root_block=False)
endpoints_shapes = {
'Mixed_3b': [5, 28, 28, 256],
'Mixed_3c': [5, 28, 28, 480],
'MaxPool_4a_3x3': [5, 14, 14, 480],
'Mixed_4b': [5, 14, 14, 512],
'Mixed_4c': [5, 14, 14, 512],
'Mixed_4d': [5, 14, 14, 512],
'Mixed_4e': [5, 14, 14, 528],
'Mixed_4f': [5, 14, 14, 832],
'MaxPool_5a_2x2': [5, 7, 7, 832],
'Mixed_5b': [5, 7, 7, 832],
'Mixed_5c': [5, 7, 7, 1024]
}
self.assertItemsEqual(endpoints_shapes.keys(), end_points.keys())
for endpoint_name in endpoints_shapes:
expected_shape = endpoints_shapes[endpoint_name]
self.assertTrue(endpoint_name in end_points)
self.assertListEqual(end_points[endpoint_name].get_shape().as_list(),
expected_shape)
def testUnknownImageShape(self):
tf.compat.v1.reset_default_graph()
batch_size = 2
height, width = 224, 224
num_classes = 1000
input_np = np.random.uniform(0, 1, (batch_size, height, width, 3))
with self.test_session() as sess:
inputs = tf.compat.v1.placeholder(
tf.float32, shape=(batch_size, None, None, 3))
logits, end_points = inception.inception_v1(inputs, num_classes)
self.assertTrue(logits.op.name.startswith('InceptionV1/Logits'))
self.assertListEqual(logits.get_shape().as_list(),
[batch_size, num_classes])
pre_pool = end_points['Mixed_5c']
feed_dict = {inputs: input_np}
tf.compat.v1.global_variables_initializer().run()
pre_pool_out = sess.run(pre_pool, feed_dict=feed_dict)
self.assertListEqual(list(pre_pool_out.shape), [batch_size, 7, 7, 1024])
def testGlobalPoolUnknownImageShape(self):
tf.compat.v1.reset_default_graph()
batch_size = 1
height, width = 250, 300
num_classes = 1000
input_np = np.random.uniform(0, 1, (batch_size, height, width, 3))
with self.test_session() as sess:
inputs = tf.compat.v1.placeholder(
tf.float32, shape=(batch_size, None, None, 3))
logits, end_points = inception.inception_v1(inputs, num_classes,
global_pool=True)
self.assertTrue(logits.op.name.startswith('InceptionV1/Logits'))
self.assertListEqual(logits.get_shape().as_list(),
[batch_size, num_classes])
pre_pool = end_points['Mixed_5c']
feed_dict = {inputs: input_np}
tf.compat.v1.global_variables_initializer().run()
pre_pool_out = sess.run(pre_pool, feed_dict=feed_dict)
self.assertListEqual(list(pre_pool_out.shape), [batch_size, 8, 10, 1024])
def testUnknowBatchSize(self):
batch_size = 1
height, width = 224, 224
num_classes = 1000
inputs = tf.compat.v1.placeholder(tf.float32, (None, height, width, 3))
logits, _ = inception.inception_v1(inputs, num_classes)
self.assertTrue(logits.op.name.startswith('InceptionV1/Logits'))
self.assertListEqual(logits.get_shape().as_list(),
[None, num_classes])
images = tf.random.uniform((batch_size, height, width, 3))
with self.test_session() as sess:
sess.run(tf.compat.v1.global_variables_initializer())
output = sess.run(logits, {inputs: images.eval()})
self.assertEquals(output.shape, (batch_size, num_classes))
def testEvaluation(self):
batch_size = 2
height, width = 224, 224
num_classes = 1000
eval_inputs = tf.random.uniform((batch_size, height, width, 3))
logits, _ = inception.inception_v1(eval_inputs, num_classes,
is_training=False)
predictions = tf.argmax(input=logits, axis=1)
with self.test_session() as sess:
sess.run(tf.compat.v1.global_variables_initializer())
output = sess.run(predictions)
self.assertEquals(output.shape, (batch_size,))
def testTrainEvalWithReuse(self):
train_batch_size = 5
eval_batch_size = 2
height, width = 224, 224
num_classes = 1000
train_inputs = tf.random.uniform((train_batch_size, height, width, 3))
inception.inception_v1(train_inputs, num_classes)
eval_inputs = tf.random.uniform((eval_batch_size, height, width, 3))
logits, _ = inception.inception_v1(eval_inputs, num_classes, reuse=True)
predictions = tf.argmax(input=logits, axis=1)
with self.test_session() as sess:
sess.run(tf.compat.v1.global_variables_initializer())
output = sess.run(predictions)
self.assertEquals(output.shape, (eval_batch_size,))
def testLogitsNotSqueezed(self):
num_classes = 25
images = tf.random.uniform([1, 224, 224, 3])
logits, _ = inception.inception_v1(images,
num_classes=num_classes,
spatial_squeeze=False)
with self.test_session() as sess:
tf.compat.v1.global_variables_initializer().run()
logits_out = sess.run(logits)
self.assertListEqual(list(logits_out.shape), [1, 1, 1, num_classes])
def testNoBatchNormScaleByDefault(self):
height, width = 224, 224
num_classes = 1000
inputs = tf.compat.v1.placeholder(tf.float32, (1, height, width, 3))
with slim.arg_scope(inception.inception_v1_arg_scope()):
inception.inception_v1(inputs, num_classes, is_training=False)
self.assertEqual(tf.compat.v1.global_variables('.*/BatchNorm/gamma:0$'), [])
def testBatchNormScale(self):
height, width = 224, 224
num_classes = 1000
inputs = tf.compat.v1.placeholder(tf.float32, (1, height, width, 3))
with slim.arg_scope(
inception.inception_v1_arg_scope(batch_norm_scale=True)):
inception.inception_v1(inputs, num_classes, is_training=False)
gamma_names = set(
v.op.name
for v in tf.compat.v1.global_variables('.*/BatchNorm/gamma:0$'))
self.assertGreater(len(gamma_names), 0)
for v in tf.compat.v1.global_variables('.*/BatchNorm/moving_mean:0$'):
self.assertIn(v.op.name[:-len('moving_mean')] + 'gamma', gamma_names)
if __name__ == '__main__':
tf.test.main()
@@ -0,0 +1,596 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains the definition for inception v2 classification network."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from tensorflow.contrib import slim as contrib_slim
from nets import inception_utils
slim = contrib_slim
# pylint: disable=g-long-lambda
trunc_normal = lambda stddev: tf.compat.v1.truncated_normal_initializer(
0.0, stddev)
def inception_v2_base(inputs,
final_endpoint='Mixed_5c',
min_depth=16,
depth_multiplier=1.0,
use_separable_conv=True,
data_format='NHWC',
include_root_block=True,
scope=None):
"""Inception v2 (6a2).
Constructs an Inception v2 network from inputs to the given final endpoint.
This method can construct the network up to the layer inception(5b) as
described in http://arxiv.org/abs/1502.03167.
Args:
inputs: a tensor of shape [batch_size, height, width, channels].
final_endpoint: specifies the endpoint to construct the network up to. It
can be one of ['Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1',
'Conv2d_2c_3x3', 'MaxPool_3a_3x3', 'Mixed_3b', 'Mixed_3c', 'Mixed_4a',
'Mixed_4b', 'Mixed_4c', 'Mixed_4d', 'Mixed_4e', 'Mixed_5a', 'Mixed_5b',
'Mixed_5c']. If include_root_block is False, ['Conv2d_1a_7x7',
'MaxPool_2a_3x3', 'Conv2d_2b_1x1', 'Conv2d_2c_3x3', 'MaxPool_3a_3x3'] will
not be available.
min_depth: Minimum depth value (number of channels) for all convolution ops.
Enforced when depth_multiplier < 1, and not an active constraint when
depth_multiplier >= 1.
depth_multiplier: Float multiplier for the depth (number of channels)
for all convolution ops. The value must be greater than zero. Typical
usage will be to set this value in (0, 1) to reduce the number of
parameters or computation cost of the model.
use_separable_conv: Use a separable convolution for the first layer
Conv2d_1a_7x7. If this is False, use a normal convolution instead.
data_format: Data format of the activations ('NHWC' or 'NCHW').
include_root_block: If True, include the convolution and max-pooling layers
before the inception modules. If False, excludes those layers.
scope: Optional variable_scope.
Returns:
tensor_out: output tensor corresponding to the final_endpoint.
end_points: a set of activations for external use, for example summaries or
losses.
Raises:
ValueError: if final_endpoint is not set to one of the predefined values,
or depth_multiplier <= 0
"""
# end_points will collect relevant activations for external use, for example
# summaries or losses.
end_points = {}
# Used to find thinned depths for each layer.
if depth_multiplier <= 0:
raise ValueError('depth_multiplier is not greater than zero.')
depth = lambda d: max(int(d * depth_multiplier), min_depth)
if data_format != 'NHWC' and data_format != 'NCHW':
raise ValueError('data_format must be either NHWC or NCHW.')
if data_format == 'NCHW' and use_separable_conv:
raise ValueError(
'separable convolution only supports NHWC layout. NCHW data format can'
' only be used when use_separable_conv is False.'
)
concat_dim = 3 if data_format == 'NHWC' else 1
with tf.compat.v1.variable_scope(scope, 'InceptionV2', [inputs]):
with slim.arg_scope(
[slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
stride=1,
padding='SAME',
data_format=data_format):
net = inputs
if include_root_block:
# Note that sizes in the comments below assume an input spatial size of
# 224x224, however, the inputs can be of any size greater 32x32.
# 224 x 224 x 3
end_point = 'Conv2d_1a_7x7'
if use_separable_conv:
# depthwise_multiplier here is different from depth_multiplier.
# depthwise_multiplier determines the output channels of the initial
# depthwise conv (see docs for tf.nn.separable_conv2d), while
# depth_multiplier controls the # channels of the subsequent 1x1
# convolution. Must have
# in_channels * depthwise_multipler <= out_channels
# so that the separable convolution is not overparameterized.
depthwise_multiplier = min(int(depth(64) / 3), 8)
net = slim.separable_conv2d(
inputs,
depth(64), [7, 7],
depth_multiplier=depthwise_multiplier,
stride=2,
padding='SAME',
weights_initializer=trunc_normal(1.0),
scope=end_point)
else:
# Use a normal convolution instead of a separable convolution.
net = slim.conv2d(
inputs,
depth(64), [7, 7],
stride=2,
weights_initializer=trunc_normal(1.0),
scope=end_point)
end_points[end_point] = net
if end_point == final_endpoint:
return net, end_points
# 112 x 112 x 64
end_point = 'MaxPool_2a_3x3'
net = slim.max_pool2d(net, [3, 3], scope=end_point, stride=2)
end_points[end_point] = net
if end_point == final_endpoint:
return net, end_points
# 56 x 56 x 64
end_point = 'Conv2d_2b_1x1'
net = slim.conv2d(
net,
depth(64), [1, 1],
scope=end_point,
weights_initializer=trunc_normal(0.1))
end_points[end_point] = net
if end_point == final_endpoint:
return net, end_points
# 56 x 56 x 64
end_point = 'Conv2d_2c_3x3'
net = slim.conv2d(net, depth(192), [3, 3], scope=end_point)
end_points[end_point] = net
if end_point == final_endpoint:
return net, end_points
# 56 x 56 x 192
end_point = 'MaxPool_3a_3x3'
net = slim.max_pool2d(net, [3, 3], scope=end_point, stride=2)
end_points[end_point] = net
if end_point == final_endpoint:
return net, end_points
# 28 x 28 x 192
# Inception module.
end_point = 'Mixed_3b'
with tf.compat.v1.variable_scope(end_point):
with tf.compat.v1.variable_scope('Branch_0'):
branch_0 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1')
with tf.compat.v1.variable_scope('Branch_1'):
branch_1 = slim.conv2d(
net, depth(64), [1, 1],
weights_initializer=trunc_normal(0.09),
scope='Conv2d_0a_1x1')
branch_1 = slim.conv2d(branch_1, depth(64), [3, 3],
scope='Conv2d_0b_3x3')
with tf.compat.v1.variable_scope('Branch_2'):
branch_2 = slim.conv2d(
net, depth(64), [1, 1],
weights_initializer=trunc_normal(0.09),
scope='Conv2d_0a_1x1')
branch_2 = slim.conv2d(branch_2, depth(96), [3, 3],
scope='Conv2d_0b_3x3')
branch_2 = slim.conv2d(branch_2, depth(96), [3, 3],
scope='Conv2d_0c_3x3')
with tf.compat.v1.variable_scope('Branch_3'):
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
branch_3 = slim.conv2d(
branch_3, depth(32), [1, 1],
weights_initializer=trunc_normal(0.1),
scope='Conv2d_0b_1x1')
net = tf.concat(
axis=concat_dim, values=[branch_0, branch_1, branch_2, branch_3])
end_points[end_point] = net
if end_point == final_endpoint: return net, end_points
# 28 x 28 x 256
end_point = 'Mixed_3c'
with tf.compat.v1.variable_scope(end_point):
with tf.compat.v1.variable_scope('Branch_0'):
branch_0 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1')
with tf.compat.v1.variable_scope('Branch_1'):
branch_1 = slim.conv2d(
net, depth(64), [1, 1],
weights_initializer=trunc_normal(0.09),
scope='Conv2d_0a_1x1')
branch_1 = slim.conv2d(branch_1, depth(96), [3, 3],
scope='Conv2d_0b_3x3')
with tf.compat.v1.variable_scope('Branch_2'):
branch_2 = slim.conv2d(
net, depth(64), [1, 1],
weights_initializer=trunc_normal(0.09),
scope='Conv2d_0a_1x1')
branch_2 = slim.conv2d(branch_2, depth(96), [3, 3],
scope='Conv2d_0b_3x3')
branch_2 = slim.conv2d(branch_2, depth(96), [3, 3],
scope='Conv2d_0c_3x3')
with tf.compat.v1.variable_scope('Branch_3'):
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
branch_3 = slim.conv2d(
branch_3, depth(64), [1, 1],
weights_initializer=trunc_normal(0.1),
scope='Conv2d_0b_1x1')
net = tf.concat(
axis=concat_dim, values=[branch_0, branch_1, branch_2, branch_3])
end_points[end_point] = net
if end_point == final_endpoint: return net, end_points
# 28 x 28 x 320
end_point = 'Mixed_4a'
with tf.compat.v1.variable_scope(end_point):
with tf.compat.v1.variable_scope('Branch_0'):
branch_0 = slim.conv2d(
net, depth(128), [1, 1],
weights_initializer=trunc_normal(0.09),
scope='Conv2d_0a_1x1')
branch_0 = slim.conv2d(branch_0, depth(160), [3, 3], stride=2,
scope='Conv2d_1a_3x3')
with tf.compat.v1.variable_scope('Branch_1'):
branch_1 = slim.conv2d(
net, depth(64), [1, 1],
weights_initializer=trunc_normal(0.09),
scope='Conv2d_0a_1x1')
branch_1 = slim.conv2d(
branch_1, depth(96), [3, 3], scope='Conv2d_0b_3x3')
branch_1 = slim.conv2d(
branch_1, depth(96), [3, 3], stride=2, scope='Conv2d_1a_3x3')
with tf.compat.v1.variable_scope('Branch_2'):
branch_2 = slim.max_pool2d(
net, [3, 3], stride=2, scope='MaxPool_1a_3x3')
net = tf.concat(axis=concat_dim, values=[branch_0, branch_1, branch_2])
end_points[end_point] = net
if end_point == final_endpoint: return net, end_points
# 14 x 14 x 576
end_point = 'Mixed_4b'
with tf.compat.v1.variable_scope(end_point):
with tf.compat.v1.variable_scope('Branch_0'):
branch_0 = slim.conv2d(net, depth(224), [1, 1], scope='Conv2d_0a_1x1')
with tf.compat.v1.variable_scope('Branch_1'):
branch_1 = slim.conv2d(
net, depth(64), [1, 1],
weights_initializer=trunc_normal(0.09),
scope='Conv2d_0a_1x1')
branch_1 = slim.conv2d(
branch_1, depth(96), [3, 3], scope='Conv2d_0b_3x3')
with tf.compat.v1.variable_scope('Branch_2'):
branch_2 = slim.conv2d(
net, depth(96), [1, 1],
weights_initializer=trunc_normal(0.09),
scope='Conv2d_0a_1x1')
branch_2 = slim.conv2d(branch_2, depth(128), [3, 3],
scope='Conv2d_0b_3x3')
branch_2 = slim.conv2d(branch_2, depth(128), [3, 3],
scope='Conv2d_0c_3x3')
with tf.compat.v1.variable_scope('Branch_3'):
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
branch_3 = slim.conv2d(
branch_3, depth(128), [1, 1],
weights_initializer=trunc_normal(0.1),
scope='Conv2d_0b_1x1')
net = tf.concat(
axis=concat_dim, values=[branch_0, branch_1, branch_2, branch_3])
end_points[end_point] = net
if end_point == final_endpoint: return net, end_points
# 14 x 14 x 576
end_point = 'Mixed_4c'
with tf.compat.v1.variable_scope(end_point):
with tf.compat.v1.variable_scope('Branch_0'):
branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1')
with tf.compat.v1.variable_scope('Branch_1'):
branch_1 = slim.conv2d(
net, depth(96), [1, 1],
weights_initializer=trunc_normal(0.09),
scope='Conv2d_0a_1x1')
branch_1 = slim.conv2d(branch_1, depth(128), [3, 3],
scope='Conv2d_0b_3x3')
with tf.compat.v1.variable_scope('Branch_2'):
branch_2 = slim.conv2d(
net, depth(96), [1, 1],
weights_initializer=trunc_normal(0.09),
scope='Conv2d_0a_1x1')
branch_2 = slim.conv2d(branch_2, depth(128), [3, 3],
scope='Conv2d_0b_3x3')
branch_2 = slim.conv2d(branch_2, depth(128), [3, 3],
scope='Conv2d_0c_3x3')
with tf.compat.v1.variable_scope('Branch_3'):
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
branch_3 = slim.conv2d(
branch_3, depth(128), [1, 1],
weights_initializer=trunc_normal(0.1),
scope='Conv2d_0b_1x1')
net = tf.concat(
axis=concat_dim, values=[branch_0, branch_1, branch_2, branch_3])
end_points[end_point] = net
if end_point == final_endpoint: return net, end_points
# 14 x 14 x 576
end_point = 'Mixed_4d'
with tf.compat.v1.variable_scope(end_point):
with tf.compat.v1.variable_scope('Branch_0'):
branch_0 = slim.conv2d(net, depth(160), [1, 1], scope='Conv2d_0a_1x1')
with tf.compat.v1.variable_scope('Branch_1'):
branch_1 = slim.conv2d(
net, depth(128), [1, 1],
weights_initializer=trunc_normal(0.09),
scope='Conv2d_0a_1x1')
branch_1 = slim.conv2d(branch_1, depth(160), [3, 3],
scope='Conv2d_0b_3x3')
with tf.compat.v1.variable_scope('Branch_2'):
branch_2 = slim.conv2d(
net, depth(128), [1, 1],
weights_initializer=trunc_normal(0.09),
scope='Conv2d_0a_1x1')
branch_2 = slim.conv2d(branch_2, depth(160), [3, 3],
scope='Conv2d_0b_3x3')
branch_2 = slim.conv2d(branch_2, depth(160), [3, 3],
scope='Conv2d_0c_3x3')
with tf.compat.v1.variable_scope('Branch_3'):
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
branch_3 = slim.conv2d(
branch_3, depth(96), [1, 1],
weights_initializer=trunc_normal(0.1),
scope='Conv2d_0b_1x1')
net = tf.concat(
axis=concat_dim, values=[branch_0, branch_1, branch_2, branch_3])
end_points[end_point] = net
if end_point == final_endpoint: return net, end_points
# 14 x 14 x 576
end_point = 'Mixed_4e'
with tf.compat.v1.variable_scope(end_point):
with tf.compat.v1.variable_scope('Branch_0'):
branch_0 = slim.conv2d(net, depth(96), [1, 1], scope='Conv2d_0a_1x1')
with tf.compat.v1.variable_scope('Branch_1'):
branch_1 = slim.conv2d(
net, depth(128), [1, 1],
weights_initializer=trunc_normal(0.09),
scope='Conv2d_0a_1x1')
branch_1 = slim.conv2d(branch_1, depth(192), [3, 3],
scope='Conv2d_0b_3x3')
with tf.compat.v1.variable_scope('Branch_2'):
branch_2 = slim.conv2d(
net, depth(160), [1, 1],
weights_initializer=trunc_normal(0.09),
scope='Conv2d_0a_1x1')
branch_2 = slim.conv2d(branch_2, depth(192), [3, 3],
scope='Conv2d_0b_3x3')
branch_2 = slim.conv2d(branch_2, depth(192), [3, 3],
scope='Conv2d_0c_3x3')
with tf.compat.v1.variable_scope('Branch_3'):
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
branch_3 = slim.conv2d(
branch_3, depth(96), [1, 1],
weights_initializer=trunc_normal(0.1),
scope='Conv2d_0b_1x1')
net = tf.concat(
axis=concat_dim, values=[branch_0, branch_1, branch_2, branch_3])
end_points[end_point] = net
if end_point == final_endpoint: return net, end_points
# 14 x 14 x 576
end_point = 'Mixed_5a'
with tf.compat.v1.variable_scope(end_point):
with tf.compat.v1.variable_scope('Branch_0'):
branch_0 = slim.conv2d(
net, depth(128), [1, 1],
weights_initializer=trunc_normal(0.09),
scope='Conv2d_0a_1x1')
branch_0 = slim.conv2d(branch_0, depth(192), [3, 3], stride=2,
scope='Conv2d_1a_3x3')
with tf.compat.v1.variable_scope('Branch_1'):
branch_1 = slim.conv2d(
net, depth(192), [1, 1],
weights_initializer=trunc_normal(0.09),
scope='Conv2d_0a_1x1')
branch_1 = slim.conv2d(branch_1, depth(256), [3, 3],
scope='Conv2d_0b_3x3')
branch_1 = slim.conv2d(branch_1, depth(256), [3, 3], stride=2,
scope='Conv2d_1a_3x3')
with tf.compat.v1.variable_scope('Branch_2'):
branch_2 = slim.max_pool2d(net, [3, 3], stride=2,
scope='MaxPool_1a_3x3')
net = tf.concat(
axis=concat_dim, values=[branch_0, branch_1, branch_2])
end_points[end_point] = net
if end_point == final_endpoint: return net, end_points
# 7 x 7 x 1024
end_point = 'Mixed_5b'
with tf.compat.v1.variable_scope(end_point):
with tf.compat.v1.variable_scope('Branch_0'):
branch_0 = slim.conv2d(net, depth(352), [1, 1], scope='Conv2d_0a_1x1')
with tf.compat.v1.variable_scope('Branch_1'):
branch_1 = slim.conv2d(
net, depth(192), [1, 1],
weights_initializer=trunc_normal(0.09),
scope='Conv2d_0a_1x1')
branch_1 = slim.conv2d(branch_1, depth(320), [3, 3],
scope='Conv2d_0b_3x3')
with tf.compat.v1.variable_scope('Branch_2'):
branch_2 = slim.conv2d(
net, depth(160), [1, 1],
weights_initializer=trunc_normal(0.09),
scope='Conv2d_0a_1x1')
branch_2 = slim.conv2d(branch_2, depth(224), [3, 3],
scope='Conv2d_0b_3x3')
branch_2 = slim.conv2d(branch_2, depth(224), [3, 3],
scope='Conv2d_0c_3x3')
with tf.compat.v1.variable_scope('Branch_3'):
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
branch_3 = slim.conv2d(
branch_3, depth(128), [1, 1],
weights_initializer=trunc_normal(0.1),
scope='Conv2d_0b_1x1')
net = tf.concat(
axis=concat_dim, values=[branch_0, branch_1, branch_2, branch_3])
end_points[end_point] = net
if end_point == final_endpoint: return net, end_points
# 7 x 7 x 1024
end_point = 'Mixed_5c'
with tf.compat.v1.variable_scope(end_point):
with tf.compat.v1.variable_scope('Branch_0'):
branch_0 = slim.conv2d(net, depth(352), [1, 1], scope='Conv2d_0a_1x1')
with tf.compat.v1.variable_scope('Branch_1'):
branch_1 = slim.conv2d(
net, depth(192), [1, 1],
weights_initializer=trunc_normal(0.09),
scope='Conv2d_0a_1x1')
branch_1 = slim.conv2d(branch_1, depth(320), [3, 3],
scope='Conv2d_0b_3x3')
with tf.compat.v1.variable_scope('Branch_2'):
branch_2 = slim.conv2d(
net, depth(192), [1, 1],
weights_initializer=trunc_normal(0.09),
scope='Conv2d_0a_1x1')
branch_2 = slim.conv2d(branch_2, depth(224), [3, 3],
scope='Conv2d_0b_3x3')
branch_2 = slim.conv2d(branch_2, depth(224), [3, 3],
scope='Conv2d_0c_3x3')
with tf.compat.v1.variable_scope('Branch_3'):
branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
branch_3 = slim.conv2d(
branch_3, depth(128), [1, 1],
weights_initializer=trunc_normal(0.1),
scope='Conv2d_0b_1x1')
net = tf.concat(
axis=concat_dim, values=[branch_0, branch_1, branch_2, branch_3])
end_points[end_point] = net
if end_point == final_endpoint: return net, end_points
raise ValueError('Unknown final endpoint %s' % final_endpoint)
def inception_v2(inputs,
num_classes=1000,
is_training=True,
dropout_keep_prob=0.8,
min_depth=16,
depth_multiplier=1.0,
prediction_fn=slim.softmax,
spatial_squeeze=True,
reuse=None,
scope='InceptionV2',
global_pool=False):
"""Inception v2 model for classification.
Constructs an Inception v2 network for classification as described in
http://arxiv.org/abs/1502.03167.
The default image size used to train this network is 224x224.
Args:
inputs: a tensor of shape [batch_size, height, width, channels].
num_classes: number of predicted classes. If 0 or None, the logits layer
is omitted and the input features to the logits layer (before dropout)
are returned instead.
is_training: whether is training or not.
dropout_keep_prob: the percentage of activation values that are retained.
min_depth: Minimum depth value (number of channels) for all convolution ops.
Enforced when depth_multiplier < 1, and not an active constraint when
depth_multiplier >= 1.
depth_multiplier: Float multiplier for the depth (number of channels)
for all convolution ops. The value must be greater than zero. Typical
usage will be to set this value in (0, 1) to reduce the number of
parameters or computation cost of the model.
prediction_fn: a function to get predictions out of logits.
spatial_squeeze: if True, logits is of shape [B, C], if false logits is of
shape [B, 1, 1, C], where B is batch_size and C is number of classes.
reuse: whether or not the network and its variables should be reused. To be
able to reuse 'scope' must be given.
scope: Optional variable_scope.
global_pool: Optional boolean flag to control the avgpooling before the
logits layer. If false or unset, pooling is done with a fixed window
that reduces default-sized inputs to 1x1, while larger inputs lead to
larger outputs. If true, any input size is pooled down to 1x1.
Returns:
net: a Tensor with the logits (pre-softmax activations) if num_classes
is a non-zero integer, or the non-dropped-out input to the logits layer
if num_classes is 0 or None.
end_points: a dictionary from components of the network to the corresponding
activation.
Raises:
ValueError: if final_endpoint is not set to one of the predefined values,
or depth_multiplier <= 0
"""
if depth_multiplier <= 0:
raise ValueError('depth_multiplier is not greater than zero.')
# Final pooling and prediction
with tf.compat.v1.variable_scope(
scope, 'InceptionV2', [inputs], reuse=reuse) as scope:
with slim.arg_scope([slim.batch_norm, slim.dropout],
is_training=is_training):
net, end_points = inception_v2_base(
inputs, scope=scope, min_depth=min_depth,
depth_multiplier=depth_multiplier)
with tf.compat.v1.variable_scope('Logits'):
if global_pool:
# Global average pooling.
net = tf.reduce_mean(
input_tensor=net, axis=[1, 2], keepdims=True, name='global_pool')
end_points['global_pool'] = net
else:
# Pooling with a fixed kernel size.
kernel_size = _reduced_kernel_size_for_small_input(net, [7, 7])
net = slim.avg_pool2d(net, kernel_size, padding='VALID',
scope='AvgPool_1a_{}x{}'.format(*kernel_size))
end_points['AvgPool_1a'] = net
if not num_classes:
return net, end_points
# 1 x 1 x 1024
net = slim.dropout(net, keep_prob=dropout_keep_prob, scope='Dropout_1b')
end_points['PreLogits'] = net
logits = slim.conv2d(net, num_classes, [1, 1], activation_fn=None,
normalizer_fn=None, scope='Conv2d_1c_1x1')
if spatial_squeeze:
logits = tf.squeeze(logits, [1, 2], name='SpatialSqueeze')
end_points['Logits'] = logits
end_points['Predictions'] = prediction_fn(logits, scope='Predictions')
return logits, end_points
inception_v2.default_image_size = 224
def _reduced_kernel_size_for_small_input(input_tensor, kernel_size):
"""Define kernel size which is automatically reduced for small input.
If the shape of the input images is unknown at graph construction time this
function assumes that the input images are is large enough.
Args:
input_tensor: input tensor of size [batch_size, height, width, channels].
kernel_size: desired kernel size of length 2: [kernel_height, kernel_width]
Returns:
a tensor with the kernel size.
TODO(jrru): Make this function work with unknown shapes. Theoretically, this
can be done with the code below. Problems are two-fold: (1) If the shape was
known, it will be lost. (2) inception.slim.ops._two_element_tuple cannot
handle tensors that define the kernel size.
shape = tf.shape(input_tensor)
return = tf.stack([tf.minimum(shape[1], kernel_size[0]),
tf.minimum(shape[2], kernel_size[1])])
"""
shape = input_tensor.get_shape().as_list()
if shape[1] is None or shape[2] is None:
kernel_size_out = kernel_size
else:
kernel_size_out = [min(shape[1], kernel_size[0]),
min(shape[2], kernel_size[1])]
return kernel_size_out
inception_v2_arg_scope = inception_utils.inception_arg_scope
@@ -0,0 +1,412 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for nets.inception_v2."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import tensorflow as tf
from tensorflow.contrib import slim as contrib_slim
from nets import inception
slim = contrib_slim
class InceptionV2Test(tf.test.TestCase):
def testBuildClassificationNetwork(self):
batch_size = 5
height, width = 224, 224
num_classes = 1000
inputs = tf.random.uniform((batch_size, height, width, 3))
logits, end_points = inception.inception_v2(inputs, num_classes)
self.assertTrue(logits.op.name.startswith(
'InceptionV2/Logits/SpatialSqueeze'))
self.assertListEqual(logits.get_shape().as_list(),
[batch_size, num_classes])
self.assertTrue('Predictions' in end_points)
self.assertListEqual(end_points['Predictions'].get_shape().as_list(),
[batch_size, num_classes])
def testBuildPreLogitsNetwork(self):
batch_size = 5
height, width = 224, 224
num_classes = None
inputs = tf.random.uniform((batch_size, height, width, 3))
net, end_points = inception.inception_v2(inputs, num_classes)
self.assertTrue(net.op.name.startswith('InceptionV2/Logits/AvgPool'))
self.assertListEqual(net.get_shape().as_list(), [batch_size, 1, 1, 1024])
self.assertFalse('Logits' in end_points)
self.assertFalse('Predictions' in end_points)
def testBuildBaseNetwork(self):
batch_size = 5
height, width = 224, 224
inputs = tf.random.uniform((batch_size, height, width, 3))
mixed_5c, end_points = inception.inception_v2_base(inputs)
self.assertTrue(mixed_5c.op.name.startswith('InceptionV2/Mixed_5c'))
self.assertListEqual(mixed_5c.get_shape().as_list(),
[batch_size, 7, 7, 1024])
expected_endpoints = ['Mixed_3b', 'Mixed_3c', 'Mixed_4a', 'Mixed_4b',
'Mixed_4c', 'Mixed_4d', 'Mixed_4e', 'Mixed_5a',
'Mixed_5b', 'Mixed_5c', 'Conv2d_1a_7x7',
'MaxPool_2a_3x3', 'Conv2d_2b_1x1', 'Conv2d_2c_3x3',
'MaxPool_3a_3x3']
self.assertItemsEqual(list(end_points.keys()), expected_endpoints)
def testBuildOnlyUptoFinalEndpoint(self):
batch_size = 5
height, width = 224, 224
endpoints = ['Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1',
'Conv2d_2c_3x3', 'MaxPool_3a_3x3', 'Mixed_3b', 'Mixed_3c',
'Mixed_4a', 'Mixed_4b', 'Mixed_4c', 'Mixed_4d', 'Mixed_4e',
'Mixed_5a', 'Mixed_5b', 'Mixed_5c']
for index, endpoint in enumerate(endpoints):
with tf.Graph().as_default():
inputs = tf.random.uniform((batch_size, height, width, 3))
out_tensor, end_points = inception.inception_v2_base(
inputs, final_endpoint=endpoint)
self.assertTrue(out_tensor.op.name.startswith(
'InceptionV2/' + endpoint))
self.assertItemsEqual(endpoints[:index + 1], list(end_points.keys()))
def testBuildAndCheckAllEndPointsUptoMixed5c(self):
batch_size = 5
height, width = 224, 224
inputs = tf.random.uniform((batch_size, height, width, 3))
_, end_points = inception.inception_v2_base(inputs,
final_endpoint='Mixed_5c')
endpoints_shapes = {'Mixed_3b': [batch_size, 28, 28, 256],
'Mixed_3c': [batch_size, 28, 28, 320],
'Mixed_4a': [batch_size, 14, 14, 576],
'Mixed_4b': [batch_size, 14, 14, 576],
'Mixed_4c': [batch_size, 14, 14, 576],
'Mixed_4d': [batch_size, 14, 14, 576],
'Mixed_4e': [batch_size, 14, 14, 576],
'Mixed_5a': [batch_size, 7, 7, 1024],
'Mixed_5b': [batch_size, 7, 7, 1024],
'Mixed_5c': [batch_size, 7, 7, 1024],
'Conv2d_1a_7x7': [batch_size, 112, 112, 64],
'MaxPool_2a_3x3': [batch_size, 56, 56, 64],
'Conv2d_2b_1x1': [batch_size, 56, 56, 64],
'Conv2d_2c_3x3': [batch_size, 56, 56, 192],
'MaxPool_3a_3x3': [batch_size, 28, 28, 192]}
self.assertItemsEqual(
list(endpoints_shapes.keys()), list(end_points.keys()))
for endpoint_name in endpoints_shapes:
expected_shape = endpoints_shapes[endpoint_name]
self.assertTrue(endpoint_name in end_points)
self.assertListEqual(end_points[endpoint_name].get_shape().as_list(),
expected_shape)
def testModelHasExpectedNumberOfParameters(self):
batch_size = 5
height, width = 224, 224
inputs = tf.random.uniform((batch_size, height, width, 3))
with slim.arg_scope(inception.inception_v2_arg_scope()):
inception.inception_v2_base(inputs)
total_params, _ = slim.model_analyzer.analyze_vars(
slim.get_model_variables())
self.assertAlmostEqual(10173112, total_params)
def testBuildEndPointsWithDepthMultiplierLessThanOne(self):
batch_size = 5
height, width = 224, 224
num_classes = 1000
inputs = tf.random.uniform((batch_size, height, width, 3))
_, end_points = inception.inception_v2(inputs, num_classes)
endpoint_keys = [key for key in end_points.keys()
if key.startswith('Mixed') or key.startswith('Conv')]
_, end_points_with_multiplier = inception.inception_v2(
inputs, num_classes, scope='depth_multiplied_net',
depth_multiplier=0.5)
for key in endpoint_keys:
original_depth = end_points[key].get_shape().as_list()[3]
new_depth = end_points_with_multiplier[key].get_shape().as_list()[3]
self.assertEqual(0.5 * original_depth, new_depth)
def testBuildEndPointsWithDepthMultiplierGreaterThanOne(self):
batch_size = 5
height, width = 224, 224
num_classes = 1000
inputs = tf.random.uniform((batch_size, height, width, 3))
_, end_points = inception.inception_v2(inputs, num_classes)
endpoint_keys = [key for key in end_points.keys()
if key.startswith('Mixed') or key.startswith('Conv')]
_, end_points_with_multiplier = inception.inception_v2(
inputs, num_classes, scope='depth_multiplied_net',
depth_multiplier=2.0)
for key in endpoint_keys:
original_depth = end_points[key].get_shape().as_list()[3]
new_depth = end_points_with_multiplier[key].get_shape().as_list()[3]
self.assertEqual(2.0 * original_depth, new_depth)
def testRaiseValueErrorWithInvalidDepthMultiplier(self):
batch_size = 5
height, width = 224, 224
num_classes = 1000
inputs = tf.random.uniform((batch_size, height, width, 3))
with self.assertRaises(ValueError):
_ = inception.inception_v2(inputs, num_classes, depth_multiplier=-0.1)
with self.assertRaises(ValueError):
_ = inception.inception_v2(inputs, num_classes, depth_multiplier=0.0)
def testBuildEndPointsWithUseSeparableConvolutionFalse(self):
batch_size = 5
height, width = 224, 224
inputs = tf.random.uniform((batch_size, height, width, 3))
_, end_points = inception.inception_v2_base(inputs)
endpoint_keys = [
key for key in end_points.keys()
if key.startswith('Mixed') or key.startswith('Conv')
]
_, end_points_with_replacement = inception.inception_v2_base(
inputs, use_separable_conv=False)
# The endpoint shapes must be equal to the original shape even when the
# separable convolution is replaced with a normal convolution.
for key in endpoint_keys:
original_shape = end_points[key].get_shape().as_list()
self.assertTrue(key in end_points_with_replacement)
new_shape = end_points_with_replacement[key].get_shape().as_list()
self.assertListEqual(original_shape, new_shape)
def testBuildEndPointsNCHWDataFormat(self):
batch_size = 5
height, width = 224, 224
inputs = tf.random.uniform((batch_size, height, width, 3))
_, end_points = inception.inception_v2_base(inputs)
endpoint_keys = [
key for key in end_points.keys()
if key.startswith('Mixed') or key.startswith('Conv')
]
inputs_in_nchw = tf.random.uniform((batch_size, 3, height, width))
_, end_points_with_replacement = inception.inception_v2_base(
inputs_in_nchw, use_separable_conv=False, data_format='NCHW')
# With the 'NCHW' data format, all endpoint activations have a transposed
# shape from the original shape with the 'NHWC' layout.
for key in endpoint_keys:
transposed_original_shape = tf.transpose(
a=end_points[key], perm=[0, 3, 1, 2]).get_shape().as_list()
self.assertTrue(key in end_points_with_replacement)
new_shape = end_points_with_replacement[key].get_shape().as_list()
self.assertListEqual(transposed_original_shape, new_shape)
def testBuildErrorsForDataFormats(self):
batch_size = 5
height, width = 224, 224
inputs = tf.random.uniform((batch_size, height, width, 3))
# 'NCWH' data format is not supported.
with self.assertRaises(ValueError):
_ = inception.inception_v2_base(inputs, data_format='NCWH')
# 'NCHW' data format is not supported for separable convolution.
with self.assertRaises(ValueError):
_ = inception.inception_v2_base(inputs, data_format='NCHW')
def testHalfSizeImages(self):
batch_size = 5
height, width = 112, 112
num_classes = 1000
inputs = tf.random.uniform((batch_size, height, width, 3))
logits, end_points = inception.inception_v2(inputs, num_classes)
self.assertTrue(logits.op.name.startswith('InceptionV2/Logits'))
self.assertListEqual(logits.get_shape().as_list(),
[batch_size, num_classes])
pre_pool = end_points['Mixed_5c']
self.assertListEqual(pre_pool.get_shape().as_list(),
[batch_size, 4, 4, 1024])
def testBuildBaseNetworkWithoutRootBlock(self):
batch_size = 5
height, width = 28, 28
channels = 192
inputs = tf.random.uniform((batch_size, height, width, channels))
_, end_points = inception.inception_v2_base(
inputs, include_root_block=False)
endpoints_shapes = {
'Mixed_3b': [batch_size, 28, 28, 256],
'Mixed_3c': [batch_size, 28, 28, 320],
'Mixed_4a': [batch_size, 14, 14, 576],
'Mixed_4b': [batch_size, 14, 14, 576],
'Mixed_4c': [batch_size, 14, 14, 576],
'Mixed_4d': [batch_size, 14, 14, 576],
'Mixed_4e': [batch_size, 14, 14, 576],
'Mixed_5a': [batch_size, 7, 7, 1024],
'Mixed_5b': [batch_size, 7, 7, 1024],
'Mixed_5c': [batch_size, 7, 7, 1024]
}
self.assertItemsEqual(
list(endpoints_shapes.keys()), list(end_points.keys()))
for endpoint_name in endpoints_shapes:
expected_shape = endpoints_shapes[endpoint_name]
self.assertTrue(endpoint_name in end_points)
self.assertListEqual(end_points[endpoint_name].get_shape().as_list(),
expected_shape)
def testUnknownImageShape(self):
tf.compat.v1.reset_default_graph()
batch_size = 2
height, width = 224, 224
num_classes = 1000
input_np = np.random.uniform(0, 1, (batch_size, height, width, 3))
with self.test_session() as sess:
inputs = tf.compat.v1.placeholder(
tf.float32, shape=(batch_size, None, None, 3))
logits, end_points = inception.inception_v2(inputs, num_classes)
self.assertTrue(logits.op.name.startswith('InceptionV2/Logits'))
self.assertListEqual(logits.get_shape().as_list(),
[batch_size, num_classes])
pre_pool = end_points['Mixed_5c']
feed_dict = {inputs: input_np}
tf.compat.v1.global_variables_initializer().run()
pre_pool_out = sess.run(pre_pool, feed_dict=feed_dict)
self.assertListEqual(list(pre_pool_out.shape), [batch_size, 7, 7, 1024])
def testGlobalPoolUnknownImageShape(self):
tf.compat.v1.reset_default_graph()
batch_size = 1
height, width = 250, 300
num_classes = 1000
input_np = np.random.uniform(0, 1, (batch_size, height, width, 3))
with self.test_session() as sess:
inputs = tf.compat.v1.placeholder(
tf.float32, shape=(batch_size, None, None, 3))
logits, end_points = inception.inception_v2(inputs, num_classes,
global_pool=True)
self.assertTrue(logits.op.name.startswith('InceptionV2/Logits'))
self.assertListEqual(logits.get_shape().as_list(),
[batch_size, num_classes])
pre_pool = end_points['Mixed_5c']
feed_dict = {inputs: input_np}
tf.compat.v1.global_variables_initializer().run()
pre_pool_out = sess.run(pre_pool, feed_dict=feed_dict)
self.assertListEqual(list(pre_pool_out.shape), [batch_size, 8, 10, 1024])
def testUnknowBatchSize(self):
batch_size = 1
height, width = 224, 224
num_classes = 1000
inputs = tf.compat.v1.placeholder(tf.float32, (None, height, width, 3))
logits, _ = inception.inception_v2(inputs, num_classes)
self.assertTrue(logits.op.name.startswith('InceptionV2/Logits'))
self.assertListEqual(logits.get_shape().as_list(),
[None, num_classes])
images = tf.random.uniform((batch_size, height, width, 3))
with self.test_session() as sess:
sess.run(tf.compat.v1.global_variables_initializer())
output = sess.run(logits, {inputs: images.eval()})
self.assertEquals(output.shape, (batch_size, num_classes))
def testEvaluation(self):
batch_size = 2
height, width = 224, 224
num_classes = 1000
eval_inputs = tf.random.uniform((batch_size, height, width, 3))
logits, _ = inception.inception_v2(eval_inputs, num_classes,
is_training=False)
predictions = tf.argmax(input=logits, axis=1)
with self.test_session() as sess:
sess.run(tf.compat.v1.global_variables_initializer())
output = sess.run(predictions)
self.assertEquals(output.shape, (batch_size,))
def testTrainEvalWithReuse(self):
train_batch_size = 5
eval_batch_size = 2
height, width = 150, 150
num_classes = 1000
train_inputs = tf.random.uniform((train_batch_size, height, width, 3))
inception.inception_v2(train_inputs, num_classes)
eval_inputs = tf.random.uniform((eval_batch_size, height, width, 3))
logits, _ = inception.inception_v2(eval_inputs, num_classes, reuse=True)
predictions = tf.argmax(input=logits, axis=1)
with self.test_session() as sess:
sess.run(tf.compat.v1.global_variables_initializer())
output = sess.run(predictions)
self.assertEquals(output.shape, (eval_batch_size,))
def testLogitsNotSqueezed(self):
num_classes = 25
images = tf.random.uniform([1, 224, 224, 3])
logits, _ = inception.inception_v2(images,
num_classes=num_classes,
spatial_squeeze=False)
with self.test_session() as sess:
tf.compat.v1.global_variables_initializer().run()
logits_out = sess.run(logits)
self.assertListEqual(list(logits_out.shape), [1, 1, 1, num_classes])
def testNoBatchNormScaleByDefault(self):
height, width = 224, 224
num_classes = 1000
inputs = tf.compat.v1.placeholder(tf.float32, (1, height, width, 3))
with slim.arg_scope(inception.inception_v2_arg_scope()):
inception.inception_v2(inputs, num_classes, is_training=False)
self.assertEqual(tf.compat.v1.global_variables('.*/BatchNorm/gamma:0$'), [])
def testBatchNormScale(self):
height, width = 224, 224
num_classes = 1000
inputs = tf.compat.v1.placeholder(tf.float32, (1, height, width, 3))
with slim.arg_scope(
inception.inception_v2_arg_scope(batch_norm_scale=True)):
inception.inception_v2(inputs, num_classes, is_training=False)
gamma_names = set(
v.op.name
for v in tf.compat.v1.global_variables('.*/BatchNorm/gamma:0$'))
self.assertGreater(len(gamma_names), 0)
for v in tf.compat.v1.global_variables('.*/BatchNorm/moving_mean:0$'):
self.assertIn(v.op.name[:-len('moving_mean')] + 'gamma', gamma_names)
if __name__ == '__main__':
tf.test.main()
@@ -0,0 +1,585 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains the definition for inception v3 classification network."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from tensorflow.contrib import slim as contrib_slim
from nets import inception_utils
slim = contrib_slim
# pylint: disable=g-long-lambda
trunc_normal = lambda stddev: tf.compat.v1.truncated_normal_initializer(
0.0, stddev)
def inception_v3_base(inputs,
final_endpoint='Mixed_7c',
min_depth=16,
depth_multiplier=1.0,
scope=None):
"""Inception model from http://arxiv.org/abs/1512.00567.
Constructs an Inception v3 network from inputs to the given final endpoint.
This method can construct the network up to the final inception block
Mixed_7c.
Note that the names of the layers in the paper do not correspond to the names
of the endpoints registered by this function although they build the same
network.
Here is a mapping from the old_names to the new names:
Old name | New name
=======================================
conv0 | Conv2d_1a_3x3
conv1 | Conv2d_2a_3x3
conv2 | Conv2d_2b_3x3
pool1 | MaxPool_3a_3x3
conv3 | Conv2d_3b_1x1
conv4 | Conv2d_4a_3x3
pool2 | MaxPool_5a_3x3
mixed_35x35x256a | Mixed_5b
mixed_35x35x288a | Mixed_5c
mixed_35x35x288b | Mixed_5d
mixed_17x17x768a | Mixed_6a
mixed_17x17x768b | Mixed_6b
mixed_17x17x768c | Mixed_6c
mixed_17x17x768d | Mixed_6d
mixed_17x17x768e | Mixed_6e
mixed_8x8x1280a | Mixed_7a
mixed_8x8x2048a | Mixed_7b
mixed_8x8x2048b | Mixed_7c
Args:
inputs: a tensor of size [batch_size, height, width, channels].
final_endpoint: specifies the endpoint to construct the network up to. It
can be one of ['Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3',
'MaxPool_3a_3x3', 'Conv2d_3b_1x1', 'Conv2d_4a_3x3', 'MaxPool_5a_3x3',
'Mixed_5b', 'Mixed_5c', 'Mixed_5d', 'Mixed_6a', 'Mixed_6b', 'Mixed_6c',
'Mixed_6d', 'Mixed_6e', 'Mixed_7a', 'Mixed_7b', 'Mixed_7c'].
min_depth: Minimum depth value (number of channels) for all convolution ops.
Enforced when depth_multiplier < 1, and not an active constraint when
depth_multiplier >= 1.
depth_multiplier: Float multiplier for the depth (number of channels)
for all convolution ops. The value must be greater than zero. Typical
usage will be to set this value in (0, 1) to reduce the number of
parameters or computation cost of the model.
scope: Optional variable_scope.
Returns:
tensor_out: output tensor corresponding to the final_endpoint.
end_points: a set of activations for external use, for example summaries or
losses.
Raises:
ValueError: if final_endpoint is not set to one of the predefined values,
or depth_multiplier <= 0
"""
# end_points will collect relevant activations for external use, for example
# summaries or losses.
end_points = {}
if depth_multiplier <= 0:
raise ValueError('depth_multiplier is not greater than zero.')
depth = lambda d: max(int(d * depth_multiplier), min_depth)
with tf.compat.v1.variable_scope(scope, 'InceptionV3', [inputs]):
with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
stride=1, padding='VALID'):
# 299 x 299 x 3
end_point = 'Conv2d_1a_3x3'
net = slim.conv2d(inputs, depth(32), [3, 3], stride=2, scope=end_point)
end_points[end_point] = net
if end_point == final_endpoint: return net, end_points
# 149 x 149 x 32
end_point = 'Conv2d_2a_3x3'
net = slim.conv2d(net, depth(32), [3, 3], scope=end_point)
end_points[end_point] = net
if end_point == final_endpoint: return net, end_points
# 147 x 147 x 32
end_point = 'Conv2d_2b_3x3'
net = slim.conv2d(net, depth(64), [3, 3], padding='SAME', scope=end_point)
end_points[end_point] = net
if end_point == final_endpoint: return net, end_points
# 147 x 147 x 64
end_point = 'MaxPool_3a_3x3'
net = slim.max_pool2d(net, [3, 3], stride=2, scope=end_point)
end_points[end_point] = net
if end_point == final_endpoint: return net, end_points
# 73 x 73 x 64
end_point = 'Conv2d_3b_1x1'
net = slim.conv2d(net, depth(80), [1, 1], scope=end_point)
end_points[end_point] = net
if end_point == final_endpoint: return net, end_points
# 73 x 73 x 80.
end_point = 'Conv2d_4a_3x3'
net = slim.conv2d(net, depth(192), [3, 3], scope=end_point)
end_points[end_point] = net
if end_point == final_endpoint: return net, end_points
# 71 x 71 x 192.
end_point = 'MaxPool_5a_3x3'
net = slim.max_pool2d(net, [3, 3], stride=2, scope=end_point)
end_points[end_point] = net
if end_point == final_endpoint: return net, end_points
# 35 x 35 x 192.
# Inception blocks
with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
stride=1, padding='SAME'):
# mixed: 35 x 35 x 256.
end_point = 'Mixed_5b'
with tf.compat.v1.variable_scope(end_point):
with tf.compat.v1.variable_scope('Branch_0'):
branch_0 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1')
with tf.compat.v1.variable_scope('Branch_1'):
branch_1 = slim.conv2d(net, depth(48), [1, 1], scope='Conv2d_0a_1x1')
branch_1 = slim.conv2d(branch_1, depth(64), [5, 5],
scope='Conv2d_0b_5x5')
with tf.compat.v1.variable_scope('Branch_2'):
branch_2 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1')
branch_2 = slim.conv2d(branch_2, depth(96), [3, 3],
scope='Conv2d_0b_3x3')
branch_2 = slim.conv2d(branch_2, depth(96), [3, 3],
scope='Conv2d_0c_3x3')
with tf.compat.v1.variable_scope('Branch_3'):
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
branch_3 = slim.conv2d(branch_3, depth(32), [1, 1],
scope='Conv2d_0b_1x1')
net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
end_points[end_point] = net
if end_point == final_endpoint: return net, end_points
# mixed_1: 35 x 35 x 288.
end_point = 'Mixed_5c'
with tf.compat.v1.variable_scope(end_point):
with tf.compat.v1.variable_scope('Branch_0'):
branch_0 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1')
with tf.compat.v1.variable_scope('Branch_1'):
branch_1 = slim.conv2d(net, depth(48), [1, 1], scope='Conv2d_0b_1x1')
branch_1 = slim.conv2d(branch_1, depth(64), [5, 5],
scope='Conv_1_0c_5x5')
with tf.compat.v1.variable_scope('Branch_2'):
branch_2 = slim.conv2d(net, depth(64), [1, 1],
scope='Conv2d_0a_1x1')
branch_2 = slim.conv2d(branch_2, depth(96), [3, 3],
scope='Conv2d_0b_3x3')
branch_2 = slim.conv2d(branch_2, depth(96), [3, 3],
scope='Conv2d_0c_3x3')
with tf.compat.v1.variable_scope('Branch_3'):
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
branch_3 = slim.conv2d(branch_3, depth(64), [1, 1],
scope='Conv2d_0b_1x1')
net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
end_points[end_point] = net
if end_point == final_endpoint: return net, end_points
# mixed_2: 35 x 35 x 288.
end_point = 'Mixed_5d'
with tf.compat.v1.variable_scope(end_point):
with tf.compat.v1.variable_scope('Branch_0'):
branch_0 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1')
with tf.compat.v1.variable_scope('Branch_1'):
branch_1 = slim.conv2d(net, depth(48), [1, 1], scope='Conv2d_0a_1x1')
branch_1 = slim.conv2d(branch_1, depth(64), [5, 5],
scope='Conv2d_0b_5x5')
with tf.compat.v1.variable_scope('Branch_2'):
branch_2 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1')
branch_2 = slim.conv2d(branch_2, depth(96), [3, 3],
scope='Conv2d_0b_3x3')
branch_2 = slim.conv2d(branch_2, depth(96), [3, 3],
scope='Conv2d_0c_3x3')
with tf.compat.v1.variable_scope('Branch_3'):
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
branch_3 = slim.conv2d(branch_3, depth(64), [1, 1],
scope='Conv2d_0b_1x1')
net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
end_points[end_point] = net
if end_point == final_endpoint: return net, end_points
# mixed_3: 17 x 17 x 768.
end_point = 'Mixed_6a'
with tf.compat.v1.variable_scope(end_point):
with tf.compat.v1.variable_scope('Branch_0'):
branch_0 = slim.conv2d(net, depth(384), [3, 3], stride=2,
padding='VALID', scope='Conv2d_1a_1x1')
with tf.compat.v1.variable_scope('Branch_1'):
branch_1 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1')
branch_1 = slim.conv2d(branch_1, depth(96), [3, 3],
scope='Conv2d_0b_3x3')
branch_1 = slim.conv2d(branch_1, depth(96), [3, 3], stride=2,
padding='VALID', scope='Conv2d_1a_1x1')
with tf.compat.v1.variable_scope('Branch_2'):
branch_2 = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID',
scope='MaxPool_1a_3x3')
net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2])
end_points[end_point] = net
if end_point == final_endpoint: return net, end_points
# mixed4: 17 x 17 x 768.
end_point = 'Mixed_6b'
with tf.compat.v1.variable_scope(end_point):
with tf.compat.v1.variable_scope('Branch_0'):
branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1')
with tf.compat.v1.variable_scope('Branch_1'):
branch_1 = slim.conv2d(net, depth(128), [1, 1], scope='Conv2d_0a_1x1')
branch_1 = slim.conv2d(branch_1, depth(128), [1, 7],
scope='Conv2d_0b_1x7')
branch_1 = slim.conv2d(branch_1, depth(192), [7, 1],
scope='Conv2d_0c_7x1')
with tf.compat.v1.variable_scope('Branch_2'):
branch_2 = slim.conv2d(net, depth(128), [1, 1], scope='Conv2d_0a_1x1')
branch_2 = slim.conv2d(branch_2, depth(128), [7, 1],
scope='Conv2d_0b_7x1')
branch_2 = slim.conv2d(branch_2, depth(128), [1, 7],
scope='Conv2d_0c_1x7')
branch_2 = slim.conv2d(branch_2, depth(128), [7, 1],
scope='Conv2d_0d_7x1')
branch_2 = slim.conv2d(branch_2, depth(192), [1, 7],
scope='Conv2d_0e_1x7')
with tf.compat.v1.variable_scope('Branch_3'):
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
branch_3 = slim.conv2d(branch_3, depth(192), [1, 1],
scope='Conv2d_0b_1x1')
net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
end_points[end_point] = net
if end_point == final_endpoint: return net, end_points
# mixed_5: 17 x 17 x 768.
end_point = 'Mixed_6c'
with tf.compat.v1.variable_scope(end_point):
with tf.compat.v1.variable_scope('Branch_0'):
branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1')
with tf.compat.v1.variable_scope('Branch_1'):
branch_1 = slim.conv2d(net, depth(160), [1, 1], scope='Conv2d_0a_1x1')
branch_1 = slim.conv2d(branch_1, depth(160), [1, 7],
scope='Conv2d_0b_1x7')
branch_1 = slim.conv2d(branch_1, depth(192), [7, 1],
scope='Conv2d_0c_7x1')
with tf.compat.v1.variable_scope('Branch_2'):
branch_2 = slim.conv2d(net, depth(160), [1, 1], scope='Conv2d_0a_1x1')
branch_2 = slim.conv2d(branch_2, depth(160), [7, 1],
scope='Conv2d_0b_7x1')
branch_2 = slim.conv2d(branch_2, depth(160), [1, 7],
scope='Conv2d_0c_1x7')
branch_2 = slim.conv2d(branch_2, depth(160), [7, 1],
scope='Conv2d_0d_7x1')
branch_2 = slim.conv2d(branch_2, depth(192), [1, 7],
scope='Conv2d_0e_1x7')
with tf.compat.v1.variable_scope('Branch_3'):
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
branch_3 = slim.conv2d(branch_3, depth(192), [1, 1],
scope='Conv2d_0b_1x1')
net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
end_points[end_point] = net
if end_point == final_endpoint: return net, end_points
# mixed_6: 17 x 17 x 768.
end_point = 'Mixed_6d'
with tf.compat.v1.variable_scope(end_point):
with tf.compat.v1.variable_scope('Branch_0'):
branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1')
with tf.compat.v1.variable_scope('Branch_1'):
branch_1 = slim.conv2d(net, depth(160), [1, 1], scope='Conv2d_0a_1x1')
branch_1 = slim.conv2d(branch_1, depth(160), [1, 7],
scope='Conv2d_0b_1x7')
branch_1 = slim.conv2d(branch_1, depth(192), [7, 1],
scope='Conv2d_0c_7x1')
with tf.compat.v1.variable_scope('Branch_2'):
branch_2 = slim.conv2d(net, depth(160), [1, 1], scope='Conv2d_0a_1x1')
branch_2 = slim.conv2d(branch_2, depth(160), [7, 1],
scope='Conv2d_0b_7x1')
branch_2 = slim.conv2d(branch_2, depth(160), [1, 7],
scope='Conv2d_0c_1x7')
branch_2 = slim.conv2d(branch_2, depth(160), [7, 1],
scope='Conv2d_0d_7x1')
branch_2 = slim.conv2d(branch_2, depth(192), [1, 7],
scope='Conv2d_0e_1x7')
with tf.compat.v1.variable_scope('Branch_3'):
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
branch_3 = slim.conv2d(branch_3, depth(192), [1, 1],
scope='Conv2d_0b_1x1')
net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
end_points[end_point] = net
if end_point == final_endpoint: return net, end_points
# mixed_7: 17 x 17 x 768.
end_point = 'Mixed_6e'
with tf.compat.v1.variable_scope(end_point):
with tf.compat.v1.variable_scope('Branch_0'):
branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1')
with tf.compat.v1.variable_scope('Branch_1'):
branch_1 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1')
branch_1 = slim.conv2d(branch_1, depth(192), [1, 7],
scope='Conv2d_0b_1x7')
branch_1 = slim.conv2d(branch_1, depth(192), [7, 1],
scope='Conv2d_0c_7x1')
with tf.compat.v1.variable_scope('Branch_2'):
branch_2 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1')
branch_2 = slim.conv2d(branch_2, depth(192), [7, 1],
scope='Conv2d_0b_7x1')
branch_2 = slim.conv2d(branch_2, depth(192), [1, 7],
scope='Conv2d_0c_1x7')
branch_2 = slim.conv2d(branch_2, depth(192), [7, 1],
scope='Conv2d_0d_7x1')
branch_2 = slim.conv2d(branch_2, depth(192), [1, 7],
scope='Conv2d_0e_1x7')
with tf.compat.v1.variable_scope('Branch_3'):
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
branch_3 = slim.conv2d(branch_3, depth(192), [1, 1],
scope='Conv2d_0b_1x1')
net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
end_points[end_point] = net
if end_point == final_endpoint: return net, end_points
# mixed_8: 8 x 8 x 1280.
end_point = 'Mixed_7a'
with tf.compat.v1.variable_scope(end_point):
with tf.compat.v1.variable_scope('Branch_0'):
branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1')
branch_0 = slim.conv2d(branch_0, depth(320), [3, 3], stride=2,
padding='VALID', scope='Conv2d_1a_3x3')
with tf.compat.v1.variable_scope('Branch_1'):
branch_1 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1')
branch_1 = slim.conv2d(branch_1, depth(192), [1, 7],
scope='Conv2d_0b_1x7')
branch_1 = slim.conv2d(branch_1, depth(192), [7, 1],
scope='Conv2d_0c_7x1')
branch_1 = slim.conv2d(branch_1, depth(192), [3, 3], stride=2,
padding='VALID', scope='Conv2d_1a_3x3')
with tf.compat.v1.variable_scope('Branch_2'):
branch_2 = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID',
scope='MaxPool_1a_3x3')
net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2])
end_points[end_point] = net
if end_point == final_endpoint: return net, end_points
# mixed_9: 8 x 8 x 2048.
end_point = 'Mixed_7b'
with tf.compat.v1.variable_scope(end_point):
with tf.compat.v1.variable_scope('Branch_0'):
branch_0 = slim.conv2d(net, depth(320), [1, 1], scope='Conv2d_0a_1x1')
with tf.compat.v1.variable_scope('Branch_1'):
branch_1 = slim.conv2d(net, depth(384), [1, 1], scope='Conv2d_0a_1x1')
branch_1 = tf.concat(axis=3, values=[
slim.conv2d(branch_1, depth(384), [1, 3], scope='Conv2d_0b_1x3'),
slim.conv2d(branch_1, depth(384), [3, 1], scope='Conv2d_0b_3x1')])
with tf.compat.v1.variable_scope('Branch_2'):
branch_2 = slim.conv2d(net, depth(448), [1, 1], scope='Conv2d_0a_1x1')
branch_2 = slim.conv2d(
branch_2, depth(384), [3, 3], scope='Conv2d_0b_3x3')
branch_2 = tf.concat(axis=3, values=[
slim.conv2d(branch_2, depth(384), [1, 3], scope='Conv2d_0c_1x3'),
slim.conv2d(branch_2, depth(384), [3, 1], scope='Conv2d_0d_3x1')])
with tf.compat.v1.variable_scope('Branch_3'):
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
branch_3 = slim.conv2d(
branch_3, depth(192), [1, 1], scope='Conv2d_0b_1x1')
net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
end_points[end_point] = net
if end_point == final_endpoint: return net, end_points
# mixed_10: 8 x 8 x 2048.
end_point = 'Mixed_7c'
with tf.compat.v1.variable_scope(end_point):
with tf.compat.v1.variable_scope('Branch_0'):
branch_0 = slim.conv2d(net, depth(320), [1, 1], scope='Conv2d_0a_1x1')
with tf.compat.v1.variable_scope('Branch_1'):
branch_1 = slim.conv2d(net, depth(384), [1, 1], scope='Conv2d_0a_1x1')
branch_1 = tf.concat(axis=3, values=[
slim.conv2d(branch_1, depth(384), [1, 3], scope='Conv2d_0b_1x3'),
slim.conv2d(branch_1, depth(384), [3, 1], scope='Conv2d_0c_3x1')])
with tf.compat.v1.variable_scope('Branch_2'):
branch_2 = slim.conv2d(net, depth(448), [1, 1], scope='Conv2d_0a_1x1')
branch_2 = slim.conv2d(
branch_2, depth(384), [3, 3], scope='Conv2d_0b_3x3')
branch_2 = tf.concat(axis=3, values=[
slim.conv2d(branch_2, depth(384), [1, 3], scope='Conv2d_0c_1x3'),
slim.conv2d(branch_2, depth(384), [3, 1], scope='Conv2d_0d_3x1')])
with tf.compat.v1.variable_scope('Branch_3'):
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
branch_3 = slim.conv2d(
branch_3, depth(192), [1, 1], scope='Conv2d_0b_1x1')
net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
end_points[end_point] = net
if end_point == final_endpoint: return net, end_points
raise ValueError('Unknown final endpoint %s' % final_endpoint)
def inception_v3(inputs,
num_classes=1000,
is_training=True,
dropout_keep_prob=0.8,
min_depth=16,
depth_multiplier=1.0,
prediction_fn=slim.softmax,
spatial_squeeze=True,
reuse=None,
create_aux_logits=True,
scope='InceptionV3',
global_pool=False):
"""Inception model from http://arxiv.org/abs/1512.00567.
"Rethinking the Inception Architecture for Computer Vision"
Christian Szegedy, Vincent Vanhoucke, Sergey Ioffe, Jonathon Shlens,
Zbigniew Wojna.
With the default arguments this method constructs the exact model defined in
the paper. However, one can experiment with variations of the inception_v3
network by changing arguments dropout_keep_prob, min_depth and
depth_multiplier.
The default image size used to train this network is 299x299.
Args:
inputs: a tensor of size [batch_size, height, width, channels].
num_classes: number of predicted classes. If 0 or None, the logits layer
is omitted and the input features to the logits layer (before dropout)
are returned instead.
is_training: whether is training or not.
dropout_keep_prob: the percentage of activation values that are retained.
min_depth: Minimum depth value (number of channels) for all convolution ops.
Enforced when depth_multiplier < 1, and not an active constraint when
depth_multiplier >= 1.
depth_multiplier: Float multiplier for the depth (number of channels)
for all convolution ops. The value must be greater than zero. Typical
usage will be to set this value in (0, 1) to reduce the number of
parameters or computation cost of the model.
prediction_fn: a function to get predictions out of logits.
spatial_squeeze: if True, logits is of shape [B, C], if false logits is of
shape [B, 1, 1, C], where B is batch_size and C is number of classes.
reuse: whether or not the network and its variables should be reused. To be
able to reuse 'scope' must be given.
create_aux_logits: Whether to create the auxiliary logits.
scope: Optional variable_scope.
global_pool: Optional boolean flag to control the avgpooling before the
logits layer. If false or unset, pooling is done with a fixed window
that reduces default-sized inputs to 1x1, while larger inputs lead to
larger outputs. If true, any input size is pooled down to 1x1.
Returns:
net: a Tensor with the logits (pre-softmax activations) if num_classes
is a non-zero integer, or the non-dropped-out input to the logits layer
if num_classes is 0 or None.
end_points: a dictionary from components of the network to the corresponding
activation.
Raises:
ValueError: if 'depth_multiplier' is less than or equal to zero.
"""
if depth_multiplier <= 0:
raise ValueError('depth_multiplier is not greater than zero.')
depth = lambda d: max(int(d * depth_multiplier), min_depth)
with tf.compat.v1.variable_scope(
scope, 'InceptionV3', [inputs], reuse=reuse) as scope:
with slim.arg_scope([slim.batch_norm, slim.dropout],
is_training=is_training):
net, end_points = inception_v3_base(
inputs, scope=scope, min_depth=min_depth,
depth_multiplier=depth_multiplier)
# Auxiliary Head logits
if create_aux_logits and num_classes:
with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
stride=1, padding='SAME'):
aux_logits = end_points['Mixed_6e']
with tf.compat.v1.variable_scope('AuxLogits'):
aux_logits = slim.avg_pool2d(
aux_logits, [5, 5], stride=3, padding='VALID',
scope='AvgPool_1a_5x5')
aux_logits = slim.conv2d(aux_logits, depth(128), [1, 1],
scope='Conv2d_1b_1x1')
# Shape of feature map before the final layer.
kernel_size = _reduced_kernel_size_for_small_input(
aux_logits, [5, 5])
aux_logits = slim.conv2d(
aux_logits, depth(768), kernel_size,
weights_initializer=trunc_normal(0.01),
padding='VALID', scope='Conv2d_2a_{}x{}'.format(*kernel_size))
aux_logits = slim.conv2d(
aux_logits, num_classes, [1, 1], activation_fn=None,
normalizer_fn=None, weights_initializer=trunc_normal(0.001),
scope='Conv2d_2b_1x1')
if spatial_squeeze:
aux_logits = tf.squeeze(aux_logits, [1, 2], name='SpatialSqueeze')
end_points['AuxLogits'] = aux_logits
# Final pooling and prediction
with tf.compat.v1.variable_scope('Logits'):
if global_pool:
# Global average pooling.
net = tf.reduce_mean(
input_tensor=net, axis=[1, 2], keepdims=True, name='GlobalPool')
end_points['global_pool'] = net
else:
# Pooling with a fixed kernel size.
kernel_size = _reduced_kernel_size_for_small_input(net, [8, 8])
net = slim.avg_pool2d(net, kernel_size, padding='VALID',
scope='AvgPool_1a_{}x{}'.format(*kernel_size))
end_points['AvgPool_1a'] = net
if not num_classes:
return net, end_points
# 1 x 1 x 2048
net = slim.dropout(net, keep_prob=dropout_keep_prob, scope='Dropout_1b')
end_points['PreLogits'] = net
# 2048
logits = slim.conv2d(net, num_classes, [1, 1], activation_fn=None,
normalizer_fn=None, scope='Conv2d_1c_1x1')
if spatial_squeeze:
logits = tf.squeeze(logits, [1, 2], name='SpatialSqueeze')
# 1000
end_points['Logits'] = logits
end_points['Predictions'] = prediction_fn(logits, scope='Predictions')
return logits, end_points
inception_v3.default_image_size = 299
def _reduced_kernel_size_for_small_input(input_tensor, kernel_size):
"""Define kernel size which is automatically reduced for small input.
If the shape of the input images is unknown at graph construction time this
function assumes that the input images are is large enough.
Args:
input_tensor: input tensor of size [batch_size, height, width, channels].
kernel_size: desired kernel size of length 2: [kernel_height, kernel_width]
Returns:
a tensor with the kernel size.
TODO(jrru): Make this function work with unknown shapes. Theoretically, this
can be done with the code below. Problems are two-fold: (1) If the shape was
known, it will be lost. (2) inception.slim.ops._two_element_tuple cannot
handle tensors that define the kernel size.
shape = tf.shape(input_tensor)
return = tf.stack([tf.minimum(shape[1], kernel_size[0]),
tf.minimum(shape[2], kernel_size[1])])
"""
shape = input_tensor.get_shape().as_list()
if shape[1] is None or shape[2] is None:
kernel_size_out = kernel_size
else:
kernel_size_out = [min(shape[1], kernel_size[0]),
min(shape[2], kernel_size[1])]
return kernel_size_out
inception_v3_arg_scope = inception_utils.inception_arg_scope
@@ -0,0 +1,350 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for nets.inception_v1."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import tensorflow as tf
from tensorflow.contrib import slim as contrib_slim
from nets import inception
slim = contrib_slim
class InceptionV3Test(tf.test.TestCase):
def testBuildClassificationNetwork(self):
batch_size = 5
height, width = 299, 299
num_classes = 1000
inputs = tf.random.uniform((batch_size, height, width, 3))
logits, end_points = inception.inception_v3(inputs, num_classes)
self.assertTrue(logits.op.name.startswith(
'InceptionV3/Logits/SpatialSqueeze'))
self.assertListEqual(logits.get_shape().as_list(),
[batch_size, num_classes])
self.assertTrue('Predictions' in end_points)
self.assertListEqual(end_points['Predictions'].get_shape().as_list(),
[batch_size, num_classes])
def testBuildPreLogitsNetwork(self):
batch_size = 5
height, width = 299, 299
num_classes = None
inputs = tf.random.uniform((batch_size, height, width, 3))
net, end_points = inception.inception_v3(inputs, num_classes)
self.assertTrue(net.op.name.startswith('InceptionV3/Logits/AvgPool'))
self.assertListEqual(net.get_shape().as_list(), [batch_size, 1, 1, 2048])
self.assertFalse('Logits' in end_points)
self.assertFalse('Predictions' in end_points)
def testBuildBaseNetwork(self):
batch_size = 5
height, width = 299, 299
inputs = tf.random.uniform((batch_size, height, width, 3))
final_endpoint, end_points = inception.inception_v3_base(inputs)
self.assertTrue(final_endpoint.op.name.startswith(
'InceptionV3/Mixed_7c'))
self.assertListEqual(final_endpoint.get_shape().as_list(),
[batch_size, 8, 8, 2048])
expected_endpoints = ['Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3',
'MaxPool_3a_3x3', 'Conv2d_3b_1x1', 'Conv2d_4a_3x3',
'MaxPool_5a_3x3', 'Mixed_5b', 'Mixed_5c', 'Mixed_5d',
'Mixed_6a', 'Mixed_6b', 'Mixed_6c', 'Mixed_6d',
'Mixed_6e', 'Mixed_7a', 'Mixed_7b', 'Mixed_7c']
self.assertItemsEqual(end_points.keys(), expected_endpoints)
def testBuildOnlyUptoFinalEndpoint(self):
batch_size = 5
height, width = 299, 299
endpoints = ['Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3',
'MaxPool_3a_3x3', 'Conv2d_3b_1x1', 'Conv2d_4a_3x3',
'MaxPool_5a_3x3', 'Mixed_5b', 'Mixed_5c', 'Mixed_5d',
'Mixed_6a', 'Mixed_6b', 'Mixed_6c', 'Mixed_6d',
'Mixed_6e', 'Mixed_7a', 'Mixed_7b', 'Mixed_7c']
for index, endpoint in enumerate(endpoints):
with tf.Graph().as_default():
inputs = tf.random.uniform((batch_size, height, width, 3))
out_tensor, end_points = inception.inception_v3_base(
inputs, final_endpoint=endpoint)
self.assertTrue(out_tensor.op.name.startswith(
'InceptionV3/' + endpoint))
self.assertItemsEqual(endpoints[:index+1], end_points.keys())
def testBuildAndCheckAllEndPointsUptoMixed7c(self):
batch_size = 5
height, width = 299, 299
inputs = tf.random.uniform((batch_size, height, width, 3))
_, end_points = inception.inception_v3_base(
inputs, final_endpoint='Mixed_7c')
endpoints_shapes = {'Conv2d_1a_3x3': [batch_size, 149, 149, 32],
'Conv2d_2a_3x3': [batch_size, 147, 147, 32],
'Conv2d_2b_3x3': [batch_size, 147, 147, 64],
'MaxPool_3a_3x3': [batch_size, 73, 73, 64],
'Conv2d_3b_1x1': [batch_size, 73, 73, 80],
'Conv2d_4a_3x3': [batch_size, 71, 71, 192],
'MaxPool_5a_3x3': [batch_size, 35, 35, 192],
'Mixed_5b': [batch_size, 35, 35, 256],
'Mixed_5c': [batch_size, 35, 35, 288],
'Mixed_5d': [batch_size, 35, 35, 288],
'Mixed_6a': [batch_size, 17, 17, 768],
'Mixed_6b': [batch_size, 17, 17, 768],
'Mixed_6c': [batch_size, 17, 17, 768],
'Mixed_6d': [batch_size, 17, 17, 768],
'Mixed_6e': [batch_size, 17, 17, 768],
'Mixed_7a': [batch_size, 8, 8, 1280],
'Mixed_7b': [batch_size, 8, 8, 2048],
'Mixed_7c': [batch_size, 8, 8, 2048]}
self.assertItemsEqual(endpoints_shapes.keys(), end_points.keys())
for endpoint_name in endpoints_shapes:
expected_shape = endpoints_shapes[endpoint_name]
self.assertTrue(endpoint_name in end_points)
self.assertListEqual(end_points[endpoint_name].get_shape().as_list(),
expected_shape)
def testModelHasExpectedNumberOfParameters(self):
batch_size = 5
height, width = 299, 299
inputs = tf.random.uniform((batch_size, height, width, 3))
with slim.arg_scope(inception.inception_v3_arg_scope()):
inception.inception_v3_base(inputs)
total_params, _ = slim.model_analyzer.analyze_vars(
slim.get_model_variables())
self.assertAlmostEqual(21802784, total_params)
def testBuildEndPoints(self):
batch_size = 5
height, width = 299, 299
num_classes = 1000
inputs = tf.random.uniform((batch_size, height, width, 3))
_, end_points = inception.inception_v3(inputs, num_classes)
self.assertTrue('Logits' in end_points)
logits = end_points['Logits']
self.assertListEqual(logits.get_shape().as_list(),
[batch_size, num_classes])
self.assertTrue('AuxLogits' in end_points)
aux_logits = end_points['AuxLogits']
self.assertListEqual(aux_logits.get_shape().as_list(),
[batch_size, num_classes])
self.assertTrue('Mixed_7c' in end_points)
pre_pool = end_points['Mixed_7c']
self.assertListEqual(pre_pool.get_shape().as_list(),
[batch_size, 8, 8, 2048])
self.assertTrue('PreLogits' in end_points)
pre_logits = end_points['PreLogits']
self.assertListEqual(pre_logits.get_shape().as_list(),
[batch_size, 1, 1, 2048])
def testBuildEndPointsWithDepthMultiplierLessThanOne(self):
batch_size = 5
height, width = 299, 299
num_classes = 1000
inputs = tf.random.uniform((batch_size, height, width, 3))
_, end_points = inception.inception_v3(inputs, num_classes)
endpoint_keys = [key for key in end_points.keys()
if key.startswith('Mixed') or key.startswith('Conv')]
_, end_points_with_multiplier = inception.inception_v3(
inputs, num_classes, scope='depth_multiplied_net',
depth_multiplier=0.5)
for key in endpoint_keys:
original_depth = end_points[key].get_shape().as_list()[3]
new_depth = end_points_with_multiplier[key].get_shape().as_list()[3]
self.assertEqual(0.5 * original_depth, new_depth)
def testBuildEndPointsWithDepthMultiplierGreaterThanOne(self):
batch_size = 5
height, width = 299, 299
num_classes = 1000
inputs = tf.random.uniform((batch_size, height, width, 3))
_, end_points = inception.inception_v3(inputs, num_classes)
endpoint_keys = [key for key in end_points.keys()
if key.startswith('Mixed') or key.startswith('Conv')]
_, end_points_with_multiplier = inception.inception_v3(
inputs, num_classes, scope='depth_multiplied_net',
depth_multiplier=2.0)
for key in endpoint_keys:
original_depth = end_points[key].get_shape().as_list()[3]
new_depth = end_points_with_multiplier[key].get_shape().as_list()[3]
self.assertEqual(2.0 * original_depth, new_depth)
def testRaiseValueErrorWithInvalidDepthMultiplier(self):
batch_size = 5
height, width = 299, 299
num_classes = 1000
inputs = tf.random.uniform((batch_size, height, width, 3))
with self.assertRaises(ValueError):
_ = inception.inception_v3(inputs, num_classes, depth_multiplier=-0.1)
with self.assertRaises(ValueError):
_ = inception.inception_v3(inputs, num_classes, depth_multiplier=0.0)
def testHalfSizeImages(self):
batch_size = 5
height, width = 150, 150
num_classes = 1000
inputs = tf.random.uniform((batch_size, height, width, 3))
logits, end_points = inception.inception_v3(inputs, num_classes)
self.assertTrue(logits.op.name.startswith('InceptionV3/Logits'))
self.assertListEqual(logits.get_shape().as_list(),
[batch_size, num_classes])
pre_pool = end_points['Mixed_7c']
self.assertListEqual(pre_pool.get_shape().as_list(),
[batch_size, 3, 3, 2048])
def testUnknownImageShape(self):
tf.compat.v1.reset_default_graph()
batch_size = 2
height, width = 299, 299
num_classes = 1000
input_np = np.random.uniform(0, 1, (batch_size, height, width, 3))
with self.test_session() as sess:
inputs = tf.compat.v1.placeholder(
tf.float32, shape=(batch_size, None, None, 3))
logits, end_points = inception.inception_v3(inputs, num_classes)
self.assertListEqual(logits.get_shape().as_list(),
[batch_size, num_classes])
pre_pool = end_points['Mixed_7c']
feed_dict = {inputs: input_np}
tf.compat.v1.global_variables_initializer().run()
pre_pool_out = sess.run(pre_pool, feed_dict=feed_dict)
self.assertListEqual(list(pre_pool_out.shape), [batch_size, 8, 8, 2048])
def testGlobalPoolUnknownImageShape(self):
tf.compat.v1.reset_default_graph()
batch_size = 1
height, width = 330, 400
num_classes = 1000
input_np = np.random.uniform(0, 1, (batch_size, height, width, 3))
with self.test_session() as sess:
inputs = tf.compat.v1.placeholder(
tf.float32, shape=(batch_size, None, None, 3))
logits, end_points = inception.inception_v3(inputs, num_classes,
global_pool=True)
self.assertListEqual(logits.get_shape().as_list(),
[batch_size, num_classes])
pre_pool = end_points['Mixed_7c']
feed_dict = {inputs: input_np}
tf.compat.v1.global_variables_initializer().run()
pre_pool_out = sess.run(pre_pool, feed_dict=feed_dict)
self.assertListEqual(list(pre_pool_out.shape), [batch_size, 8, 11, 2048])
def testUnknowBatchSize(self):
batch_size = 1
height, width = 299, 299
num_classes = 1000
inputs = tf.compat.v1.placeholder(tf.float32, (None, height, width, 3))
logits, _ = inception.inception_v3(inputs, num_classes)
self.assertTrue(logits.op.name.startswith('InceptionV3/Logits'))
self.assertListEqual(logits.get_shape().as_list(),
[None, num_classes])
images = tf.random.uniform((batch_size, height, width, 3))
with self.test_session() as sess:
sess.run(tf.compat.v1.global_variables_initializer())
output = sess.run(logits, {inputs: images.eval()})
self.assertEquals(output.shape, (batch_size, num_classes))
def testEvaluation(self):
batch_size = 2
height, width = 299, 299
num_classes = 1000
eval_inputs = tf.random.uniform((batch_size, height, width, 3))
logits, _ = inception.inception_v3(eval_inputs, num_classes,
is_training=False)
predictions = tf.argmax(input=logits, axis=1)
with self.test_session() as sess:
sess.run(tf.compat.v1.global_variables_initializer())
output = sess.run(predictions)
self.assertEquals(output.shape, (batch_size,))
def testTrainEvalWithReuse(self):
train_batch_size = 5
eval_batch_size = 2
height, width = 150, 150
num_classes = 1000
train_inputs = tf.random.uniform((train_batch_size, height, width, 3))
inception.inception_v3(train_inputs, num_classes)
eval_inputs = tf.random.uniform((eval_batch_size, height, width, 3))
logits, _ = inception.inception_v3(eval_inputs, num_classes,
is_training=False, reuse=True)
predictions = tf.argmax(input=logits, axis=1)
with self.test_session() as sess:
sess.run(tf.compat.v1.global_variables_initializer())
output = sess.run(predictions)
self.assertEquals(output.shape, (eval_batch_size,))
def testLogitsNotSqueezed(self):
num_classes = 25
images = tf.random.uniform([1, 299, 299, 3])
logits, _ = inception.inception_v3(images,
num_classes=num_classes,
spatial_squeeze=False)
with self.test_session() as sess:
tf.compat.v1.global_variables_initializer().run()
logits_out = sess.run(logits)
self.assertListEqual(list(logits_out.shape), [1, 1, 1, num_classes])
def testNoBatchNormScaleByDefault(self):
height, width = 299, 299
num_classes = 1000
inputs = tf.compat.v1.placeholder(tf.float32, (1, height, width, 3))
with slim.arg_scope(inception.inception_v3_arg_scope()):
inception.inception_v3(inputs, num_classes, is_training=False)
self.assertEqual(tf.compat.v1.global_variables('.*/BatchNorm/gamma:0$'), [])
def testBatchNormScale(self):
height, width = 299, 299
num_classes = 1000
inputs = tf.compat.v1.placeholder(tf.float32, (1, height, width, 3))
with slim.arg_scope(
inception.inception_v3_arg_scope(batch_norm_scale=True)):
inception.inception_v3(inputs, num_classes, is_training=False)
gamma_names = set(
v.op.name
for v in tf.compat.v1.global_variables('.*/BatchNorm/gamma:0$'))
self.assertGreater(len(gamma_names), 0)
for v in tf.compat.v1.global_variables('.*/BatchNorm/moving_mean:0$'):
self.assertIn(v.op.name[:-len('moving_mean')] + 'gamma', gamma_names)
if __name__ == '__main__':
tf.test.main()
@@ -0,0 +1,347 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains the definition of the Inception V4 architecture.
As described in http://arxiv.org/abs/1602.07261.
Inception-v4, Inception-ResNet and the Impact of Residual Connections
on Learning
Christian Szegedy, Sergey Ioffe, Vincent Vanhoucke, Alex Alemi
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from tensorflow.contrib import slim as contrib_slim
from nets import inception_utils
slim = contrib_slim
def block_inception_a(inputs, scope=None, reuse=None):
"""Builds Inception-A block for Inception v4 network."""
# By default use stride=1 and SAME padding
with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d],
stride=1, padding='SAME'):
with tf.compat.v1.variable_scope(
scope, 'BlockInceptionA', [inputs], reuse=reuse):
with tf.compat.v1.variable_scope('Branch_0'):
branch_0 = slim.conv2d(inputs, 96, [1, 1], scope='Conv2d_0a_1x1')
with tf.compat.v1.variable_scope('Branch_1'):
branch_1 = slim.conv2d(inputs, 64, [1, 1], scope='Conv2d_0a_1x1')
branch_1 = slim.conv2d(branch_1, 96, [3, 3], scope='Conv2d_0b_3x3')
with tf.compat.v1.variable_scope('Branch_2'):
branch_2 = slim.conv2d(inputs, 64, [1, 1], scope='Conv2d_0a_1x1')
branch_2 = slim.conv2d(branch_2, 96, [3, 3], scope='Conv2d_0b_3x3')
branch_2 = slim.conv2d(branch_2, 96, [3, 3], scope='Conv2d_0c_3x3')
with tf.compat.v1.variable_scope('Branch_3'):
branch_3 = slim.avg_pool2d(inputs, [3, 3], scope='AvgPool_0a_3x3')
branch_3 = slim.conv2d(branch_3, 96, [1, 1], scope='Conv2d_0b_1x1')
return tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
def block_reduction_a(inputs, scope=None, reuse=None):
"""Builds Reduction-A block for Inception v4 network."""
# By default use stride=1 and SAME padding
with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d],
stride=1, padding='SAME'):
with tf.compat.v1.variable_scope(
scope, 'BlockReductionA', [inputs], reuse=reuse):
with tf.compat.v1.variable_scope('Branch_0'):
branch_0 = slim.conv2d(inputs, 384, [3, 3], stride=2, padding='VALID',
scope='Conv2d_1a_3x3')
with tf.compat.v1.variable_scope('Branch_1'):
branch_1 = slim.conv2d(inputs, 192, [1, 1], scope='Conv2d_0a_1x1')
branch_1 = slim.conv2d(branch_1, 224, [3, 3], scope='Conv2d_0b_3x3')
branch_1 = slim.conv2d(branch_1, 256, [3, 3], stride=2,
padding='VALID', scope='Conv2d_1a_3x3')
with tf.compat.v1.variable_scope('Branch_2'):
branch_2 = slim.max_pool2d(inputs, [3, 3], stride=2, padding='VALID',
scope='MaxPool_1a_3x3')
return tf.concat(axis=3, values=[branch_0, branch_1, branch_2])
def block_inception_b(inputs, scope=None, reuse=None):
"""Builds Inception-B block for Inception v4 network."""
# By default use stride=1 and SAME padding
with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d],
stride=1, padding='SAME'):
with tf.compat.v1.variable_scope(
scope, 'BlockInceptionB', [inputs], reuse=reuse):
with tf.compat.v1.variable_scope('Branch_0'):
branch_0 = slim.conv2d(inputs, 384, [1, 1], scope='Conv2d_0a_1x1')
with tf.compat.v1.variable_scope('Branch_1'):
branch_1 = slim.conv2d(inputs, 192, [1, 1], scope='Conv2d_0a_1x1')
branch_1 = slim.conv2d(branch_1, 224, [1, 7], scope='Conv2d_0b_1x7')
branch_1 = slim.conv2d(branch_1, 256, [7, 1], scope='Conv2d_0c_7x1')
with tf.compat.v1.variable_scope('Branch_2'):
branch_2 = slim.conv2d(inputs, 192, [1, 1], scope='Conv2d_0a_1x1')
branch_2 = slim.conv2d(branch_2, 192, [7, 1], scope='Conv2d_0b_7x1')
branch_2 = slim.conv2d(branch_2, 224, [1, 7], scope='Conv2d_0c_1x7')
branch_2 = slim.conv2d(branch_2, 224, [7, 1], scope='Conv2d_0d_7x1')
branch_2 = slim.conv2d(branch_2, 256, [1, 7], scope='Conv2d_0e_1x7')
with tf.compat.v1.variable_scope('Branch_3'):
branch_3 = slim.avg_pool2d(inputs, [3, 3], scope='AvgPool_0a_3x3')
branch_3 = slim.conv2d(branch_3, 128, [1, 1], scope='Conv2d_0b_1x1')
return tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
def block_reduction_b(inputs, scope=None, reuse=None):
"""Builds Reduction-B block for Inception v4 network."""
# By default use stride=1 and SAME padding
with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d],
stride=1, padding='SAME'):
with tf.compat.v1.variable_scope(
scope, 'BlockReductionB', [inputs], reuse=reuse):
with tf.compat.v1.variable_scope('Branch_0'):
branch_0 = slim.conv2d(inputs, 192, [1, 1], scope='Conv2d_0a_1x1')
branch_0 = slim.conv2d(branch_0, 192, [3, 3], stride=2,
padding='VALID', scope='Conv2d_1a_3x3')
with tf.compat.v1.variable_scope('Branch_1'):
branch_1 = slim.conv2d(inputs, 256, [1, 1], scope='Conv2d_0a_1x1')
branch_1 = slim.conv2d(branch_1, 256, [1, 7], scope='Conv2d_0b_1x7')
branch_1 = slim.conv2d(branch_1, 320, [7, 1], scope='Conv2d_0c_7x1')
branch_1 = slim.conv2d(branch_1, 320, [3, 3], stride=2,
padding='VALID', scope='Conv2d_1a_3x3')
with tf.compat.v1.variable_scope('Branch_2'):
branch_2 = slim.max_pool2d(inputs, [3, 3], stride=2, padding='VALID',
scope='MaxPool_1a_3x3')
return tf.concat(axis=3, values=[branch_0, branch_1, branch_2])
def block_inception_c(inputs, scope=None, reuse=None):
"""Builds Inception-C block for Inception v4 network."""
# By default use stride=1 and SAME padding
with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d],
stride=1, padding='SAME'):
with tf.compat.v1.variable_scope(
scope, 'BlockInceptionC', [inputs], reuse=reuse):
with tf.compat.v1.variable_scope('Branch_0'):
branch_0 = slim.conv2d(inputs, 256, [1, 1], scope='Conv2d_0a_1x1')
with tf.compat.v1.variable_scope('Branch_1'):
branch_1 = slim.conv2d(inputs, 384, [1, 1], scope='Conv2d_0a_1x1')
branch_1 = tf.concat(axis=3, values=[
slim.conv2d(branch_1, 256, [1, 3], scope='Conv2d_0b_1x3'),
slim.conv2d(branch_1, 256, [3, 1], scope='Conv2d_0c_3x1')])
with tf.compat.v1.variable_scope('Branch_2'):
branch_2 = slim.conv2d(inputs, 384, [1, 1], scope='Conv2d_0a_1x1')
branch_2 = slim.conv2d(branch_2, 448, [3, 1], scope='Conv2d_0b_3x1')
branch_2 = slim.conv2d(branch_2, 512, [1, 3], scope='Conv2d_0c_1x3')
branch_2 = tf.concat(axis=3, values=[
slim.conv2d(branch_2, 256, [1, 3], scope='Conv2d_0d_1x3'),
slim.conv2d(branch_2, 256, [3, 1], scope='Conv2d_0e_3x1')])
with tf.compat.v1.variable_scope('Branch_3'):
branch_3 = slim.avg_pool2d(inputs, [3, 3], scope='AvgPool_0a_3x3')
branch_3 = slim.conv2d(branch_3, 256, [1, 1], scope='Conv2d_0b_1x1')
return tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
def inception_v4_base(inputs, final_endpoint='Mixed_7d', scope=None):
"""Creates the Inception V4 network up to the given final endpoint.
Args:
inputs: a 4-D tensor of size [batch_size, height, width, 3].
final_endpoint: specifies the endpoint to construct the network up to.
It can be one of [ 'Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3',
'Mixed_3a', 'Mixed_4a', 'Mixed_5a', 'Mixed_5b', 'Mixed_5c', 'Mixed_5d',
'Mixed_5e', 'Mixed_6a', 'Mixed_6b', 'Mixed_6c', 'Mixed_6d', 'Mixed_6e',
'Mixed_6f', 'Mixed_6g', 'Mixed_6h', 'Mixed_7a', 'Mixed_7b', 'Mixed_7c',
'Mixed_7d']
scope: Optional variable_scope.
Returns:
logits: the logits outputs of the model.
end_points: the set of end_points from the inception model.
Raises:
ValueError: if final_endpoint is not set to one of the predefined values,
"""
end_points = {}
def add_and_check_final(name, net):
end_points[name] = net
return name == final_endpoint
with tf.compat.v1.variable_scope(scope, 'InceptionV4', [inputs]):
with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
stride=1, padding='SAME'):
# 299 x 299 x 3
net = slim.conv2d(inputs, 32, [3, 3], stride=2,
padding='VALID', scope='Conv2d_1a_3x3')
if add_and_check_final('Conv2d_1a_3x3', net): return net, end_points
# 149 x 149 x 32
net = slim.conv2d(net, 32, [3, 3], padding='VALID',
scope='Conv2d_2a_3x3')
if add_and_check_final('Conv2d_2a_3x3', net): return net, end_points
# 147 x 147 x 32
net = slim.conv2d(net, 64, [3, 3], scope='Conv2d_2b_3x3')
if add_and_check_final('Conv2d_2b_3x3', net): return net, end_points
# 147 x 147 x 64
with tf.compat.v1.variable_scope('Mixed_3a'):
with tf.compat.v1.variable_scope('Branch_0'):
branch_0 = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID',
scope='MaxPool_0a_3x3')
with tf.compat.v1.variable_scope('Branch_1'):
branch_1 = slim.conv2d(net, 96, [3, 3], stride=2, padding='VALID',
scope='Conv2d_0a_3x3')
net = tf.concat(axis=3, values=[branch_0, branch_1])
if add_and_check_final('Mixed_3a', net): return net, end_points
# 73 x 73 x 160
with tf.compat.v1.variable_scope('Mixed_4a'):
with tf.compat.v1.variable_scope('Branch_0'):
branch_0 = slim.conv2d(net, 64, [1, 1], scope='Conv2d_0a_1x1')
branch_0 = slim.conv2d(branch_0, 96, [3, 3], padding='VALID',
scope='Conv2d_1a_3x3')
with tf.compat.v1.variable_scope('Branch_1'):
branch_1 = slim.conv2d(net, 64, [1, 1], scope='Conv2d_0a_1x1')
branch_1 = slim.conv2d(branch_1, 64, [1, 7], scope='Conv2d_0b_1x7')
branch_1 = slim.conv2d(branch_1, 64, [7, 1], scope='Conv2d_0c_7x1')
branch_1 = slim.conv2d(branch_1, 96, [3, 3], padding='VALID',
scope='Conv2d_1a_3x3')
net = tf.concat(axis=3, values=[branch_0, branch_1])
if add_and_check_final('Mixed_4a', net): return net, end_points
# 71 x 71 x 192
with tf.compat.v1.variable_scope('Mixed_5a'):
with tf.compat.v1.variable_scope('Branch_0'):
branch_0 = slim.conv2d(net, 192, [3, 3], stride=2, padding='VALID',
scope='Conv2d_1a_3x3')
with tf.compat.v1.variable_scope('Branch_1'):
branch_1 = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID',
scope='MaxPool_1a_3x3')
net = tf.concat(axis=3, values=[branch_0, branch_1])
if add_and_check_final('Mixed_5a', net): return net, end_points
# 35 x 35 x 384
# 4 x Inception-A blocks
for idx in range(4):
block_scope = 'Mixed_5' + chr(ord('b') + idx)
net = block_inception_a(net, block_scope)
if add_and_check_final(block_scope, net): return net, end_points
# 35 x 35 x 384
# Reduction-A block
net = block_reduction_a(net, 'Mixed_6a')
if add_and_check_final('Mixed_6a', net): return net, end_points
# 17 x 17 x 1024
# 7 x Inception-B blocks
for idx in range(7):
block_scope = 'Mixed_6' + chr(ord('b') + idx)
net = block_inception_b(net, block_scope)
if add_and_check_final(block_scope, net): return net, end_points
# 17 x 17 x 1024
# Reduction-B block
net = block_reduction_b(net, 'Mixed_7a')
if add_and_check_final('Mixed_7a', net): return net, end_points
# 8 x 8 x 1536
# 3 x Inception-C blocks
for idx in range(3):
block_scope = 'Mixed_7' + chr(ord('b') + idx)
net = block_inception_c(net, block_scope)
if add_and_check_final(block_scope, net): return net, end_points
raise ValueError('Unknown final endpoint %s' % final_endpoint)
def inception_v4(inputs, num_classes=1001, is_training=True,
dropout_keep_prob=0.8,
reuse=None,
scope='InceptionV4',
create_aux_logits=True):
"""Creates the Inception V4 model.
Args:
inputs: a 4-D tensor of size [batch_size, height, width, 3].
num_classes: number of predicted classes. If 0 or None, the logits layer
is omitted and the input features to the logits layer (before dropout)
are returned instead.
is_training: whether is training or not.
dropout_keep_prob: float, the fraction to keep before final layer.
reuse: whether or not the network and its variables should be reused. To be
able to reuse 'scope' must be given.
scope: Optional variable_scope.
create_aux_logits: Whether to include the auxiliary logits.
Returns:
net: a Tensor with the logits (pre-softmax activations) if num_classes
is a non-zero integer, or the non-dropped input to the logits layer
if num_classes is 0 or None.
end_points: the set of end_points from the inception model.
"""
end_points = {}
with tf.compat.v1.variable_scope(
scope, 'InceptionV4', [inputs], reuse=reuse) as scope:
with slim.arg_scope([slim.batch_norm, slim.dropout],
is_training=is_training):
net, end_points = inception_v4_base(inputs, scope=scope)
with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
stride=1, padding='SAME'):
# Auxiliary Head logits
if create_aux_logits and num_classes:
with tf.compat.v1.variable_scope('AuxLogits'):
# 17 x 17 x 1024
aux_logits = end_points['Mixed_6h']
aux_logits = slim.avg_pool2d(aux_logits, [5, 5], stride=3,
padding='VALID',
scope='AvgPool_1a_5x5')
aux_logits = slim.conv2d(aux_logits, 128, [1, 1],
scope='Conv2d_1b_1x1')
aux_logits = slim.conv2d(aux_logits, 768,
aux_logits.get_shape()[1:3],
padding='VALID', scope='Conv2d_2a')
aux_logits = slim.flatten(aux_logits)
aux_logits = slim.fully_connected(aux_logits, num_classes,
activation_fn=None,
scope='Aux_logits')
end_points['AuxLogits'] = aux_logits
# Final pooling and prediction
# TODO(sguada,arnoegw): Consider adding a parameter global_pool which
# can be set to False to disable pooling here (as in resnet_*()).
with tf.compat.v1.variable_scope('Logits'):
# 8 x 8 x 1536
kernel_size = net.get_shape()[1:3]
if kernel_size.is_fully_defined():
net = slim.avg_pool2d(net, kernel_size, padding='VALID',
scope='AvgPool_1a')
else:
net = tf.reduce_mean(
input_tensor=net,
axis=[1, 2],
keepdims=True,
name='global_pool')
end_points['global_pool'] = net
if not num_classes:
return net, end_points
# 1 x 1 x 1536
net = slim.dropout(net, dropout_keep_prob, scope='Dropout_1b')
net = slim.flatten(net, scope='PreLogitsFlatten')
end_points['PreLogitsFlatten'] = net
# 1536
logits = slim.fully_connected(net, num_classes, activation_fn=None,
scope='Logits')
end_points['Logits'] = logits
end_points['Predictions'] = tf.nn.softmax(logits, name='Predictions')
return logits, end_points
inception_v4.default_image_size = 299
inception_v4_arg_scope = inception_utils.inception_arg_scope
@@ -0,0 +1,287 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for slim.inception_v4."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from tensorflow.contrib import slim as contrib_slim
from nets import inception
class InceptionTest(tf.test.TestCase):
def testBuildLogits(self):
batch_size = 5
height, width = 299, 299
num_classes = 1000
inputs = tf.random.uniform((batch_size, height, width, 3))
logits, end_points = inception.inception_v4(inputs, num_classes)
auxlogits = end_points['AuxLogits']
predictions = end_points['Predictions']
self.assertTrue(auxlogits.op.name.startswith('InceptionV4/AuxLogits'))
self.assertListEqual(auxlogits.get_shape().as_list(),
[batch_size, num_classes])
self.assertTrue(logits.op.name.startswith('InceptionV4/Logits'))
self.assertListEqual(logits.get_shape().as_list(),
[batch_size, num_classes])
self.assertTrue(predictions.op.name.startswith(
'InceptionV4/Logits/Predictions'))
self.assertListEqual(predictions.get_shape().as_list(),
[batch_size, num_classes])
def testBuildPreLogitsNetwork(self):
batch_size = 5
height, width = 299, 299
num_classes = None
inputs = tf.random.uniform((batch_size, height, width, 3))
net, end_points = inception.inception_v4(inputs, num_classes)
self.assertTrue(net.op.name.startswith('InceptionV4/Logits/AvgPool'))
self.assertListEqual(net.get_shape().as_list(), [batch_size, 1, 1, 1536])
self.assertFalse('Logits' in end_points)
self.assertFalse('Predictions' in end_points)
def testBuildWithoutAuxLogits(self):
batch_size = 5
height, width = 299, 299
num_classes = 1000
inputs = tf.random.uniform((batch_size, height, width, 3))
logits, endpoints = inception.inception_v4(inputs, num_classes,
create_aux_logits=False)
self.assertFalse('AuxLogits' in endpoints)
self.assertTrue(logits.op.name.startswith('InceptionV4/Logits'))
self.assertListEqual(logits.get_shape().as_list(),
[batch_size, num_classes])
def testAllEndPointsShapes(self):
batch_size = 5
height, width = 299, 299
num_classes = 1000
inputs = tf.random.uniform((batch_size, height, width, 3))
_, end_points = inception.inception_v4(inputs, num_classes)
endpoints_shapes = {'Conv2d_1a_3x3': [batch_size, 149, 149, 32],
'Conv2d_2a_3x3': [batch_size, 147, 147, 32],
'Conv2d_2b_3x3': [batch_size, 147, 147, 64],
'Mixed_3a': [batch_size, 73, 73, 160],
'Mixed_4a': [batch_size, 71, 71, 192],
'Mixed_5a': [batch_size, 35, 35, 384],
# 4 x Inception-A blocks
'Mixed_5b': [batch_size, 35, 35, 384],
'Mixed_5c': [batch_size, 35, 35, 384],
'Mixed_5d': [batch_size, 35, 35, 384],
'Mixed_5e': [batch_size, 35, 35, 384],
# Reduction-A block
'Mixed_6a': [batch_size, 17, 17, 1024],
# 7 x Inception-B blocks
'Mixed_6b': [batch_size, 17, 17, 1024],
'Mixed_6c': [batch_size, 17, 17, 1024],
'Mixed_6d': [batch_size, 17, 17, 1024],
'Mixed_6e': [batch_size, 17, 17, 1024],
'Mixed_6f': [batch_size, 17, 17, 1024],
'Mixed_6g': [batch_size, 17, 17, 1024],
'Mixed_6h': [batch_size, 17, 17, 1024],
# Reduction-A block
'Mixed_7a': [batch_size, 8, 8, 1536],
# 3 x Inception-C blocks
'Mixed_7b': [batch_size, 8, 8, 1536],
'Mixed_7c': [batch_size, 8, 8, 1536],
'Mixed_7d': [batch_size, 8, 8, 1536],
# Logits and predictions
'AuxLogits': [batch_size, num_classes],
'global_pool': [batch_size, 1, 1, 1536],
'PreLogitsFlatten': [batch_size, 1536],
'Logits': [batch_size, num_classes],
'Predictions': [batch_size, num_classes]}
self.assertItemsEqual(endpoints_shapes.keys(), end_points.keys())
for endpoint_name in endpoints_shapes:
expected_shape = endpoints_shapes[endpoint_name]
self.assertTrue(endpoint_name in end_points)
self.assertListEqual(end_points[endpoint_name].get_shape().as_list(),
expected_shape)
def testBuildBaseNetwork(self):
batch_size = 5
height, width = 299, 299
inputs = tf.random.uniform((batch_size, height, width, 3))
net, end_points = inception.inception_v4_base(inputs)
self.assertTrue(net.op.name.startswith(
'InceptionV4/Mixed_7d'))
self.assertListEqual(net.get_shape().as_list(), [batch_size, 8, 8, 1536])
expected_endpoints = [
'Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3', 'Mixed_3a',
'Mixed_4a', 'Mixed_5a', 'Mixed_5b', 'Mixed_5c', 'Mixed_5d',
'Mixed_5e', 'Mixed_6a', 'Mixed_6b', 'Mixed_6c', 'Mixed_6d',
'Mixed_6e', 'Mixed_6f', 'Mixed_6g', 'Mixed_6h', 'Mixed_7a',
'Mixed_7b', 'Mixed_7c', 'Mixed_7d']
self.assertItemsEqual(end_points.keys(), expected_endpoints)
for name, op in end_points.items():
self.assertTrue(op.name.startswith('InceptionV4/' + name))
def testBuildOnlyUpToFinalEndpoint(self):
batch_size = 5
height, width = 299, 299
all_endpoints = [
'Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3', 'Mixed_3a',
'Mixed_4a', 'Mixed_5a', 'Mixed_5b', 'Mixed_5c', 'Mixed_5d',
'Mixed_5e', 'Mixed_6a', 'Mixed_6b', 'Mixed_6c', 'Mixed_6d',
'Mixed_6e', 'Mixed_6f', 'Mixed_6g', 'Mixed_6h', 'Mixed_7a',
'Mixed_7b', 'Mixed_7c', 'Mixed_7d']
for index, endpoint in enumerate(all_endpoints):
with tf.Graph().as_default():
inputs = tf.random.uniform((batch_size, height, width, 3))
out_tensor, end_points = inception.inception_v4_base(
inputs, final_endpoint=endpoint)
self.assertTrue(out_tensor.op.name.startswith(
'InceptionV4/' + endpoint))
self.assertItemsEqual(all_endpoints[:index+1], end_points.keys())
def testVariablesSetDevice(self):
batch_size = 5
height, width = 299, 299
num_classes = 1000
inputs = tf.random.uniform((batch_size, height, width, 3))
# Force all Variables to reside on the device.
with tf.compat.v1.variable_scope('on_cpu'), tf.device('/cpu:0'):
inception.inception_v4(inputs, num_classes)
with tf.compat.v1.variable_scope('on_gpu'), tf.device('/gpu:0'):
inception.inception_v4(inputs, num_classes)
for v in tf.compat.v1.get_collection(
tf.compat.v1.GraphKeys.GLOBAL_VARIABLES, scope='on_cpu'):
self.assertDeviceEqual(v.device, '/cpu:0')
for v in tf.compat.v1.get_collection(
tf.compat.v1.GraphKeys.GLOBAL_VARIABLES, scope='on_gpu'):
self.assertDeviceEqual(v.device, '/gpu:0')
def testHalfSizeImages(self):
batch_size = 5
height, width = 150, 150
num_classes = 1000
inputs = tf.random.uniform((batch_size, height, width, 3))
logits, end_points = inception.inception_v4(inputs, num_classes)
self.assertTrue(logits.op.name.startswith('InceptionV4/Logits'))
self.assertListEqual(logits.get_shape().as_list(),
[batch_size, num_classes])
pre_pool = end_points['Mixed_7d']
self.assertListEqual(pre_pool.get_shape().as_list(),
[batch_size, 3, 3, 1536])
def testGlobalPool(self):
batch_size = 1
height, width = 350, 400
num_classes = 1000
inputs = tf.random.uniform((batch_size, height, width, 3))
logits, end_points = inception.inception_v4(inputs, num_classes)
self.assertTrue(logits.op.name.startswith('InceptionV4/Logits'))
self.assertListEqual(logits.get_shape().as_list(),
[batch_size, num_classes])
pre_pool = end_points['Mixed_7d']
self.assertListEqual(pre_pool.get_shape().as_list(),
[batch_size, 9, 11, 1536])
def testGlobalPoolUnknownImageShape(self):
batch_size = 1
height, width = 350, 400
num_classes = 1000
with self.test_session() as sess:
inputs = tf.compat.v1.placeholder(tf.float32, (batch_size, None, None, 3))
logits, end_points = inception.inception_v4(
inputs, num_classes, create_aux_logits=False)
self.assertTrue(logits.op.name.startswith('InceptionV4/Logits'))
self.assertListEqual(logits.get_shape().as_list(),
[batch_size, num_classes])
pre_pool = end_points['Mixed_7d']
images = tf.random.uniform((batch_size, height, width, 3))
sess.run(tf.compat.v1.global_variables_initializer())
logits_out, pre_pool_out = sess.run([logits, pre_pool],
{inputs: images.eval()})
self.assertTupleEqual(logits_out.shape, (batch_size, num_classes))
self.assertTupleEqual(pre_pool_out.shape, (batch_size, 9, 11, 1536))
def testUnknownBatchSize(self):
batch_size = 1
height, width = 299, 299
num_classes = 1000
with self.test_session() as sess:
inputs = tf.compat.v1.placeholder(tf.float32, (None, height, width, 3))
logits, _ = inception.inception_v4(inputs, num_classes)
self.assertTrue(logits.op.name.startswith('InceptionV4/Logits'))
self.assertListEqual(logits.get_shape().as_list(),
[None, num_classes])
images = tf.random.uniform((batch_size, height, width, 3))
sess.run(tf.compat.v1.global_variables_initializer())
output = sess.run(logits, {inputs: images.eval()})
self.assertEquals(output.shape, (batch_size, num_classes))
def testEvaluation(self):
batch_size = 2
height, width = 299, 299
num_classes = 1000
with self.test_session() as sess:
eval_inputs = tf.random.uniform((batch_size, height, width, 3))
logits, _ = inception.inception_v4(eval_inputs,
num_classes,
is_training=False)
predictions = tf.argmax(input=logits, axis=1)
sess.run(tf.compat.v1.global_variables_initializer())
output = sess.run(predictions)
self.assertEquals(output.shape, (batch_size,))
def testTrainEvalWithReuse(self):
train_batch_size = 5
eval_batch_size = 2
height, width = 150, 150
num_classes = 1000
with self.test_session() as sess:
train_inputs = tf.random.uniform((train_batch_size, height, width, 3))
inception.inception_v4(train_inputs, num_classes)
eval_inputs = tf.random.uniform((eval_batch_size, height, width, 3))
logits, _ = inception.inception_v4(eval_inputs,
num_classes,
is_training=False,
reuse=True)
predictions = tf.argmax(input=logits, axis=1)
sess.run(tf.compat.v1.global_variables_initializer())
output = sess.run(predictions)
self.assertEquals(output.shape, (eval_batch_size,))
def testNoBatchNormScaleByDefault(self):
height, width = 299, 299
num_classes = 1000
inputs = tf.compat.v1.placeholder(tf.float32, (1, height, width, 3))
with contrib_slim.arg_scope(inception.inception_v4_arg_scope()):
inception.inception_v4(inputs, num_classes, is_training=False)
self.assertEqual(tf.compat.v1.global_variables('.*/BatchNorm/gamma:0$'), [])
def testBatchNormScale(self):
height, width = 299, 299
num_classes = 1000
inputs = tf.compat.v1.placeholder(tf.float32, (1, height, width, 3))
with contrib_slim.arg_scope(
inception.inception_v4_arg_scope(batch_norm_scale=True)):
inception.inception_v4(inputs, num_classes, is_training=False)
gamma_names = set(
v.op.name
for v in tf.compat.v1.global_variables('.*/BatchNorm/gamma:0$'))
self.assertGreater(len(gamma_names), 0)
for v in tf.compat.v1.global_variables('.*/BatchNorm/moving_mean:0$'):
self.assertIn(v.op.name[:-len('moving_mean')] + 'gamma', gamma_names)
if __name__ == '__main__':
tf.test.main()
@@ -0,0 +1,98 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains a variant of the LeNet model definition."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from tensorflow.contrib import slim as contrib_slim
slim = contrib_slim
def lenet(images, num_classes=10, is_training=False,
dropout_keep_prob=0.5,
prediction_fn=slim.softmax,
scope='LeNet'):
"""Creates a variant of the LeNet model.
Note that since the output is a set of 'logits', the values fall in the
interval of (-infinity, infinity). Consequently, to convert the outputs to a
probability distribution over the characters, one will need to convert them
using the softmax function:
logits = lenet.lenet(images, is_training=False)
probabilities = tf.nn.softmax(logits)
predictions = tf.argmax(logits, 1)
Args:
images: A batch of `Tensors` of size [batch_size, height, width, channels].
num_classes: the number of classes in the dataset. If 0 or None, the logits
layer is omitted and the input features to the logits layer are returned
instead.
is_training: specifies whether or not we're currently training the model.
This variable will determine the behaviour of the dropout layer.
dropout_keep_prob: the percentage of activation values that are retained.
prediction_fn: a function to get predictions out of logits.
scope: Optional variable_scope.
Returns:
net: a 2D Tensor with the logits (pre-softmax activations) if num_classes
is a non-zero integer, or the inon-dropped-out nput to the logits layer
if num_classes is 0 or None.
end_points: a dictionary from components of the network to the corresponding
activation.
"""
end_points = {}
with tf.compat.v1.variable_scope(scope, 'LeNet', [images]):
net = end_points['conv1'] = slim.conv2d(images, 32, [5, 5], scope='conv1')
net = end_points['pool1'] = slim.max_pool2d(net, [2, 2], 2, scope='pool1')
net = end_points['conv2'] = slim.conv2d(net, 64, [5, 5], scope='conv2')
net = end_points['pool2'] = slim.max_pool2d(net, [2, 2], 2, scope='pool2')
net = slim.flatten(net)
end_points['Flatten'] = net
net = end_points['fc3'] = slim.fully_connected(net, 1024, scope='fc3')
if not num_classes:
return net, end_points
net = end_points['dropout3'] = slim.dropout(
net, dropout_keep_prob, is_training=is_training, scope='dropout3')
logits = end_points['Logits'] = slim.fully_connected(
net, num_classes, activation_fn=None, scope='fc4')
end_points['Predictions'] = prediction_fn(logits, scope='Predictions')
return logits, end_points
lenet.default_image_size = 28
def lenet_arg_scope(weight_decay=0.0):
"""Defines the default lenet argument scope.
Args:
weight_decay: The weight decay to use for regularizing the model.
Returns:
An `arg_scope` to use for the inception v3 model.
"""
with slim.arg_scope(
[slim.conv2d, slim.fully_connected],
weights_regularizer=slim.l2_regularizer(weight_decay),
weights_initializer=tf.compat.v1.truncated_normal_initializer(stddev=0.1),
activation_fn=tf.nn.relu) as sc:
return sc
@@ -0,0 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="Encoding" addBOMForNewFiles="with NO BOM" />
</project>
@@ -0,0 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6" project-jdk-type="Python SDK" />
</project>
@@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>
@@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/mobilenet.iml" filepath="$PROJECT_DIR$/.idea/mobilenet.iml" />
</modules>
</component>
</project>
@@ -0,0 +1,166 @@
# MobileNet
This folder contains building code for
[MobileNetV2](https://arxiv.org/abs/1801.04381) and
[MobilenetV3](https://arxiv.org/abs/1905.02244) networks. The architectural
definition for each model is located in [mobilenet_v2.py](mobilenet_v2.py) and
[mobilenet_v3.py](mobilenet_v3.py) respectively.
For MobilenetV1 please refer to this [page](../mobilenet_v1.md)
We have also introduced a family of MobileNets customized for the Edge TPU
accelerator found in
[Google Pixel4](https://blog.google/products/pixel/pixel-4/) devices. The
architectural definition for MobileNetEdgeTPU is located in
[mobilenet_v3.py](mobilenet_v3.py)
## Performance
### Mobilenet V3 latency
This is the timing of [MobileNetV2] vs [MobileNetV3] using TF-Lite on the large
core of Pixel 1 phone.
![Mobilenet V2 and V3 Latency for Pixel 1.png](g3doc/latency_pixel1.png)
### MACs
MACs, also sometimes known as MADDs - the number of multiply-accumulates needed
to compute an inference on a single image is a common metric to measure the
efficiency of the model. Full size Mobilenet V3 on image size 224 uses ~215
Million MADDs (MMadds) while achieving accuracy 75.1%, while Mobilenet V2 uses
~300MMadds and achieving accuracy 72%. By comparison ResNet-50 uses
approximately 3500 MMAdds while achieving 76% accuracy.
Below is the graph comparing Mobilenets and a few selected networks. The size of
each blob represents the number of parameters. Note for
[ShuffleNet](https://arxiv.org/abs/1707.01083) there are no published size
numbers. We estimate it to be comparable to MobileNetV2 numbers.
![madds_top1_accuracy](g3doc/madds_top1_accuracy.png)
### Mobilenet EdgeTPU latency
The figure below shows the Pixel 4 Edge TPU latency of int8-quantized Mobilenet
EdgeTPU compared with MobilenetV2 and the minimalistic variants of MobilenetV3
(see below).
![Mobilenet Edge TPU latency for Pixel 4 Edge TPU.png](g3doc/edgetpu_latency.png)
## Pretrained models
### Mobilenet V3 Imagenet Checkpoints
All mobilenet V3 checkpoints were trained with image resolution 224x224. All
phone latencies are in milliseconds, measured on large core. In addition to
large and small models this page also contains so-called minimalistic models,
these models have the same per-layer dimensions characteristic as MobilenetV3
however, they don't utilize any of the advanced blocks (squeeze-and-excite
units, hard-swish, and 5x5 convolutions). While these models are less efficient
on CPU, we find that they are much more performant on GPU/DSP.
| Imagenet Checkpoint | MACs (M) | Params (M) | Top1 | Pixel 1 | Pixel 2 | Pixel 3 |
| ------------------ | -------- | ---------- | ---- | ------- | ------- | ------- |
| [Large dm=1 (float)] | 217 | 5.4 | 75.2 | 51.2 | 61 | 44 |
| [Large dm=1 (8-bit)] | 217 | 5.4 | 73.9 | 44 | 42.5 | 32 |
| [Large dm=0.75 (float)] | 155 | 4.0 | 73.3 | 39.8 | 48 | 34 |
| [Small dm=1 (float)] | 66 | 2.9 | 67.5 | 15.8 | 19.4 | 14.4 |
| [Small dm=1 (8-bit)] | 66 | 2.9 | 64.9 | 15.5 | 15 | 10.7 |
| [Small dm=0.75 (float)] | 44 | 2.4 | 65.4 | 12.8 | 15.9 | 11.6 |
#### Minimalistic checkpoints:
| Imagenet Checkpoint | MACs (M) | Params (M) | Top1 | Pixel 1 | Pixel 2 | Pixel 3 |
| -------------- | -------- | ---------- | ---- | ------- | ------- | ------- |
| [Large minimalistic (float)] | 209 | 3.9 | 72.3 | 44.1 | 51 | 35 |
| [Large minimalistic (8-bit)][lm8] | 209 | 3.9 | 71.3 | 37 | 35 | 27 |
| [Small minimalistic (float)] | 65 | 2.0 | 61.9 | 12.2 | 15.1 | 11 |
#### Edge TPU checkpoints:
| Imagenet Checkpoint | MACs (M) | Params (M) | Top1 | Pixel 4 Edge TPU | Pixel 4 CPU |
| ----------------- | -------- | ---------- | ---- | ------- | ----------- |
| [MobilenetEdgeTPU dm=0.75 (8-bit)]| 624 | 2.9 | 73.5 | 3.1 | 13.8 |
| [MobilenetEdgeTPU dm=1 (8-bit)] | 990 | 4.0 | 75.6 | 3.6 | 20.6 |
Note: 8-bit quantized versions of the MobilenetEdgeTPU models were obtained
using Tensorflow Lite's
[post training quantization](https://www.tensorflow.org/lite/performance/post_training_quantization)
tool.
[Small minimalistic (float)]: https://storage.googleapis.com/mobilenet_v3/checkpoints/v3-small-minimalistic_224_1.0_float.tgz
[Large minimalistic (float)]: https://storage.googleapis.com/mobilenet_v3/checkpoints/v3-large-minimalistic_224_1.0_float.tgz
[lm8]: https://storage.googleapis.com/mobilenet_v3/checkpoints/v3-large-minimalistic_224_1.0_uint8.tgz
[Large dm=1 (float)]: https://storage.googleapis.com/mobilenet_v3/checkpoints/v3-large_224_1.0_float.tgz
[Small dm=1 (float)]: https://storage.googleapis.com/mobilenet_v3/checkpoints/v3-small_224_1.0_float.tgz
[Large dm=1 (8-bit)]: https://storage.googleapis.com/mobilenet_v3/checkpoints/v3-large_224_1.0_uint8.tgz
[Small dm=1 (8-bit)]: https://storage.googleapis.com/mobilenet_v3/checkpoints/v3-small_224_1.0_uint8.tgz
[Large dm=0.75 (float)]: https://storage.googleapis.com/mobilenet_v3/checkpoints/v3-large_224_0.75_float.tgz
[Small dm=0.75 (float)]: https://storage.googleapis.com/mobilenet_v3/checkpoints/v3-small_224_0.75_float.tgz
[MobilenetEdgeTPU dm=0.75 (8-bit)]: https://storage.cloud.google.com/mobilenet_edgetpu/checkpoints/mobilenet_edgetpu_224_0.75.tgz
[MobilenetEdgeTPU dm=1 (8-bit)]: https://storage.cloud.google.com/mobilenet_edgetpu/checkpoints/mobilenet_edgetpu_224_1.0.tgz
### Mobilenet V2 Imagenet Checkpoints
Classification Checkpoint | MACs (M) | Parameters (M) | Top 1 Accuracy | Top 5 Accuracy | Mobile CPU (ms) Pixel 1
---------------------------------------------------------------------------------------------------------- | -------- | -------------- | -------------- | -------------- | -----------------------
[mobilenet_v2_1.4_224](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_1.4_224.tgz) | 582 | 6.06 | 75.0 | 92.5 | 138.0
[mobilenet_v2_1.3_224](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_1.3_224.tgz) | 509 | 5.34 | 74.4 | 92.1 | 123.0
[mobilenet_v2_1.0_224](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_1.0_224.tgz) | 300 | 3.47 | 71.8 | 91.0 | 73.8
[mobilenet_v2_1.0_192](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_1.0_192.tgz) | 221 | 3.47 | 70.7 | 90.1 | 55.1
[mobilenet_v2_1.0_160](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_1.0_160.tgz) | 154 | 3.47 | 68.8 | 89.0 | 40.2
[mobilenet_v2_1.0_128](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_1.0_128.tgz) | 99 | 3.47 | 65.3 | 86.9 | 27.6
[mobilenet_v2_1.0_96](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_1.0_96.tgz) | 56 | 3.47 | 60.3 | 83.2 | 17.6
[mobilenet_v2_0.75_224](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_0.75_224.tgz) | 209 | 2.61 | 69.8 | 89.6 | 55.8
[mobilenet_v2_0.75_192](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_0.75_192.tgz) | 153 | 2.61 | 68.7 | 88.9 | 41.6
[mobilenet_v2_0.75_160](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_0.75_160.tgz) | 107 | 2.61 | 66.4 | 87.3 | 30.4
[mobilenet_v2_0.75_128](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_0.75_128.tgz) | 69 | 2.61 | 63.2 | 85.3 | 21.9
[mobilenet_v2_0.75_96](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_0.75_96.tgz) | 39 | 2.61 | 58.8 | 81.6 | 14.2
[mobilenet_v2_0.5_224](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_0.5_224.tgz) | 97 | 1.95 | 65.4 | 86.4 | 28.7
[mobilenet_v2_0.5_192](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_0.5_192.tgz) | 71 | 1.95 | 63.9 | 85.4 | 21.1
[mobilenet_v2_0.5_160](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_0.5_160.tgz) | 50 | 1.95 | 61.0 | 83.2 | 14.9
[mobilenet_v2_0.5_128](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_0.5_128.tgz) | 32 | 1.95 | 57.7 | 80.8 | 9.9
[mobilenet_v2_0.5_96](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_0.5_96.tgz) | 18 | 1.95 | 51.2 | 75.8 | 6.4
[mobilenet_v2_0.35_224](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_0.35_224.tgz) | 59 | 1.66 | 60.3 | 82.9 | 19.7
[mobilenet_v2_0.35_192](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_0.35_192.tgz) | 43 | 1.66 | 58.2 | 81.2 | 14.6
[mobilenet_v2_0.35_160](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_0.35_160.tgz) | 30 | 1.66 | 55.7 | 79.1 | 10.5
[mobilenet_v2_0.35_128](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_0.35_128.tgz) | 20 | 1.66 | 50.8 | 75.0 | 6.9
[mobilenet_v2_0.35_96](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_0.35_96.tgz) | 11 | 1.66 | 45.5 | 70.4 | 4.5
## Training
### V3
TODO: Add V3 hyperparameters
### V2
The numbers above can be reproduced using slim's
[`train_image_classifier`](https://github.com/tensorflow/models/blob/master/research/slim/README.md#training-a-model-from-scratch).
Below is the set of parameters that achieves 72.0% for full size MobileNetV2,
after about 700K when trained on 8 GPU. If trained on a single GPU the full
convergence is after 5.5M steps. Also note that learning rate and
num_epochs_per_decay both need to be adjusted depending on how many GPUs are
being used due to slim's internal averaging.
```bash
--model_name="mobilenet_v2"
--learning_rate=0.045 * NUM_GPUS #slim internally averages clones so we compensate
--preprocessing_name="inception_v2"
--label_smoothing=0.1
--moving_average_decay=0.9999
--batch_size= 96
--num_clones = NUM_GPUS # you can use any number here between 1 and 8 depending on your hardware setup.
--learning_rate_decay_factor=0.98
--num_epochs_per_decay = 2.5 / NUM_GPUS # train_image_classifier does per clone epochs
```
# Example
See this [ipython notebook](mobilenet_example.ipynb) or open and run the network
directly in
[Colaboratory](https://colab.research.google.com/github/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet_example.ipynb).
[MobilenetV2]: https://arxiv.org/abs/1801.04381
[MobilenetV3]: https://arxiv.org/abs/1905.02244
@@ -0,0 +1,475 @@
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Convolution blocks for mobilenet."""
import contextlib
import functools
import tensorflow as tf
from tensorflow.contrib import slim as contrib_slim
slim = contrib_slim
def _fixed_padding(inputs, kernel_size, rate=1):
"""Pads the input along the spatial dimensions independently of input size.
Pads the input such that if it was used in a convolution with 'VALID' padding,
the output would have the same dimensions as if the unpadded input was used
in a convolution with 'SAME' padding.
Args:
inputs: A tensor of size [batch, height_in, width_in, channels].
kernel_size: The kernel to be used in the conv2d or max_pool2d operation.
rate: An integer, rate for atrous convolution.
Returns:
output: A tensor of size [batch, height_out, width_out, channels] with the
input, either intact (if kernel_size == 1) or padded (if kernel_size > 1).
"""
kernel_size_effective = [kernel_size[0] + (kernel_size[0] - 1) * (rate - 1),
kernel_size[0] + (kernel_size[0] - 1) * (rate - 1)]
pad_total = [kernel_size_effective[0] - 1, kernel_size_effective[1] - 1]
pad_beg = [pad_total[0] // 2, pad_total[1] // 2]
pad_end = [pad_total[0] - pad_beg[0], pad_total[1] - pad_beg[1]]
padded_inputs = tf.pad(inputs, [[0, 0], [pad_beg[0], pad_end[0]],
[pad_beg[1], pad_end[1]], [0, 0]])
return padded_inputs
def _make_divisible(v, divisor, min_value=None):
if min_value is None:
min_value = divisor
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
# Make sure that round down does not go down by more than 10%.
if new_v < 0.9 * v:
new_v += divisor
return new_v
def _split_divisible(num, num_ways, divisible_by=8):
"""Evenly splits num, num_ways so each piece is a multiple of divisible_by."""
assert num % divisible_by == 0
assert num / num_ways >= divisible_by
# Note: want to round down, we adjust each split to match the total.
base = num // num_ways // divisible_by * divisible_by
result = []
accumulated = 0
for i in range(num_ways):
r = base
while accumulated + r < num * (i + 1) / num_ways:
r += divisible_by
result.append(r)
accumulated += r
assert accumulated == num
return result
@contextlib.contextmanager
def _v1_compatible_scope_naming(scope):
"""v1 compatible scope naming."""
if scope is None: # Create uniqified separable blocks.
with tf.compat.v1.variable_scope(None, default_name='separable') as s, \
tf.compat.v1.name_scope(s.original_name_scope):
yield ''
else:
# We use scope_depthwise, scope_pointwise for compatibility with V1 ckpts.
# which provide numbered scopes.
scope += '_'
yield scope
@slim.add_arg_scope
def split_separable_conv2d(input_tensor,
num_outputs,
scope=None,
normalizer_fn=None,
stride=1,
rate=1,
endpoints=None,
use_explicit_padding=False):
"""Separable mobilenet V1 style convolution.
Depthwise convolution, with default non-linearity,
followed by 1x1 depthwise convolution. This is similar to
slim.separable_conv2d, but differs in tha it applies batch
normalization and non-linearity to depthwise. This matches
the basic building of Mobilenet Paper
(https://arxiv.org/abs/1704.04861)
Args:
input_tensor: input
num_outputs: number of outputs
scope: optional name of the scope. Note if provided it will use
scope_depthwise for deptwhise, and scope_pointwise for pointwise.
normalizer_fn: which normalizer function to use for depthwise/pointwise
stride: stride
rate: output rate (also known as dilation rate)
endpoints: optional, if provided, will export additional tensors to it.
use_explicit_padding: Use 'VALID' padding for convolutions, but prepad
inputs so that the output dimensions are the same as if 'SAME' padding
were used.
Returns:
output tesnor
"""
with _v1_compatible_scope_naming(scope) as scope:
dw_scope = scope + 'depthwise'
endpoints = endpoints if endpoints is not None else {}
kernel_size = [3, 3]
padding = 'SAME'
if use_explicit_padding:
padding = 'VALID'
input_tensor = _fixed_padding(input_tensor, kernel_size, rate)
net = slim.separable_conv2d(
input_tensor,
None,
kernel_size,
depth_multiplier=1,
stride=stride,
rate=rate,
normalizer_fn=normalizer_fn,
padding=padding,
scope=dw_scope)
endpoints[dw_scope] = net
pw_scope = scope + 'pointwise'
net = slim.conv2d(
net,
num_outputs, [1, 1],
stride=1,
normalizer_fn=normalizer_fn,
scope=pw_scope)
endpoints[pw_scope] = net
return net
def expand_input_by_factor(n, divisible_by=8):
return lambda num_inputs, **_: _make_divisible(num_inputs * n, divisible_by)
def split_conv(input_tensor,
num_outputs,
num_ways,
scope,
divisible_by=8,
**kwargs):
"""Creates a split convolution.
Split convolution splits the input and output into
'num_blocks' blocks of approximately the same size each,
and only connects $i$-th input to $i$ output.
Args:
input_tensor: input tensor
num_outputs: number of output filters
num_ways: num blocks to split by.
scope: scope for all the operators.
divisible_by: make sure that every part is divisiable by this.
**kwargs: will be passed directly into conv2d operator
Returns:
tensor
"""
b = input_tensor.get_shape().as_list()[3]
if num_ways == 1 or min(b // num_ways,
num_outputs // num_ways) < divisible_by:
# Don't do any splitting if we end up with less than 8 filters
# on either side.
return slim.conv2d(input_tensor, num_outputs, [1, 1], scope=scope, **kwargs)
outs = []
input_splits = _split_divisible(b, num_ways, divisible_by=divisible_by)
output_splits = _split_divisible(
num_outputs, num_ways, divisible_by=divisible_by)
inputs = tf.split(input_tensor, input_splits, axis=3, name='split_' + scope)
base = scope
for i, (input_tensor, out_size) in enumerate(zip(inputs, output_splits)):
scope = base + '_part_%d' % (i,)
n = slim.conv2d(input_tensor, out_size, [1, 1], scope=scope, **kwargs)
n = tf.identity(n, scope + '_output')
outs.append(n)
return tf.concat(outs, 3, name=scope + '_concat')
@slim.add_arg_scope
def expanded_conv(input_tensor,
num_outputs,
expansion_size=expand_input_by_factor(6),
stride=1,
rate=1,
kernel_size=(3, 3),
residual=True,
normalizer_fn=None,
split_projection=1,
split_expansion=1,
split_divisible_by=8,
expansion_transform=None,
depthwise_location='expansion',
depthwise_channel_multiplier=1,
endpoints=None,
use_explicit_padding=False,
padding='SAME',
inner_activation_fn=None,
depthwise_activation_fn=None,
project_activation_fn=tf.identity,
depthwise_fn=slim.separable_conv2d,
expansion_fn=split_conv,
projection_fn=split_conv,
scope=None):
"""Depthwise Convolution Block with expansion.
Builds a composite convolution that has the following structure
expansion (1x1) -> depthwise (kernel_size) -> projection (1x1)
Args:
input_tensor: input
num_outputs: number of outputs in the final layer.
expansion_size: the size of expansion, could be a constant or a callable.
If latter it will be provided 'num_inputs' as an input. For forward
compatibility it should accept arbitrary keyword arguments.
Default will expand the input by factor of 6.
stride: depthwise stride
rate: depthwise rate
kernel_size: depthwise kernel
residual: whether to include residual connection between input
and output.
normalizer_fn: batchnorm or otherwise
split_projection: how many ways to split projection operator
(that is conv expansion->bottleneck)
split_expansion: how many ways to split expansion op
(that is conv bottleneck->expansion) ops will keep depth divisible
by this value.
split_divisible_by: make sure every split group is divisible by this number.
expansion_transform: Optional function that takes expansion
as a single input and returns output.
depthwise_location: where to put depthwise covnvolutions supported
values None, 'input', 'output', 'expansion'
depthwise_channel_multiplier: depthwise channel multiplier:
each input will replicated (with different filters)
that many times. So if input had c channels,
output will have c x depthwise_channel_multpilier.
endpoints: An optional dictionary into which intermediate endpoints are
placed. The keys "expansion_output", "depthwise_output",
"projection_output" and "expansion_transform" are always populated, even
if the corresponding functions are not invoked.
use_explicit_padding: Use 'VALID' padding for convolutions, but prepad
inputs so that the output dimensions are the same as if 'SAME' padding
were used.
padding: Padding type to use if `use_explicit_padding` is not set.
inner_activation_fn: activation function to use in all inner convolutions.
If none, will rely on slim default scopes.
depthwise_activation_fn: activation function to use for deptwhise only.
If not provided will rely on slim default scopes. If both
inner_activation_fn and depthwise_activation_fn are provided,
depthwise_activation_fn takes precedence over inner_activation_fn.
project_activation_fn: activation function for the project layer.
(note this layer is not affected by inner_activation_fn)
depthwise_fn: Depthwise convolution function.
expansion_fn: Expansion convolution function. If use custom function then
"split_expansion" and "split_divisible_by" will be ignored.
projection_fn: Projection convolution function. If use custom function then
"split_projection" and "split_divisible_by" will be ignored.
scope: optional scope.
Returns:
Tensor of depth num_outputs
Raises:
TypeError: on inval
"""
conv_defaults = {}
dw_defaults = {}
if inner_activation_fn is not None:
conv_defaults['activation_fn'] = inner_activation_fn
dw_defaults['activation_fn'] = inner_activation_fn
if depthwise_activation_fn is not None:
dw_defaults['activation_fn'] = depthwise_activation_fn
# pylint: disable=g-backslash-continuation
with tf.compat.v1.variable_scope(scope, default_name='expanded_conv') as s, \
tf.compat.v1.name_scope(s.original_name_scope), \
slim.arg_scope((slim.conv2d,), **conv_defaults), \
slim.arg_scope((slim.separable_conv2d,), **dw_defaults):
prev_depth = input_tensor.get_shape().as_list()[3]
if depthwise_location not in [None, 'input', 'output', 'expansion']:
raise TypeError('%r is unknown value for depthwise_location' %
depthwise_location)
if use_explicit_padding:
if padding != 'SAME':
raise TypeError('`use_explicit_padding` should only be used with '
'"SAME" padding.')
padding = 'VALID'
depthwise_func = functools.partial(
depthwise_fn,
num_outputs=None,
kernel_size=kernel_size,
depth_multiplier=depthwise_channel_multiplier,
stride=stride,
rate=rate,
normalizer_fn=normalizer_fn,
padding=padding,
scope='depthwise')
# b1 -> b2 * r -> b2
# i -> (o * r) (bottleneck) -> o
input_tensor = tf.identity(input_tensor, 'input')
net = input_tensor
if depthwise_location == 'input':
if use_explicit_padding:
net = _fixed_padding(net, kernel_size, rate)
net = depthwise_func(net, activation_fn=None)
net = tf.identity(net, name='depthwise_output')
if endpoints is not None:
endpoints['depthwise_output'] = net
if callable(expansion_size):
inner_size = expansion_size(num_inputs=prev_depth)
else:
inner_size = expansion_size
if inner_size > net.shape[3]:
if expansion_fn == split_conv:
expansion_fn = functools.partial(
expansion_fn,
num_ways=split_expansion,
divisible_by=split_divisible_by,
stride=1)
net = expansion_fn(
net,
inner_size,
scope='expand',
normalizer_fn=normalizer_fn)
net = tf.identity(net, 'expansion_output')
if endpoints is not None:
endpoints['expansion_output'] = net
if depthwise_location == 'expansion':
if use_explicit_padding:
net = _fixed_padding(net, kernel_size, rate)
net = depthwise_func(net)
net = tf.identity(net, name='depthwise_output')
if endpoints is not None:
endpoints['depthwise_output'] = net
if expansion_transform:
net = expansion_transform(expansion_tensor=net, input_tensor=input_tensor)
# Note in contrast with expansion, we always have
# projection to produce the desired output size.
if projection_fn == split_conv:
projection_fn = functools.partial(
projection_fn,
num_ways=split_projection,
divisible_by=split_divisible_by,
stride=1)
net = projection_fn(
net,
num_outputs,
scope='project',
normalizer_fn=normalizer_fn,
activation_fn=project_activation_fn)
if endpoints is not None:
endpoints['projection_output'] = net
if depthwise_location == 'output':
if use_explicit_padding:
net = _fixed_padding(net, kernel_size, rate)
net = depthwise_func(net, activation_fn=None)
net = tf.identity(net, name='depthwise_output')
if endpoints is not None:
endpoints['depthwise_output'] = net
if callable(residual): # custom residual
net = residual(input_tensor=input_tensor, output_tensor=net)
elif (residual and
# stride check enforces that we don't add residuals when spatial
# dimensions are None
stride == 1 and
# Depth matches
net.get_shape().as_list()[3] ==
input_tensor.get_shape().as_list()[3]):
net += input_tensor
return tf.identity(net, name='output')
@slim.add_arg_scope
def squeeze_excite(input_tensor,
divisible_by=8,
squeeze_factor=3,
inner_activation_fn=tf.nn.relu,
gating_fn=tf.sigmoid,
squeeze_input_tensor=None,
pool=None):
"""Squeeze excite block for Mobilenet V3.
If the squeeze_input_tensor - or the input_tensor if squeeze_input_tensor is
None - contains variable dimensions (Nonetype in tensor shape), perform
average pooling (as the first step in the squeeze operation) by calling
reduce_mean across the H/W of the input tensor.
Args:
input_tensor: input tensor to apply SE block to.
divisible_by: ensures all inner dimensions are divisible by this number.
squeeze_factor: the factor of squeezing in the inner fully connected layer
inner_activation_fn: non-linearity to be used in inner layer.
gating_fn: non-linearity to be used for final gating function
squeeze_input_tensor: custom tensor to use for computing gating activation.
If provided the result will be input_tensor * SE(squeeze_input_tensor)
instead of input_tensor * SE(input_tensor).
pool: if number is provided will average pool with that kernel size
to compute inner tensor, followed by bilinear upsampling.
Returns:
Gated input_tensor. (e.g. X * SE(X))
"""
with tf.compat.v1.variable_scope('squeeze_excite'):
if squeeze_input_tensor is None:
squeeze_input_tensor = input_tensor
input_size = input_tensor.shape.as_list()[1:3]
pool_height, pool_width = squeeze_input_tensor.shape.as_list()[1:3]
stride = 1
if pool is not None and pool_height >= pool:
pool_height, pool_width, stride = pool, pool, pool
input_channels = squeeze_input_tensor.shape.as_list()[3]
output_channels = input_tensor.shape.as_list()[3]
squeeze_channels = _make_divisible(
input_channels / squeeze_factor, divisor=divisible_by)
if pool is None:
pooled = tf.reduce_mean(squeeze_input_tensor, axis=[1, 2], keepdims=True)
else:
pooled = tf.nn.avg_pool(
squeeze_input_tensor, (1, pool_height, pool_width, 1),
strides=(1, stride, stride, 1),
padding='VALID')
squeeze = slim.conv2d(
pooled,
kernel_size=(1, 1),
num_outputs=squeeze_channels,
normalizer_fn=None,
activation_fn=inner_activation_fn)
excite_outputs = output_channels
excite = slim.conv2d(squeeze, num_outputs=excite_outputs,
kernel_size=[1, 1],
normalizer_fn=None,
activation_fn=gating_fn)
if pool is not None:
# Note: As of 03/20/2019 only BILINEAR (the default) with
# align_corners=True has gradients implemented in TPU.
excite = tf.image.resize_images(
excite, input_size,
align_corners=True)
result = input_tensor * excite
return result
@@ -0,0 +1,501 @@
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Mobilenet Base Class."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import contextlib
import copy
import os
import tensorflow as tf
from tensorflow.contrib import slim as contrib_slim
slim = contrib_slim
@slim.add_arg_scope
def apply_activation(x, name=None, activation_fn=None):
return activation_fn(x, name=name) if activation_fn else x
def _fixed_padding(inputs, kernel_size, rate=1):
"""Pads the input along the spatial dimensions independently of input size.
Pads the input such that if it was used in a convolution with 'VALID' padding,
the output would have the same dimensions as if the unpadded input was used
in a convolution with 'SAME' padding.
Args:
inputs: A tensor of size [batch, height_in, width_in, channels].
kernel_size: The kernel to be used in the conv2d or max_pool2d operation.
rate: An integer, rate for atrous convolution.
Returns:
output: A tensor of size [batch, height_out, width_out, channels] with the
input, either intact (if kernel_size == 1) or padded (if kernel_size > 1).
"""
kernel_size_effective = [kernel_size[0] + (kernel_size[0] - 1) * (rate - 1),
kernel_size[0] + (kernel_size[0] - 1) * (rate - 1)]
pad_total = [kernel_size_effective[0] - 1, kernel_size_effective[1] - 1]
pad_beg = [pad_total[0] // 2, pad_total[1] // 2]
pad_end = [pad_total[0] - pad_beg[0], pad_total[1] - pad_beg[1]]
padded_inputs = tf.pad(
tensor=inputs,
paddings=[[0, 0], [pad_beg[0], pad_end[0]], [pad_beg[1], pad_end[1]],
[0, 0]])
return padded_inputs
def _make_divisible(v, divisor, min_value=None):
if min_value is None:
min_value = divisor
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
# Make sure that round down does not go down by more than 10%.
if new_v < 0.9 * v:
new_v += divisor
return int(new_v)
@contextlib.contextmanager
def _set_arg_scope_defaults(defaults):
"""Sets arg scope defaults for all items present in defaults.
Args:
defaults: dictionary/list of pairs, containing a mapping from
function to a dictionary of default args.
Yields:
context manager where all defaults are set.
"""
if hasattr(defaults, 'items'):
items = list(defaults.items())
else:
items = defaults
if not items:
yield
else:
func, default_arg = items[0]
with slim.arg_scope(func, **default_arg):
with _set_arg_scope_defaults(items[1:]):
yield
@slim.add_arg_scope
def depth_multiplier(output_params,
multiplier,
divisible_by=8,
min_depth=8,
**unused_kwargs):
if 'num_outputs' not in output_params:
return
d = output_params['num_outputs']
output_params['num_outputs'] = _make_divisible(d * multiplier, divisible_by,
min_depth)
_Op = collections.namedtuple('Op', ['op', 'params', 'multiplier_func'])
def op(opfunc, multiplier_func=depth_multiplier, **params):
multiplier = params.pop('multiplier_transform', multiplier_func)
return _Op(opfunc, params=params, multiplier_func=multiplier)
class NoOpScope(object):
"""No-op context manager."""
def __enter__(self):
return None
def __exit__(self, exc_type, exc_value, traceback):
return False
def safe_arg_scope(funcs, **kwargs):
"""Returns `slim.arg_scope` with all None arguments removed.
Arguments:
funcs: Functions to pass to `arg_scope`.
**kwargs: Arguments to pass to `arg_scope`.
Returns:
arg_scope or No-op context manager.
Note: can be useful if None value should be interpreted as "do not overwrite
this parameter value".
"""
filtered_args = {name: value for name, value in kwargs.items()
if value is not None}
if filtered_args:
return slim.arg_scope(funcs, **filtered_args)
else:
return NoOpScope()
@slim.add_arg_scope
def mobilenet_base( # pylint: disable=invalid-name
inputs,
conv_defs,
multiplier=1.0,
final_endpoint=None,
output_stride=None,
use_explicit_padding=False,
scope=None,
is_training=False):
"""Mobilenet base network.
Constructs a network from inputs to the given final endpoint. By default
the network is constructed in inference mode. To create network
in training mode use:
with slim.arg_scope(mobilenet.training_scope()):
logits, endpoints = mobilenet_base(...)
Args:
inputs: a tensor of shape [batch_size, height, width, channels].
conv_defs: A list of op(...) layers specifying the net architecture.
multiplier: Float multiplier for the depth (number of channels)
for all convolution ops. The value must be greater than zero. Typical
usage will be to set this value in (0, 1) to reduce the number of
parameters or computation cost of the model.
final_endpoint: The name of last layer, for early termination for
for V1-based networks: last layer is "layer_14", for V2: "layer_20"
output_stride: An integer that specifies the requested ratio of input to
output spatial resolution. If not None, then we invoke atrous convolution
if necessary to prevent the network from reducing the spatial resolution
of the activation maps. Allowed values are 1 or any even number, excluding
zero. Typical values are 8 (accurate fully convolutional mode), 16
(fast fully convolutional mode), and 32 (classification mode).
NOTE- output_stride relies on all consequent operators to support dilated
operators via "rate" parameter. This might require wrapping non-conv
operators to operate properly.
use_explicit_padding: Use 'VALID' padding for convolutions, but prepad
inputs so that the output dimensions are the same as if 'SAME' padding
were used.
scope: optional variable scope.
is_training: How to setup batch_norm and other ops. Note: most of the time
this does not need be set directly. Use mobilenet.training_scope() to set
up training instead. This parameter is here for backward compatibility
only. It is safe to set it to the value matching
training_scope(is_training=...). It is also safe to explicitly set
it to False, even if there is outer training_scope set to to training.
(The network will be built in inference mode). If this is set to None,
no arg_scope is added for slim.batch_norm's is_training parameter.
Returns:
tensor_out: output tensor.
end_points: a set of activations for external use, for example summaries or
losses.
Raises:
ValueError: depth_multiplier <= 0, or the target output_stride is not
allowed.
"""
if multiplier <= 0:
raise ValueError('multiplier is not greater than zero.')
# Set conv defs defaults and overrides.
conv_defs_defaults = conv_defs.get('defaults', {})
conv_defs_overrides = conv_defs.get('overrides', {})
if use_explicit_padding:
conv_defs_overrides = copy.deepcopy(conv_defs_overrides)
conv_defs_overrides[
(slim.conv2d, slim.separable_conv2d)] = {'padding': 'VALID'}
if output_stride is not None:
if output_stride == 0 or (output_stride > 1 and output_stride % 2):
raise ValueError('Output stride must be None, 1 or a multiple of 2.')
# a) Set the tensorflow scope
# b) set padding to default: note we might consider removing this
# since it is also set by mobilenet_scope
# c) set all defaults
# d) set all extra overrides.
# pylint: disable=g-backslash-continuation
with _scope_all(scope, default_scope='Mobilenet'), \
safe_arg_scope([slim.batch_norm], is_training=is_training), \
_set_arg_scope_defaults(conv_defs_defaults), \
_set_arg_scope_defaults(conv_defs_overrides):
# The current_stride variable keeps track of the output stride of the
# activations, i.e., the running product of convolution strides up to the
# current network layer. This allows us to invoke atrous convolution
# whenever applying the next convolution would result in the activations
# having output stride larger than the target output_stride.
current_stride = 1
# The atrous convolution rate parameter.
rate = 1
net = inputs
# Insert default parameters before the base scope which includes
# any custom overrides set in mobilenet.
end_points = {}
scopes = {}
for i, opdef in enumerate(conv_defs['spec']):
params = dict(opdef.params)
opdef.multiplier_func(params, multiplier)
stride = params.get('stride', 1)
if output_stride is not None and current_stride == output_stride:
# If we have reached the target output_stride, then we need to employ
# atrous convolution with stride=1 and multiply the atrous rate by the
# current unit's stride for use in subsequent layers.
layer_stride = 1
layer_rate = rate
rate *= stride
else:
layer_stride = stride
layer_rate = 1
current_stride *= stride
# Update params.
params['stride'] = layer_stride
# Only insert rate to params if rate > 1 and kernel size is not [1, 1].
if layer_rate > 1:
if tuple(params.get('kernel_size', [])) != (1, 1):
# We will apply atrous rate in the following cases:
# 1) When kernel_size is not in params, the operation then uses
# default kernel size 3x3.
# 2) When kernel_size is in params, and if the kernel_size is not
# equal to (1, 1) (there is no need to apply atrous convolution to
# any 1x1 convolution).
params['rate'] = layer_rate
# Set padding
if use_explicit_padding:
if 'kernel_size' in params:
net = _fixed_padding(net, params['kernel_size'], layer_rate)
else:
params['use_explicit_padding'] = True
end_point = 'layer_%d' % (i + 1)
try:
net = opdef.op(net, **params)
except Exception:
print('Failed to create op %i: %r params: %r' % (i, opdef, params))
raise
end_points[end_point] = net
scope = os.path.dirname(net.name)
scopes[scope] = end_point
if final_endpoint is not None and end_point == final_endpoint:
break
# Add all tensors that end with 'output' to
# endpoints
for t in net.graph.get_operations():
scope = os.path.dirname(t.name)
bn = os.path.basename(t.name)
if scope in scopes and t.name.endswith('output'):
end_points[scopes[scope] + '/' + bn] = t.outputs[0]
return net, end_points
@contextlib.contextmanager
def _scope_all(scope, default_scope=None):
with tf.compat.v1.variable_scope(scope, default_name=default_scope) as s,\
tf.compat.v1.name_scope(s.original_name_scope):
yield s
@slim.add_arg_scope
def mobilenet(inputs,
num_classes=1001,
prediction_fn=slim.softmax,
reuse=None,
scope='Mobilenet',
base_only=False,
**mobilenet_args):
"""Mobilenet model for classification, supports both V1 and V2.
Note: default mode is inference, use mobilenet.training_scope to create
training network.
Args:
inputs: a tensor of shape [batch_size, height, width, channels].
num_classes: number of predicted classes. If 0 or None, the logits layer
is omitted and the input features to the logits layer (before dropout)
are returned instead.
prediction_fn: a function to get predictions out of logits
(default softmax).
reuse: whether or not the network and its variables should be reused. To be
able to reuse 'scope' must be given.
scope: Optional variable_scope.
base_only: if True will only create the base of the network (no pooling
and no logits).
**mobilenet_args: passed to mobilenet_base verbatim.
- conv_defs: list of conv defs
- multiplier: Float multiplier for the depth (number of channels)
for all convolution ops. The value must be greater than zero. Typical
usage will be to set this value in (0, 1) to reduce the number of
parameters or computation cost of the model.
- output_stride: will ensure that the last layer has at most total stride.
If the architecture calls for more stride than that provided
(e.g. output_stride=16, but the architecture has 5 stride=2 operators),
it will replace output_stride with fractional convolutions using Atrous
Convolutions.
Returns:
logits: the pre-softmax activations, a tensor of size
[batch_size, num_classes]
end_points: a dictionary from components of the network to the corresponding
activation tensor.
Raises:
ValueError: Input rank is invalid.
"""
is_training = mobilenet_args.get('is_training', False)
input_shape = inputs.get_shape().as_list()
if len(input_shape) != 4:
raise ValueError('Expected rank 4 input, was: %d' % len(input_shape))
with tf.compat.v1.variable_scope(scope, 'Mobilenet', reuse=reuse) as scope:
inputs = tf.identity(inputs, 'input')
net, end_points = mobilenet_base(inputs, scope=scope, **mobilenet_args)
if base_only:
return net, end_points
net = tf.identity(net, name='embedding')
with tf.compat.v1.variable_scope('Logits'):
net = global_pool(net)
end_points['global_pool'] = net
if not num_classes:
return net, end_points
# net = slim.dropout(net, scope='Dropout', is_training=is_training)
# 1 x 1 x num_classes
# Note: legacy scope name.
# logits = slim.conv2d(
# net,
# num_classes, [1, 1],
# activation_fn=None,
# normalizer_fn=None,
# biases_initializer=tf.compat.v1.zeros_initializer(),
# scope='Conv2d_1c_1x1')
# logits = tf.squeeze(logits, [1, 2])
# use slim.fully_connected instead
net = tf.squeeze(net)
net = slim.dropout(net, keep_prob=0.8, scope='Dropout', is_training=is_training)
logits = slim.fully_connected(
net,
num_classes,
activation_fn=None,
normalizer_fn=None,
scope='FC'
)
#logits = tf.expand_dims(logits, axis=[])
logits = tf.identity(logits, name='output')
end_points['Logits'] = logits
if prediction_fn:
end_points['Predictions'] = prediction_fn(logits, 'Predictions')
return logits, end_points
def global_pool(input_tensor, pool_op=tf.compat.v2.nn.avg_pool2d):
"""Applies avg pool to produce 1x1 output.
NOTE: This function is funcitonally equivalenet to reduce_mean, but it has
baked in average pool which has better support across hardware.
Args:
input_tensor: input tensor
pool_op: pooling op (avg pool is default)
Returns:
a tensor batch_size x 1 x 1 x depth.
"""
shape = input_tensor.get_shape().as_list()
if shape[1] is None or shape[2] is None:
kernel_size = tf.convert_to_tensor(value=[
1,
tf.shape(input=input_tensor)[1],
tf.shape(input=input_tensor)[2], 1
])
else:
kernel_size = [1, shape[1], shape[2], 1]
output = pool_op(
input_tensor, ksize=kernel_size, strides=[1, 1, 1, 1], padding='VALID')
# Recover output shape, for unknown shape.
output.set_shape([None, 1, 1, None])
return output
def training_scope(is_training=True,
weight_decay=0.00004,
stddev=0.09,
dropout_keep_prob=0.8,
bn_decay=0.997):
"""Defines Mobilenet training scope.
Usage:
with tf.contrib.slim.arg_scope(mobilenet.training_scope()):
logits, endpoints = mobilenet_v2.mobilenet(input_tensor)
# the network created will be trainble with dropout/batch norm
# initialized appropriately.
Args:
is_training: if set to False this will ensure that all customizations are
set to non-training mode. This might be helpful for code that is reused
across both training/evaluation, but most of the time training_scope with
value False is not needed. If this is set to None, the parameters is not
added to the batch_norm arg_scope.
weight_decay: The weight decay to use for regularizing the model.
stddev: Standard deviation for initialization, if negative uses xavier.
dropout_keep_prob: dropout keep probability (not set if equals to None).
bn_decay: decay for the batch norm moving averages (not set if equals to
None).
Returns:
An argument scope to use via arg_scope.
"""
# Note: do not introduce parameters that would change the inference
# model here (for example whether to use bias), modify conv_def instead.
batch_norm_params = {
'decay': bn_decay,
'is_training': is_training
}
#if stddev < 0:
# weight_intitializer = slim.initializers.xavier_initializer()
#else:
# weight_intitializer = tf.compat.v1.truncated_normal_initializer(stddev=stddev)
# modified for NPU
weight_2d = tf.initializers.variance_scaling(scale=2., mode="fan_out", distribution="untruncated_normal")
weight_dw = tf.initializers.variance_scaling(scale=2., mode="fan_in", distribution="untruncated_normal")
weight_pw = tf.initializers.variance_scaling(scale=2., mode="fan_out", distribution="untruncated_normal")
weight_fc = tf.initializers.random_normal(stddev=0.01)
# Set weight_decay for weights in Conv and FC layers.
with slim.arg_scope(
#[slim.conv2d, slim.fully_connected, slim.separable_conv2d],
[slim.conv2d],
#weights_initializer=weight_intitializer,
weights_initializer=weight_2d,
normalizer_fn=slim.batch_norm), \
slim.arg_scope([slim.fully_connected], weights_initializer=weight_fc, normalizer_fn=slim.batch_norm), \
slim.arg_scope([slim.separable_conv2d], weights_initializer=weight_dw, pointwise_initializer=weight_pw, normalizer_fn=slim.batch_norm), \
slim.arg_scope([mobilenet_base, mobilenet], is_training=is_training),\
safe_arg_scope([slim.batch_norm], **batch_norm_params), \
safe_arg_scope([slim.dropout], is_training=is_training,
keep_prob=dropout_keep_prob), \
slim.arg_scope([slim.conv2d], \
weights_regularizer=slim.l2_regularizer(weight_decay)), \
slim.arg_scope([slim.separable_conv2d], weights_regularizer=None) as s:
return s

Some files were not shown because too many files have changed in this diff Show More