[add]上传训练benchmark by z00560161
This commit is contained in:
@@ -0,0 +1 @@
|
||||
#!/bin/bash
|
||||
@@ -0,0 +1,25 @@
|
||||
# MobileNet_pytorch训练说明
|
||||
|
||||
### 1. 模型训练参数配置
|
||||
|
||||
在train/yaml/MobileNet.yaml中修改相应配置, 配置项含义:
|
||||
|
||||
```
|
||||
pytorch_config:
|
||||
data_url: 数据集路径
|
||||
epoches: 跑多少个epoch
|
||||
batch_size: 单p默认768 2p 1534 4p 3072 8p默认6144
|
||||
lr: 默认参数1p 0.03 2p 0.06 4p 0.12 8p 0.24
|
||||
seed: 123456
|
||||
docker_image: docker 镜像名称:版本号
|
||||
```
|
||||
|
||||
------
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1 @@
|
||||
# MobileNetV2 NPU训练
|
||||
@@ -0,0 +1,29 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
g_feat_in = []
|
||||
g_feat_out = []
|
||||
g_grad_in = []
|
||||
g_grad_out = []
|
||||
|
||||
|
||||
def forward_hook_fn(module, input, output):
|
||||
g_feat_in.append(input)
|
||||
g_feat_out.append(output)
|
||||
print(module)
|
||||
print(input)
|
||||
print(output)
|
||||
|
||||
|
||||
def backward_hook_fn(module, grad_input, grad_output):
|
||||
g_grad_in.append(grad_input)
|
||||
g_grad_out.append(grad_output)
|
||||
print(module)
|
||||
print(grad_input)
|
||||
print(grad_input)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
+498
@@ -0,0 +1,498 @@
|
||||
import argparse
|
||||
import os
|
||||
import random
|
||||
import shutil
|
||||
import time
|
||||
import warnings
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import torch.nn.parallel
|
||||
import torch.backends.cudnn as cudnn
|
||||
import torch.distributed as dist
|
||||
import torch.optim
|
||||
import torch.multiprocessing as mp
|
||||
import torch.utils.data
|
||||
import torch.utils.data.distributed
|
||||
import torchvision.transforms as transforms
|
||||
import torchvision.datasets as datasets
|
||||
import torchvision.models as models
|
||||
from mobilenet import mobilenet_v2
|
||||
import torch.npu
|
||||
|
||||
# from torch.utils.tensorboard import SummaryWriter
|
||||
|
||||
from apex import amp
|
||||
import numpy as np
|
||||
|
||||
from hook import *
|
||||
|
||||
from benchmark_log import hwlog
|
||||
from benchmark_log.basic_utils import get_environment_info
|
||||
from benchmark_log.basic_utils import get_model_parameter
|
||||
|
||||
|
||||
# model_names = sorted(name for name in models.__dict__
|
||||
# if name.islower() and not name.startswith("__")
|
||||
# and callable(models.__dict__[name]))
|
||||
|
||||
parser = argparse.ArgumentParser(description='PyTorch ImageNet Training')
|
||||
parser.add_argument('--data', metavar='DIR', default='/dataset/imagenet',
|
||||
help='path to dataset')
|
||||
# parser.add_argument('-a', '--arch', metavar='ARCH', default='resnet18',
|
||||
# choices=model_names,
|
||||
# help='model architecture: ' +
|
||||
# ' | '.join(model_names) +
|
||||
# ' (default: resnet18)')
|
||||
parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
|
||||
help='number of data loading workers (default: 4)')
|
||||
parser.add_argument('--epochs', default=90, type=int, metavar='N',
|
||||
help='number of total epochs to run')
|
||||
parser.add_argument('--start-epoch', default=0, type=int, metavar='N',
|
||||
help='manual epoch number (useful on restarts)')
|
||||
parser.add_argument('-b', '--batch-size', default=256, type=int,
|
||||
metavar='N',
|
||||
help='mini-batch size (default: 256), this is the total '
|
||||
'batch size of all GPUs on the current node when '
|
||||
'using Data Parallel or Distributed Data Parallel')
|
||||
parser.add_argument('--lr', '--learning-rate', default=0.1, type=float,
|
||||
metavar='LR', help='initial learning rate', dest='lr')
|
||||
parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
|
||||
help='momentum')
|
||||
parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,
|
||||
metavar='W', help='weight decay (default: 1e-4)',
|
||||
dest='weight_decay')
|
||||
parser.add_argument('-p', '--print-freq', default=10, type=int,
|
||||
metavar='N', help='print frequency (default: 10)')
|
||||
parser.add_argument('--resume', default='', type=str, metavar='PATH',
|
||||
help='path to latest checkpoint (default: none)')
|
||||
parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true',
|
||||
help='evaluate model on validation set')
|
||||
parser.add_argument('--pretrained', dest='pretrained', action='store_true',
|
||||
help='use pre-trained model')
|
||||
parser.add_argument('--world-size', default=-1, type=int,
|
||||
help='number of nodes for distributed training')
|
||||
parser.add_argument('--rank', default=-1, type=int,
|
||||
help='node rank for distributed training')
|
||||
parser.add_argument('--dist-url', default='tcp://224.66.41.62:23456', type=str,
|
||||
help='url used to set up distributed training')
|
||||
parser.add_argument('--dist-backend', default='nccl', type=str,
|
||||
help='distributed backend')
|
||||
parser.add_argument('--seed', default=None, type=int,
|
||||
help='seed for initializing training. ')
|
||||
parser.add_argument('--gpu', default=None, type=int,
|
||||
help='GPU id to use.')
|
||||
parser.add_argument('--multiprocessing-distributed', action='store_true',
|
||||
help='Use multi-processing distributed training to launch '
|
||||
'N processes per node, which has N GPUs. This is the '
|
||||
'fastest way to use PyTorch for either single node or '
|
||||
'multi node data parallel training')
|
||||
|
||||
parser.add_argument('--amp', default=False, action='store_true',
|
||||
help='use amp to train the model')
|
||||
parser.add_argument('--opt-level', default=None, type=str, help='apex optimize level')
|
||||
parser.add_argument('--loss-scale-value', default='1024', type=int, help='static loss scale value')
|
||||
|
||||
parser.add_argument('--summary-path', default=None, type=str, help='event file path')
|
||||
parser.add_argument('--stop-step-num', default=None, type=int, help='after the stop-step, killing the training task')
|
||||
parser.add_argument('--device', default='npu:0', type=str, help='device type, cpu or npu:x or cuda:x')
|
||||
parser.add_argument('--eval-freq', default=10, type=int, help='test interval')
|
||||
parser.add_argument('--hook', default=False, action='store_true', help='pytorch hook')
|
||||
|
||||
best_acc1 = 0
|
||||
cur_step = 0
|
||||
|
||||
|
||||
def seed_everything(seed, device):
|
||||
random.seed(seed)
|
||||
os.environ['PYTHONHASHSEED'] = str(seed)
|
||||
np.random.seed(seed)
|
||||
torch.manual_seed(seed)
|
||||
|
||||
if 'cuda' in device:
|
||||
torch.cuda.manual_seed(seed)
|
||||
torch.cuda.manual_seed_all(seed)
|
||||
cudnn.deterministic = True
|
||||
torch.backends.cudnn.deterministic = True
|
||||
torch.backends.cudnn.benchmark = False
|
||||
|
||||
|
||||
def main():
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.seed is not None:
|
||||
seed_everything(args.seed, args.device)
|
||||
|
||||
warnings.warn('You have chosen to seed training. '
|
||||
'This will turn on the CUDNN deterministic setting, '
|
||||
'which can slow down your training considerably! '
|
||||
'You may see unexpected behavior when restarting '
|
||||
'from checkpoints.')
|
||||
|
||||
main_worker(args)
|
||||
|
||||
|
||||
def main_worker(args):
|
||||
global best_acc1
|
||||
global cur_step
|
||||
|
||||
# sum_writer = SummaryWriter(args.summary_path)
|
||||
global_step = -1
|
||||
|
||||
if 'npu' in args.device:
|
||||
torch.npu.set_device(args.device)
|
||||
if 'cuda' in args.device:
|
||||
torch.cuda.set_device(args.device)
|
||||
|
||||
model = mobilenet_v2()
|
||||
|
||||
# set hook
|
||||
if args.hook:
|
||||
modules = model.named_modules()
|
||||
for name, module in modules:
|
||||
module.register_forward_hook(forward_hook_fn)
|
||||
module.register_backward_hook(backward_hook_fn)
|
||||
|
||||
optimizer = torch.optim.SGD(model.parameters(), args.lr,
|
||||
momentum=args.momentum,
|
||||
weight_decay=args.weight_decay)
|
||||
|
||||
criterion = nn.CrossEntropyLoss()
|
||||
|
||||
if 'npu' in args.device or 'cuda' in args.device:
|
||||
model = model.to(args.device)
|
||||
criterion = criterion.to(args.device)
|
||||
|
||||
if args.amp:
|
||||
model, optimizer = amp.initialize(model, optimizer, opt_level=args.opt_level, loss_scale=args.loss_scale_value)
|
||||
|
||||
# optionally resume from a checkpoint
|
||||
if args.resume:
|
||||
if os.path.isfile(args.resume):
|
||||
print("=> loading checkpoint '{}'".format(args.resume))
|
||||
checkpoint = torch.load(args.resume, map_location=args.device)
|
||||
args.start_epoch = checkpoint['epoch']
|
||||
best_acc1 = checkpoint['best_acc1']
|
||||
model.load_state_dict(checkpoint['state_dict'])
|
||||
optimizer.load_state_dict(checkpoint['optimizer'])
|
||||
if args.amp:
|
||||
amp.load_state_dict(checkpoint['amp'])
|
||||
print("=> loaded checkpoint '{}' (epoch {})"
|
||||
.format(args.resume, checkpoint['epoch']))
|
||||
else:
|
||||
print("=> no checkpoint found at '{}'".format(args.resume))
|
||||
|
||||
|
||||
# Data loading code
|
||||
traindir = os.path.join(args.data, 'train')
|
||||
valdir = os.path.join(args.data, 'val')
|
||||
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
|
||||
std=[0.229, 0.224, 0.225])
|
||||
|
||||
train_dataset = datasets.ImageFolder(
|
||||
traindir,
|
||||
transforms.Compose([
|
||||
transforms.RandomResizedCrop(224),
|
||||
transforms.RandomHorizontalFlip(),
|
||||
transforms.ToTensor(),
|
||||
normalize,
|
||||
]))
|
||||
|
||||
train_sampler = None
|
||||
|
||||
train_loader = torch.utils.data.DataLoader(
|
||||
train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None),
|
||||
num_workers=args.workers, pin_memory=True, sampler=train_sampler, drop_last=True)
|
||||
|
||||
val_loader = torch.utils.data.DataLoader(
|
||||
datasets.ImageFolder(valdir, transforms.Compose([
|
||||
transforms.Resize(256),
|
||||
transforms.CenterCrop(224),
|
||||
transforms.ToTensor(),
|
||||
normalize,
|
||||
])),
|
||||
batch_size=args.batch_size, shuffle=False,
|
||||
num_workers=args.workers, pin_memory=True, drop_last=True)
|
||||
|
||||
if args.evaluate:
|
||||
validate(val_loader, model, criterion, args, global_step)
|
||||
return
|
||||
|
||||
for epoch in range(args.start_epoch, args.epochs):
|
||||
|
||||
# train for one epoch
|
||||
global_step = train(train_loader, model, criterion, optimizer, epoch, args, global_step)
|
||||
|
||||
if (epoch + 1) % (args.eval_freq) == 0 or epoch == args.epochs - 1:
|
||||
# evaluate on validation set
|
||||
acc1 = validate(val_loader, model, criterion, args, global_step)
|
||||
|
||||
# remember best acc@1 and save checkpoint
|
||||
is_best = acc1 > best_acc1
|
||||
best_acc1 = max(acc1, best_acc1)
|
||||
|
||||
# save checkpoint
|
||||
if args.amp:
|
||||
save_checkpoint({
|
||||
'epoch': epoch + 1,
|
||||
'state_dict': model.state_dict(),
|
||||
'best_acc1': best_acc1,
|
||||
'optimizer': optimizer.state_dict(),
|
||||
'amp': amp.state_dict(),
|
||||
}, is_best)
|
||||
else:
|
||||
save_checkpoint({
|
||||
'epoch': epoch + 1,
|
||||
'state_dict': model.state_dict(),
|
||||
'best_acc1': best_acc1,
|
||||
'optimizer': optimizer.state_dict(),
|
||||
}, is_best)
|
||||
|
||||
if args.stop_step_num is not None and cur_step >= args.stop_step_num:
|
||||
break
|
||||
|
||||
# sum_writer.close()
|
||||
|
||||
|
||||
def train(train_loader, model, criterion, optimizer, epoch, args, global_step, sum_writer=None):
|
||||
global cur_step
|
||||
|
||||
if args.seed is not None:
|
||||
seed_everything(args.seed + epoch, args.device)
|
||||
|
||||
batch_time = AverageMeter('Time', ':6.3f')
|
||||
data_time = AverageMeter('Data', ':6.3f')
|
||||
learning_rate = AverageMeter('LR', ':2.8f')
|
||||
losses = AverageMeter('Loss', ':6.8f')
|
||||
top1 = AverageMeter('Acc@1', ':6.2f')
|
||||
top5 = AverageMeter('Acc@5', ':6.2f')
|
||||
progress = ProgressMeter(
|
||||
len(train_loader),
|
||||
[batch_time, data_time, learning_rate, losses, top1, top5],
|
||||
prefix="Epoch: [{}]".format(epoch))
|
||||
|
||||
# switch to train mode
|
||||
model.train()
|
||||
|
||||
end = time.time()
|
||||
steps_per_epoch = len(train_loader)
|
||||
for i, (images, target) in enumerate(train_loader):
|
||||
|
||||
global_step = epoch * steps_per_epoch + i
|
||||
cur_step = global_step
|
||||
|
||||
lr = adjust_learning_rate(optimizer, global_step, steps_per_epoch, args)
|
||||
|
||||
learning_rate.update(lr)
|
||||
|
||||
# sum_writer.add_scalar('learning rate', lr, global_step)
|
||||
|
||||
# measure data loading time
|
||||
data_time.update(time.time() - end)
|
||||
|
||||
if 'npu' in args.device:
|
||||
target = target.to(torch.int32)
|
||||
|
||||
if 'npu' in args.device or 'cuda' in args.device:
|
||||
images = images.to(args.device, non_blocking=True)
|
||||
target = target.to(args.device, non_blocking=True)
|
||||
|
||||
# output = None
|
||||
# loss = None
|
||||
# with torch.autograd.profiler.profile(record_shapes=True, use_npu=True) as prof:
|
||||
|
||||
# compute output
|
||||
output = model(images)
|
||||
loss = criterion(output, target)
|
||||
|
||||
# measure accuracy and record loss
|
||||
acc1, acc5 = accuracy(output, target, topk=(1, 5))
|
||||
losses.update(loss.item(), images.size(0))
|
||||
top1.update(acc1[0], images.size(0))
|
||||
top5.update(acc5[0], images.size(0))
|
||||
|
||||
# compute gradient and do SGD step
|
||||
optimizer.zero_grad()
|
||||
if args.amp:
|
||||
with amp.scale_loss(loss, optimizer) as scaled_loss:
|
||||
scaled_loss.backward()
|
||||
else:
|
||||
loss.backward()
|
||||
|
||||
# sum_writer.add_scalar('Accuary/train/top1', acc1, global_step)
|
||||
# sum_writer.add_scalar('Accuary/train/top5', acc5, global_step)
|
||||
# sum_writer.add_scalar('Loss/train/loss', loss, global_step)
|
||||
|
||||
optimizer.step()
|
||||
# for name, parms in model.named_parameters():
|
||||
# print('-->name:', name, ' -->grad_value_max:', torch.max(parms.grad), ' -->grad_value_min:', torch.min(parms.grad))
|
||||
|
||||
# print(prof.key_averages().table())
|
||||
# prof.export_chrome_trace("mobilenetv2_{}_npu.prof".format(i))
|
||||
|
||||
# measure elapsed time
|
||||
batch_time.update(time.time() - end)
|
||||
end = time.time()
|
||||
|
||||
if i % args.print_freq == 0:
|
||||
progress.display(i)
|
||||
|
||||
if args.stop_step_num is not None and cur_step >= args.stop_step_num:
|
||||
break
|
||||
|
||||
print(' * FPS@all {:.3f}'.format(args.batch_size / batch_time.avg))
|
||||
hwlog.remark_print(key=hwlog.FPS, value=' * FPS@all {:.3f}'.format(args.batch_size / batch_time.avg))
|
||||
return global_step
|
||||
|
||||
|
||||
def validate(val_loader, model, criterion, args, global_step, sum_writer=None):
|
||||
batch_time = AverageMeter('Time', ':6.3f')
|
||||
losses = AverageMeter('Loss', ':.4e')
|
||||
top1 = AverageMeter('Acc@1', ':6.2f')
|
||||
top5 = AverageMeter('Acc@5', ':6.2f')
|
||||
progress = ProgressMeter(
|
||||
len(val_loader),
|
||||
[batch_time, losses, top1, top5],
|
||||
prefix='Test: ')
|
||||
|
||||
# switch to evaluate mode
|
||||
model.eval()
|
||||
|
||||
with torch.no_grad():
|
||||
end = time.time()
|
||||
for i, (images, target) in enumerate(val_loader):
|
||||
|
||||
if 'npu' in args.device:
|
||||
target = target.to(torch.int32)
|
||||
|
||||
if 'npu' in args.device or 'cuda' in args.device:
|
||||
images = images.to(args.device, non_blocking=True)
|
||||
target = target.to(args.device, non_blocking=True)
|
||||
|
||||
# compute output
|
||||
output = model(images)
|
||||
loss = criterion(output, target)
|
||||
|
||||
# measure accuracy and record loss
|
||||
acc1, acc5 = accuracy(output, target, topk=(1, 5))
|
||||
losses.update(loss.item(), images.size(0))
|
||||
top1.update(acc1[0], images.size(0))
|
||||
top5.update(acc5[0], images.size(0))
|
||||
|
||||
# measure elapsed time
|
||||
batch_time.update(time.time() - end)
|
||||
end = time.time()
|
||||
|
||||
if i % args.print_freq == 0:
|
||||
progress.display(i)
|
||||
|
||||
# TODO: this should also be done with the ProgressMeter
|
||||
print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'
|
||||
.format(top1=top1, top5=top5))
|
||||
|
||||
hwlog.remark_print(key=hwlog.EVAL_ACCURACY_TOP1, value="{top1.avg:.3f}".format(top1=top1))
|
||||
hwlog.remark_print(key=hwlog.EVAL_ACCURACY_TOP5, value="{top5.avg:.3f}".format(top5=top5))
|
||||
|
||||
#if not args.evaluate:
|
||||
# # sum_writer.add_scalar('Loss/validation/loss', losses, global_step)
|
||||
# sum_writer.add_scalar('Accuary/validation/top1', top1.avg, global_step)
|
||||
# sum_writer.add_scalar('Accuary/validation/top5', top5.avg, global_step)
|
||||
|
||||
return top1.avg
|
||||
|
||||
|
||||
def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
|
||||
torch.save(state, filename)
|
||||
if is_best:
|
||||
shutil.copyfile(filename, 'model_best.pth.tar')
|
||||
|
||||
|
||||
class AverageMeter(object):
|
||||
"""Computes and stores the average and current value"""
|
||||
def __init__(self, name, fmt=':f'):
|
||||
self.name = name
|
||||
self.fmt = fmt
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
self.val = 0
|
||||
self.avg = 0
|
||||
self.sum = 0
|
||||
self.count = 0
|
||||
|
||||
def update(self, val, n=1):
|
||||
self.val = val
|
||||
self.sum += val * n
|
||||
self.count += n
|
||||
self.avg = self.sum / self.count
|
||||
|
||||
def __str__(self):
|
||||
fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
|
||||
return fmtstr.format(**self.__dict__)
|
||||
|
||||
|
||||
class ProgressMeter(object):
|
||||
def __init__(self, num_batches, meters, prefix=""):
|
||||
self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
|
||||
self.meters = meters
|
||||
self.prefix = prefix
|
||||
|
||||
def display(self, batch):
|
||||
entries = [self.prefix + self.batch_fmtstr.format(batch)]
|
||||
entries += [str(meter) for meter in self.meters]
|
||||
print('\t'.join(entries))
|
||||
# 日志打点
|
||||
train_acc1 = str(entries).split("Acc@1")[1].strip().split(" ")[0]
|
||||
train_acc5 = str(entries).split("Acc@5")[1].strip().split(" ")[0]
|
||||
hwlog.remark_print(key=hwlog.TRAIN_ACCURACY_TOP1, value=train_acc1)
|
||||
hwlog.remark_print(key=hwlog.TRAIN_ACCURACY_TOP5, value=train_acc5)
|
||||
|
||||
def _get_batch_fmtstr(self, num_batches):
|
||||
num_digits = len(str(num_batches // 1))
|
||||
fmt = '{:' + str(num_digits) + 'd}'
|
||||
return '[' + fmt + '/' + fmt.format(num_batches) + ']'
|
||||
|
||||
|
||||
def adjust_learning_rate(optimizer, global_step, steps_per_epoch, args):
|
||||
"""Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
|
||||
# lr = args.lr * (0.98 ** (epoch / 2.5))
|
||||
lr = args.lr * (0.98 ** (global_step // int(steps_per_epoch * 2.5)))
|
||||
for param_group in optimizer.param_groups:
|
||||
param_group['lr'] = lr
|
||||
return lr
|
||||
|
||||
|
||||
def accuracy(output, target, topk=(1,)):
|
||||
"""Computes the accuracy over the k top predictions for the specified values of k"""
|
||||
with torch.no_grad():
|
||||
maxk = max(topk)
|
||||
batch_size = target.size(0)
|
||||
|
||||
_, pred = output.topk(maxk, 1, True, True)
|
||||
pred = pred.t()
|
||||
correct = pred.eq(target.view(1, -1).expand_as(pred))
|
||||
|
||||
res = []
|
||||
for k in topk:
|
||||
correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
|
||||
res.append(correct_k.mul_(100.0 / batch_size))
|
||||
return res
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
hwlog.ROOT_DIR = os.path.split(os.path.abspath(__file__))[0]
|
||||
cpu_info, npu_info, framework_info, os_info, benchmark_version = get_environment_info("pytorch")
|
||||
config_info = get_model_parameter("pytorch_config")
|
||||
initinal_data = {"base_lr": 0.1, "dataset": "imagenet", "optimizer": "SGD", "loss_scale": 1024}
|
||||
hwlog.remark_print(key=hwlog.CPU_INFO, value=cpu_info)
|
||||
hwlog.remark_print(key=hwlog.NPU_INFO, value=npu_info)
|
||||
hwlog.remark_print(key=hwlog.OS_INFO, value=os_info)
|
||||
hwlog.remark_print(key=hwlog.FRAMEWORK_INFO, value=framework_info)
|
||||
hwlog.remark_print(key=hwlog.BENCHMARK_VERSION, value=benchmark_version)
|
||||
hwlog.remark_print(key=hwlog.CONFIG_INFO, value=config_info)
|
||||
hwlog.remark_print(key=hwlog.BASE_LR, value=initinal_data.get("base_lr"))
|
||||
hwlog.remark_print(key=hwlog.DATASET, value=initinal_data.get("dataset"))
|
||||
hwlog.remark_print(key=hwlog.OPT_NAME, value=initinal_data.get("optimizer"))
|
||||
hwlog.remark_print(key=hwlog.LOSS_SCALE, value=initinal_data.get("loss_scale"))
|
||||
main()
|
||||
+179
@@ -0,0 +1,179 @@
|
||||
from torch import nn
|
||||
# from .utils import load_state_dict_from_url
|
||||
|
||||
|
||||
__all__ = ['MobileNetV2', 'mobilenet_v2']
|
||||
|
||||
|
||||
model_urls = {
|
||||
'mobilenet_v2': 'https://download.pytorch.org/models/mobilenet_v2-b0353104.pth',
|
||||
}
|
||||
|
||||
|
||||
def _make_divisible(v, divisor, min_value=None):
|
||||
"""
|
||||
This function is taken from the original tf repo.
|
||||
It ensures that all layers have a channel number that is divisible by 8
|
||||
It can be seen here:
|
||||
https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
|
||||
:param v:
|
||||
:param divisor:
|
||||
:param min_value:
|
||||
:return:
|
||||
"""
|
||||
if min_value is None:
|
||||
min_value = divisor
|
||||
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
|
||||
# Make sure that round down does not go down by more than 10%.
|
||||
if new_v < 0.9 * v:
|
||||
new_v += divisor
|
||||
return new_v
|
||||
|
||||
|
||||
class ConvBNReLU(nn.Sequential):
|
||||
def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1):
|
||||
padding = (kernel_size - 1) // 2
|
||||
super(ConvBNReLU, self).__init__(
|
||||
nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False),
|
||||
nn.BatchNorm2d(out_planes),
|
||||
nn.ReLU6(inplace=True)
|
||||
# nn.ReLU(inplace=True)
|
||||
)
|
||||
|
||||
|
||||
class InvertedResidual(nn.Module):
|
||||
def __init__(self, inp, oup, stride, expand_ratio):
|
||||
super(InvertedResidual, self).__init__()
|
||||
self.stride = stride
|
||||
assert stride in [1, 2]
|
||||
|
||||
hidden_dim = int(round(inp * expand_ratio))
|
||||
self.use_res_connect = self.stride == 1 and inp == oup
|
||||
|
||||
layers = []
|
||||
if expand_ratio != 1:
|
||||
# pw
|
||||
layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1))
|
||||
layers.extend([
|
||||
# dw
|
||||
ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim),
|
||||
# pw-linear
|
||||
nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
|
||||
nn.BatchNorm2d(oup),
|
||||
])
|
||||
self.conv = nn.Sequential(*layers)
|
||||
|
||||
def forward(self, x):
|
||||
if self.use_res_connect:
|
||||
return x + self.conv(x)
|
||||
else:
|
||||
return self.conv(x)
|
||||
|
||||
|
||||
class MobileNetV2(nn.Module):
|
||||
def __init__(self,
|
||||
num_classes=1000,
|
||||
width_mult=1.0,
|
||||
inverted_residual_setting=None,
|
||||
round_nearest=8,
|
||||
block=None):
|
||||
"""
|
||||
MobileNet V2 main class
|
||||
|
||||
Args:
|
||||
num_classes (int): Number of classes
|
||||
width_mult (float): Width multiplier - adjusts number of channels in each layer by this amount
|
||||
inverted_residual_setting: Network structure
|
||||
round_nearest (int): Round the number of channels in each layer to be a multiple of this number
|
||||
Set to 1 to turn off rounding
|
||||
block: Module specifying inverted residual building block for mobilenet
|
||||
|
||||
"""
|
||||
super(MobileNetV2, self).__init__()
|
||||
|
||||
if block is None:
|
||||
block = InvertedResidual
|
||||
input_channel = 32
|
||||
last_channel = 1280
|
||||
|
||||
if inverted_residual_setting is None:
|
||||
inverted_residual_setting = [
|
||||
# t, c, n, s
|
||||
[1, 16, 1, 1],
|
||||
[6, 24, 2, 2],
|
||||
[6, 32, 3, 2],
|
||||
[6, 64, 4, 2],
|
||||
[6, 96, 3, 1],
|
||||
[6, 160, 3, 2],
|
||||
[6, 320, 1, 1],
|
||||
]
|
||||
|
||||
# only check the first element, assuming user knows t,c,n,s are required
|
||||
if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4:
|
||||
raise ValueError("inverted_residual_setting should be non-empty "
|
||||
"or a 4-element list, got {}".format(inverted_residual_setting))
|
||||
|
||||
# building first layer
|
||||
input_channel = _make_divisible(input_channel * width_mult, round_nearest)
|
||||
self.last_channel = _make_divisible(last_channel * max(1.0, width_mult), round_nearest)
|
||||
features = [ConvBNReLU(3, input_channel, stride=2)]
|
||||
# building inverted residual blocks
|
||||
for t, c, n, s in inverted_residual_setting:
|
||||
output_channel = _make_divisible(c * width_mult, round_nearest)
|
||||
for i in range(n):
|
||||
stride = s if i == 0 else 1
|
||||
features.append(block(input_channel, output_channel, stride, expand_ratio=t))
|
||||
input_channel = output_channel
|
||||
# building last several layers
|
||||
features.append(ConvBNReLU(input_channel, self.last_channel, kernel_size=1))
|
||||
# make it nn.Sequential
|
||||
self.features = nn.Sequential(*features)
|
||||
|
||||
# building classifier
|
||||
self.classifier = nn.Sequential(
|
||||
# p=0.2
|
||||
nn.Dropout(0.2),
|
||||
nn.Linear(self.last_channel, num_classes),
|
||||
)
|
||||
|
||||
# weight initialization
|
||||
for m in self.modules():
|
||||
if isinstance(m, nn.Conv2d):
|
||||
nn.init.kaiming_normal_(m.weight, mode='fan_out')
|
||||
if m.bias is not None:
|
||||
nn.init.zeros_(m.bias)
|
||||
elif isinstance(m, nn.BatchNorm2d):
|
||||
nn.init.ones_(m.weight)
|
||||
nn.init.zeros_(m.bias)
|
||||
elif isinstance(m, nn.Linear):
|
||||
nn.init.normal_(m.weight, 0, 0.01)
|
||||
nn.init.zeros_(m.bias)
|
||||
|
||||
def _forward_impl(self, x):
|
||||
# This exists since TorchScript doesn't support inheritance, so the superclass method
|
||||
# (this one) needs to have a name other than `forward` that can be accessed in a subclass
|
||||
x = self.features(x)
|
||||
# Cannot use "squeeze" as batch-size can be 1 => must use reshape with x.shape[0]
|
||||
x = nn.functional.adaptive_avg_pool2d(x, 1).reshape(x.shape[0], -1)
|
||||
x = self.classifier(x)
|
||||
return x
|
||||
|
||||
def forward(self, x):
|
||||
return self._forward_impl(x)
|
||||
|
||||
|
||||
def mobilenet_v2(pretrained=False, progress=True, **kwargs):
|
||||
"""
|
||||
Constructs a MobileNetV2 architecture from
|
||||
`"MobileNetV2: Inverted Residuals and Linear Bottlenecks" <https://arxiv.org/abs/1801.04381>`_.
|
||||
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
progress (bool): If True, displays a progress bar of the download to stderr
|
||||
"""
|
||||
model = MobileNetV2(**kwargs)
|
||||
# if pretrained:
|
||||
# state_dict = load_state_dict_from_url(model_urls['mobilenet_v2'],
|
||||
# progress=progress)
|
||||
# model.load_state_dict(state_dict)
|
||||
return model
|
||||
+18
@@ -0,0 +1,18 @@
|
||||
{
|
||||
"startCfg":
|
||||
[
|
||||
{
|
||||
"jobID": "123456789",
|
||||
"deviceID": ["0"],
|
||||
"features":
|
||||
[
|
||||
{
|
||||
"name": "task_trace"
|
||||
},
|
||||
{
|
||||
"name": "training_trace"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1 @@
|
||||
# MobileNetV2 NPU训练
|
||||
@@ -0,0 +1,22 @@
|
||||
export ASCEND_HOME=/usr/local/Ascend
|
||||
export LD_LIBRARY_PATH=/usr/local/lib/:/usr/lib/:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64:/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:/usr/local/Ascend/add-ons/
|
||||
export PYTHONPATH=$PYTHONPATH:/usr/local/Ascend/ascend-toolkit/latest/opp/op_impl/built-in/ai_core/tbe:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/te:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/topi:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/hccl:/usr/local/Ascend/ascend-toolkit/latest/tfplugin/python/site-packages:$currentDir
|
||||
export PATH=$PATH:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin
|
||||
export ASCEND_OPP_PATH=/usr/local/Ascend/ascend-toolkit/latest/opp/
|
||||
|
||||
export SLOG_PRINT_TO_STDOUT=0
|
||||
su HwHiAiUser -c "adc --host 0.0.0.0:22118 --log \"SetLogLevel(0)[error]\" --device 7"
|
||||
|
||||
export TASK_QUEUE_ENABLE=0
|
||||
taskset -c 111-150 python3 densenet121_1p_main.py \
|
||||
--workers 40 \
|
||||
--arch densenet121 \
|
||||
--npu 7 \
|
||||
--lr 0.1 \
|
||||
--momentum 0.9 \
|
||||
--amp \
|
||||
--batch-size 256 \
|
||||
--epoch 90 \
|
||||
--evaluate \
|
||||
--resume checkpoint.pth.tar \
|
||||
--data /opt/npu/dataset/imagenet
|
||||
@@ -0,0 +1,29 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
g_feat_in = []
|
||||
g_feat_out = []
|
||||
g_grad_in = []
|
||||
g_grad_out = []
|
||||
|
||||
|
||||
def forward_hook_fn(module, input, output):
|
||||
g_feat_in.append(input)
|
||||
g_feat_out.append(output)
|
||||
print(module)
|
||||
print(input)
|
||||
print(output)
|
||||
|
||||
|
||||
def backward_hook_fn(module, grad_input, grad_output):
|
||||
g_grad_in.append(grad_input)
|
||||
g_grad_out.append(grad_output)
|
||||
print(module)
|
||||
print(grad_input)
|
||||
print(grad_input)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
+556
@@ -0,0 +1,556 @@
|
||||
import argparse
|
||||
import os
|
||||
import random
|
||||
import shutil
|
||||
import time
|
||||
import warnings
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import torch.nn.parallel
|
||||
import torch.backends.cudnn as cudnn
|
||||
import torch.distributed as dist
|
||||
import torch.optim
|
||||
import torch.multiprocessing as mp
|
||||
import torch.utils.data
|
||||
import torch.utils.data.distributed
|
||||
import torchvision.transforms as transforms
|
||||
import torchvision.datasets as datasets
|
||||
import torchvision.models as models
|
||||
from mobilenet import mobilenet_v2
|
||||
import torch.npu
|
||||
import torch.cuda
|
||||
|
||||
from torch.utils.tensorboard import SummaryWriter
|
||||
|
||||
from apex import amp
|
||||
import numpy as np
|
||||
|
||||
from hook import *
|
||||
|
||||
|
||||
# model_names = sorted(name for name in models.__dict__
|
||||
# if name.islower() and not name.startswith("__")
|
||||
# and callable(models.__dict__[name]))
|
||||
|
||||
parser = argparse.ArgumentParser(description='PyTorch ImageNet Training')
|
||||
parser.add_argument('--data', metavar='DIR', default='/dataset/imagenet',
|
||||
help='path to dataset')
|
||||
# parser.add_argument('-a', '--arch', metavar='ARCH', default='resnet18',
|
||||
# choices=model_names,
|
||||
# help='model architecture: ' +
|
||||
# ' | '.join(model_names) +
|
||||
# ' (default: resnet18)')
|
||||
parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
|
||||
help='number of data loading workers (default: 4)')
|
||||
parser.add_argument('--epochs', default=90, type=int, metavar='N',
|
||||
help='number of total epochs to run')
|
||||
parser.add_argument('--start-epoch', default=0, type=int, metavar='N',
|
||||
help='manual epoch number (useful on restarts)')
|
||||
parser.add_argument('-b', '--batch-size', default=256, type=int,
|
||||
metavar='N',
|
||||
help='mini-batch size (default: 256), this is the total '
|
||||
'batch size of all GPUs on the current node when '
|
||||
'using Data Parallel or Distributed Data Parallel')
|
||||
parser.add_argument('--lr', '--learning-rate', default=0.1, type=float,
|
||||
metavar='LR', help='initial learning rate', dest='lr')
|
||||
parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
|
||||
help='momentum')
|
||||
parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,
|
||||
metavar='W', help='weight decay (default: 1e-4)',
|
||||
dest='weight_decay')
|
||||
parser.add_argument('-p', '--print-freq', default=10, type=int,
|
||||
metavar='N', help='print frequency (default: 10)')
|
||||
parser.add_argument('--resume', default='', type=str, metavar='PATH',
|
||||
help='path to latest checkpoint (default: none)')
|
||||
parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true',
|
||||
help='evaluate model on validation set')
|
||||
parser.add_argument('--pretrained', dest='pretrained', action='store_true',
|
||||
help='use pre-trained model')
|
||||
# parser.add_argument('--world-size', default=-1, type=int,
|
||||
# help='number of nodes for distributed training')
|
||||
parser.add_argument('--node-nums', default=1, type=int,
|
||||
help='number of nodes for distributed training')
|
||||
parser.add_argument('--rank', default=0, type=int,
|
||||
help='node rank for distributed training')
|
||||
parser.add_argument('--dist-url', default='tcp://224.66.41.62:23456', type=str,
|
||||
help='url used to set up distributed training')
|
||||
parser.add_argument('--dist-backend', default='nccl', type=str,
|
||||
help='distributed backend')
|
||||
parser.add_argument('--seed', default=None, type=int,
|
||||
help='seed for initializing training. ')
|
||||
parser.add_argument('--gpu', default=None, type=int,
|
||||
help='GPU id to use.')
|
||||
parser.add_argument('--multiprocessing-distributed', action='store_true',
|
||||
help='Use multi-processing distributed training to launch '
|
||||
'N processes per node, which has N GPUs. This is the '
|
||||
'fastest way to use PyTorch for either single node or '
|
||||
'multi node data parallel training')
|
||||
|
||||
parser.add_argument('--addr', default='10.136.181.115', type=str,
|
||||
help='master addr')
|
||||
parser.add_argument('--device-id', default=None, type=int,
|
||||
help='GPU id to use.')
|
||||
|
||||
parser.add_argument('--amp', default=False, action='store_true',
|
||||
help='use amp to train the model')
|
||||
parser.add_argument('--opt-level', default=None, type=str, help='apex optimize level')
|
||||
parser.add_argument('--loss-scale-value', default='1024', type=int, help='static loss scale value')
|
||||
|
||||
parser.add_argument('--summary-path', default=None, type=str, help='event file path')
|
||||
parser.add_argument('--stop-step-num', default=None, type=int, help='after the stop-step, killing the training task')
|
||||
parser.add_argument('--device', default='npu', type=str, help='device type, cpu or npu:x or cuda')
|
||||
parser.add_argument('--eval-freq', default=10, type=int, help='test interval')
|
||||
parser.add_argument('--hook', default=False, action='store_true', help='pytorch hook')
|
||||
|
||||
best_acc1 = 0
|
||||
cur_step = 0
|
||||
|
||||
|
||||
def seed_everything(seed, device):
|
||||
random.seed(seed)
|
||||
os.environ['PYTHONHASHSEED'] = str(seed)
|
||||
np.random.seed(seed)
|
||||
torch.manual_seed(seed)
|
||||
|
||||
if 'cuda' in device:
|
||||
torch.cuda.manual_seed(seed)
|
||||
torch.cuda.manual_seed_all(seed)
|
||||
cudnn.deterministic = True
|
||||
torch.backends.cudnn.deterministic = True
|
||||
torch.backends.cudnn.benchmark = False
|
||||
|
||||
|
||||
def main():
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.seed is not None:
|
||||
seed_everything(args.seed, args.device)
|
||||
|
||||
warnings.warn('You have chosen to seed training. '
|
||||
'This will turn on the CUDNN deterministic setting, '
|
||||
'which can slow down your training considerably! '
|
||||
'You may see unexpected behavior when restarting '
|
||||
'from checkpoints.')
|
||||
|
||||
os.environ['MASTER_ADDR'] = args.addr
|
||||
os.environ['MASTER_PORT'] = '90000'
|
||||
|
||||
args.distributed = args.node_nums > 1 or args.multiprocessing_distributed
|
||||
if not args.distributed:
|
||||
print('dist param is not correct!')
|
||||
return
|
||||
|
||||
if args.device == 'npu':
|
||||
# device_nums_per_node = torch.npu.device_count()
|
||||
device_nums_per_node = 2
|
||||
elif args.device == 'cuda':
|
||||
device_nums_per_node = torch.cuda.device_count()
|
||||
else:
|
||||
print('unknown device type[npu/cuda]!')
|
||||
return
|
||||
|
||||
if args.multiprocessing_distributed:
|
||||
args.world_size = device_nums_per_node * args.node_nums # world_size means nums of all devices or nums of processes
|
||||
if args.device == 'npu':
|
||||
# main_worker(args.device_id, ngpus_per_node, args) # 需要外层脚本启多个进程
|
||||
mp.spawn(main_worker, nprocs=device_nums_per_node, args=(device_nums_per_node, args)) # 这里起子进程,就不需要外层脚本启多个进程了
|
||||
else:
|
||||
mp.spawn(main_worker, nprocs=device_nums_per_node, args=(device_nums_per_node, args))
|
||||
else:
|
||||
print('dist param is not correct!')
|
||||
return
|
||||
# main_worker(args.device_id, device_nums_per_node, args)
|
||||
|
||||
|
||||
# first param must be the index of PID
|
||||
def main_worker(pid_idx, device_nums_per_node, args):
|
||||
global best_acc1
|
||||
global cur_step
|
||||
|
||||
# dist set
|
||||
sum_writer = SummaryWriter(args.summary_path)
|
||||
global_step = -1
|
||||
|
||||
if args.distributed:
|
||||
if args.multiprocessing_distributed:
|
||||
# For multiprocessing distributed training, rank needs to be the
|
||||
# global rank among all the processes
|
||||
args.rank = pid_idx # args.rank * device_nums_per_node + pid_idx
|
||||
args.pid_idx = pid_idx
|
||||
|
||||
if args.device == 'npu':
|
||||
dist.init_process_group(backend=args.dist_backend, world_size=args.world_size, rank=args.rank)
|
||||
else:
|
||||
dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
|
||||
world_size=args.world_size, rank=args.rank)
|
||||
|
||||
if args.distributed:
|
||||
# For multiprocessing distributed, DistributedDataParallel constructor
|
||||
# should always set the single device scope, otherwise,
|
||||
# DistributedDataParallel will use all available devices.
|
||||
if args.device == 'npu':
|
||||
loc = 'npu:{}'.format(pid_idx)
|
||||
torch.npu.set_device(loc)
|
||||
else:
|
||||
torch.cuda.set_device(pid_idx)
|
||||
|
||||
args.batch_size = int(args.batch_size / device_nums_per_node)
|
||||
args.workers = int((args.workers + device_nums_per_node - 1) / device_nums_per_node)
|
||||
|
||||
# Data loading code
|
||||
traindir = os.path.join(args.data, 'train')
|
||||
valdir = os.path.join(args.data, 'val')
|
||||
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
|
||||
std=[0.229, 0.224, 0.225])
|
||||
|
||||
train_dataset = datasets.ImageFolder(
|
||||
traindir,
|
||||
transforms.Compose([
|
||||
transforms.RandomResizedCrop(224),
|
||||
transforms.RandomHorizontalFlip(),
|
||||
transforms.ToTensor(),
|
||||
normalize,
|
||||
]))
|
||||
|
||||
if args.distributed:
|
||||
train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
|
||||
else:
|
||||
train_sampler = None
|
||||
|
||||
train_loader = torch.utils.data.DataLoader(
|
||||
train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None),
|
||||
num_workers=args.workers, pin_memory=True, sampler=train_sampler, drop_last=True)
|
||||
|
||||
val_loader = torch.utils.data.DataLoader(
|
||||
datasets.ImageFolder(valdir, transforms.Compose([
|
||||
transforms.Resize(256),
|
||||
transforms.CenterCrop(224),
|
||||
transforms.ToTensor(),
|
||||
normalize,
|
||||
])),
|
||||
batch_size=args.batch_size, shuffle=False,
|
||||
num_workers=args.workers, pin_memory=True, drop_last=True)
|
||||
|
||||
# define model and train
|
||||
model = mobilenet_v2()
|
||||
|
||||
criterion = nn.CrossEntropyLoss()
|
||||
|
||||
loc = None
|
||||
if 'npu' == args.device:
|
||||
loc = 'npu:{}'.format(pid_idx)
|
||||
elif 'cuda' == args.device:
|
||||
loc = 'cuda:{}'.format(pid_idx)
|
||||
model = model.to(loc)
|
||||
|
||||
criterion = criterion.to(loc)
|
||||
|
||||
optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay)
|
||||
|
||||
if args.amp:
|
||||
model, optimizer = amp.initialize(model, optimizer, opt_level=args.opt_level, loss_scale=args.loss_scale_value)
|
||||
|
||||
model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[pid_idx], broadcast_buffers=False)
|
||||
# model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)
|
||||
|
||||
# set hook
|
||||
if args.hook:
|
||||
modules = model.named_modules()
|
||||
for name, module in modules:
|
||||
module.register_forward_hook(forward_hook_fn)
|
||||
module.register_backward_hook(backward_hook_fn)
|
||||
|
||||
# optionally resume from a checkpoint
|
||||
if args.resume:
|
||||
if os.path.isfile(args.resume):
|
||||
print("=> loading checkpoint '{}'".format(args.resume))
|
||||
checkpoint = torch.load(args.resume, map_location=args.device)
|
||||
args.start_epoch = checkpoint['epoch']
|
||||
best_acc1 = checkpoint['best_acc1']
|
||||
model.load_state_dict(checkpoint['state_dict'])
|
||||
optimizer.load_state_dict(checkpoint['optimizer'])
|
||||
if args.amp:
|
||||
amp.load_state_dict(checkpoint['amp'])
|
||||
print("=> loaded checkpoint '{}' (epoch {})"
|
||||
.format(args.resume, checkpoint['epoch']))
|
||||
else:
|
||||
print("=> no checkpoint found at '{}'".format(args.resume))
|
||||
|
||||
if args.evaluate:
|
||||
validate(val_loader, model, criterion, args, global_step, sum_writer)
|
||||
return
|
||||
|
||||
for epoch in range(args.start_epoch, args.epochs):
|
||||
|
||||
# train for one epoch
|
||||
global_step = train(train_loader, model, criterion, optimizer, epoch, args, global_step, sum_writer, device_nums_per_node)
|
||||
|
||||
if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
|
||||
# evaluate on validation set
|
||||
acc1 = validate(val_loader, model, criterion, args, global_step, sum_writer, device_nums_per_node)
|
||||
|
||||
# remember best acc@1 and save checkpoint
|
||||
is_best = acc1 > best_acc1
|
||||
best_acc1 = max(acc1, best_acc1)
|
||||
|
||||
# save checkpoint
|
||||
if args.amp:
|
||||
save_checkpoint({
|
||||
'epoch': epoch + 1,
|
||||
'state_dict': model.state_dict(),
|
||||
'best_acc1': best_acc1,
|
||||
'optimizer': optimizer.state_dict(),
|
||||
'amp': amp.state_dict(),
|
||||
}, is_best)
|
||||
else:
|
||||
save_checkpoint({
|
||||
'epoch': epoch + 1,
|
||||
'state_dict': model.state_dict(),
|
||||
'best_acc1': best_acc1,
|
||||
'optimizer': optimizer.state_dict(),
|
||||
}, is_best)
|
||||
|
||||
if args.stop_step_num is not None and cur_step >= args.stop_step_num:
|
||||
break
|
||||
|
||||
sum_writer.close()
|
||||
|
||||
|
||||
def train(train_loader, model, criterion, optimizer, epoch, args, global_step, sum_writer, device_nums_per_node):
|
||||
global cur_step
|
||||
|
||||
if args.seed is not None:
|
||||
seed_everything(args.seed + epoch, args.device)
|
||||
|
||||
batch_time = AverageMeter('Time', ':6.3f')
|
||||
data_time = AverageMeter('Data', ':6.3f')
|
||||
learning_rate = AverageMeter('LR', ':2.8f')
|
||||
losses = AverageMeter('Loss', ':6.8f')
|
||||
top1 = AverageMeter('Acc@1', ':6.2f')
|
||||
top5 = AverageMeter('Acc@5', ':6.2f')
|
||||
progress = ProgressMeter(
|
||||
len(train_loader),
|
||||
[batch_time, data_time, learning_rate, losses, top1, top5],
|
||||
prefix="Epoch: [{}]".format(epoch))
|
||||
|
||||
# switch to train mode
|
||||
model.train()
|
||||
|
||||
end = time.time()
|
||||
steps_per_epoch = len(train_loader)
|
||||
for i, (images, target) in enumerate(train_loader):
|
||||
|
||||
global_step = epoch * steps_per_epoch + i
|
||||
cur_step = global_step
|
||||
|
||||
lr = adjust_learning_rate(optimizer, global_step, steps_per_epoch, args)
|
||||
|
||||
learning_rate.update(lr)
|
||||
|
||||
sum_writer.add_scalar('learning rate', lr, global_step)
|
||||
|
||||
# measure data loading time
|
||||
data_time.update(time.time() - end)
|
||||
|
||||
if 'npu' in args.device:
|
||||
target = target.to(torch.int32)
|
||||
|
||||
loc = None
|
||||
if 'npu' in args.device:
|
||||
loc = 'npu:{}'.format(args.pid_idx)
|
||||
elif 'cuda' in args.device:
|
||||
loc = 'cuda:{}'.format(args.pid_idx)
|
||||
images = images.to(loc, non_blocking=True)
|
||||
target = target.to(loc, non_blocking=True)
|
||||
|
||||
# output = None
|
||||
# loss = None
|
||||
# with torch.autograd.profiler.profile(record_shapes=True, use_npu=True) as prof:
|
||||
|
||||
# compute output
|
||||
output = model(images)
|
||||
loss = criterion(output, target)
|
||||
|
||||
# measure accuracy and record loss
|
||||
acc1, acc5 = accuracy(output, target, topk=(1, 5))
|
||||
losses.update(loss.item(), images.size(0))
|
||||
top1.update(acc1[0], images.size(0))
|
||||
top5.update(acc5[0], images.size(0))
|
||||
|
||||
# compute gradient and do SGD step
|
||||
optimizer.zero_grad()
|
||||
if args.amp:
|
||||
with amp.scale_loss(loss, optimizer) as scaled_loss:
|
||||
scaled_loss.backward()
|
||||
else:
|
||||
loss.backward()
|
||||
|
||||
sum_writer.add_scalar('Accuary/train/top1', acc1, global_step)
|
||||
sum_writer.add_scalar('Accuary/train/top5', acc5, global_step)
|
||||
sum_writer.add_scalar('Loss/train/loss', loss, global_step)
|
||||
|
||||
optimizer.step()
|
||||
# for name, parms in model.named_parameters():
|
||||
# print('-->name:', name, ' -->grad_value_max:', torch.max(parms.grad), ' -->grad_value_min:', torch.min(parms.grad))
|
||||
|
||||
# print(prof.key_averages().table())
|
||||
# prof.export_chrome_trace("mobilenetv2_{}_npu.prof".format(i))
|
||||
|
||||
# measure elapsed time
|
||||
batch_time.update(time.time() - end)
|
||||
end = time.time()
|
||||
|
||||
if i % args.print_freq == 0:
|
||||
if not args.multiprocessing_distributed or \
|
||||
(args.multiprocessing_distributed and args.rank % device_nums_per_node == 0):
|
||||
progress.display(i)
|
||||
|
||||
if not args.multiprocessing_distributed or \
|
||||
(args.multiprocessing_distributed and args.rank % device_nums_per_node == 0):
|
||||
print('FPS@all: {:.3f}'.format(8 * args.batch_size / batch_time.avg))
|
||||
|
||||
if args.stop_step_num is not None and cur_step >= args.stop_step_num:
|
||||
break
|
||||
|
||||
return global_step
|
||||
|
||||
|
||||
def validate(val_loader, model, criterion, args, global_step, sum_writer, device_nums_per_node):
|
||||
batch_time = AverageMeter('Time', ':6.3f')
|
||||
losses = AverageMeter('Loss', ':.4e')
|
||||
top1 = AverageMeter('Acc@1', ':6.2f')
|
||||
top5 = AverageMeter('Acc@5', ':6.2f')
|
||||
progress = ProgressMeter(
|
||||
len(val_loader),
|
||||
[batch_time, losses, top1, top5],
|
||||
prefix='Test: ')
|
||||
|
||||
# switch to evaluate mode
|
||||
model.eval()
|
||||
|
||||
with torch.no_grad():
|
||||
end = time.time()
|
||||
for i, (images, target) in enumerate(val_loader):
|
||||
|
||||
if 'npu' in args.device:
|
||||
target = target.to(torch.int32)
|
||||
|
||||
loc = None
|
||||
if 'npu' in args.device:
|
||||
loc = 'npu:{}'.format(args.pid_idx)
|
||||
elif 'cuda' in args.device:
|
||||
loc = 'cuda:{}'.format(args.pid_idx)
|
||||
images = images.to(loc, non_blocking=True)
|
||||
target = target.to(loc, non_blocking=True)
|
||||
|
||||
# compute output
|
||||
output = model(images)
|
||||
loss = criterion(output, target)
|
||||
|
||||
# measure accuracy and record loss
|
||||
acc1, acc5 = accuracy(output, target, topk=(1, 5))
|
||||
losses.update(loss.item(), images.size(0))
|
||||
top1.update(acc1[0], images.size(0))
|
||||
top5.update(acc5[0], images.size(0))
|
||||
|
||||
# measure elapsed time
|
||||
batch_time.update(time.time() - end)
|
||||
end = time.time()
|
||||
|
||||
if i % args.print_freq == 0:
|
||||
if not args.multiprocessing_distributed or \
|
||||
(args.multiprocessing_distributed and args.rank % device_nums_per_node == 0):
|
||||
progress.display(i)
|
||||
|
||||
# TODO: this should also be done with the ProgressMeter
|
||||
print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'
|
||||
.format(top1=top1, top5=top5))
|
||||
if not args.multiprocessing_distributed or \
|
||||
(args.multiprocessing_distributed and args.rank % device_nums_per_node == 0):
|
||||
print("[device id:", args.gpu, "]", '[AVG-ACC] * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'.format(top1=top1, top5=top5))
|
||||
|
||||
if not args.evaluate:
|
||||
# sum_writer.add_scalar('Loss/validation/loss', losses, global_step)
|
||||
sum_writer.add_scalar('Accuary/validation/top1', top1.avg, global_step)
|
||||
sum_writer.add_scalar('Accuary/validation/top5', top5.avg, global_step)
|
||||
|
||||
return top1.avg
|
||||
|
||||
|
||||
def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
|
||||
torch.save(state, filename)
|
||||
if is_best:
|
||||
shutil.copyfile(filename, 'model_best.pth.tar')
|
||||
|
||||
|
||||
class AverageMeter(object):
|
||||
"""Computes and stores the average and current value"""
|
||||
def __init__(self, name, fmt=':f'):
|
||||
self.name = name
|
||||
self.fmt = fmt
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
self.val = 0
|
||||
self.avg = 0
|
||||
self.sum = 0
|
||||
self.count = 0
|
||||
|
||||
def update(self, val, n=1):
|
||||
self.val = val
|
||||
self.sum += val * n
|
||||
self.count += n
|
||||
self.avg = self.sum / self.count
|
||||
|
||||
def __str__(self):
|
||||
fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
|
||||
return fmtstr.format(**self.__dict__)
|
||||
|
||||
|
||||
class ProgressMeter(object):
|
||||
def __init__(self, num_batches, meters, prefix=""):
|
||||
self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
|
||||
self.meters = meters
|
||||
self.prefix = prefix
|
||||
|
||||
def display(self, batch):
|
||||
entries = [self.prefix + self.batch_fmtstr.format(batch)]
|
||||
entries += [str(meter) for meter in self.meters]
|
||||
print('\t'.join(entries))
|
||||
|
||||
def _get_batch_fmtstr(self, num_batches):
|
||||
num_digits = len(str(num_batches // 1))
|
||||
fmt = '{:' + str(num_digits) + 'd}'
|
||||
return '[' + fmt + '/' + fmt.format(num_batches) + ']'
|
||||
|
||||
|
||||
def adjust_learning_rate(optimizer, global_step, steps_per_epoch, args):
|
||||
"""Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
|
||||
# lr = args.lr * (0.98 ** (epoch / 2.5))
|
||||
lr = args.lr * (0.98 ** (global_step // int(steps_per_epoch * 2.5)))
|
||||
for param_group in optimizer.param_groups:
|
||||
param_group['lr'] = lr
|
||||
return lr
|
||||
|
||||
|
||||
def accuracy(output, target, topk=(1,)):
|
||||
"""Computes the accuracy over the k top predictions for the specified values of k"""
|
||||
with torch.no_grad():
|
||||
maxk = max(topk)
|
||||
batch_size = target.size(0)
|
||||
|
||||
_, pred = output.topk(maxk, 1, True, True)
|
||||
pred = pred.t()
|
||||
correct = pred.eq(target.view(1, -1).expand_as(pred))
|
||||
|
||||
res = []
|
||||
for k in topk:
|
||||
correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
|
||||
res.append(correct_k.mul_(100.0 / batch_size))
|
||||
return res
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
+179
@@ -0,0 +1,179 @@
|
||||
from torch import nn
|
||||
# from .utils import load_state_dict_from_url
|
||||
|
||||
|
||||
__all__ = ['MobileNetV2', 'mobilenet_v2']
|
||||
|
||||
|
||||
model_urls = {
|
||||
'mobilenet_v2': 'https://download.pytorch.org/models/mobilenet_v2-b0353104.pth',
|
||||
}
|
||||
|
||||
|
||||
def _make_divisible(v, divisor, min_value=None):
|
||||
"""
|
||||
This function is taken from the original tf repo.
|
||||
It ensures that all layers have a channel number that is divisible by 8
|
||||
It can be seen here:
|
||||
https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
|
||||
:param v:
|
||||
:param divisor:
|
||||
:param min_value:
|
||||
:return:
|
||||
"""
|
||||
if min_value is None:
|
||||
min_value = divisor
|
||||
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
|
||||
# Make sure that round down does not go down by more than 10%.
|
||||
if new_v < 0.9 * v:
|
||||
new_v += divisor
|
||||
return new_v
|
||||
|
||||
|
||||
class ConvBNReLU(nn.Sequential):
|
||||
def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1):
|
||||
padding = (kernel_size - 1) // 2
|
||||
super(ConvBNReLU, self).__init__(
|
||||
nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False),
|
||||
nn.BatchNorm2d(out_planes),
|
||||
nn.ReLU6(inplace=True)
|
||||
# nn.ReLU(inplace=True)
|
||||
)
|
||||
|
||||
|
||||
class InvertedResidual(nn.Module):
|
||||
def __init__(self, inp, oup, stride, expand_ratio):
|
||||
super(InvertedResidual, self).__init__()
|
||||
self.stride = stride
|
||||
assert stride in [1, 2]
|
||||
|
||||
hidden_dim = int(round(inp * expand_ratio))
|
||||
self.use_res_connect = self.stride == 1 and inp == oup
|
||||
|
||||
layers = []
|
||||
if expand_ratio != 1:
|
||||
# pw
|
||||
layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1))
|
||||
layers.extend([
|
||||
# dw
|
||||
ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim),
|
||||
# pw-linear
|
||||
nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
|
||||
nn.BatchNorm2d(oup),
|
||||
])
|
||||
self.conv = nn.Sequential(*layers)
|
||||
|
||||
def forward(self, x):
|
||||
if self.use_res_connect:
|
||||
return x + self.conv(x)
|
||||
else:
|
||||
return self.conv(x)
|
||||
|
||||
|
||||
class MobileNetV2(nn.Module):
|
||||
def __init__(self,
|
||||
num_classes=1000,
|
||||
width_mult=1.0,
|
||||
inverted_residual_setting=None,
|
||||
round_nearest=8,
|
||||
block=None):
|
||||
"""
|
||||
MobileNet V2 main class
|
||||
|
||||
Args:
|
||||
num_classes (int): Number of classes
|
||||
width_mult (float): Width multiplier - adjusts number of channels in each layer by this amount
|
||||
inverted_residual_setting: Network structure
|
||||
round_nearest (int): Round the number of channels in each layer to be a multiple of this number
|
||||
Set to 1 to turn off rounding
|
||||
block: Module specifying inverted residual building block for mobilenet
|
||||
|
||||
"""
|
||||
super(MobileNetV2, self).__init__()
|
||||
|
||||
if block is None:
|
||||
block = InvertedResidual
|
||||
input_channel = 32
|
||||
last_channel = 1280
|
||||
|
||||
if inverted_residual_setting is None:
|
||||
inverted_residual_setting = [
|
||||
# t, c, n, s
|
||||
[1, 16, 1, 1],
|
||||
[6, 24, 2, 2],
|
||||
[6, 32, 3, 2],
|
||||
[6, 64, 4, 2],
|
||||
[6, 96, 3, 1],
|
||||
[6, 160, 3, 2],
|
||||
[6, 320, 1, 1],
|
||||
]
|
||||
|
||||
# only check the first element, assuming user knows t,c,n,s are required
|
||||
if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4:
|
||||
raise ValueError("inverted_residual_setting should be non-empty "
|
||||
"or a 4-element list, got {}".format(inverted_residual_setting))
|
||||
|
||||
# building first layer
|
||||
input_channel = _make_divisible(input_channel * width_mult, round_nearest)
|
||||
self.last_channel = _make_divisible(last_channel * max(1.0, width_mult), round_nearest)
|
||||
features = [ConvBNReLU(3, input_channel, stride=2)]
|
||||
# building inverted residual blocks
|
||||
for t, c, n, s in inverted_residual_setting:
|
||||
output_channel = _make_divisible(c * width_mult, round_nearest)
|
||||
for i in range(n):
|
||||
stride = s if i == 0 else 1
|
||||
features.append(block(input_channel, output_channel, stride, expand_ratio=t))
|
||||
input_channel = output_channel
|
||||
# building last several layers
|
||||
features.append(ConvBNReLU(input_channel, self.last_channel, kernel_size=1))
|
||||
# make it nn.Sequential
|
||||
self.features = nn.Sequential(*features)
|
||||
|
||||
# building classifier
|
||||
self.classifier = nn.Sequential(
|
||||
# p=0.2
|
||||
nn.Dropout(0.2),
|
||||
nn.Linear(self.last_channel, num_classes),
|
||||
)
|
||||
|
||||
# weight initialization
|
||||
for m in self.modules():
|
||||
if isinstance(m, nn.Conv2d):
|
||||
nn.init.kaiming_normal_(m.weight, mode='fan_out')
|
||||
if m.bias is not None:
|
||||
nn.init.zeros_(m.bias)
|
||||
elif isinstance(m, nn.BatchNorm2d):
|
||||
nn.init.ones_(m.weight)
|
||||
nn.init.zeros_(m.bias)
|
||||
elif isinstance(m, nn.Linear):
|
||||
nn.init.normal_(m.weight, 0, 0.01)
|
||||
nn.init.zeros_(m.bias)
|
||||
|
||||
def _forward_impl(self, x):
|
||||
# This exists since TorchScript doesn't support inheritance, so the superclass method
|
||||
# (this one) needs to have a name other than `forward` that can be accessed in a subclass
|
||||
x = self.features(x)
|
||||
# Cannot use "squeeze" as batch-size can be 1 => must use reshape with x.shape[0]
|
||||
x = nn.functional.adaptive_avg_pool2d(x, 1).reshape(x.shape[0], -1)
|
||||
x = self.classifier(x)
|
||||
return x
|
||||
|
||||
def forward(self, x):
|
||||
return self._forward_impl(x)
|
||||
|
||||
|
||||
def mobilenet_v2(pretrained=False, progress=True, **kwargs):
|
||||
"""
|
||||
Constructs a MobileNetV2 architecture from
|
||||
`"MobileNetV2: Inverted Residuals and Linear Bottlenecks" <https://arxiv.org/abs/1801.04381>`_.
|
||||
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
progress (bool): If True, displays a progress bar of the download to stderr
|
||||
"""
|
||||
model = MobileNetV2(**kwargs)
|
||||
# if pretrained:
|
||||
# state_dict = load_state_dict_from_url(model_urls['mobilenet_v2'],
|
||||
# progress=progress)
|
||||
# model.load_state_dict(state_dict)
|
||||
return model
|
||||
+638
@@ -0,0 +1,638 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import random
|
||||
import shutil
|
||||
import time
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.parallel
|
||||
import torch.backends.cudnn as cudnn
|
||||
import torch.distributed as dist
|
||||
import torch.optim
|
||||
import torch.multiprocessing as mp
|
||||
import torch.utils.data
|
||||
import torch.utils.data.distributed
|
||||
import torchvision.transforms as transforms
|
||||
import torchvision.datasets as datasets
|
||||
import torchvision.models as models
|
||||
|
||||
from mobilenet import mobilenet_v2
|
||||
from apex import amp
|
||||
from multi_epochs_dataloader import MultiEpochsDataLoader
|
||||
|
||||
from benchmark_log import hwlog
|
||||
from benchmark_log.basic_utils import get_environment_info
|
||||
from benchmark_log.basic_utils import get_model_parameter
|
||||
|
||||
BATCH_SIZE = 4096
|
||||
OPTIMIZER_BATCH_SIZE = 4096
|
||||
# model_names = sorted(name for name in models.__dict__
|
||||
# if name.islower() and not name.startswith("__")
|
||||
# and callable(models.__dict__[name]))
|
||||
|
||||
parser = argparse.ArgumentParser(description='PyTorch ImageNet Training')
|
||||
parser.add_argument('--data', metavar='DIR', default='/opt/npu/dataset/imagenet',
|
||||
help='path to dataset')
|
||||
# parser.add_argument('-a', '--arch', metavar='ARCH', default='resnet50',
|
||||
# choices=model_names,
|
||||
# help='model architecture: ' +
|
||||
# ' | '.join(model_names) +
|
||||
# ' (default: resnet18)')
|
||||
parser.add_argument('-j', '--workers', default=32, type=int, metavar='N',
|
||||
help='number of data loading workers (default: 4)')
|
||||
parser.add_argument('--epochs', default=90, type=int, metavar='N',
|
||||
help='number of total epochs to run')
|
||||
parser.add_argument('--start-epoch', default=0, type=int, metavar='N',
|
||||
help='manual epoch number (useful on restarts)')
|
||||
parser.add_argument('-b', '--batch-size', default=BATCH_SIZE, type=int,
|
||||
metavar='N',
|
||||
help='mini-batch size (default: 256), this is the total '
|
||||
'batch size of all GPUs on the current node when '
|
||||
'using Data Parallel or Distributed Data Parallel')
|
||||
parser.add_argument('--lr', '--learning-rate', default=0.1, type=float,
|
||||
metavar='LR', help='initial learning rate', dest='lr')
|
||||
parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
|
||||
help='momentum')
|
||||
parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,
|
||||
metavar='W', help='weight decay (default: 1e-4)',
|
||||
dest='weight_decay')
|
||||
parser.add_argument('--workspace', type=str, default='./', metavar='DIR',
|
||||
help='path to directory where checkpoints will be stored')
|
||||
parser.add_argument('-p', '--print-freq', default=10, type=int,
|
||||
metavar='N', help='print frequency (default: 10)')
|
||||
parser.add_argument('-ef', '--eval-freq', default=5, type=int,
|
||||
metavar='N', help='evaluate frequency (default: 5)')
|
||||
parser.add_argument('--resume', default='', type=str, metavar='PATH',
|
||||
help='path to latest checkpoint (default: none)')
|
||||
parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true',
|
||||
help='evaluate model on validation set')
|
||||
parser.add_argument('--pretrained', dest='pretrained', action='store_true',
|
||||
help='use pre-trained model')
|
||||
parser.add_argument('--world-size', default=-1, type=int,
|
||||
help='number of nodes for distributed training')
|
||||
parser.add_argument('--rank', default=-1, type=int,
|
||||
help='node rank for distributed training')
|
||||
parser.add_argument('--dist-url', default='tcp://224.66.41.62:23456', type=str,
|
||||
help='url used to set up distributed training')
|
||||
parser.add_argument('--dist-backend', default='nccl', type=str,
|
||||
help='distributed backend')
|
||||
parser.add_argument('--seed', default=None, type=int,
|
||||
help='seed for initializing training. ')
|
||||
parser.add_argument('--gpu', default=None, type=int,
|
||||
help='GPU id to use.')
|
||||
parser.add_argument('--multiprocessing-distributed', action='store_true',
|
||||
help='Use multi-processing distributed training to launch '
|
||||
'N processes per node, which has N GPUs. This is the '
|
||||
'fastest way to use PyTorch for either single node or '
|
||||
'multi node data parallel training')
|
||||
parser.add_argument('-bm', '--benchmark', default=0, type=int,
|
||||
metavar='N', help='set benchmark status (default: 1,run benchmark)')
|
||||
parser.add_argument('--device', default='npu', type=str, help='npu or gpu')
|
||||
parser.add_argument('--addr', default='10.136.181.115', type=str, help='master addr')
|
||||
parser.add_argument('--checkpoint-nameprefix', default='checkpoint', type=str, help='checkpoint-nameprefix')
|
||||
parser.add_argument('--checkpoint-freq', default=0, type=int,
|
||||
metavar='N', help='checkpoint frequency (default: 0)'
|
||||
'0: save only one file whitch per epoch;'
|
||||
'n: save diff file per n epoch'
|
||||
'-1:no checkpoint,not support')
|
||||
|
||||
# apex
|
||||
parser.add_argument('--amp', default=False, action='store_true',
|
||||
help='use amp to train the model')
|
||||
parser.add_argument('--loss-scale', default=64., type=float,
|
||||
help='loss scale using in amp, default -1 means dynamic')
|
||||
parser.add_argument('--opt-level', default='O2', type=str,
|
||||
help='loss scale using in amp, default -1 means dynamic')
|
||||
|
||||
warnings.filterwarnings('ignore')
|
||||
best_acc1 = 0
|
||||
|
||||
|
||||
def main():
|
||||
args = parser.parse_args()
|
||||
print("===============main()=================")
|
||||
print(args)
|
||||
print("===============main()=================")
|
||||
|
||||
os.environ['KERNEL_NAME_ID'] = str(0)
|
||||
print("++++++++++++++++++ KERNEL_NAME_ID:", os.environ['KERNEL_NAME_ID'])
|
||||
|
||||
if args.seed is not None:
|
||||
random.seed(args.seed)
|
||||
torch.manual_seed(args.seed)
|
||||
cudnn.deterministic = True
|
||||
warnings.warn('You have chosen to seed training. '
|
||||
'This will turn on the CUDNN deterministic setting, '
|
||||
'which can slow down your training considerably! '
|
||||
'You may see unexpected behavior when restarting '
|
||||
'from checkpoints.')
|
||||
|
||||
os.environ['MASTER_ADDR'] = args.addr # '10.136.181.51'
|
||||
os.environ['MASTER_PORT'] = '59629'
|
||||
|
||||
if args.gpu is not None:
|
||||
warnings.warn('You have chosen a specific GPU. This will completely '
|
||||
'disable data parallelism.')
|
||||
|
||||
if args.dist_url == "env://" and args.world_size == -1:
|
||||
args.world_size = int(os.environ["WORLD_SIZE"])
|
||||
|
||||
args.distributed = args.world_size > 1 or args.multiprocessing_distributed
|
||||
|
||||
if args.device == 'npu':
|
||||
ngpus_per_node = torch.npu.device_count()
|
||||
else:
|
||||
ngpus_per_node = torch.cuda.device_count()
|
||||
if args.multiprocessing_distributed:
|
||||
# Since we have ngpus_per_node processes per node, the total world_size
|
||||
# needs to be adjusted accordingly
|
||||
args.world_size = ngpus_per_node * args.world_size
|
||||
# Use torch.multiprocessing.spawn to launch distributed processes: the
|
||||
# main_worker process function
|
||||
# The child process uses the environment variables of the parent process,
|
||||
# we have to set KERNEL_NAME_ID for every proc
|
||||
if args.device == 'npu':
|
||||
# main_worker(args.gpu, ngpus_per_node, args)
|
||||
mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args))
|
||||
else:
|
||||
mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args))
|
||||
else:
|
||||
# Simply call main_worker function
|
||||
main_worker(args.gpu, ngpus_per_node, args)
|
||||
|
||||
|
||||
def main_worker(gpu, ngpus_per_node, args):
|
||||
global best_acc1
|
||||
args.gpu = gpu
|
||||
|
||||
print("[npu id:", args.gpu, "]", "++++++++++++++++ before set KERNEL_NAME_ID:", os.environ['KERNEL_NAME_ID'])
|
||||
os.environ['KERNEL_NAME_ID'] = str(gpu)
|
||||
print("[npu id:", args.gpu, "]", "++++++++++++++++ KERNEL_NAME_ID:", os.environ['KERNEL_NAME_ID'])
|
||||
|
||||
if args.gpu is not None:
|
||||
print("[npu id:", args.gpu, "]", "Use GPU: {} for training".format(args.gpu))
|
||||
|
||||
if args.distributed:
|
||||
if args.dist_url == "env://" and args.rank == -1:
|
||||
args.rank = int(os.environ["RANK"])
|
||||
if args.multiprocessing_distributed:
|
||||
# For multiprocessing distributed training, rank needs to be the
|
||||
# global rank among all the processes
|
||||
args.rank = args.rank * ngpus_per_node + gpu
|
||||
|
||||
if args.device == 'npu':
|
||||
dist.init_process_group(backend=args.dist_backend, # init_method=args.dist_url,
|
||||
world_size=args.world_size, rank=args.rank)
|
||||
else:
|
||||
dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
|
||||
world_size=args.world_size, rank=args.rank)
|
||||
|
||||
loc = 'npu:{}'.format(args.gpu)
|
||||
torch.npu.set_device(loc)
|
||||
|
||||
args.batch_size = int(args.batch_size / ngpus_per_node)
|
||||
args.workers = int((args.workers + ngpus_per_node - 1) / ngpus_per_node)
|
||||
|
||||
print("[npu id:", args.gpu, "]", "===============main_worker()=================")
|
||||
print("[npu id:", args.gpu, "]", args)
|
||||
print("[npu id:", args.gpu, "]", "===============main_worker()=================")
|
||||
|
||||
# Data loading code
|
||||
# traindir = os.path.join(args.data, 'train')
|
||||
# valdir = os.path.join(args.data, 'val')
|
||||
# normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
|
||||
# std=[0.229, 0.224, 0.225])
|
||||
|
||||
# train_dataset = datasets.ImageFolder(
|
||||
# traindir,
|
||||
# transforms.Compose([
|
||||
# transforms.RandomResizedCrop(224),
|
||||
# transforms.RandomHorizontalFlip(),
|
||||
# transforms.ToTensor(),
|
||||
# normalize,
|
||||
# ]))
|
||||
#
|
||||
# if args.distributed:
|
||||
# train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
|
||||
# else:
|
||||
# train_sampler = None
|
||||
#
|
||||
# train_loader = torch.utils.data.DataLoader(
|
||||
# train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None),
|
||||
# num_workers=args.workers, pin_memory=True, sampler=train_sampler, drop_last=True)
|
||||
|
||||
train_loader, train_loader_len, train_sampler = get_pytorch_train_loader(args.data,
|
||||
args.batch_size,
|
||||
workers=args.workers,
|
||||
distributed=args.distributed)
|
||||
|
||||
# val_loader = torch.utils.data.DataLoader(
|
||||
# datasets.ImageFolder(valdir, transforms.Compose([
|
||||
# transforms.Resize(256),
|
||||
# transforms.CenterCrop(224),
|
||||
# transforms.ToTensor(),
|
||||
# normalize,
|
||||
# ])),
|
||||
# batch_size=args.batch_size, shuffle=True,
|
||||
# num_workers=args.workers, pin_memory=True, drop_last=True)
|
||||
|
||||
val_loader = get_pytorch_val_loader(args.data, args.batch_size, args.workers, distributed=False)
|
||||
|
||||
# create model
|
||||
print("[npu id:", args.gpu, "]", "=> creating model '{}'".format('mobilenetv2'))
|
||||
# model = models.__dict__[args.arch]()
|
||||
model = mobilenet_v2()
|
||||
model = model.to(loc)
|
||||
|
||||
# define loss function (criterion) and optimizer
|
||||
criterion = nn.CrossEntropyLoss().to(loc)
|
||||
optimizer = torch.optim.SGD(model.parameters(), args.lr,
|
||||
momentum=args.momentum,
|
||||
weight_decay=args.weight_decay)
|
||||
|
||||
if args.amp:
|
||||
model, optimizer = amp.initialize(model, optimizer, opt_level=args.opt_level, loss_scale=args.loss_scale)
|
||||
|
||||
model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu], broadcast_buffers=False)
|
||||
|
||||
# optionally resume from a checkpoint
|
||||
if args.resume:
|
||||
if os.path.isfile(args.resume):
|
||||
print("=> loading checkpoint '{}'".format(args.resume))
|
||||
checkpoint = torch.load(args.resume, map_location=loc)
|
||||
args.start_epoch = checkpoint['epoch']
|
||||
best_acc1 = checkpoint['best_acc1']
|
||||
model.load_state_dict(checkpoint['state_dict'])
|
||||
optimizer.load_state_dict(checkpoint['optimizer'])
|
||||
if args.amp:
|
||||
amp.load_state_dict(checkpoint['amp'])
|
||||
print("=> loaded checkpoint '{}' (epoch {})".format(args.resume, checkpoint['epoch']))
|
||||
else:
|
||||
print("=> no checkpoint found at '{}'".format(args.resume))
|
||||
|
||||
cudnn.benchmark = True
|
||||
|
||||
if args.evaluate:
|
||||
validate(val_loader, model, criterion, args, ngpus_per_node)
|
||||
return
|
||||
|
||||
for epoch in range(args.start_epoch, args.epochs):
|
||||
if args.distributed:
|
||||
train_sampler.set_epoch(epoch)
|
||||
# adjust_learning_rate(optimizer, epoch, args)
|
||||
|
||||
# train for one epoch
|
||||
train(train_loader, train_loader_len, model, criterion, optimizer, epoch, args, ngpus_per_node)
|
||||
|
||||
if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
|
||||
# evaluate on validation set
|
||||
acc1 = validate(val_loader, model, criterion, args, ngpus_per_node)
|
||||
|
||||
# remember best acc@1 and save checkpoint
|
||||
is_best = acc1 > best_acc1
|
||||
best_acc1 = max(acc1, best_acc1)
|
||||
|
||||
if not args.multiprocessing_distributed or \
|
||||
(args.multiprocessing_distributed and args.rank % ngpus_per_node == 0 and epoch == args.epochs - 1):
|
||||
if args.amp:
|
||||
save_checkpoint({
|
||||
'epoch': epoch + 1,
|
||||
'state_dict': model.state_dict(),
|
||||
'best_acc1': best_acc1,
|
||||
'optimizer': optimizer.state_dict(),
|
||||
'amp': amp.state_dict(),
|
||||
}, is_best)
|
||||
else:
|
||||
save_checkpoint({
|
||||
'epoch': epoch + 1,
|
||||
'state_dict': model.state_dict(),
|
||||
'best_acc1': best_acc1,
|
||||
'optimizer': optimizer.state_dict(),
|
||||
}, is_best)
|
||||
|
||||
|
||||
def train(train_loader, train_loader_len, model, criterion, optimizer, epoch, args, ngpus_per_node):
|
||||
batch_time = AverageMeter('Time', ':6.3f')
|
||||
data_time = AverageMeter('Data', ':6.3f')
|
||||
losses = AverageMeter('Loss', ':.4e')
|
||||
top1 = AverageMeter('Acc@1', ':6.2f')
|
||||
top5 = AverageMeter('Acc@5', ':6.2f')
|
||||
progress = ProgressMeter(
|
||||
train_loader_len,
|
||||
[batch_time, data_time, losses, top1, top5],
|
||||
prefix="Epoch: [{}]".format(epoch))
|
||||
|
||||
loc = 'npu:{}'.format(args.gpu)
|
||||
|
||||
mean = torch.tensor([0.485 * 255, 0.456 * 255, 0.406 * 255]).view(1, 3, 1, 1)
|
||||
std = torch.tensor([0.229 * 255, 0.224 * 255, 0.225 * 255]).view(1, 3, 1, 1)
|
||||
mean = mean.to(loc, non_blocking=True)
|
||||
std = std.to(loc, non_blocking=True)
|
||||
|
||||
# switch to train mode
|
||||
model.train()
|
||||
end = time.time()
|
||||
if args.benchmark == 1:
|
||||
optimizer.zero_grad()
|
||||
|
||||
# steps_per_epoch = len(train_loader)
|
||||
steps_per_epoch = train_loader_len
|
||||
print('==========step per epoch======================', steps_per_epoch)
|
||||
for i, (images, target) in enumerate(train_loader):
|
||||
# measure data loading time
|
||||
data_time.update(time.time() - end)
|
||||
|
||||
global_step = epoch * steps_per_epoch + i
|
||||
lr = adjust_learning_rate(optimizer, global_step, steps_per_epoch, args)
|
||||
|
||||
target = target.to(torch.int32)
|
||||
images = images.to(loc, non_blocking=True).to(torch.float).sub(mean).div(std)
|
||||
target = target.to(loc, non_blocking=True)
|
||||
|
||||
# compute output
|
||||
output = model(images)
|
||||
# stream = torch.npu.current_stream()
|
||||
# stream.synchronize()
|
||||
|
||||
loss = criterion(output, target)
|
||||
# stream = torch.npu.current_stream()
|
||||
# stream.synchronize()
|
||||
|
||||
# measure accuracy and record loss
|
||||
acc1, acc5 = accuracy(output, target, topk=(1, 5))
|
||||
losses.update(loss.item(), images.size(0))
|
||||
top1.update(acc1[0], images.size(0))
|
||||
top5.update(acc5[0], images.size(0))
|
||||
|
||||
# compute gradient and do SGD step
|
||||
if args.benchmark == 0:
|
||||
optimizer.zero_grad()
|
||||
|
||||
if args.amp:
|
||||
with amp.scale_loss(loss, optimizer) as scaled_loss:
|
||||
scaled_loss.backward()
|
||||
else:
|
||||
loss.backward()
|
||||
|
||||
# stream = torch.npu.current_stream()
|
||||
# stream.synchronize()
|
||||
|
||||
if args.benchmark == 0:
|
||||
optimizer.step()
|
||||
elif args.benchmark == 1:
|
||||
BATCH_SIZE_multiplier = int(OPTIMIZER_BATCH_SIZE / args.batch_size)
|
||||
BM_optimizer_step = ((i + 1) % BATCH_SIZE_multiplier) == 0
|
||||
if BM_optimizer_step:
|
||||
for param_group in optimizer.param_groups:
|
||||
for param in param_group['params']:
|
||||
param.grad /= BATCH_SIZE_multiplier
|
||||
optimizer.step()
|
||||
optimizer.zero_grad()
|
||||
# stream = torch.npu.current_stream()
|
||||
# stream.synchronize()
|
||||
|
||||
# measure elapsed time
|
||||
batch_time.update(time.time() - end)
|
||||
end = time.time()
|
||||
|
||||
if i % args.print_freq == 0:
|
||||
if not args.multiprocessing_distributed or (args.multiprocessing_distributed
|
||||
and args.rank % ngpus_per_node == 0):
|
||||
progress.display(i)
|
||||
|
||||
if not args.multiprocessing_distributed or (args.multiprocessing_distributed
|
||||
and args.rank % ngpus_per_node == 0):
|
||||
print("[npu id:", args.gpu, "]", '* FPS@all {:.3f}'.format(ngpus_per_node * args.batch_size / batch_time.avg))
|
||||
hwlog.remark_print(key=hwlog.FPS,
|
||||
value=' * FPS@all {:.3f}'.format(ngpus_per_node * args.batch_size / batch_time.avg))
|
||||
|
||||
|
||||
def validate(val_loader, model, criterion, args, ngpus_per_node):
|
||||
batch_time = AverageMeter('Time', ':6.3f')
|
||||
losses = AverageMeter('Loss', ':.4e')
|
||||
top1 = AverageMeter('Acc@1', ':6.2f')
|
||||
top5 = AverageMeter('Acc@5', ':6.2f')
|
||||
progress = ProgressMeter(
|
||||
len(val_loader),
|
||||
[batch_time, losses, top1, top5],
|
||||
prefix='Test: ')
|
||||
|
||||
# switch to evaluate mode
|
||||
model.eval()
|
||||
|
||||
with torch.no_grad():
|
||||
loc = 'npu:{}'.format(args.gpu)
|
||||
mean = torch.tensor([0.485 * 255, 0.456 * 255, 0.406 * 255]).view(1, 3, 1, 1)
|
||||
std = torch.tensor([0.229 * 255, 0.224 * 255, 0.225 * 255]).view(1, 3, 1, 1)
|
||||
mean = mean.to(loc, non_blocking=True)
|
||||
std = std.to(loc, non_blocking=True)
|
||||
|
||||
end = time.time()
|
||||
for i, (images, target) in enumerate(val_loader):
|
||||
|
||||
target = target.to(torch.int32)
|
||||
images = images.to(loc, non_blocking=True).to(torch.float).sub(mean).div(std)
|
||||
target = target.to(loc, non_blocking=True)
|
||||
|
||||
# compute output
|
||||
output = model(images)
|
||||
loss = criterion(output, target)
|
||||
|
||||
# measure accuracy and record loss
|
||||
acc1, acc5 = accuracy(output, target, topk=(1, 5))
|
||||
losses.update(loss.item(), images.size(0))
|
||||
top1.update(acc1[0], images.size(0))
|
||||
top5.update(acc5[0], images.size(0))
|
||||
|
||||
# measure elapsed time
|
||||
batch_time.update(time.time() - end)
|
||||
end = time.time()
|
||||
|
||||
if i % args.print_freq == 0:
|
||||
if not args.multiprocessing_distributed or \
|
||||
(args.multiprocessing_distributed and args.rank % ngpus_per_node == 0):
|
||||
progress.display(i)
|
||||
|
||||
# TODO: this should also be done with the ProgressMeter
|
||||
if not args.multiprocessing_distributed or \
|
||||
(args.multiprocessing_distributed and args.rank % ngpus_per_node == 0):
|
||||
print("[npu id:", args.gpu, "]", '[AVG-ACC] * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'
|
||||
.format(top1=top1, top5=top5))
|
||||
hwlog.remark_print(key=hwlog.EVAL_ACCURACY_TOP1, value="{top1.avg:.3f}".format(top1=top1))
|
||||
hwlog.remark_print(key=hwlog.EVAL_ACCURACY_TOP5, value="{top5.avg:.3f}".format(top5=top5))
|
||||
|
||||
return top1.avg
|
||||
|
||||
|
||||
def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
|
||||
torch.save(state, filename)
|
||||
if is_best:
|
||||
shutil.copyfile(filename, 'model_best_acc%.4f_epoch%d.pth.tar' % (state['best_acc1'], state['epoch']))
|
||||
|
||||
|
||||
class AverageMeter(object):
|
||||
"""Computes and stores the average and current value"""
|
||||
|
||||
def __init__(self, name, fmt=':f'):
|
||||
self.name = name
|
||||
self.fmt = fmt
|
||||
self.reset()
|
||||
self.start_count_index = 10
|
||||
|
||||
def reset(self):
|
||||
self.val = 0
|
||||
self.avg = 0
|
||||
self.sum = 0
|
||||
self.count = 0
|
||||
|
||||
def update(self, val, n=1):
|
||||
if self.count == 0:
|
||||
self.batchsize = n
|
||||
|
||||
self.val = val
|
||||
self.count += n
|
||||
if self.count > (self.start_count_index * self.batchsize):
|
||||
self.sum += val * n
|
||||
self.avg = self.sum / (self.count - self.start_count_index * self.batchsize)
|
||||
|
||||
def __str__(self):
|
||||
fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
|
||||
return fmtstr.format(**self.__dict__)
|
||||
|
||||
|
||||
class ProgressMeter(object):
|
||||
def __init__(self, num_batches, meters, prefix=""):
|
||||
self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
|
||||
self.meters = meters
|
||||
self.prefix = prefix
|
||||
|
||||
def display(self, batch):
|
||||
entries = [self.prefix + self.batch_fmtstr.format(batch)]
|
||||
entries += [str(meter) for meter in self.meters]
|
||||
print("[npu id:", os.environ['KERNEL_NAME_ID'], "]", '\t'.join(entries))
|
||||
train_acc1 = str(entries).split("Acc@1")[1].strip().split(" ")[0]
|
||||
train_acc5 = str(entries).split("Acc@5")[1].strip().split(" ")[0]
|
||||
hwlog.remark_print(key=hwlog.TRAIN_ACCURACY_TOP1, value=train_acc1)
|
||||
hwlog.remark_print(key=hwlog.TRAIN_ACCURACY_TOP5, value=train_acc5)
|
||||
|
||||
def _get_batch_fmtstr(self, num_batches):
|
||||
num_digits = len(str(num_batches // 1))
|
||||
fmt = '{:' + str(num_digits) + 'd}'
|
||||
return '[' + fmt + '/' + fmt.format(num_batches) + ']'
|
||||
|
||||
|
||||
# def adjust_learning_rate(optimizer, epoch, args):
|
||||
# """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
|
||||
# lr = args.lr * (0.1 ** (epoch // 30))
|
||||
# for param_group in optimizer.param_groups:
|
||||
# param_group['lr'] = lr
|
||||
|
||||
def adjust_learning_rate(optimizer, global_step, steps_per_epoch, args):
|
||||
"""Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
|
||||
# lr = args.lr * (0.98 ** (epoch / 2.5))
|
||||
lr = args.lr * (0.98 ** (global_step // int(steps_per_epoch * 2.5)))
|
||||
for param_group in optimizer.param_groups:
|
||||
param_group['lr'] = lr
|
||||
return lr
|
||||
|
||||
|
||||
def accuracy(output, target, topk=(1,)):
|
||||
"""Computes the accuracy over the k top predictions for the specified values of k"""
|
||||
with torch.no_grad():
|
||||
maxk = max(topk)
|
||||
batch_size = target.size(0)
|
||||
|
||||
_, pred = output.topk(maxk, 1, True, True)
|
||||
pred = pred.t()
|
||||
correct = pred.eq(target.view(1, -1).expand_as(pred))
|
||||
|
||||
res = []
|
||||
for k in topk:
|
||||
correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
|
||||
res.append(correct_k.mul_(100.0 / batch_size))
|
||||
return res
|
||||
|
||||
|
||||
def fast_collate(batch):
|
||||
imgs = [img[0] for img in batch]
|
||||
targets = torch.tensor([target[1] for target in batch], dtype=torch.int64)
|
||||
w = imgs[0].size[0]
|
||||
h = imgs[0].size[1]
|
||||
tensor = torch.zeros((len(imgs), 3, h, w), dtype=torch.uint8)
|
||||
for i, img in enumerate(imgs):
|
||||
nump_array = np.asarray(img, dtype=np.uint8)
|
||||
if nump_array.ndim < 3:
|
||||
nump_array = np.expand_dims(nump_array, axis=-1)
|
||||
nump_array = np.rollaxis(nump_array, 2)
|
||||
|
||||
tensor[i] += torch.from_numpy(nump_array)
|
||||
|
||||
return tensor, targets
|
||||
|
||||
|
||||
def get_pytorch_train_loader(data_path, batch_size, workers=5, _worker_init_fn=None, distributed=False):
|
||||
traindir = os.path.join(data_path, 'train')
|
||||
train_dataset = datasets.ImageFolder(
|
||||
traindir,
|
||||
transforms.Compose([
|
||||
transforms.RandomResizedCrop(224),
|
||||
transforms.RandomHorizontalFlip(),
|
||||
]))
|
||||
|
||||
if distributed:
|
||||
train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
|
||||
else:
|
||||
train_sampler = None
|
||||
|
||||
dataloader_fn = MultiEpochsDataLoader # torch.utils.data.DataLoader
|
||||
train_loader = dataloader_fn(
|
||||
train_dataset, batch_size=batch_size, shuffle=(train_sampler is None),
|
||||
num_workers=workers, worker_init_fn=_worker_init_fn, pin_memory=True, sampler=train_sampler, collate_fn=fast_collate, drop_last=True)
|
||||
return train_loader, len(train_loader), train_sampler
|
||||
|
||||
|
||||
def get_pytorch_val_loader(data_path, batch_size, workers=5, _worker_init_fn=None, distributed=False):
|
||||
valdir = os.path.join(data_path, 'val')
|
||||
val_dataset = datasets.ImageFolder(
|
||||
valdir, transforms.Compose([
|
||||
transforms.Resize(256),
|
||||
transforms.CenterCrop(224),
|
||||
]))
|
||||
|
||||
if distributed:
|
||||
val_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset)
|
||||
else:
|
||||
val_sampler = None
|
||||
|
||||
dataloader_fn = MultiEpochsDataLoader # torch.utils.data.DataLoader
|
||||
val_loader = dataloader_fn(
|
||||
val_dataset,
|
||||
sampler=val_sampler,
|
||||
batch_size=batch_size, shuffle=(val_sampler is None),
|
||||
num_workers=workers, worker_init_fn=_worker_init_fn, pin_memory=True, collate_fn=fast_collate)
|
||||
|
||||
return val_loader
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
hwlog.ROOT_DIR = os.path.split(os.path.abspath(__file__))[0]
|
||||
cpu_info, npu_info, framework_info, os_info, benchmark_version = get_environment_info("pytorch")
|
||||
config_info = get_model_parameter("pytorch_config")
|
||||
initinal_data = {"base_lr": 0.1, "dataset": "imagenet", "optimizer": "SGD", "loss_scale": 1024}
|
||||
hwlog.remark_print(key=hwlog.CPU_INFO, value=cpu_info)
|
||||
hwlog.remark_print(key=hwlog.NPU_INFO, value=npu_info)
|
||||
hwlog.remark_print(key=hwlog.OS_INFO, value=os_info)
|
||||
hwlog.remark_print(key=hwlog.FRAMEWORK_INFO, value=framework_info)
|
||||
hwlog.remark_print(key=hwlog.BENCHMARK_VERSION, value=benchmark_version)
|
||||
hwlog.remark_print(key=hwlog.CONFIG_INFO, value=config_info)
|
||||
hwlog.remark_print(key=hwlog.BASE_LR, value=initinal_data.get("base_lr"))
|
||||
hwlog.remark_print(key=hwlog.DATASET, value=initinal_data.get("dataset"))
|
||||
hwlog.remark_print(key=hwlog.OPT_NAME, value=initinal_data.get("optimizer"))
|
||||
hwlog.remark_print(key=hwlog.LOSS_SCALE, value=initinal_data.get("loss_scale"))
|
||||
main()
|
||||
+663
@@ -0,0 +1,663 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import random
|
||||
import shutil
|
||||
import time
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.parallel
|
||||
import torch.backends.cudnn as cudnn
|
||||
import torch.distributed as dist
|
||||
import torch.optim
|
||||
import torch.multiprocessing as mp
|
||||
import torch.utils.data
|
||||
import torch.utils.data.distributed
|
||||
import torchvision.transforms as transforms
|
||||
import torchvision.datasets as datasets
|
||||
import torchvision.models as models
|
||||
|
||||
from mobilenet import mobilenet_v2
|
||||
from apex import amp
|
||||
from multi_epochs_dataloader import MultiEpochsDataLoader
|
||||
|
||||
from benchmark_log import hwlog
|
||||
from benchmark_log.basic_utils import get_environment_info
|
||||
from benchmark_log.basic_utils import get_model_parameter
|
||||
BATCH_SIZE = 6144
|
||||
OPTIMIZER_BATCH_SIZE = 6144
|
||||
# model_names = sorted(name for name in models.__dict__
|
||||
# if name.islower() and not name.startswith("__")
|
||||
# and callable(models.__dict__[name]))
|
||||
|
||||
parser = argparse.ArgumentParser(description='PyTorch ImageNet Training')
|
||||
parser.add_argument('--data', metavar='DIR', default='/opt/npu/dataset/imagenet',
|
||||
help='path to dataset')
|
||||
# parser.add_argument('-a', '--arch', metavar='ARCH', default='resnet50',
|
||||
# choices=model_names,
|
||||
# help='model architecture: ' +
|
||||
# ' | '.join(model_names) +
|
||||
# ' (default: resnet18)')
|
||||
parser.add_argument('-j', '--workers', default=32, type=int, metavar='N',
|
||||
help='number of data loading workers (default: 4)')
|
||||
parser.add_argument('--epochs', default=90, type=int, metavar='N',
|
||||
help='number of total epochs to run')
|
||||
parser.add_argument('--start-epoch', default=0, type=int, metavar='N',
|
||||
help='manual epoch number (useful on restarts)')
|
||||
parser.add_argument('-b', '--batch-size', default=BATCH_SIZE, type=int,
|
||||
metavar='N',
|
||||
help='mini-batch size (default: 256), this is the total '
|
||||
'batch size of all GPUs on the current node when '
|
||||
'using Data Parallel or Distributed Data Parallel')
|
||||
parser.add_argument('--lr', '--learning-rate', default=0.1, type=float,
|
||||
metavar='LR', help='initial learning rate', dest='lr')
|
||||
parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
|
||||
help='momentum')
|
||||
parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,
|
||||
metavar='W', help='weight decay (default: 1e-4)',
|
||||
dest='weight_decay')
|
||||
parser.add_argument('--workspace', type=str, default='./', metavar='DIR',
|
||||
help='path to directory where checkpoints will be stored')
|
||||
parser.add_argument('-p', '--print-freq', default=10, type=int,
|
||||
metavar='N', help='print frequency (default: 10)')
|
||||
parser.add_argument('-ef', '--eval-freq', default=5, type=int,
|
||||
metavar='N', help='evaluate frequency (default: 5)')
|
||||
parser.add_argument('--resume', default='', type=str, metavar='PATH',
|
||||
help='path to latest checkpoint (default: none)')
|
||||
parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true',
|
||||
help='evaluate model on validation set')
|
||||
parser.add_argument('--pretrained', dest='pretrained', action='store_true',
|
||||
help='use pre-trained model')
|
||||
parser.add_argument('--world-size', default=-1, type=int,
|
||||
help='number of nodes for distributed training')
|
||||
parser.add_argument('--rank', default=-1, type=int,
|
||||
help='node rank for distributed training')
|
||||
parser.add_argument('--dist-url', default='tcp://224.66.41.62:23456', type=str,
|
||||
help='url used to set up distributed training')
|
||||
parser.add_argument('--dist-backend', default='nccl', type=str,
|
||||
help='distributed backend')
|
||||
parser.add_argument('--seed', default=None, type=int,
|
||||
help='seed for initializing training. ')
|
||||
parser.add_argument('--gpu', default=None, type=int,
|
||||
help='GPU id to use.')
|
||||
parser.add_argument('--multiprocessing-distributed', action='store_true',
|
||||
help='Use multi-processing distributed training to launch '
|
||||
'N processes per node, which has N GPUs. This is the '
|
||||
'fastest way to use PyTorch for either single node or '
|
||||
'multi node data parallel training')
|
||||
parser.add_argument('-bm', '--benchmark', default=0, type=int,
|
||||
metavar='N', help='set benchmark status (default: 1,run benchmark)')
|
||||
parser.add_argument('--device', default='npu', type=str, help='npu or gpu')
|
||||
parser.add_argument('--addr', default='10.136.181.115', type=str, help='master addr')
|
||||
parser.add_argument('--checkpoint-nameprefix', default='checkpoint', type=str, help='checkpoint-nameprefix')
|
||||
parser.add_argument('--checkpoint-freq', default=0, type=int,
|
||||
metavar='N', help='checkpoint frequency (default: 0)'
|
||||
'0: save only one file whitch per epoch;'
|
||||
'n: save diff file per n epoch'
|
||||
'-1:no checkpoint,not support')
|
||||
|
||||
parser.add_argument('--device-list', default='0,1,2,3,4,5,6,7', type=str, help='device id list')
|
||||
|
||||
# apex
|
||||
parser.add_argument('--amp', default=False, action='store_true',
|
||||
help='use amp to train the model')
|
||||
parser.add_argument('--loss-scale', default=64., type=float,
|
||||
help='loss scale using in amp, default -1 means dynamic')
|
||||
parser.add_argument('--opt-level', default='O2', type=str,
|
||||
help='loss scale using in amp, default -1 means dynamic')
|
||||
|
||||
warnings.filterwarnings('ignore')
|
||||
best_acc1 = 0
|
||||
|
||||
|
||||
def device_id_to_process_device_map(device_list):
|
||||
devices = device_list.split(",")
|
||||
devices = [int(x) for x in devices]
|
||||
devices.sort()
|
||||
|
||||
process_device_map = dict()
|
||||
for process_id, device_id in enumerate(devices):
|
||||
process_device_map[process_id] = device_id
|
||||
|
||||
return process_device_map
|
||||
|
||||
|
||||
def main():
|
||||
args = parser.parse_args()
|
||||
print("===============main()=================")
|
||||
print(args)
|
||||
print("===============main()=================")
|
||||
|
||||
os.environ['KERNEL_NAME_ID'] = str(0)
|
||||
print("++++++++++++++++++ KERNEL_NAME_ID:", os.environ['KERNEL_NAME_ID'])
|
||||
|
||||
if args.seed is not None:
|
||||
random.seed(args.seed)
|
||||
torch.manual_seed(args.seed)
|
||||
cudnn.deterministic = True
|
||||
warnings.warn('You have chosen to seed training. '
|
||||
'This will turn on the CUDNN deterministic setting, '
|
||||
'which can slow down your training considerably! '
|
||||
'You may see unexpected behavior when restarting '
|
||||
'from checkpoints.')
|
||||
|
||||
os.environ['MASTER_ADDR'] = args.addr # '10.136.181.51'
|
||||
os.environ['MASTER_PORT'] = '59629'
|
||||
|
||||
if args.gpu is not None:
|
||||
warnings.warn('You have chosen a specific GPU. This will completely '
|
||||
'disable data parallelism.')
|
||||
|
||||
if args.dist_url == "env://" and args.world_size == -1:
|
||||
args.world_size = int(os.environ["WORLD_SIZE"])
|
||||
|
||||
args.distributed = args.world_size > 1 or args.multiprocessing_distributed
|
||||
|
||||
args.process_device_map = device_id_to_process_device_map(args.device_list)
|
||||
|
||||
if args.device == 'npu':
|
||||
# ngpus_per_node = torch.npu.device_count()
|
||||
ngpus_per_node = len(args.process_device_map)
|
||||
else:
|
||||
ngpus_per_node = torch.cuda.device_count()
|
||||
if args.multiprocessing_distributed:
|
||||
# Since we have ngpus_per_node processes per node, the total world_size
|
||||
# needs to be adjusted accordingly
|
||||
args.world_size = ngpus_per_node * args.world_size
|
||||
# Use torch.multiprocessing.spawn to launch distributed processes: the
|
||||
# main_worker process function
|
||||
# The child process uses the environment variables of the parent process,
|
||||
# we have to set KERNEL_NAME_ID for every proc
|
||||
if args.device == 'npu':
|
||||
# main_worker(args.gpu, ngpus_per_node, args)
|
||||
mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args))
|
||||
else:
|
||||
mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args))
|
||||
else:
|
||||
# Simply call main_worker function
|
||||
main_worker(args.gpu, ngpus_per_node, args)
|
||||
|
||||
|
||||
def main_worker(gpu, ngpus_per_node, args):
|
||||
global best_acc1
|
||||
# args.gpu = gpu
|
||||
args.gpu = args.process_device_map[gpu]
|
||||
|
||||
print("[npu id:", args.gpu, "]", "++++++++++++++++ before set KERNEL_NAME_ID:", os.environ['KERNEL_NAME_ID'])
|
||||
os.environ['KERNEL_NAME_ID'] = str(gpu)
|
||||
print("[npu id:", args.gpu, "]", "++++++++++++++++ KERNEL_NAME_ID:", os.environ['KERNEL_NAME_ID'])
|
||||
|
||||
if args.gpu is not None:
|
||||
print("[npu id:", args.gpu, "]", "Use GPU: {} for training".format(args.gpu))
|
||||
|
||||
if args.distributed:
|
||||
if args.dist_url == "env://" and args.rank == -1:
|
||||
args.rank = int(os.environ["RANK"])
|
||||
if args.multiprocessing_distributed:
|
||||
# For multiprocessing distributed training, rank needs to be the
|
||||
# global rank among all the processes
|
||||
args.rank = args.rank * ngpus_per_node + gpu
|
||||
|
||||
if args.device == 'npu':
|
||||
dist.init_process_group(backend=args.dist_backend, # init_method=args.dist_url,
|
||||
world_size=args.world_size, rank=args.rank)
|
||||
else:
|
||||
dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
|
||||
world_size=args.world_size, rank=args.rank)
|
||||
|
||||
loc = 'npu:{}'.format(args.gpu)
|
||||
torch.npu.set_device(loc)
|
||||
|
||||
args.batch_size = int(args.batch_size / ngpus_per_node)
|
||||
args.workers = int((args.workers + ngpus_per_node - 1) / ngpus_per_node)
|
||||
|
||||
print("[npu id:", args.gpu, "]", "===============main_worker()=================")
|
||||
print("[npu id:", args.gpu, "]", args)
|
||||
print("[npu id:", args.gpu, "]", "===============main_worker()=================")
|
||||
|
||||
# Data loading code
|
||||
# traindir = os.path.join(args.data, 'train')
|
||||
# valdir = os.path.join(args.data, 'val')
|
||||
# normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
|
||||
# std=[0.229, 0.224, 0.225])
|
||||
|
||||
# train_dataset = datasets.ImageFolder(
|
||||
# traindir,
|
||||
# transforms.Compose([
|
||||
# transforms.RandomResizedCrop(224),
|
||||
# transforms.RandomHorizontalFlip(),
|
||||
# transforms.ToTensor(),
|
||||
# normalize,
|
||||
# ]))
|
||||
#
|
||||
# if args.distributed:
|
||||
# train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
|
||||
# else:
|
||||
# train_sampler = None
|
||||
#
|
||||
# train_loader = torch.utils.data.DataLoader(
|
||||
# train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None),
|
||||
# num_workers=args.workers, pin_memory=True, sampler=train_sampler, drop_last=True)
|
||||
|
||||
train_loader, train_loader_len, train_sampler = get_pytorch_train_loader(args.data,
|
||||
args.batch_size,
|
||||
workers=args.workers,
|
||||
distributed=args.distributed)
|
||||
|
||||
# val_loader = torch.utils.data.DataLoader(
|
||||
# datasets.ImageFolder(valdir, transforms.Compose([
|
||||
# transforms.Resize(256),
|
||||
# transforms.CenterCrop(224),
|
||||
# transforms.ToTensor(),
|
||||
# normalize,
|
||||
# ])),
|
||||
# batch_size=args.batch_size, shuffle=True,
|
||||
# num_workers=args.workers, pin_memory=True, drop_last=True)
|
||||
|
||||
val_loader = get_pytorch_val_loader(args.data, args.batch_size, args.workers, distributed=False)
|
||||
|
||||
# create model
|
||||
print("[npu id:", args.gpu, "]", "=> creating model '{}'".format('mobilenetv2'))
|
||||
# model = models.__dict__[args.arch]()
|
||||
model = mobilenet_v2()
|
||||
model = model.to(loc)
|
||||
|
||||
# define loss function (criterion) and optimizer
|
||||
criterion = nn.CrossEntropyLoss().to(loc)
|
||||
optimizer = torch.optim.SGD(model.parameters(), args.lr,
|
||||
momentum=args.momentum,
|
||||
weight_decay=args.weight_decay)
|
||||
|
||||
if args.amp:
|
||||
model, optimizer = amp.initialize(model, optimizer, opt_level=args.opt_level, loss_scale=args.loss_scale)
|
||||
|
||||
model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu], broadcast_buffers=False)
|
||||
|
||||
# optionally resume from a checkpoint
|
||||
if args.resume:
|
||||
if os.path.isfile(args.resume):
|
||||
print("=> loading checkpoint '{}'".format(args.resume))
|
||||
checkpoint = torch.load(args.resume, map_location=loc)
|
||||
args.start_epoch = checkpoint['epoch']
|
||||
best_acc1 = checkpoint['best_acc1']
|
||||
model.load_state_dict(checkpoint['state_dict'])
|
||||
optimizer.load_state_dict(checkpoint['optimizer'])
|
||||
if args.amp:
|
||||
amp.load_state_dict(checkpoint['amp'])
|
||||
print("=> loaded checkpoint '{}' (epoch {})".format(args.resume, checkpoint['epoch']))
|
||||
else:
|
||||
print("=> no checkpoint found at '{}'".format(args.resume))
|
||||
|
||||
cudnn.benchmark = True
|
||||
|
||||
if args.evaluate:
|
||||
validate(val_loader, model, criterion, args, ngpus_per_node)
|
||||
return
|
||||
|
||||
for epoch in range(args.start_epoch, args.epochs):
|
||||
if args.distributed:
|
||||
train_sampler.set_epoch(epoch)
|
||||
# adjust_learning_rate(optimizer, epoch, args)
|
||||
|
||||
# train for one epoch
|
||||
train(train_loader, train_loader_len, model, criterion, optimizer, epoch, args, ngpus_per_node)
|
||||
|
||||
if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
|
||||
# evaluate on validation set
|
||||
acc1 = validate(val_loader, model, criterion, args, ngpus_per_node)
|
||||
|
||||
# remember best acc@1 and save checkpoint
|
||||
is_best = acc1 > best_acc1
|
||||
best_acc1 = max(acc1, best_acc1)
|
||||
|
||||
if not args.multiprocessing_distributed or \
|
||||
(args.multiprocessing_distributed and args.rank % ngpus_per_node == 0 and epoch == args.epochs - 1):
|
||||
if args.amp:
|
||||
save_checkpoint({
|
||||
'epoch': epoch + 1,
|
||||
'state_dict': model.state_dict(),
|
||||
'best_acc1': best_acc1,
|
||||
'optimizer': optimizer.state_dict(),
|
||||
'amp': amp.state_dict(),
|
||||
}, is_best)
|
||||
else:
|
||||
save_checkpoint({
|
||||
'epoch': epoch + 1,
|
||||
'state_dict': model.state_dict(),
|
||||
'best_acc1': best_acc1,
|
||||
'optimizer': optimizer.state_dict(),
|
||||
}, is_best)
|
||||
|
||||
|
||||
def train(train_loader, train_loader_len, model, criterion, optimizer, epoch, args, ngpus_per_node):
|
||||
batch_time = AverageMeter('Time', ':6.3f')
|
||||
data_time = AverageMeter('Data', ':6.3f')
|
||||
losses = AverageMeter('Loss', ':.4e')
|
||||
top1 = AverageMeter('Acc@1', ':6.2f')
|
||||
top5 = AverageMeter('Acc@5', ':6.2f')
|
||||
progress = ProgressMeter(
|
||||
train_loader_len,
|
||||
[batch_time, data_time, losses, top1, top5],
|
||||
prefix="Epoch: [{}]".format(epoch))
|
||||
|
||||
loc = 'npu:{}'.format(args.gpu)
|
||||
|
||||
mean = torch.tensor([0.485 * 255, 0.456 * 255, 0.406 * 255]).view(1, 3, 1, 1)
|
||||
std = torch.tensor([0.229 * 255, 0.224 * 255, 0.225 * 255]).view(1, 3, 1, 1)
|
||||
mean = mean.to(loc, non_blocking=True)
|
||||
std = std.to(loc, non_blocking=True)
|
||||
|
||||
# switch to train mode
|
||||
model.train()
|
||||
end = time.time()
|
||||
if args.benchmark == 1:
|
||||
optimizer.zero_grad()
|
||||
|
||||
# steps_per_epoch = len(train_loader)
|
||||
steps_per_epoch = train_loader_len
|
||||
print('==========step per epoch======================', steps_per_epoch)
|
||||
for i, (images, target) in enumerate(train_loader):
|
||||
# measure data loading time
|
||||
data_time.update(time.time() - end)
|
||||
|
||||
global_step = epoch * steps_per_epoch + i
|
||||
lr = adjust_learning_rate(optimizer, global_step, steps_per_epoch, args)
|
||||
|
||||
target = target.to(torch.int32)
|
||||
images = images.to(loc, non_blocking=True).to(torch.float).sub(mean).div(std)
|
||||
target = target.to(loc, non_blocking=True)
|
||||
|
||||
# compute output
|
||||
output = model(images)
|
||||
stream = torch.npu.current_stream()
|
||||
stream.synchronize()
|
||||
|
||||
loss = criterion(output, target)
|
||||
stream = torch.npu.current_stream()
|
||||
stream.synchronize()
|
||||
|
||||
# measure accuracy and record loss
|
||||
acc1, acc5 = accuracy(output, target, topk=(1, 5))
|
||||
losses.update(loss.item(), images.size(0))
|
||||
top1.update(acc1[0], images.size(0))
|
||||
top5.update(acc5[0], images.size(0))
|
||||
|
||||
# compute gradient and do SGD step
|
||||
if args.benchmark == 0:
|
||||
optimizer.zero_grad()
|
||||
|
||||
if args.amp:
|
||||
with amp.scale_loss(loss, optimizer) as scaled_loss:
|
||||
scaled_loss.backward()
|
||||
else:
|
||||
loss.backward()
|
||||
|
||||
stream = torch.npu.current_stream()
|
||||
stream.synchronize()
|
||||
|
||||
if args.benchmark == 0:
|
||||
optimizer.step()
|
||||
elif args.benchmark == 1:
|
||||
BATCH_SIZE_multiplier = int(OPTIMIZER_BATCH_SIZE / args.batch_size)
|
||||
BM_optimizer_step = ((i + 1) % BATCH_SIZE_multiplier) == 0
|
||||
if BM_optimizer_step:
|
||||
for param_group in optimizer.param_groups:
|
||||
for param in param_group['params']:
|
||||
param.grad /= BATCH_SIZE_multiplier
|
||||
optimizer.step()
|
||||
optimizer.zero_grad()
|
||||
stream = torch.npu.current_stream()
|
||||
stream.synchronize()
|
||||
|
||||
# measure elapsed time
|
||||
batch_time.update(time.time() - end)
|
||||
end = time.time()
|
||||
|
||||
if i % args.print_freq == 0:
|
||||
if not args.multiprocessing_distributed or (args.multiprocessing_distributed
|
||||
and args.rank % ngpus_per_node == 0):
|
||||
progress.display(i)
|
||||
|
||||
if not args.multiprocessing_distributed or (args.multiprocessing_distributed
|
||||
and args.rank % ngpus_per_node == 0):
|
||||
print("[npu id:", args.gpu, "]", '* FPS@all {:.3f}'.format(ngpus_per_node * args.batch_size / batch_time.avg))
|
||||
hwlog.remark_print(key=hwlog.FPS, value=' * FPS@all {:.3f}'.format(ngpus_per_node * args.batch_size / batch_time.avg))
|
||||
|
||||
|
||||
def validate(val_loader, model, criterion, args, ngpus_per_node):
|
||||
batch_time = AverageMeter('Time', ':6.3f')
|
||||
losses = AverageMeter('Loss', ':.4e')
|
||||
top1 = AverageMeter('Acc@1', ':6.2f')
|
||||
top5 = AverageMeter('Acc@5', ':6.2f')
|
||||
progress = ProgressMeter(
|
||||
len(val_loader),
|
||||
[batch_time, losses, top1, top5],
|
||||
prefix='Test: ')
|
||||
|
||||
# switch to evaluate mode
|
||||
model.eval()
|
||||
|
||||
with torch.no_grad():
|
||||
loc = 'npu:{}'.format(args.gpu)
|
||||
mean = torch.tensor([0.485 * 255, 0.456 * 255, 0.406 * 255]).view(1, 3, 1, 1)
|
||||
std = torch.tensor([0.229 * 255, 0.224 * 255, 0.225 * 255]).view(1, 3, 1, 1)
|
||||
mean = mean.to(loc, non_blocking=True)
|
||||
std = std.to(loc, non_blocking=True)
|
||||
|
||||
end = time.time()
|
||||
for i, (images, target) in enumerate(val_loader):
|
||||
|
||||
target = target.to(torch.int32)
|
||||
images = images.to(loc, non_blocking=True).to(torch.float).sub(mean).div(std)
|
||||
target = target.to(loc, non_blocking=True)
|
||||
|
||||
# compute output
|
||||
output = model(images)
|
||||
loss = criterion(output, target)
|
||||
|
||||
# measure accuracy and record loss
|
||||
acc1, acc5 = accuracy(output, target, topk=(1, 5))
|
||||
losses.update(loss.item(), images.size(0))
|
||||
top1.update(acc1[0], images.size(0))
|
||||
top5.update(acc5[0], images.size(0))
|
||||
|
||||
# measure elapsed time
|
||||
batch_time.update(time.time() - end)
|
||||
end = time.time()
|
||||
|
||||
if i % args.print_freq == 0:
|
||||
if not args.multiprocessing_distributed or \
|
||||
(args.multiprocessing_distributed and args.rank % ngpus_per_node == 0):
|
||||
progress.display(i)
|
||||
|
||||
# TODO: this should also be done with the ProgressMeter
|
||||
if not args.multiprocessing_distributed or \
|
||||
(args.multiprocessing_distributed and args.rank % ngpus_per_node == 0):
|
||||
print("[npu id:", args.gpu, "]", '[AVG-ACC] * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'
|
||||
.format(top1=top1, top5=top5))
|
||||
hwlog.remark_print(key=hwlog.EVAL_ACCURACY_TOP1, value="{top1.avg:.3f}".format(top1=top1))
|
||||
hwlog.remark_print(key=hwlog.EVAL_ACCURACY_TOP5, value="{top5.avg:.3f}".format(top5=top5))
|
||||
|
||||
return top1.avg
|
||||
|
||||
|
||||
def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
|
||||
torch.save(state, filename)
|
||||
if is_best:
|
||||
shutil.copyfile(filename, 'model_best_acc%.4f_epoch%d.pth.tar' % (state['best_acc1'], state['epoch']))
|
||||
|
||||
|
||||
class AverageMeter(object):
|
||||
"""Computes and stores the average and current value"""
|
||||
|
||||
def __init__(self, name, fmt=':f'):
|
||||
self.name = name
|
||||
self.fmt = fmt
|
||||
self.reset()
|
||||
self.start_count_index = 10
|
||||
|
||||
def reset(self):
|
||||
self.val = 0
|
||||
self.avg = 0
|
||||
self.sum = 0
|
||||
self.count = 0
|
||||
|
||||
def update(self, val, n=1):
|
||||
if self.count == 0:
|
||||
self.batchsize = n
|
||||
|
||||
self.val = val
|
||||
self.count += n
|
||||
if self.count > (self.start_count_index * self.batchsize):
|
||||
self.sum += val * n
|
||||
self.avg = self.sum / (self.count - self.start_count_index * self.batchsize)
|
||||
|
||||
def __str__(self):
|
||||
fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
|
||||
return fmtstr.format(**self.__dict__)
|
||||
|
||||
|
||||
class ProgressMeter(object):
|
||||
def __init__(self, num_batches, meters, prefix=""):
|
||||
self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
|
||||
self.meters = meters
|
||||
self.prefix = prefix
|
||||
|
||||
def display(self, batch):
|
||||
entries = [self.prefix + self.batch_fmtstr.format(batch)]
|
||||
entries += [str(meter) for meter in self.meters]
|
||||
print("[npu id:", os.environ['KERNEL_NAME_ID'], "]", '\t'.join(entries))
|
||||
train_acc1 = str(entries).split("Acc@1")[1].strip().split(" ")[0]
|
||||
train_acc5 = str(entries).split("Acc@5")[1].strip().split(" ")[0]
|
||||
hwlog.remark_print(key=hwlog.TRAIN_ACCURACY_TOP1, value=train_acc1)
|
||||
hwlog.remark_print(key=hwlog.TRAIN_ACCURACY_TOP5, value=train_acc5)
|
||||
|
||||
|
||||
def _get_batch_fmtstr(self, num_batches):
|
||||
num_digits = len(str(num_batches // 1))
|
||||
fmt = '{:' + str(num_digits) + 'd}'
|
||||
return '[' + fmt + '/' + fmt.format(num_batches) + ']'
|
||||
|
||||
|
||||
# def adjust_learning_rate(optimizer, epoch, args):
|
||||
# """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
|
||||
# lr = args.lr * (0.1 ** (epoch // 30))
|
||||
# for param_group in optimizer.param_groups:
|
||||
# param_group['lr'] = lr
|
||||
|
||||
# def adjust_learning_rate(optimizer, global_step, steps_per_epoch, args):
|
||||
# """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
|
||||
# # lr = args.lr * (0.98 ** (epoch / 2.5))
|
||||
# lr = args.lr * (0.98 ** (global_step // int(steps_per_epoch * 2.5)))
|
||||
# for param_group in optimizer.param_groups:
|
||||
# param_group['lr'] = lr
|
||||
# return lr
|
||||
|
||||
def adjust_learning_rate(optimizer, global_step, steps_per_epoch, args):
|
||||
"""Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
|
||||
# lr = args.lr * (0.98 ** (epoch / 2.5))
|
||||
lr = args.lr * (0.98 ** (global_step // int(steps_per_epoch * 2.5)))
|
||||
for param_group in optimizer.param_groups:
|
||||
param_group['lr'] = lr
|
||||
return lr
|
||||
|
||||
|
||||
def accuracy(output, target, topk=(1,)):
|
||||
"""Computes the accuracy over the k top predictions for the specified values of k"""
|
||||
with torch.no_grad():
|
||||
maxk = max(topk)
|
||||
batch_size = target.size(0)
|
||||
|
||||
_, pred = output.topk(maxk, 1, True, True)
|
||||
pred = pred.t()
|
||||
correct = pred.eq(target.view(1, -1).expand_as(pred))
|
||||
|
||||
res = []
|
||||
for k in topk:
|
||||
correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
|
||||
res.append(correct_k.mul_(100.0 / batch_size))
|
||||
return res
|
||||
|
||||
|
||||
def fast_collate(batch):
|
||||
imgs = [img[0] for img in batch]
|
||||
targets = torch.tensor([target[1] for target in batch], dtype=torch.int64)
|
||||
w = imgs[0].size[0]
|
||||
h = imgs[0].size[1]
|
||||
tensor = torch.zeros((len(imgs), 3, h, w), dtype=torch.uint8)
|
||||
for i, img in enumerate(imgs):
|
||||
nump_array = np.asarray(img, dtype=np.uint8)
|
||||
if nump_array.ndim < 3:
|
||||
nump_array = np.expand_dims(nump_array, axis=-1)
|
||||
nump_array = np.rollaxis(nump_array, 2)
|
||||
|
||||
tensor[i] += torch.from_numpy(nump_array)
|
||||
|
||||
return tensor, targets
|
||||
|
||||
|
||||
def get_pytorch_train_loader(data_path, batch_size, workers=5, _worker_init_fn=None, distributed=False):
|
||||
traindir = os.path.join(data_path, 'train')
|
||||
train_dataset = datasets.ImageFolder(
|
||||
traindir,
|
||||
transforms.Compose([
|
||||
transforms.RandomResizedCrop(224),
|
||||
transforms.RandomHorizontalFlip(),
|
||||
]))
|
||||
|
||||
if distributed:
|
||||
train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
|
||||
else:
|
||||
train_sampler = None
|
||||
|
||||
dataloader_fn = MultiEpochsDataLoader # torch.utils.data.DataLoader
|
||||
train_loader = dataloader_fn(
|
||||
train_dataset, batch_size=batch_size, shuffle=(train_sampler is None),
|
||||
num_workers=workers, worker_init_fn=_worker_init_fn, pin_memory=True, sampler=train_sampler, collate_fn=fast_collate, drop_last=True)
|
||||
return train_loader, len(train_loader), train_sampler
|
||||
|
||||
|
||||
def get_pytorch_val_loader(data_path, batch_size, workers=5, _worker_init_fn=None, distributed=False):
|
||||
valdir = os.path.join(data_path, 'val')
|
||||
val_dataset = datasets.ImageFolder(
|
||||
valdir, transforms.Compose([
|
||||
transforms.Resize(256),
|
||||
transforms.CenterCrop(224),
|
||||
]))
|
||||
|
||||
if distributed:
|
||||
val_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset)
|
||||
else:
|
||||
val_sampler = None
|
||||
|
||||
dataloader_fn = MultiEpochsDataLoader # torch.utils.data.DataLoader
|
||||
val_loader = dataloader_fn(
|
||||
val_dataset,
|
||||
sampler=val_sampler,
|
||||
batch_size=batch_size, shuffle=(val_sampler is None),
|
||||
num_workers=workers, worker_init_fn=_worker_init_fn, pin_memory=True, collate_fn=fast_collate)
|
||||
|
||||
return val_loader
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
hwlog.ROOT_DIR = os.path.split(os.path.abspath(__file__))[0]
|
||||
cpu_info, npu_info, framework_info, os_info, benchmark_version = get_environment_info("pytorch")
|
||||
config_info = get_model_parameter("pytorch_config")
|
||||
initinal_data = {"base_lr": 0.1, "dataset": "imagenet", "optimizer": "SGD", "loss_scale": 1024}
|
||||
hwlog.remark_print(key=hwlog.CPU_INFO, value=cpu_info)
|
||||
hwlog.remark_print(key=hwlog.NPU_INFO, value=npu_info)
|
||||
hwlog.remark_print(key=hwlog.OS_INFO, value=os_info)
|
||||
hwlog.remark_print(key=hwlog.FRAMEWORK_INFO, value=framework_info)
|
||||
hwlog.remark_print(key=hwlog.BENCHMARK_VERSION, value=benchmark_version)
|
||||
hwlog.remark_print(key=hwlog.CONFIG_INFO, value=config_info)
|
||||
hwlog.remark_print(key=hwlog.BASE_LR, value=initinal_data.get("base_lr"))
|
||||
hwlog.remark_print(key=hwlog.DATASET, value=initinal_data.get("dataset"))
|
||||
hwlog.remark_print(key=hwlog.OPT_NAME, value=initinal_data.get("optimizer"))
|
||||
hwlog.remark_print(key=hwlog.LOSS_SCALE, value=initinal_data.get("loss_scale"))
|
||||
main()
|
||||
+31
@@ -0,0 +1,31 @@
|
||||
import torch
|
||||
|
||||
|
||||
class MultiEpochsDataLoader(torch.utils.data.DataLoader):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self._DataLoader__initialized = False
|
||||
self.batch_sampler = _RepeatSampler(self.batch_sampler)
|
||||
self._DataLoader__initialized = True
|
||||
self.iterator = super().__iter__()
|
||||
|
||||
def __len__(self):
|
||||
return len(self.batch_sampler.sampler)
|
||||
|
||||
def __iter__(self):
|
||||
for _ in range(len(self)):
|
||||
yield next(self.iterator)
|
||||
|
||||
|
||||
class _RepeatSampler(object):
|
||||
""" Sampler that repeats forever.
|
||||
Args:
|
||||
sampler (Sampler)
|
||||
"""
|
||||
|
||||
def __init__(self, sampler):
|
||||
self.sampler = sampler
|
||||
|
||||
def __iter__(self):
|
||||
while True:
|
||||
yield from iter(self.sampler)
|
||||
+18
@@ -0,0 +1,18 @@
|
||||
{
|
||||
"startCfg":
|
||||
[
|
||||
{
|
||||
"jobID": "123456789",
|
||||
"deviceID": ["0"],
|
||||
"features":
|
||||
[
|
||||
{
|
||||
"name": "task_trace"
|
||||
},
|
||||
{
|
||||
"name": "training_trace"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
+19
@@ -0,0 +1,19 @@
|
||||
source set_env_b023.sh
|
||||
|
||||
su HwHiAiUser -c "adc --host 0.0.0.0:22118 --log \"SetLogLevel(0)[error]\" --device 0"
|
||||
|
||||
export SLOG_PRINT_TO_STDOUT=0
|
||||
export TASK_QUEUE_ENABLE=0
|
||||
nohup taskset -c 1-40 python3.7 densenet121_1p_main.py \
|
||||
--workers 40 \
|
||||
--arch densenet121 \
|
||||
--npu 0 \
|
||||
--lr 0.1 \
|
||||
--momentum 0.9 \
|
||||
--amp \
|
||||
--print-freq 1 \
|
||||
--eval-freq 5\
|
||||
--batch-size 256 \
|
||||
--epoch 45 \
|
||||
--resume checkpoint.pth.tar \
|
||||
--data /home/dataset/imagenet > output_1p.log &
|
||||
+27
@@ -0,0 +1,27 @@
|
||||
source set_env_b023.sh
|
||||
|
||||
su HwHiAiUser -c "adc --host 0.0.0.0:22118 --log \"SetLogLevel(0)[error]\" --device 0"
|
||||
su HwHiAiUser -c "adc --host 0.0.0.0:22118 --log \"SetLogLevel(0)[error]\" --device 4"
|
||||
|
||||
export SLOG_PRINT_TO_STDOUT=0
|
||||
export TASK_QUEUE_ENABLE=0
|
||||
nohup python3.7 ./densenet121_8p_main.py \
|
||||
--addr='10.246.246.57' \
|
||||
--seed 49 \
|
||||
--workers 80 \
|
||||
--lr 0.8 \
|
||||
--print-freq 1 \
|
||||
--eval-freq 5\
|
||||
--arch densenet121 \
|
||||
--dist-url 'tcp://127.0.0.1:50000' \
|
||||
--dist-backend 'hccl' \
|
||||
--multiprocessing-distributed \
|
||||
--world-size 1 \
|
||||
--batch-size 2048 \
|
||||
--epochs 45 \
|
||||
--rank 0 \
|
||||
--amp \
|
||||
--benchmark 0 \
|
||||
--resume checkpoint.pth.tar \
|
||||
--data /train/imagenet > resume_8p.log &
|
||||
|
||||
@@ -0,0 +1,18 @@
|
||||
source set_env_b023.sh
|
||||
|
||||
su HwHiAiUser -c "adc --host 0.0.0.0:22118 --log \"SetLogLevel(0)[error]\" --device 0"
|
||||
|
||||
export SLOG_PRINT_TO_STDOUT=0
|
||||
export TASK_QUEUE_ENABLE=0
|
||||
nohup taskset -c 1-40 python3.7 densenet121_1p_main.py \
|
||||
--workers 40 \
|
||||
--arch densenet121 \
|
||||
--npu 0 \
|
||||
--lr 0.1 \
|
||||
--momentum 0.9 \
|
||||
--amp \
|
||||
--print-freq 1 \
|
||||
--eval-freq 5\
|
||||
--batch-size 256 \
|
||||
--epoch 90 \
|
||||
--data /opt/npu/dataset/imagenet > output_1p.log &
|
||||
@@ -0,0 +1,26 @@
|
||||
#!/usr/bin/env bash
|
||||
source set_env_b023.sh
|
||||
|
||||
su HwHiAiUser -c "adc --host 0.0.0.0:22118 --log \"SetLogLevel(0)[error]\" --device 0"
|
||||
su HwHiAiUser -c "adc --host 0.0.0.0:22118 --log \"SetLogLevel(0)[error]\" --device 4"
|
||||
|
||||
export SLOG_PRINT_TO_STDOUT=0
|
||||
export TASK_QUEUE_ENABLE=0
|
||||
nohup python3.7 ./mobilenetv2_8p_main.py \
|
||||
--addr='10.246.246.76' \
|
||||
--seed 49 \
|
||||
--workers 80 \
|
||||
--lr 0.24 \
|
||||
--print-freq 1 \
|
||||
--eval-freq 5\
|
||||
--dist-url 'tcp://127.0.0.1:50002' \
|
||||
--dist-backend 'hccl' \
|
||||
--multiprocessing-distributed \
|
||||
--world-size 1 \
|
||||
--batch-size 6144 \
|
||||
--epochs 600 \
|
||||
--rank 0 \
|
||||
--amp \
|
||||
--benchmark 0 \
|
||||
--data /opt/npu/dataset/imagenet > output_8p.log &
|
||||
|
||||
+17
@@ -0,0 +1,17 @@
|
||||
############## toolkit situation ################
|
||||
#export ASCEND_HOME=/usr/local/Ascend
|
||||
#export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib/:/usr/lib/:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64:/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:/usr/local/Ascend/add-ons/
|
||||
#export PYTHONPATH=/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/schedule_search.egg:/usr/local/Ascend/ascend-toolkit/latest/opp/op_impl/built-in/ai_core/tbe:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/hccl
|
||||
#export PATH=$PATH:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin
|
||||
#export ASCEND_OPP_PATH=/usr/local/Ascend/ascend-toolkit/latest/opp/
|
||||
|
||||
############## nnae situation ################
|
||||
export ASCEND_HOME=/usr/local/Ascend
|
||||
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib/:/usr/local/python3.7.5/lib/:/usr/lib/:/usr/local/Ascend/nnae/latest/fwkacllib/lib64:/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:/usr/local/Ascend/add-ons/
|
||||
export PYTHONPATH=/usr/local/Ascend/nnae/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:/usr/local/Ascend/nnae/latest/fwkacllib/python/site-packages/schedule_search.egg:/usr/local/Ascend/nnae/latest/opp/op_impl/built-in/ai_core/tbe:/usr/local/Ascend/nnae/latest/fwkacllib/python/site-packages/hccl
|
||||
export PATH=$PATH:/usr/local/Ascend/nnae/latest/fwkacllib/ccec_compiler/bin
|
||||
export ASCEND_OPP_PATH=/usr/local/Ascend/nnae/latest/opp/
|
||||
|
||||
# pip3.7 install --upgrade /usr/local/Ascend/nnae/latest/fwkacllib/lib64/topi-0.4.0-py3-none-any.whl
|
||||
# pip3.7 install --upgrade /usr/local/Ascend/nnae/latest/fwkacllib/lib64/te-0.4.0-py3-none-any.whl
|
||||
|
||||
+18
@@ -0,0 +1,18 @@
|
||||
############## toolkit situation ################
|
||||
export LD_LIBRARY_PATH=/usr/local/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:/usr/local/lib/:/usr/lib64/:/usr/lib/:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/:/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:/usr/local/Ascend/add-ons/:/usr/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH
|
||||
export PATH=$PATH:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin/:/usr/local/Ascend/ascend-toolkit/latest/toolkit/tools/ide_daemon/bin/
|
||||
export ASCEND_OPP_PATH=/usr/local/Ascend/ascend-toolkit/latest/opp/
|
||||
export OPTION_EXEC_EXTERN_PLUGIN_PATH=/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so
|
||||
export PYTHONPATH=/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH
|
||||
|
||||
|
||||
############## nnae situation ################
|
||||
# export LD_LIBRARY_PATH=/usr/local/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:/usr/local/lib/:/usr/lib64/:/usr/lib/:/usr/local/Ascend/nnae/latest/fwkacllib/lib64/:/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:/usr/local/Ascend/add-ons/:/usr/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH
|
||||
# export PATH=$PATH:/usr/local/Ascend/nnae/latest/fwkacllib/ccec_compiler/bin/:/usr/local/Ascend/nnae/latest/toolkit/tools/ide_daemon/bin/
|
||||
# export ASCEND_OPP_PATH=/usr/local/Ascend/nnae/latest/opp/
|
||||
# export OPTION_EXEC_EXTERN_PLUGIN_PATH=/usr/local/Ascend/nnae/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:/usr/local/Ascend/nnae/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:/usr/local/Ascend/nnae/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so
|
||||
# export PYTHONPATH=/usr/local/Ascend/nnae/latest/fwkacllib/python/site-packages/:/usr/local/Ascend/nnae/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:/usr/local/Ascend/nnae/latest/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH
|
||||
|
||||
|
||||
|
||||
# ln -s /usr/local/Ascend/ascend-toolkit/latest/toolkit/bin/adc /usr/local/bin/
|
||||
+12
@@ -0,0 +1,12 @@
|
||||
# main env
|
||||
export LD_LIBRARY_PATH=/usr/local/lib/:/usr/lib/:/usr/local/Ascend/fwkacllib/lib64/:/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:/usr/local/Ascend/add-ons/:/usr/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH
|
||||
export PATH=$PATH:/usr/local/Ascend/fwkacllib/ccec_compiler/bin
|
||||
export ASCEND_OPP_PATH=/usr/local/Ascend/opp
|
||||
export NEW_GE_FE_ID=1
|
||||
export GE_AICPU_FLAG=1
|
||||
export PYTHONPATH=/usr/local/Ascend/atc/python/site-packages/te.egg:/usr/local/Ascend/atc/python/site-packages/topi.egg:/usr/local/Ascend/atc/python/site-packages/auto_tune.egg:/usr/local/Ascend/atc/python/site-packages/schedule_search.egg:/usr/local
|
||||
export CUSTOM_OP_LIB_PATH=/usr/local/Ascend/ops/framework/built-in/tensorflow
|
||||
export OPTION_EXEC_EXTERN_PLUGIN_PATH=/usr/local/Ascend/fwkacllib/lib64/plugin/opskernel/libfe.so:/usr/local/Ascend/fwkacllib/lib64/plugin/opskernel/libaicpu_plugin.so:/usr/local/Ascend/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so
|
||||
export PLUGIN_LOAD_PATH=/usr/local/Ascend/fwkacllib/lib64/plugin/opskernel/libfe.so:/usr/local/Ascend/fwkacllib/lib64/plugin/opskernel/libaicpu_plugin.so:/usr/local/Ascend/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so:/usr/local/Ascend/fwkacllib/lib64/plugin/opskernel/librts_engine.so
|
||||
export SLOG_PRINT_TO_STDOUT=1
|
||||
|
||||
+31
@@ -0,0 +1,31 @@
|
||||
############## toolkit situation ################
|
||||
#export LD_LIBRARY_PATH=/usr/local/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:/usr/local/lib/:/usr/lib64/:/usr/lib/:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/:/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:/usr/local/Ascend/add-ons/:/usr/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH
|
||||
#export PATH=$PATH:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin/:/usr/local/Ascend/ascend-toolkit/latest/toolkit/tools/ide_daemon/bin/
|
||||
#export ASCEND_OPP_PATH=/usr/local/Ascend/ascend-toolkit/latest/opp/
|
||||
#export OPTION_EXEC_EXTERN_PLUGIN_PATH=/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so
|
||||
#export PYTHONPATH=/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH
|
||||
|
||||
|
||||
############## nnae situation ################
|
||||
|
||||
|
||||
if [ -d /usr/local/Ascend/nnae/latest ];then
|
||||
export LD_LIBRARY_PATH=/usr/local/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:/usr/local/lib/:/usr/lib64/:/usr/lib/:/usr/local/Ascend/nnae/latest/fwkacllib/lib64/:/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:/usr/local/Ascend/add-ons/:/usr/lib/aarch64_64-linux-gnu:$LD_LIBRARY_PATH
|
||||
export PATH=$PATH:/usr/local/Ascend/nnae/latest/fwkacllib/ccec_compiler/bin/:/usr/local/Ascend/nnae/latest/toolkit/tools/ide_daemon/bin/
|
||||
export ASCEND_OPP_PATH=/usr/local/Ascend/nnae/latest/opp/
|
||||
export OPTION_EXEC_EXTERN_PLUGIN_PATH=/usr/local/Ascend/nnae/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:/usr/local/Ascend/nnae/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:/usr/local/Ascend/nnae/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so
|
||||
export PYTHONPATH=/usr/local/Ascend/nnae/latest/fwkacllib/python/site-packages/:/usr/local/Ascend/nnae/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:/usr/local/Ascend/nnae/latest/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH
|
||||
else
|
||||
export LD_LIBRARY_PATH=/usr/local/:/usr/local/lib/:/usr/lib64/:/usr/lib/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/:/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:/usr/local/Ascend/add-ons/:/usr/lib/aarch64-linux-gnu:$LD_LIBRARY_PATH
|
||||
export PATH=$PATH:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin/:/usr/local/Ascend/ascend-toolkit/latest/toolkit/tools/ide_daemon/bin/
|
||||
export ASCEND_OPP_PATH=/usr/local/Ascend/ascend-toolkit/latest/opp/
|
||||
export OPTION_EXEC_EXTERN_PLUGIN_PATH=/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so
|
||||
export PYTHONPATH=/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH
|
||||
fi
|
||||
|
||||
# ln -s /usr/local/Ascend/ascend-toolkit/latest/toolkit/bin/adc /usr/local/bin/
|
||||
|
||||
export SLOG_PRINT_TO_STDOUT=0
|
||||
#su HwHiAiUser -c "adc --host 0.0.0.0:22118 --log \"SetLogLevel(0)[error]\" --device 0"
|
||||
|
||||
export TASK_QUEUE_ENABLE=1
|
||||
@@ -0,0 +1,60 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
rank_size=$1
|
||||
yamlPath=$2
|
||||
toolsPath=$3
|
||||
if [ -f /.dockerenv ];then
|
||||
CLUSTER=$4
|
||||
MPIRUN_ALL_IP="$5"
|
||||
export CLUSTER=${CLUSTER}
|
||||
fi
|
||||
|
||||
currentDir=$(cd "$(dirname "$0")/.."; pwd)
|
||||
|
||||
source ${currentDir}/config/npu_set_env.sh
|
||||
|
||||
eval $(${toolsPath}/get_params_for_yaml.sh ${yamlPath} "pytorch_config")
|
||||
|
||||
|
||||
rm -rf /var/log/npu/slog/host-0/*
|
||||
currtime=`date +%Y%m%d%H%M%S`
|
||||
mkdir -p ${currentDir%train*}/train/result/pt_mobilenet/training_job_${currtime}/
|
||||
train_job_dir=${currentDir%train*}/train/result/pt_mobilenet/training_job_${currtime}/
|
||||
|
||||
echo "[`date +%Y%m%d-%H:%M:%S`] [INFO] ${train_job_dir}"
|
||||
# device 列表, 若无指定 device 或大于等于 8p 时根据 rank_size 顺序选择
|
||||
eval device_group=\$device_group_${rank_size}p
|
||||
if [ x"${device_group}" == x"" ] || [ ${rank_size} -ge 8 ];then
|
||||
device_group="$(seq 0 "$(expr $rank_size - 1)")"
|
||||
fi
|
||||
|
||||
device_group_str=`echo ${device_group} | sed 's/ //g'`
|
||||
first_device_id=`echo ${device_group_str: 0:1}`
|
||||
|
||||
rank_id=0
|
||||
|
||||
if [ x"${CLUSTER}" == x"True" ];then
|
||||
ln -snf ${currentDir%train*}/train/result/pt_mobilenet/training_job_${currtime}/0/hw_mobilenet.log ${currentDir%train*}/train/result/pt_mobilenet/training_job_${currtime}/
|
||||
this_ip=$(hostname -I |awk '{print $1}')
|
||||
for ip in $MPIRUN_ALL_IP;do
|
||||
if [ x"$this_ip" != x"$ip" ];then
|
||||
scp $yamlPath root@$ip:$yamlPath
|
||||
scp $jsonFilePath root@$ip:$jsonFilePath
|
||||
fi
|
||||
done
|
||||
export PATH=$PATH:/usr/local/mpirun4.0.2/bin
|
||||
mpirun -H ${mpirun_ip} \
|
||||
--bind-to none -map-by slot\
|
||||
--allow-run-as-root \
|
||||
--mca btl_tcp_if_exclude lo,docker0,endvnic,virbr0,vethf40501b,docker_gwbridge,br-f42ac38052b4\
|
||||
--prefix /usr/local/mpirun4.0.2/ \
|
||||
${currentDir}/scripts/train.sh 0 $rank_size $yamlPath $currtime ${toolsPath} ${CLUSTER}
|
||||
else
|
||||
ln -snf ${currentDir%train*}/train/result/pt_mobilenet/training_job_${currtime}/${first_device_id}/hw_mobilenet.log ${currentDir%train*}/train/result/pt_mobilenet/training_job_${currtime}/
|
||||
#for device_id in $device_group;do
|
||||
#echo "[`date +%Y%m%d-%H:%M:%S`] [INFO] start: train ${device_id} & " >> ${currentDir}/result/main.log
|
||||
${currentDir}/scripts/train.sh 0 $rank_size $yamlPath $currtime ${toolsPath} $rank_id&
|
||||
#let rank_id++
|
||||
#done
|
||||
fi
|
||||
wait
|
||||
@@ -0,0 +1,146 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
device_id=$1
|
||||
rank_size=$2
|
||||
yamlPath=$3
|
||||
currentDir=$(cd "$(dirname "$0")/.."; pwd)
|
||||
currtime=$4
|
||||
toolsPath=$5
|
||||
|
||||
export YAML_PATH=$3
|
||||
mkdir -p ${currentDir%train*}/train/result/pt_mobilenet/training_job_${currtime}/
|
||||
export train_job_dir=${currentDir%train*}/train/result/pt_mobilenet/training_job_${currtime}/
|
||||
|
||||
|
||||
# 从 yaml 获取配置
|
||||
eval $(${toolsPath}/get_params_for_yaml.sh ${yamlPath} "pytorch_config")
|
||||
export REMARK_LOG_FILE=hw_mobilenet.log
|
||||
benchmark_log_path=${currentDir%atlas_benchmark-master*}/atlas_benchmark-master/utils
|
||||
export PYTHONPATH=$PYTHONPATH:${benchmark_log_path}
|
||||
|
||||
|
||||
source ${currentDir}/config/set_env_b023.sh
|
||||
|
||||
# user env
|
||||
export HCCL_CONNECT_TIMEOUT=600
|
||||
export JOB_ID=9999001
|
||||
export RANK_TABLE_FILE=${currentDir}/config/${rank_size}p.json
|
||||
export RANK_SIZE=${rank_size}
|
||||
export SLOG_PRINT_TO_STDOUT=0
|
||||
export DEVICE_ID=${device_id}
|
||||
DEVICE_INDEX=$(( DEVICE_ID + RANK_INDEX * 8 ))
|
||||
export DEVICE_INDEX=${DEVICE_INDEX}
|
||||
|
||||
cd ${train_job_dir}
|
||||
curd_dir=${currentDir%atlas_benchmark-master*}/atlas_benchmark-master/utils/atlasboost
|
||||
export PYTHONPATH=$PYTHONPATH:${curd_dir}
|
||||
|
||||
if [ x"$6" != x"True" ];then
|
||||
rank_id=$6
|
||||
export RANK_ID=$6
|
||||
else
|
||||
device_id_mo=$(python3.7 -c "import src.tensorflow.mpi_ops as atlasboost;atlasboost.init(); \
|
||||
device_id = atlasboost.local_rank();cluster_device_id = str(device_id); \
|
||||
atlasboost.set_device_id(device_id);print(atlasboost.rank())")
|
||||
device_id_mo=`echo $device_id_mo`
|
||||
rank_id=${device_id_mo##* }
|
||||
echo rank_id is $rank_id
|
||||
export RANK_ID=${rank_id}
|
||||
device=${device_id_mo##*deviceid = }
|
||||
device_id=${device%% phyid=*}
|
||||
export DEVICE_ID=${device_id}
|
||||
echo device_id is $device_id
|
||||
hccljson=${train_job_dir}/*.json
|
||||
cp ${hccljson} ${currentDir}/config/${rank_size}p.json
|
||||
fi
|
||||
|
||||
#mkdir exec path
|
||||
mkdir -p ${train_job_dir}/${device_id}
|
||||
cd ${train_job_dir}/${device_id}
|
||||
|
||||
startTime=`date +%Y%m%d-%H:%M:%S`
|
||||
startTime_s=`date +%s`
|
||||
|
||||
|
||||
if [ x"$6" == x"True" ];then
|
||||
python3.7 ${currentDir}/code/8p/mobilenetv2_8p_main.py \
|
||||
--addr=$(hostname -I |awk '{print $1}') \
|
||||
--seed 49 \
|
||||
--workers 128 \
|
||||
--lr 0.24 \
|
||||
--print-freq 1 \
|
||||
--eval-freq 5\
|
||||
--dist-url 'tcp://127.0.0.1:50002' \
|
||||
--dist-backend 'hccl' \
|
||||
--multiprocessing-distributed \
|
||||
--world-size 1 \
|
||||
--batch-size ${batch_size} \
|
||||
--epochs ${epoches} \
|
||||
--rank 0 \
|
||||
--amp \
|
||||
--benchmark 0 \
|
||||
--data ${data_url} > ${train_job_dir}/train_${rank_size}p.log 2>&1
|
||||
elif [ x"${rank_size}" == x"1" ];then
|
||||
# 单卡
|
||||
python3.7 ${currentDir}/code/1p/main_apex.py \
|
||||
--workers 128 \
|
||||
--seed 123456 \
|
||||
--lr 0.03 \
|
||||
--amp \
|
||||
--opt-level 'O2' \
|
||||
--loss-scale-value 64 \
|
||||
--momentum 0.9 \
|
||||
--batch-size ${batch_size} \
|
||||
--weight-decay 1e-5 \
|
||||
--epoch ${epoches} \
|
||||
--print-freq 1 \
|
||||
--device ${device_single}\
|
||||
--eval-freq 1 \
|
||||
--summary-path './runs/mobilenetv2/npu_O2_ls64_c75b150_0909' \
|
||||
--data ${data_url} > ${train_job_dir}/train_${rank_size}p.log 2>&1
|
||||
elif [ ${rank_size} -le 8 ];then
|
||||
# 多卡单机
|
||||
python3.7 ${currentDir}/code/8p/mobilenetv2_8p_main_anycard.py \
|
||||
--addr=$(hostname -I |awk '{print $1}') \
|
||||
--seed 49 \
|
||||
--workers 128 \
|
||||
--lr ${lr} \
|
||||
--print-freq 1 \
|
||||
--loss-scale 64 \
|
||||
--eval-freq 1\
|
||||
--dist-url 'tcp://127.0.0.1:50002' \
|
||||
--dist-backend 'hccl' \
|
||||
--multiprocessing-distributed \
|
||||
--world-size 1 \
|
||||
--batch-size ${batch_size} \
|
||||
--epochs ${epoches} \
|
||||
--rank 0 \
|
||||
--amp \
|
||||
--device-list ${device_group_mutli} \
|
||||
--benchmark 0 \
|
||||
--data ${data_url} > ${train_job_dir}/train_${rank_size}p.log 2>&1
|
||||
fi
|
||||
|
||||
|
||||
|
||||
if [ $? -eq 0 ];then
|
||||
echo ":::ABK 1.0.0 hw_mobilenet train success"
|
||||
echo ":::ABK 1.0.0 hw_mobilenet train success" >> ${train_job_dir}/train_${rank_size}p.log
|
||||
echo ":::ABK 1.0.0 hw_mobilenet train success" >> ./hw_mobilenet.log
|
||||
else
|
||||
echo ":::ABK 1.0.0 hw_mobilenet train failed"
|
||||
echo ":::ABK 1.0.0 hw_mobilenet train failed" >> ${train_job_dir}/train_${rank_size}p.log
|
||||
echo ":::ABK 1.0.0 hw_mobilenet train failed" >> ./hw_mobilenet.log
|
||||
fi
|
||||
|
||||
endTime=`date +%Y%m%d-%H:%M:%S`
|
||||
endTime_s=`date +%s`
|
||||
|
||||
sumTime=$[ $endTime_s - $startTime_s ]
|
||||
|
||||
hour=$(( $sumTime/3600 ))
|
||||
min=$(( ($sumTime-${hour}*3600)/60 ))
|
||||
sec=$(( $sumTime-${hour}*3600-${min}*60 ))
|
||||
echo ":::ABK 1.0.0 mobilenet train total time:${hour}:${min}:${sec}"
|
||||
|
||||
echo ":::ABK 1.0.0 mobilenet train total time: ${hour}:${min}:${sec}" >> ./hw_mobilenet.log
|
||||
@@ -0,0 +1,47 @@
|
||||
# MobileNet_tensorflow训练说明
|
||||
|
||||
### 1. 模型训练参数配置
|
||||
|
||||
在train/yaml/MobileNet.yaml中修改相应配置, 配置项含义:
|
||||
|
||||
```
|
||||
tensorflow_config:
|
||||
# 基本参数
|
||||
max_steps: 1000
|
||||
data_url: 数据集路径
|
||||
epoches: 跑多少个epoch
|
||||
|
||||
# 训练(train) 或 评测(evaluate)
|
||||
mode: train
|
||||
batch_size: 256
|
||||
#仅在 mode 为 evaluate 时用到
|
||||
ckpt_path: /opt/0908/benchmark-benchmark_Alpha/train/result/tf_mobilenet/trainingJob_20200905171017/0/results/model.ckpt-123125
|
||||
|
||||
# 仅多机执行需要配置: ip1:卡数量1,ip2:卡数量2
|
||||
mpirun_ip: 90.90.176.152:8,90.90.176.154:8
|
||||
|
||||
# docker 镜像名称:版本号
|
||||
docker_image: c73:b021
|
||||
|
||||
# 指定 device id, 多个 id 使用空格分隔, 数量需与 rank_size 相同
|
||||
device_group_1p: 0
|
||||
device_group_2p: 0 1
|
||||
device_group_4p: 0 1 2 3
|
||||
|
||||
profiling_mode: false
|
||||
profiling_options: training_trace
|
||||
fp_point: L2Loss
|
||||
bp_point: gradients/AddN_30
|
||||
aicpu_profiling_mode: false
|
||||
|
||||
```
|
||||
|
||||
------
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
+211
@@ -0,0 +1,211 @@
|
||||
# MobileNetv2 for Tensorflow
|
||||
|
||||
This repository provides a script and recipe to train the MobileNetv2 model to achieve state-of-the-art accuracy.
|
||||
|
||||
## Table Of Contents
|
||||
|
||||
* [Model overview](#model-overview)
|
||||
* [Model Architecture](#model-architecture)
|
||||
* [Default configuration](#default-configuration)
|
||||
* [Data augmentation](#data-augmentation)
|
||||
* [Setup](#setup)
|
||||
* [Requirements](#requirements)
|
||||
* [Quick start guide](#quick-start-guide)
|
||||
* [Advanced](#advanced)
|
||||
* [Command line arguments](#command-line-arguments)
|
||||
* [Training process](#training-process)
|
||||
* [Performance](#performance)
|
||||
* [Results](#results)
|
||||
* [Training accuracy results](#training-accuracy-results)
|
||||
* [Training performance results](#training-performance-results)
|
||||
|
||||
|
||||
|
||||
|
||||
## Model overview
|
||||
|
||||
In this repository, we implement MobileNetv2 from paper [Sandler, Mark, et al. "Mobilenetv2: Inverted residuals and linear bottlenecks." CVPR 2018.](https://arxiv.org/abs/1801.04381)
|
||||
|
||||
MobileNetv2 is a mobile architecture. It is mainly constructed based on depthwise separable convolutions, linear bottlenecks and inverted residuals.
|
||||
|
||||
### Model architecture
|
||||
|
||||
The model architecture can be found from the reference paper.
|
||||
|
||||
### Default configuration
|
||||
|
||||
The following sections introduce the default configurations and hyperparameters for MobileNetv2 model.
|
||||
|
||||
#### Optimizer
|
||||
|
||||
This model uses Momentum optimizer from Tensorflow with the following hyperparameters:
|
||||
|
||||
- Momentum : 0.9
|
||||
- Learning rate (LR) : 0.8
|
||||
- LR schedule: cosine_annealing
|
||||
- Warmup epoch: 5
|
||||
- Batch size : 256*8
|
||||
- Weight decay : 0.00004
|
||||
- Moving average decay: 0.9999
|
||||
- Label smoothing = 0.1
|
||||
- We train for:
|
||||
- 300 epochs for a standard training process using ImageNet2012
|
||||
|
||||
#### Data augmentation
|
||||
|
||||
This model uses the data augmentation from InceptionV2:
|
||||
|
||||
- For training:
|
||||
- Convert DataType and RandomResizeCrop
|
||||
- RandomHorizontalFlip, prob=0.5
|
||||
- Subtract with 0.5 and multiply with 2.0
|
||||
- For inference:
|
||||
- Convert DataType
|
||||
- CenterCrop 87.5% of the original image and resize to (224, 224)
|
||||
- Subtract with 0.5 and multiply with 2.0
|
||||
|
||||
For more details, we refer readers to read the corresponding source code in Slim.
|
||||
|
||||
## Setup
|
||||
The following section lists the requirements to start training the MobileNetv2 model.
|
||||
### Requirements
|
||||
|
||||
Tensorflow 1.15.0
|
||||
|
||||
## Quick Start Guide
|
||||
|
||||
### 1. Clone the respository
|
||||
|
||||
```shell
|
||||
git clone xxx
|
||||
cd ModelZoo_MobileNetv2_TF
|
||||
```
|
||||
|
||||
### 2. Download and preprocess the dataset
|
||||
|
||||
1. Download the ImageNet2012 dataset
|
||||
2. Generate tfrecord files following [Tensorflow-Slim](https://github.com/tensorflow/models/tree/master/research/slim).
|
||||
3. The train and validation tfrecord files are under the path/data directories.
|
||||
|
||||
### 3. Train
|
||||
- train on a single NPU
|
||||
- **edit** *train_1p.sh* (see example below)
|
||||
- bash run_1p.sh
|
||||
- train on 8 NPUs
|
||||
- **edit** *train_8p.sh* (see example below)
|
||||
- bash run_8p.sh
|
||||
|
||||
Examples:
|
||||
- Case for single NPU
|
||||
- In *train_1p.sh*, python scripts part should look like as follows. For more detailed command lines arguments, please refer to [Command line arguments](#command-line-arguments)
|
||||
```shell
|
||||
python3.7 ${currentDir}/train.py \
|
||||
--dataset_dir=/opt/npu/slimImagenet \
|
||||
--max_train_steps=500 \
|
||||
--iterations_per_loop=50 \
|
||||
--model_name="mobilenet_v2" \
|
||||
--moving_average_decay=0.9999 \
|
||||
--label_smoothing=0.1 \
|
||||
--preprocessing_name="inception_v2" \
|
||||
--weight_decay='0.00004' \
|
||||
--batch_size=256 \
|
||||
--learning_rate_decay_type='cosine_annealing' \
|
||||
--learning_rate=0.4 \
|
||||
--optimizer='momentum' \
|
||||
--momentum='0.9' \
|
||||
--warmup_epochs=5
|
||||
```
|
||||
- Run the program
|
||||
```
|
||||
bash run_1p.sh
|
||||
```
|
||||
- Case for 8 NPUs
|
||||
- In *train_8p.sh*, python scripts part should look like as follows.
|
||||
```shell
|
||||
python3.7 ${currentDir}/train.py \
|
||||
--dataset_dir=/opt/npu/slimImagenet \
|
||||
--max_epoch=300 \
|
||||
--model_name="mobilenet_v2" \
|
||||
--moving_average_decay=0.9999 \
|
||||
--label_smoothing=0.1 \
|
||||
--preprocessing_name="inception_v2" \
|
||||
--weight_decay='0.00004' \
|
||||
--batch_size=256 \
|
||||
--learning_rate_decay_type='cosine_annealing' \
|
||||
--learning_rate=0.8 \
|
||||
--optimizer='momentum' \
|
||||
--momentum='0.9' \
|
||||
--warmup_epochs=5
|
||||
```
|
||||
- Run the program
|
||||
```
|
||||
bash run_8p.sh
|
||||
```
|
||||
|
||||
### 4. Test
|
||||
- We evaluate results by using following commands:
|
||||
```shell
|
||||
python3.7 eval_image_classifier_mobilenet.py --dataset_dir=/opt/npu/slimImagenet \
|
||||
--checkpoint_path=result/8p/0/results/model.ckpt-187500
|
||||
```
|
||||
Remember to modify the dataset path and checkpoint path, then run the command.
|
||||
|
||||
|
||||
## Advanced
|
||||
### Commmand-line options
|
||||
|
||||
We list those important parameters to train this network here. For more details of all the parameters, please read *train.py* and other related files.
|
||||
|
||||
```
|
||||
--dataset_dir directory of dataset (default: /opt/npu/models/slimImagenet)
|
||||
--max_epoch number of epochs to train the model (default: 200)
|
||||
--max_train_steps max number of training steps (default: 500)
|
||||
--iterations_per_loop number of steps to run in devices each iteration (default: None)
|
||||
--model_name name of the model to train (default: mobilenet_v2_140)
|
||||
--moving_average_decay the decay to use for the moving average (default: None)
|
||||
--label_smoothing use label smooth in cross entropy (default: 0.1)
|
||||
--preprocessing_name preprocessing method for training (default: inception_v2)
|
||||
--weight_decay weight decay for regularization loss (default: 0)
|
||||
--batch_size batch size per npu (default: 96)
|
||||
--learning_rate_decay_type learning rate decay type (default: fixed)
|
||||
--learning_rate initial learning rate (default: 0.1)
|
||||
--optimizer the name of optimizer (default: sgd)
|
||||
--momentum momentum value used in optimizer (default: 0.9)
|
||||
--warmup_epochs warmup epochs for learning rate (default: 5)
|
||||
```
|
||||
|
||||
### Training process
|
||||
|
||||
All the results of the training will be stored in the directory `result`.
|
||||
|
||||
## Performance
|
||||
|
||||
### Result
|
||||
|
||||
Our result were obtained by running the applicable training script. To achieve the same results, follow the steps in the Quick Start Guide.
|
||||
|
||||
#### Training accuracy results
|
||||
|
||||
| **epochs** | Top1 |
|
||||
| :--------: | :------------: |
|
||||
| 300 | 72.47% |
|
||||
|
||||
#### Training performance results
|
||||
| **NPUs** | train performance |
|
||||
| :------: | :---------------: |
|
||||
| 1 | 1400 img/s |
|
||||
|
||||
| **NPUs** | train performance |
|
||||
| :------: | :---------------: |
|
||||
| 8 | 11000 img/s |
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
+240
@@ -0,0 +1,240 @@
|
||||
# Copyright 2017 The TensorFlow Authors All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
|
||||
"""Functions to read, decode and pre-process input data for the Model.
|
||||
"""
|
||||
import collections
|
||||
import sys
|
||||
import tensorflow as tf
|
||||
|
||||
from tensorflow.python.data.experimental.ops import threadpool
|
||||
|
||||
# from tensorflow.contrib import slim
|
||||
|
||||
InputEndpoints = collections.namedtuple(
|
||||
'InputEndpoints', ['images', 'images_orig', 'labels', 'labels_one_hot'])
|
||||
ShuffleBatchConfig = collections.namedtuple('ShuffleBatchConfig', [
|
||||
'num_batching_threads', 'queue_capacity', 'min_after_dequeue'
|
||||
])
|
||||
|
||||
DEFAULT_SHUFFLE_CONFIG = ShuffleBatchConfig(
|
||||
num_batching_threads=8, queue_capacity=3000, min_after_dequeue=1000)
|
||||
|
||||
|
||||
def get_data_files(data_sources):
|
||||
from tensorflow.python.platform import gfile
|
||||
if isinstance(data_sources, (list, tuple)):
|
||||
data_files = []
|
||||
for source in data_sources:
|
||||
data_files += get_data_files(source)
|
||||
else:
|
||||
if '*' in data_sources or '?' in data_sources or '[' in data_sources:
|
||||
data_files = gfile.Glob(data_sources)
|
||||
else:
|
||||
data_files = [data_sources]
|
||||
if not data_files:
|
||||
raise ValueError('No data files found in %s' % (data_sources,))
|
||||
return data_files
|
||||
|
||||
|
||||
def preprocess_image(image, location, label_one_hot, height=224, width=224):
|
||||
"""Prepare one image for evaluation.
|
||||
If height and width are specified it would output an image with that size by
|
||||
applying resize_bilinear.
|
||||
If central_fraction is specified it would cropt the central fraction of the
|
||||
input image.
|
||||
Args:
|
||||
image: 3-D Tensor of image. If dtype is tf.float32 then the range should be
|
||||
[0, 1], otherwise it would converted to tf.float32 assuming that the range
|
||||
is [0, MAX], where MAX is largest positive representable number for
|
||||
int(8/16/32) data type (see `tf.image.convert_image_dtype` for details)
|
||||
height: integer
|
||||
width: integer
|
||||
central_fraction: Optional Float, fraction of the image to crop.
|
||||
scope: Optional scope for name_scope.
|
||||
Returns:
|
||||
3-D float Tensor of prepared image.
|
||||
"""
|
||||
|
||||
# if image.dtype != tf.float32:
|
||||
image = tf.image.convert_image_dtype(image, dtype=tf.float32)
|
||||
# Crop the central region of the image with an area containing 87.5% of
|
||||
# the original image.
|
||||
# if central_fraction:
|
||||
# image = tf.image.central_crop(image, central_fraction=central_fraction)
|
||||
|
||||
# if height and width:
|
||||
# Resize the image to the specified height and width.
|
||||
image = tf.expand_dims(image, 0)
|
||||
image = tf.image.resize_bilinear(image, [height, width], align_corners=False)
|
||||
image = tf.squeeze(image, [0])
|
||||
|
||||
# image = tf.cast(image, tf.float32)
|
||||
# image = tf.multiply(image, 1/255.)
|
||||
image = tf.subtract(image, 0.5)
|
||||
image = tf.multiply(image, 2.0)
|
||||
|
||||
return image, location, label_one_hot
|
||||
|
||||
|
||||
def _int64_feature(value):
|
||||
"""Wrapper for inserting int64 features into Example proto."""
|
||||
if not isinstance(value, list):
|
||||
value = [value]
|
||||
return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
|
||||
|
||||
|
||||
def parse_example_proto(example_serialized, num_classes, labels_offset, image_preprocessing_fn):
|
||||
feature_map = {
|
||||
'image/encoded': tf.FixedLenFeature([], tf.string, ''),
|
||||
'image/class/label': tf.FixedLenFeature([1], tf.int64, -1),
|
||||
'image/class/text': tf.FixedLenFeature([], tf.string, ''),
|
||||
'image/object/bbox/xmin': tf.VarLenFeature(dtype=tf.float32),
|
||||
'image/object/bbox/ymin': tf.VarLenFeature(dtype=tf.float32),
|
||||
'image/object/bbox/xmax': tf.VarLenFeature(dtype=tf.float32),
|
||||
'image/object/bbox/ymax': tf.VarLenFeature(dtype=tf.float32)
|
||||
}
|
||||
with tf.compat.v1.name_scope('deserialize_image_record'):
|
||||
obj = tf.io.parse_single_example(serialized=example_serialized, features=feature_map)
|
||||
image = tf.image.decode_jpeg(obj['image/encoded'], channels=3, fancy_upscaling=False,
|
||||
dct_method='INTEGER_FAST')
|
||||
if image_preprocessing_fn:
|
||||
image = image_preprocessing_fn(image, 224, 224)
|
||||
else:
|
||||
image = tf.image.resize(image, [224, 224])
|
||||
|
||||
label = tf.cast(obj['image/class/label'], tf.int32)
|
||||
label = tf.squeeze(label)
|
||||
label -= labels_offset
|
||||
label = tf.one_hot(label, num_classes - labels_offset)
|
||||
return image, label
|
||||
|
||||
|
||||
def parse_example_decode(example_serialized):
|
||||
feature_map = {
|
||||
'image/encoded': tf.FixedLenFeature([], tf.string, ''),
|
||||
'image/class/label': tf.FixedLenFeature([1], tf.int64, -1),
|
||||
'image/class/text': tf.FixedLenFeature([], tf.string, ''),
|
||||
'image/object/bbox/xmin': tf.VarLenFeature(dtype=tf.float32),
|
||||
'image/object/bbox/ymin': tf.VarLenFeature(dtype=tf.float32),
|
||||
'image/object/bbox/xmax': tf.VarLenFeature(dtype=tf.float32),
|
||||
'image/object/bbox/ymax': tf.VarLenFeature(dtype=tf.float32)
|
||||
}
|
||||
with tf.compat.v1.name_scope('deserialize_image_record'):
|
||||
obj = tf.io.parse_single_example(serialized=example_serialized, features=feature_map)
|
||||
image = tf.image.decode_jpeg(obj['image/encoded'], channels=3, fancy_upscaling=False,
|
||||
dct_method='INTEGER_FAST')
|
||||
|
||||
return image, obj['image/class/label']
|
||||
|
||||
|
||||
def parse_example(image, label, num_classes, labels_offset, image_preprocessing_fn):
|
||||
with tf.compat.v1.name_scope('deserialize_image_record'):
|
||||
if image_preprocessing_fn:
|
||||
image = image_preprocessing_fn(image, 224, 224)
|
||||
else:
|
||||
image = tf.image.resize(image, [224, 224])
|
||||
|
||||
label = tf.cast(label, tf.int32)
|
||||
label = tf.squeeze(label)
|
||||
label -= labels_offset
|
||||
label = tf.one_hot(label, num_classes - labels_offset)
|
||||
return image, label
|
||||
|
||||
|
||||
def parse_example1(example_serialized, image_preprocessing_fn1):
|
||||
feature_map = {
|
||||
'image/encoded': tf.FixedLenFeature([], tf.string, ''),
|
||||
'image/class/label': tf.FixedLenFeature([1], tf.int64, -1),
|
||||
'image/class/text': tf.FixedLenFeature([], tf.string, ''),
|
||||
'image/object/bbox/xmin': tf.VarLenFeature(dtype=tf.float32),
|
||||
'image/object/bbox/ymin': tf.VarLenFeature(dtype=tf.float32),
|
||||
'image/object/bbox/xmax': tf.VarLenFeature(dtype=tf.float32),
|
||||
'image/object/bbox/ymax': tf.VarLenFeature(dtype=tf.float32)
|
||||
}
|
||||
with tf.compat.v1.name_scope('deserialize_image_record'):
|
||||
obj = tf.io.parse_single_example(serialized=example_serialized, features=feature_map)
|
||||
image = tf.image.decode_jpeg(obj['image/encoded'], channels=3, fancy_upscaling=False,
|
||||
dct_method='INTEGER_FAST')
|
||||
|
||||
image = image_preprocessing_fn1(image, 224, 224)
|
||||
return image, obj['image/class/label']
|
||||
|
||||
|
||||
def parse_example2(image, label, num_classes, labels_offset, image_preprocessing_fn2):
|
||||
with tf.compat.v1.name_scope('deserialize_image_record'):
|
||||
image = image_preprocessing_fn2(image, 224, 224)
|
||||
|
||||
label = tf.cast(label, tf.int32)
|
||||
label = tf.squeeze(label)
|
||||
label -= labels_offset
|
||||
label = tf.one_hot(label, num_classes - labels_offset)
|
||||
return image, label
|
||||
|
||||
|
||||
def get_data(dataset, batch_size, num_classes, labels_offset, is_training,
|
||||
preprocessing_name=None, use_grayscale=None, add_image_summaries=False):
|
||||
return get_data_united(dataset, batch_size, num_classes, labels_offset, is_training,
|
||||
preprocessing_name, use_grayscale, add_image_summaries)
|
||||
|
||||
|
||||
def create_ds(data_sources, is_training):
|
||||
data_files = get_data_files(data_sources)
|
||||
ds = tf.data.Dataset.from_tensor_slices(data_files)
|
||||
|
||||
if is_training:
|
||||
ds = ds.shuffle(1000)
|
||||
# add for eval
|
||||
else:
|
||||
ds = ds.take(50000)
|
||||
|
||||
##### change #####
|
||||
num_readers = 10
|
||||
ds = ds.interleave(
|
||||
tf.data.TFRecordDataset, cycle_length=num_readers, block_length=1,
|
||||
num_parallel_calls=tf.data.experimental.AUTOTUNE)
|
||||
counter = tf.data.Dataset.range(sys.maxsize)
|
||||
ds = tf.data.Dataset.zip((ds, counter))
|
||||
##### change #####
|
||||
|
||||
if is_training:
|
||||
ds = ds.repeat()
|
||||
|
||||
return ds
|
||||
|
||||
|
||||
def get_data_united(dataset, batch_size, num_classes, labels_offset, is_training,
|
||||
preprocessing_name=None, use_grayscale=None, add_image_summaries=False):
|
||||
from preprocessing import preprocessing_factory
|
||||
image_preprocessing_fn = preprocessing_factory.get_preprocessing(
|
||||
name='inception_v2',
|
||||
is_training=is_training,
|
||||
use_grayscale=use_grayscale,
|
||||
add_image_summaries=add_image_summaries
|
||||
)
|
||||
|
||||
ds = create_ds(dataset.data_sources, is_training)
|
||||
|
||||
ds = ds.map(lambda example, counter: parse_example_proto(example, num_classes, labels_offset, image_preprocessing_fn), num_parallel_calls=24)
|
||||
|
||||
ds = ds.batch(batch_size, drop_remainder=True)
|
||||
|
||||
ds = ds.prefetch(buffer_size=tf.contrib.data.AUTOTUNE)
|
||||
|
||||
iterator = ds.make_initializable_iterator()
|
||||
|
||||
ds = threadpool.override_threadpool(ds,threadpool.PrivateThreadPool(128, display_name='input_pipeline_thread_pool'))
|
||||
|
||||
return iterator, ds
|
||||
+1
@@ -0,0 +1 @@
|
||||
|
||||
+705
@@ -0,0 +1,705 @@
|
||||
# Copyright 2016 Google Inc. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Converts ImageNet data to TFRecords file format with Example protos.
|
||||
|
||||
The raw ImageNet data set is expected to reside in JPEG files located in the
|
||||
following directory structure.
|
||||
|
||||
data_dir/n01440764/ILSVRC2012_val_00000293.JPEG
|
||||
data_dir/n01440764/ILSVRC2012_val_00000543.JPEG
|
||||
...
|
||||
|
||||
where 'n01440764' is the unique synset label associated with
|
||||
these images.
|
||||
|
||||
The training data set consists of 1000 sub-directories (i.e. labels)
|
||||
each containing 1200 JPEG images for a total of 1.2M JPEG images.
|
||||
|
||||
The evaluation data set consists of 1000 sub-directories (i.e. labels)
|
||||
each containing 50 JPEG images for a total of 50K JPEG images.
|
||||
|
||||
This TensorFlow script converts the training and evaluation data into
|
||||
a sharded data set consisting of 1024 and 128 TFRecord files, respectively.
|
||||
|
||||
train_directory/train-00000-of-01024
|
||||
train_directory/train-00001-of-01024
|
||||
...
|
||||
train_directory/train-00127-of-01024
|
||||
|
||||
and
|
||||
|
||||
validation_directory/validation-00000-of-00128
|
||||
validation_directory/validation-00001-of-00128
|
||||
...
|
||||
validation_directory/validation-00127-of-00128
|
||||
|
||||
Each validation TFRecord file contains ~390 records. Each training TFREcord
|
||||
file contains ~1250 records. Each record within the TFRecord file is a
|
||||
serialized Example proto. The Example proto contains the following fields:
|
||||
|
||||
image/encoded: string containing JPEG encoded image in RGB colorspace
|
||||
image/height: integer, image height in pixels
|
||||
image/width: integer, image width in pixels
|
||||
image/colorspace: string, specifying the colorspace, always 'RGB'
|
||||
image/channels: integer, specifying the number of channels, always 3
|
||||
image/format: string, specifying the format, always'JPEG'
|
||||
|
||||
image/filename: string containing the basename of the image file
|
||||
e.g. 'n01440764_10026.JPEG' or 'ILSVRC2012_val_00000293.JPEG'
|
||||
image/class/label: integer specifying the index in a classification layer.
|
||||
The label ranges from [1, 1000] where 0 is not used.
|
||||
image/class/synset: string specifying the unique ID of the label,
|
||||
e.g. 'n01440764'
|
||||
image/class/text: string specifying the human-readable version of the label
|
||||
e.g. 'red fox, Vulpes vulpes'
|
||||
|
||||
image/object/bbox/xmin: list of integers specifying the 0+ human annotated
|
||||
bounding boxes
|
||||
image/object/bbox/xmax: list of integers specifying the 0+ human annotated
|
||||
bounding boxes
|
||||
image/object/bbox/ymin: list of integers specifying the 0+ human annotated
|
||||
bounding boxes
|
||||
image/object/bbox/ymax: list of integers specifying the 0+ human annotated
|
||||
bounding boxes
|
||||
image/object/bbox/label: integer specifying the index in a classification
|
||||
layer. The label ranges from [1, 1000] where 0 is not used. Note this is
|
||||
always identical to the image label.
|
||||
|
||||
Note that the length of xmin is identical to the length of xmax, ymin and ymax
|
||||
for each example.
|
||||
|
||||
Running this script using 16 threads may take around ~2.5 hours on a HP Z420.
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
from datetime import datetime
|
||||
import os
|
||||
import random
|
||||
import sys
|
||||
import threading
|
||||
|
||||
import numpy as np
|
||||
from six.moves import xrange # pylint: disable=redefined-builtin
|
||||
import tensorflow as tf
|
||||
|
||||
|
||||
tf.app.flags.DEFINE_string('train_directory', '/tmp/',
|
||||
'Training data directory')
|
||||
tf.app.flags.DEFINE_string('validation_directory', '/tmp/',
|
||||
'Validation data directory')
|
||||
tf.app.flags.DEFINE_string('output_directory', '/tmp/',
|
||||
'Output data directory')
|
||||
|
||||
tf.app.flags.DEFINE_integer('train_shards', 1024,
|
||||
'Number of shards in training TFRecord files.')
|
||||
tf.app.flags.DEFINE_integer('validation_shards', 128,
|
||||
'Number of shards in validation TFRecord files.')
|
||||
|
||||
tf.app.flags.DEFINE_integer('num_threads', 8,
|
||||
'Number of threads to preprocess the images.')
|
||||
|
||||
# The labels file contains a list of valid labels are held in this file.
|
||||
# Assumes that the file contains entries as such:
|
||||
# n01440764
|
||||
# n01443537
|
||||
# n01484850
|
||||
# where each line corresponds to a label expressed as a synset. We map
|
||||
# each synset contained in the file to an integer (based on the alphabetical
|
||||
# ordering). See below for details.
|
||||
tf.app.flags.DEFINE_string('labels_file',
|
||||
'imagenet_lsvrc_2015_synsets.txt',
|
||||
'Labels file')
|
||||
|
||||
# This file containing mapping from synset to human-readable label.
|
||||
# Assumes each line of the file looks like:
|
||||
#
|
||||
# n02119247 black fox
|
||||
# n02119359 silver fox
|
||||
# n02119477 red fox, Vulpes fulva
|
||||
#
|
||||
# where each line corresponds to a unique mapping. Note that each line is
|
||||
# formatted as <synset>\t<human readable label>.
|
||||
tf.app.flags.DEFINE_string('imagenet_metadata_file',
|
||||
'imagenet_metadata.txt',
|
||||
'ImageNet metadata file')
|
||||
|
||||
# This file is the output of process_bounding_box.py
|
||||
# Assumes each line of the file looks like:
|
||||
#
|
||||
# n00007846_64193.JPEG,0.0060,0.2620,0.7545,0.9940
|
||||
#
|
||||
# where each line corresponds to one bounding box annotation associated
|
||||
# with an image. Each line can be parsed as:
|
||||
#
|
||||
# <JPEG file name>, <xmin>, <ymin>, <xmax>, <ymax>
|
||||
#
|
||||
# Note that there might exist mulitple bounding box annotations associated
|
||||
# with an image file.
|
||||
tf.app.flags.DEFINE_string('bounding_box_file',
|
||||
'./imagenet_2012_bounding_boxes.csv',
|
||||
'Bounding box file')
|
||||
|
||||
FLAGS = tf.app.flags.FLAGS
|
||||
|
||||
|
||||
def _int64_feature(value):
|
||||
"""Wrapper for inserting int64 features into Example proto."""
|
||||
if not isinstance(value, list):
|
||||
value = [value]
|
||||
return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
|
||||
|
||||
|
||||
def _float_feature(value):
|
||||
"""Wrapper for inserting float features into Example proto."""
|
||||
if not isinstance(value, list):
|
||||
value = [value]
|
||||
return tf.train.Feature(float_list=tf.train.FloatList(value=value))
|
||||
|
||||
|
||||
def _bytes_feature(value):
|
||||
"""Wrapper for inserting bytes features into Example proto."""
|
||||
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
|
||||
|
||||
|
||||
def _convert_to_example(filename, image_buffer, label, synset, human, bbox,
|
||||
height, width):
|
||||
"""Build an Example proto for an example.
|
||||
|
||||
Args:
|
||||
filename: string, path to an image file, e.g., '/path/to/example.JPG'
|
||||
image_buffer: string, JPEG encoding of RGB image
|
||||
label: integer, identifier for the ground truth for the network
|
||||
synset: string, unique WordNet ID specifying the label, e.g., 'n02323233'
|
||||
human: string, human-readable label, e.g., 'red fox, Vulpes vulpes'
|
||||
bbox: list of bounding boxes; each box is a list of integers
|
||||
specifying [xmin, ymin, xmax, ymax]. All boxes are assumed to belong to
|
||||
the same label as the image label.
|
||||
height: integer, image height in pixels
|
||||
width: integer, image width in pixels
|
||||
Returns:
|
||||
Example proto
|
||||
"""
|
||||
xmin = []
|
||||
ymin = []
|
||||
xmax = []
|
||||
ymax = []
|
||||
for b in bbox:
|
||||
assert len(b) == 4
|
||||
# pylint: disable=expression-not-assigned
|
||||
[l.append(point) for l, point in zip([xmin, ymin, xmax, ymax], b)]
|
||||
# pylint: enable=expression-not-assigned
|
||||
|
||||
colorspace = 'RGB'
|
||||
channels = 3
|
||||
image_format = 'JPEG'
|
||||
|
||||
example = tf.train.Example(features=tf.train.Features(feature={
|
||||
'image/height': _int64_feature(height),
|
||||
'image/width': _int64_feature(width),
|
||||
'image/colorspace': _bytes_feature(colorspace),
|
||||
'image/channels': _int64_feature(channels),
|
||||
'image/class/label': _int64_feature(label),
|
||||
'image/class/synset': _bytes_feature(synset),
|
||||
'image/class/text': _bytes_feature(human),
|
||||
'image/object/bbox/xmin': _float_feature(xmin),
|
||||
'image/object/bbox/xmax': _float_feature(xmax),
|
||||
'image/object/bbox/ymin': _float_feature(ymin),
|
||||
'image/object/bbox/ymax': _float_feature(ymax),
|
||||
'image/object/bbox/label': _int64_feature([label] * len(xmin)),
|
||||
'image/format': _bytes_feature(image_format),
|
||||
'image/filename': _bytes_feature(os.path.basename(filename)),
|
||||
'image/encoded': _bytes_feature(image_buffer)}))
|
||||
return example
|
||||
|
||||
|
||||
class ImageCoder(object):
|
||||
"""Helper class that provides TensorFlow image coding utilities."""
|
||||
|
||||
def __init__(self):
|
||||
# Create a single Session to run all image coding calls.
|
||||
self._sess = tf.Session()
|
||||
|
||||
# Initializes function that converts PNG to JPEG data.
|
||||
self._png_data = tf.placeholder(dtype=tf.string)
|
||||
image = tf.image.decode_png(self._png_data, channels=3)
|
||||
self._png_to_jpeg = tf.image.encode_jpeg(image, format='rgb', quality=100)
|
||||
|
||||
# Initializes function that converts CMYK JPEG data to RGB JPEG data.
|
||||
self._cmyk_data = tf.placeholder(dtype=tf.string)
|
||||
image = tf.image.decode_jpeg(self._cmyk_data, channels=0)
|
||||
self._cmyk_to_rgb = tf.image.encode_jpeg(image, format='rgb', quality=100)
|
||||
|
||||
# Initializes function that decodes RGB JPEG data.
|
||||
self._decode_jpeg_data = tf.placeholder(dtype=tf.string)
|
||||
self._decode_jpeg = tf.image.decode_jpeg(self._decode_jpeg_data, channels=3)
|
||||
|
||||
def png_to_jpeg(self, image_data):
|
||||
return self._sess.run(self._png_to_jpeg,
|
||||
feed_dict={self._png_data: image_data})
|
||||
|
||||
def cmyk_to_rgb(self, image_data):
|
||||
return self._sess.run(self._cmyk_to_rgb,
|
||||
feed_dict={self._cmyk_data: image_data})
|
||||
|
||||
def decode_jpeg(self, image_data):
|
||||
image = self._sess.run(self._decode_jpeg,
|
||||
feed_dict={self._decode_jpeg_data: image_data})
|
||||
assert len(image.shape) == 3
|
||||
assert image.shape[2] == 3
|
||||
return image
|
||||
|
||||
|
||||
def _is_png(filename):
|
||||
"""Determine if a file contains a PNG format image.
|
||||
|
||||
Args:
|
||||
filename: string, path of the image file.
|
||||
|
||||
Returns:
|
||||
boolean indicating if the image is a PNG.
|
||||
"""
|
||||
# File list from:
|
||||
# https://groups.google.com/forum/embed/?place=forum/torch7#!topic/torch7/fOSTXHIESSU
|
||||
return 'n02105855_2933.JPEG' in filename
|
||||
|
||||
|
||||
def _is_cmyk(filename):
|
||||
"""Determine if file contains a CMYK JPEG format image.
|
||||
|
||||
Args:
|
||||
filename: string, path of the image file.
|
||||
|
||||
Returns:
|
||||
boolean indicating if the image is a JPEG encoded with CMYK color space.
|
||||
"""
|
||||
# File list from:
|
||||
# https://github.com/cytsai/ilsvrc-cmyk-image-list
|
||||
blacklist = ['n01739381_1309.JPEG', 'n02077923_14822.JPEG',
|
||||
'n02447366_23489.JPEG', 'n02492035_15739.JPEG',
|
||||
'n02747177_10752.JPEG', 'n03018349_4028.JPEG',
|
||||
'n03062245_4620.JPEG', 'n03347037_9675.JPEG',
|
||||
'n03467068_12171.JPEG', 'n03529860_11437.JPEG',
|
||||
'n03544143_17228.JPEG', 'n03633091_5218.JPEG',
|
||||
'n03710637_5125.JPEG', 'n03961711_5286.JPEG',
|
||||
'n04033995_2932.JPEG', 'n04258138_17003.JPEG',
|
||||
'n04264628_27969.JPEG', 'n04336792_7448.JPEG',
|
||||
'n04371774_5854.JPEG', 'n04596742_4225.JPEG',
|
||||
'n07583066_647.JPEG', 'n13037406_4650.JPEG']
|
||||
return filename.split('/')[-1] in blacklist
|
||||
|
||||
|
||||
def _process_image(filename, coder):
|
||||
"""Process a single image file.
|
||||
|
||||
Args:
|
||||
filename: string, path to an image file e.g., '/path/to/example.JPG'.
|
||||
coder: instance of ImageCoder to provide TensorFlow image coding utils.
|
||||
Returns:
|
||||
image_buffer: string, JPEG encoding of RGB image.
|
||||
height: integer, image height in pixels.
|
||||
width: integer, image width in pixels.
|
||||
"""
|
||||
# Read the image file.
|
||||
image_data = tf.gfile.GFile(filename, 'r').read()
|
||||
|
||||
# Clean the dirty data.
|
||||
if _is_png(filename):
|
||||
# 1 image is a PNG.
|
||||
print('Converting PNG to JPEG for %s' % filename)
|
||||
image_data = coder.png_to_jpeg(image_data)
|
||||
elif _is_cmyk(filename):
|
||||
# 22 JPEG images are in CMYK colorspace.
|
||||
print('Converting CMYK to RGB for %s' % filename)
|
||||
image_data = coder.cmyk_to_rgb(image_data)
|
||||
|
||||
# Decode the RGB JPEG.
|
||||
image = coder.decode_jpeg(image_data)
|
||||
|
||||
# Check that image converted to RGB
|
||||
assert len(image.shape) == 3
|
||||
height = image.shape[0]
|
||||
width = image.shape[1]
|
||||
assert image.shape[2] == 3
|
||||
|
||||
return image_data, height, width
|
||||
|
||||
|
||||
def _process_image_files_batch(coder, thread_index, ranges, name, filenames,
|
||||
synsets, labels, humans, bboxes, num_shards):
|
||||
"""Processes and saves list of images as TFRecord in 1 thread.
|
||||
|
||||
Args:
|
||||
coder: instance of ImageCoder to provide TensorFlow image coding utils.
|
||||
thread_index: integer, unique batch to run index is within [0, len(ranges)).
|
||||
ranges: list of pairs of integers specifying ranges of each batches to
|
||||
analyze in parallel.
|
||||
name: string, unique identifier specifying the data set
|
||||
filenames: list of strings; each string is a path to an image file
|
||||
synsets: list of strings; each string is a unique WordNet ID
|
||||
labels: list of integer; each integer identifies the ground truth
|
||||
humans: list of strings; each string is a human-readable label
|
||||
bboxes: list of bounding boxes for each image. Note that each entry in this
|
||||
list might contain from 0+ entries corresponding to the number of bounding
|
||||
box annotations for the image.
|
||||
num_shards: integer number of shards for this data set.
|
||||
"""
|
||||
# Each thread produces N shards where N = int(num_shards / num_threads).
|
||||
# For instance, if num_shards = 128, and the num_threads = 2, then the first
|
||||
# thread would produce shards [0, 64).
|
||||
num_threads = len(ranges)
|
||||
assert not num_shards % num_threads
|
||||
num_shards_per_batch = int(num_shards / num_threads)
|
||||
|
||||
shard_ranges = np.linspace(ranges[thread_index][0],
|
||||
ranges[thread_index][1],
|
||||
num_shards_per_batch + 1).astype(int)
|
||||
num_files_in_thread = ranges[thread_index][1] - ranges[thread_index][0]
|
||||
|
||||
counter = 0
|
||||
for s in xrange(num_shards_per_batch):
|
||||
# Generate a sharded version of the file name, e.g. 'train-00002-of-00010'
|
||||
shard = thread_index * num_shards_per_batch + s
|
||||
output_filename = '%s-%.5d-of-%.5d' % (name, shard, num_shards)
|
||||
output_file = os.path.join(FLAGS.output_directory, output_filename)
|
||||
writer = tf.python_io.TFRecordWriter(output_file)
|
||||
|
||||
shard_counter = 0
|
||||
files_in_shard = np.arange(shard_ranges[s], shard_ranges[s + 1], dtype=int)
|
||||
for i in files_in_shard:
|
||||
filename = filenames[i]
|
||||
label = labels[i]
|
||||
synset = synsets[i]
|
||||
human = humans[i]
|
||||
bbox = bboxes[i]
|
||||
|
||||
image_buffer, height, width = _process_image(filename, coder)
|
||||
|
||||
example = _convert_to_example(filename, image_buffer, label,
|
||||
synset, human, bbox,
|
||||
height, width)
|
||||
writer.write(example.SerializeToString())
|
||||
shard_counter += 1
|
||||
counter += 1
|
||||
|
||||
if not counter % 1000:
|
||||
print('%s [thread %d]: Processed %d of %d images in thread batch.' %
|
||||
(datetime.now(), thread_index, counter, num_files_in_thread))
|
||||
sys.stdout.flush()
|
||||
|
||||
writer.close()
|
||||
print('%s [thread %d]: Wrote %d images to %s' %
|
||||
(datetime.now(), thread_index, shard_counter, output_file))
|
||||
sys.stdout.flush()
|
||||
shard_counter = 0
|
||||
print('%s [thread %d]: Wrote %d images to %d shards.' %
|
||||
(datetime.now(), thread_index, counter, num_files_in_thread))
|
||||
sys.stdout.flush()
|
||||
|
||||
|
||||
def _process_image_files(name, filenames, synsets, labels, humans,
|
||||
bboxes, num_shards):
|
||||
"""Process and save list of images as TFRecord of Example protos.
|
||||
|
||||
Args:
|
||||
name: string, unique identifier specifying the data set
|
||||
filenames: list of strings; each string is a path to an image file
|
||||
synsets: list of strings; each string is a unique WordNet ID
|
||||
labels: list of integer; each integer identifies the ground truth
|
||||
humans: list of strings; each string is a human-readable label
|
||||
bboxes: list of bounding boxes for each image. Note that each entry in this
|
||||
list might contain from 0+ entries corresponding to the number of bounding
|
||||
box annotations for the image.
|
||||
num_shards: integer number of shards for this data set.
|
||||
"""
|
||||
assert len(filenames) == len(synsets)
|
||||
assert len(filenames) == len(labels)
|
||||
assert len(filenames) == len(humans)
|
||||
assert len(filenames) == len(bboxes)
|
||||
|
||||
# Break all images into batches with a [ranges[i][0], ranges[i][1]].
|
||||
spacing = np.linspace(0, len(filenames), FLAGS.num_threads + 1).astype(np.int)
|
||||
ranges = []
|
||||
threads = []
|
||||
for i in xrange(len(spacing) - 1):
|
||||
ranges.append([spacing[i], spacing[i+1]])
|
||||
|
||||
# Launch a thread for each batch.
|
||||
print('Launching %d threads for spacings: %s' % (FLAGS.num_threads, ranges))
|
||||
sys.stdout.flush()
|
||||
|
||||
# Create a mechanism for monitoring when all threads are finished.
|
||||
coord = tf.train.Coordinator()
|
||||
|
||||
# Create a generic TensorFlow-based utility for converting all image codings.
|
||||
coder = ImageCoder()
|
||||
|
||||
threads = []
|
||||
for thread_index in xrange(len(ranges)):
|
||||
args = (coder, thread_index, ranges, name, filenames,
|
||||
synsets, labels, humans, bboxes, num_shards)
|
||||
t = threading.Thread(target=_process_image_files_batch, args=args)
|
||||
t.start()
|
||||
threads.append(t)
|
||||
|
||||
# Wait for all the threads to terminate.
|
||||
coord.join(threads)
|
||||
print('%s: Finished writing all %d images in data set.' %
|
||||
(datetime.now(), len(filenames)))
|
||||
sys.stdout.flush()
|
||||
|
||||
|
||||
def _find_image_files(data_dir, labels_file):
|
||||
"""Build a list of all images files and labels in the data set.
|
||||
|
||||
Args:
|
||||
data_dir: string, path to the root directory of images.
|
||||
|
||||
Assumes that the ImageNet data set resides in JPEG files located in
|
||||
the following directory structure.
|
||||
|
||||
data_dir/n01440764/ILSVRC2012_val_00000293.JPEG
|
||||
data_dir/n01440764/ILSVRC2012_val_00000543.JPEG
|
||||
|
||||
where 'n01440764' is the unique synset label associated with these images.
|
||||
|
||||
labels_file: string, path to the labels file.
|
||||
|
||||
The list of valid labels are held in this file. Assumes that the file
|
||||
contains entries as such:
|
||||
n01440764
|
||||
n01443537
|
||||
n01484850
|
||||
where each line corresponds to a label expressed as a synset. We map
|
||||
each synset contained in the file to an integer (based on the alphabetical
|
||||
ordering) starting with the integer 1 corresponding to the synset
|
||||
contained in the first line.
|
||||
|
||||
The reason we start the integer labels at 1 is to reserve label 0 as an
|
||||
unused background class.
|
||||
|
||||
Returns:
|
||||
filenames: list of strings; each string is a path to an image file.
|
||||
synsets: list of strings; each string is a unique WordNet ID.
|
||||
labels: list of integer; each integer identifies the ground truth.
|
||||
"""
|
||||
print('Determining list of input files and labels from %s.' % data_dir)
|
||||
challenge_synsets = [
|
||||
l.strip() for l in tf.gfile.GFile(labels_file, 'r').readlines()
|
||||
]
|
||||
|
||||
labels = []
|
||||
filenames = []
|
||||
synsets = []
|
||||
|
||||
# Leave label index 0 empty as a background class.
|
||||
label_index = 1
|
||||
|
||||
# Construct the list of JPEG files and labels.
|
||||
for synset in challenge_synsets:
|
||||
jpeg_file_path = '%s/%s/*.JPEG' % (data_dir, synset)
|
||||
matching_files = tf.gfile.Glob(jpeg_file_path)
|
||||
|
||||
labels.extend([label_index] * len(matching_files))
|
||||
synsets.extend([synset] * len(matching_files))
|
||||
filenames.extend(matching_files)
|
||||
|
||||
if not label_index % 100:
|
||||
print('Finished finding files in %d of %d classes.' % (
|
||||
label_index, len(challenge_synsets)))
|
||||
label_index += 1
|
||||
|
||||
# Shuffle the ordering of all image files in order to guarantee
|
||||
# random ordering of the images with respect to label in the
|
||||
# saved TFRecord files. Make the randomization repeatable.
|
||||
shuffled_index = range(len(filenames))
|
||||
random.seed(12345)
|
||||
random.shuffle(shuffled_index)
|
||||
|
||||
filenames = [filenames[i] for i in shuffled_index]
|
||||
synsets = [synsets[i] for i in shuffled_index]
|
||||
labels = [labels[i] for i in shuffled_index]
|
||||
|
||||
print('Found %d JPEG files across %d labels inside %s.' %
|
||||
(len(filenames), len(challenge_synsets), data_dir))
|
||||
return filenames, synsets, labels
|
||||
|
||||
|
||||
def _find_human_readable_labels(synsets, synset_to_human):
|
||||
"""Build a list of human-readable labels.
|
||||
|
||||
Args:
|
||||
synsets: list of strings; each string is a unique WordNet ID.
|
||||
synset_to_human: dict of synset to human labels, e.g.,
|
||||
'n02119022' --> 'red fox, Vulpes vulpes'
|
||||
|
||||
Returns:
|
||||
List of human-readable strings corresponding to each synset.
|
||||
"""
|
||||
humans = []
|
||||
for s in synsets:
|
||||
assert s in synset_to_human, ('Failed to find: %s' % s)
|
||||
humans.append(synset_to_human[s])
|
||||
return humans
|
||||
|
||||
|
||||
def _find_image_bounding_boxes(filenames, image_to_bboxes):
|
||||
"""Find the bounding boxes for a given image file.
|
||||
|
||||
Args:
|
||||
filenames: list of strings; each string is a path to an image file.
|
||||
image_to_bboxes: dictionary mapping image file names to a list of
|
||||
bounding boxes. This list contains 0+ bounding boxes.
|
||||
Returns:
|
||||
List of bounding boxes for each image. Note that each entry in this
|
||||
list might contain from 0+ entries corresponding to the number of bounding
|
||||
box annotations for the image.
|
||||
"""
|
||||
num_image_bbox = 0
|
||||
bboxes = []
|
||||
for f in filenames:
|
||||
basename = os.path.basename(f)
|
||||
if basename in image_to_bboxes:
|
||||
bboxes.append(image_to_bboxes[basename])
|
||||
num_image_bbox += 1
|
||||
else:
|
||||
bboxes.append([])
|
||||
print('Found %d images with bboxes out of %d images' % (
|
||||
num_image_bbox, len(filenames)))
|
||||
return bboxes
|
||||
|
||||
|
||||
def _process_dataset(name, directory, num_shards, synset_to_human,
|
||||
image_to_bboxes):
|
||||
"""Process a complete data set and save it as a TFRecord.
|
||||
|
||||
Args:
|
||||
name: string, unique identifier specifying the data set.
|
||||
directory: string, root path to the data set.
|
||||
num_shards: integer number of shards for this data set.
|
||||
synset_to_human: dict of synset to human labels, e.g.,
|
||||
'n02119022' --> 'red fox, Vulpes vulpes'
|
||||
image_to_bboxes: dictionary mapping image file names to a list of
|
||||
bounding boxes. This list contains 0+ bounding boxes.
|
||||
"""
|
||||
filenames, synsets, labels = _find_image_files(directory, FLAGS.labels_file)
|
||||
humans = _find_human_readable_labels(synsets, synset_to_human)
|
||||
bboxes = _find_image_bounding_boxes(filenames, image_to_bboxes)
|
||||
_process_image_files(name, filenames, synsets, labels,
|
||||
humans, bboxes, num_shards)
|
||||
|
||||
|
||||
def _build_synset_lookup(imagenet_metadata_file):
|
||||
"""Build lookup for synset to human-readable label.
|
||||
|
||||
Args:
|
||||
imagenet_metadata_file: string, path to file containing mapping from
|
||||
synset to human-readable label.
|
||||
|
||||
Assumes each line of the file looks like:
|
||||
|
||||
n02119247 black fox
|
||||
n02119359 silver fox
|
||||
n02119477 red fox, Vulpes fulva
|
||||
|
||||
where each line corresponds to a unique mapping. Note that each line is
|
||||
formatted as <synset>\t<human readable label>.
|
||||
|
||||
Returns:
|
||||
Dictionary of synset to human labels, such as:
|
||||
'n02119022' --> 'red fox, Vulpes vulpes'
|
||||
"""
|
||||
lines = tf.gfile.GFile(imagenet_metadata_file, 'r').readlines()
|
||||
synset_to_human = {}
|
||||
for l in lines:
|
||||
if l:
|
||||
parts = l.strip().split('\t')
|
||||
assert len(parts) == 2
|
||||
synset = parts[0]
|
||||
human = parts[1]
|
||||
synset_to_human[synset] = human
|
||||
return synset_to_human
|
||||
|
||||
|
||||
def _build_bounding_box_lookup(bounding_box_file):
|
||||
"""Build a lookup from image file to bounding boxes.
|
||||
|
||||
Args:
|
||||
bounding_box_file: string, path to file with bounding boxes annotations.
|
||||
|
||||
Assumes each line of the file looks like:
|
||||
|
||||
n00007846_64193.JPEG,0.0060,0.2620,0.7545,0.9940
|
||||
|
||||
where each line corresponds to one bounding box annotation associated
|
||||
with an image. Each line can be parsed as:
|
||||
|
||||
<JPEG file name>, <xmin>, <ymin>, <xmax>, <ymax>
|
||||
|
||||
Note that there might exist mulitple bounding box annotations associated
|
||||
with an image file. This file is the output of process_bounding_boxes.py.
|
||||
|
||||
Returns:
|
||||
Dictionary mapping image file names to a list of bounding boxes. This list
|
||||
contains 0+ bounding boxes.
|
||||
"""
|
||||
lines = tf.gfile.GFile(bounding_box_file, 'r').readlines()
|
||||
images_to_bboxes = {}
|
||||
num_bbox = 0
|
||||
num_image = 0
|
||||
for l in lines:
|
||||
if l:
|
||||
parts = l.split(',')
|
||||
assert len(parts) == 5, ('Failed to parse: %s' % l)
|
||||
filename = parts[0]
|
||||
xmin = float(parts[1])
|
||||
ymin = float(parts[2])
|
||||
xmax = float(parts[3])
|
||||
ymax = float(parts[4])
|
||||
box = [xmin, ymin, xmax, ymax]
|
||||
|
||||
if filename not in images_to_bboxes:
|
||||
images_to_bboxes[filename] = []
|
||||
num_image += 1
|
||||
images_to_bboxes[filename].append(box)
|
||||
num_bbox += 1
|
||||
|
||||
print('Successfully read %d bounding boxes '
|
||||
'across %d images.' % (num_bbox, num_image))
|
||||
return images_to_bboxes
|
||||
|
||||
|
||||
def main(unused_argv):
|
||||
assert not FLAGS.train_shards % FLAGS.num_threads, (
|
||||
'Please make the FLAGS.num_threads commensurate with FLAGS.train_shards')
|
||||
assert not FLAGS.validation_shards % FLAGS.num_threads, (
|
||||
'Please make the FLAGS.num_threads commensurate with '
|
||||
'FLAGS.validation_shards')
|
||||
print('Saving results to %s' % FLAGS.output_directory)
|
||||
|
||||
# Build a map from synset to human-readable label.
|
||||
synset_to_human = _build_synset_lookup(FLAGS.imagenet_metadata_file)
|
||||
image_to_bboxes = _build_bounding_box_lookup(FLAGS.bounding_box_file)
|
||||
|
||||
# Run it!
|
||||
_process_dataset('validation', FLAGS.validation_directory,
|
||||
FLAGS.validation_shards, synset_to_human, image_to_bboxes)
|
||||
_process_dataset('train', FLAGS.train_directory, FLAGS.train_shards,
|
||||
synset_to_human, image_to_bboxes)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
tf.app.run()
|
||||
+100
@@ -0,0 +1,100 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Provides data for the Cifar10 dataset.
|
||||
|
||||
The dataset scripts used to create the dataset can be found at:
|
||||
tensorflow/models/research/slim/datasets/download_and_convert_cifar10.py
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import os
|
||||
import tensorflow as tf
|
||||
from tensorflow.contrib import slim as contrib_slim
|
||||
|
||||
from datasets import dataset_utils
|
||||
|
||||
slim = contrib_slim
|
||||
|
||||
_FILE_PATTERN = 'cifar10_%s.tfrecord'
|
||||
|
||||
SPLITS_TO_SIZES = {'train': 50000, 'test': 10000}
|
||||
|
||||
_NUM_CLASSES = 10
|
||||
|
||||
_ITEMS_TO_DESCRIPTIONS = {
|
||||
'image': 'A [32 x 32 x 3] color image.',
|
||||
'label': 'A single integer between 0 and 9',
|
||||
}
|
||||
|
||||
|
||||
def get_split(split_name, dataset_dir, file_pattern=None, reader=None):
|
||||
"""Gets a dataset tuple with instructions for reading cifar10.
|
||||
|
||||
Args:
|
||||
split_name: A train/test split name.
|
||||
dataset_dir: The base directory of the dataset sources.
|
||||
file_pattern: The file pattern to use when matching the dataset sources.
|
||||
It is assumed that the pattern contains a '%s' string so that the split
|
||||
name can be inserted.
|
||||
reader: The TensorFlow reader type.
|
||||
|
||||
Returns:
|
||||
A `Dataset` namedtuple.
|
||||
|
||||
Raises:
|
||||
ValueError: if `split_name` is not a valid train/test split.
|
||||
"""
|
||||
if split_name not in SPLITS_TO_SIZES:
|
||||
raise ValueError('split name %s was not recognized.' % split_name)
|
||||
|
||||
if not file_pattern:
|
||||
file_pattern = _FILE_PATTERN
|
||||
file_pattern = os.path.join(dataset_dir, file_pattern % split_name)
|
||||
|
||||
# Allowing None in the signature so that dataset_factory can use the default.
|
||||
if not reader:
|
||||
reader = tf.TFRecordReader
|
||||
|
||||
keys_to_features = {
|
||||
'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''),
|
||||
'image/format': tf.FixedLenFeature((), tf.string, default_value='png'),
|
||||
'image/class/label': tf.FixedLenFeature(
|
||||
[], tf.int64, default_value=tf.zeros([], dtype=tf.int64)),
|
||||
}
|
||||
|
||||
items_to_handlers = {
|
||||
'image': slim.tfexample_decoder.Image(shape=[32, 32, 3]),
|
||||
'label': slim.tfexample_decoder.Tensor('image/class/label'),
|
||||
}
|
||||
|
||||
decoder = slim.tfexample_decoder.TFExampleDecoder(
|
||||
keys_to_features, items_to_handlers)
|
||||
|
||||
labels_to_names = None
|
||||
if dataset_utils.has_labels(dataset_dir):
|
||||
labels_to_names = dataset_utils.read_label_file(dataset_dir)
|
||||
|
||||
return slim.dataset.Dataset(
|
||||
data_sources=file_pattern,
|
||||
reader=reader,
|
||||
decoder=decoder,
|
||||
num_samples=SPLITS_TO_SIZES[split_name],
|
||||
items_to_descriptions=_ITEMS_TO_DESCRIPTIONS,
|
||||
num_classes=_NUM_CLASSES,
|
||||
labels_to_names=labels_to_names,
|
||||
)
|
||||
+59
@@ -0,0 +1,59 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""A factory-pattern class which returns classification image/label pairs."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
from datasets import cifar10
|
||||
from datasets import flowers
|
||||
from datasets import imagenet
|
||||
from datasets import mnist
|
||||
from datasets import visualwakewords
|
||||
|
||||
datasets_map = {
|
||||
'cifar10': cifar10,
|
||||
'flowers': flowers,
|
||||
'imagenet': imagenet,
|
||||
'mnist': mnist,
|
||||
'visualwakewords': visualwakewords,
|
||||
}
|
||||
|
||||
|
||||
def get_dataset(name, split_name, dataset_dir, file_pattern=None, reader=None):
|
||||
"""Given a dataset name and a split_name returns a Dataset.
|
||||
|
||||
Args:
|
||||
name: String, the name of the dataset.
|
||||
split_name: A train/test split name.
|
||||
dataset_dir: The directory where the dataset files are stored.
|
||||
file_pattern: The file pattern to use for matching the dataset source files.
|
||||
reader: The subclass of tf.ReaderBase. If left as `None`, then the default
|
||||
reader defined by each dataset is used.
|
||||
|
||||
Returns:
|
||||
A `Dataset` class.
|
||||
|
||||
Raises:
|
||||
ValueError: If the dataset `name` is unknown.
|
||||
"""
|
||||
if name not in datasets_map:
|
||||
raise ValueError('Name of dataset unknown %s' % name)
|
||||
return datasets_map[name].get_split(
|
||||
split_name,
|
||||
dataset_dir,
|
||||
file_pattern,
|
||||
reader)
|
||||
+240
@@ -0,0 +1,240 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Contains utilities for downloading and converting datasets."""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import os
|
||||
import sys
|
||||
import tarfile
|
||||
import zipfile
|
||||
|
||||
from six.moves import urllib
|
||||
import tensorflow as tf
|
||||
|
||||
LABELS_FILENAME = 'labels.txt'
|
||||
|
||||
|
||||
def int64_feature(values):
|
||||
"""Returns a TF-Feature of int64s.
|
||||
|
||||
Args:
|
||||
values: A scalar or list of values.
|
||||
|
||||
Returns:
|
||||
A TF-Feature.
|
||||
"""
|
||||
if not isinstance(values, (tuple, list)):
|
||||
values = [values]
|
||||
return tf.train.Feature(int64_list=tf.train.Int64List(value=values))
|
||||
|
||||
|
||||
def bytes_list_feature(values):
|
||||
"""Returns a TF-Feature of list of bytes.
|
||||
|
||||
Args:
|
||||
values: A string or list of strings.
|
||||
|
||||
Returns:
|
||||
A TF-Feature.
|
||||
"""
|
||||
return tf.train.Feature(bytes_list=tf.train.BytesList(value=values))
|
||||
|
||||
|
||||
def float_list_feature(values):
|
||||
"""Returns a TF-Feature of list of floats.
|
||||
|
||||
Args:
|
||||
values: A float or list of floats.
|
||||
|
||||
Returns:
|
||||
A TF-Feature.
|
||||
"""
|
||||
return tf.train.Feature(float_list=tf.train.FloatList(value=values))
|
||||
|
||||
|
||||
def bytes_feature(values):
|
||||
"""Returns a TF-Feature of bytes.
|
||||
|
||||
Args:
|
||||
values: A string.
|
||||
|
||||
Returns:
|
||||
A TF-Feature.
|
||||
"""
|
||||
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[values]))
|
||||
|
||||
|
||||
def float_feature(values):
|
||||
"""Returns a TF-Feature of floats.
|
||||
|
||||
Args:
|
||||
values: A scalar of list of values.
|
||||
|
||||
Returns:
|
||||
A TF-Feature.
|
||||
"""
|
||||
if not isinstance(values, (tuple, list)):
|
||||
values = [values]
|
||||
return tf.train.Feature(float_list=tf.train.FloatList(value=values))
|
||||
|
||||
|
||||
def image_to_tfexample(image_data, image_format, height, width, class_id):
|
||||
return tf.train.Example(features=tf.train.Features(feature={
|
||||
'image/encoded': bytes_feature(image_data),
|
||||
'image/format': bytes_feature(image_format),
|
||||
'image/class/label': int64_feature(class_id),
|
||||
'image/height': int64_feature(height),
|
||||
'image/width': int64_feature(width),
|
||||
}))
|
||||
|
||||
|
||||
def download_url(url, dataset_dir):
|
||||
"""Downloads the tarball or zip file from url into filepath.
|
||||
|
||||
Args:
|
||||
url: The URL of a tarball or zip file.
|
||||
dataset_dir: The directory where the temporary files are stored.
|
||||
|
||||
Returns:
|
||||
filepath: path where the file is downloaded.
|
||||
"""
|
||||
filename = url.split('/')[-1]
|
||||
filepath = os.path.join(dataset_dir, filename)
|
||||
|
||||
def _progress(count, block_size, total_size):
|
||||
sys.stdout.write('\r>> Downloading %s %.1f%%' % (
|
||||
filename, float(count * block_size) / float(total_size) * 100.0))
|
||||
sys.stdout.flush()
|
||||
|
||||
filepath, _ = urllib.request.urlretrieve(url, filepath, _progress)
|
||||
print()
|
||||
statinfo = os.stat(filepath)
|
||||
print('Successfully downloaded', filename, statinfo.st_size, 'bytes.')
|
||||
return filepath
|
||||
|
||||
|
||||
def download_and_uncompress_tarball(tarball_url, dataset_dir):
|
||||
"""Downloads the `tarball_url` and uncompresses it locally.
|
||||
|
||||
Args:
|
||||
tarball_url: The URL of a tarball file.
|
||||
dataset_dir: The directory where the temporary files are stored.
|
||||
"""
|
||||
filepath = download_url(tarball_url, dataset_dir)
|
||||
tarfile.open(filepath, 'r:gz').extractall(dataset_dir)
|
||||
|
||||
|
||||
def download_and_uncompress_zipfile(zip_url, dataset_dir):
|
||||
"""Downloads the `zip_url` and uncompresses it locally.
|
||||
|
||||
Args:
|
||||
zip_url: The URL of a zip file.
|
||||
dataset_dir: The directory where the temporary files are stored.
|
||||
"""
|
||||
filename = zip_url.split('/')[-1]
|
||||
filepath = os.path.join(dataset_dir, filename)
|
||||
|
||||
if tf.gfile.Exists(filepath):
|
||||
print('File {filename} has been already downloaded at {filepath}. '
|
||||
'Unzipping it....'.format(filename=filename, filepath=filepath))
|
||||
else:
|
||||
filepath = download_url(zip_url, dataset_dir)
|
||||
|
||||
with zipfile.ZipFile(filepath, 'r') as zip_file:
|
||||
for member in zip_file.namelist():
|
||||
memberpath = os.path.join(dataset_dir, member)
|
||||
# extract only if file doesn't exist
|
||||
if not (os.path.exists(memberpath) or os.path.isfile(memberpath)):
|
||||
zip_file.extract(member, dataset_dir)
|
||||
|
||||
|
||||
def write_label_file(labels_to_class_names,
|
||||
dataset_dir,
|
||||
filename=LABELS_FILENAME):
|
||||
"""Writes a file with the list of class names.
|
||||
|
||||
Args:
|
||||
labels_to_class_names: A map of (integer) labels to class names.
|
||||
dataset_dir: The directory in which the labels file should be written.
|
||||
filename: The filename where the class names are written.
|
||||
"""
|
||||
labels_filename = os.path.join(dataset_dir, filename)
|
||||
with tf.gfile.Open(labels_filename, 'w') as f:
|
||||
for label in labels_to_class_names:
|
||||
class_name = labels_to_class_names[label]
|
||||
f.write('%d:%s\n' % (label, class_name))
|
||||
|
||||
|
||||
def has_labels(dataset_dir, filename=LABELS_FILENAME):
|
||||
"""Specifies whether or not the dataset directory contains a label map file.
|
||||
|
||||
Args:
|
||||
dataset_dir: The directory in which the labels file is found.
|
||||
filename: The filename where the class names are written.
|
||||
|
||||
Returns:
|
||||
`True` if the labels file exists and `False` otherwise.
|
||||
"""
|
||||
return tf.gfile.Exists(os.path.join(dataset_dir, filename))
|
||||
|
||||
|
||||
def read_label_file(dataset_dir, filename=LABELS_FILENAME):
|
||||
"""Reads the labels file and returns a mapping from ID to class name.
|
||||
|
||||
Args:
|
||||
dataset_dir: The directory in which the labels file is found.
|
||||
filename: The filename where the class names are written.
|
||||
|
||||
Returns:
|
||||
A map from a label (integer) to class name.
|
||||
"""
|
||||
labels_filename = os.path.join(dataset_dir, filename)
|
||||
with tf.gfile.Open(labels_filename, 'rb') as f:
|
||||
lines = f.read().decode()
|
||||
lines = lines.split('\n')
|
||||
lines = filter(None, lines)
|
||||
|
||||
labels_to_class_names = {}
|
||||
for line in lines:
|
||||
index = line.index(':')
|
||||
labels_to_class_names[int(line[:index])] = line[index+1:]
|
||||
return labels_to_class_names
|
||||
|
||||
|
||||
def open_sharded_output_tfrecords(exit_stack, base_path, num_shards):
|
||||
"""Opens all TFRecord shards for writing and adds them to an exit stack.
|
||||
|
||||
Args:
|
||||
exit_stack: A context2.ExitStack used to automatically closed the TFRecords
|
||||
opened in this function.
|
||||
base_path: The base path for all shards
|
||||
num_shards: The number of shards
|
||||
|
||||
Returns:
|
||||
The list of opened TFRecords. Position k in the list corresponds to shard k.
|
||||
"""
|
||||
tf_record_output_filenames = [
|
||||
'{}-{:05d}-of-{:05d}'.format(base_path, idx, num_shards)
|
||||
for idx in range(num_shards)
|
||||
]
|
||||
|
||||
tfrecords = [
|
||||
exit_stack.enter_context(tf.python_io.TFRecordWriter(file_name))
|
||||
for file_name in tf_record_output_filenames
|
||||
]
|
||||
|
||||
return tfrecords
|
||||
+198
@@ -0,0 +1,198 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
r"""Downloads and converts cifar10 data to TFRecords of TF-Example protos.
|
||||
|
||||
This module downloads the cifar10 data, uncompresses it, reads the files
|
||||
that make up the cifar10 data and creates two TFRecord datasets: one for train
|
||||
and one for test. Each TFRecord dataset is comprised of a set of TF-Example
|
||||
protocol buffers, each of which contain a single image and label.
|
||||
|
||||
The script should take several minutes to run.
|
||||
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import os
|
||||
import sys
|
||||
import tarfile
|
||||
|
||||
import numpy as np
|
||||
from six.moves import cPickle
|
||||
from six.moves import urllib
|
||||
import tensorflow as tf
|
||||
|
||||
from datasets import dataset_utils
|
||||
|
||||
# The URL where the CIFAR data can be downloaded.
|
||||
_DATA_URL = 'https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz'
|
||||
|
||||
# The number of training files.
|
||||
_NUM_TRAIN_FILES = 5
|
||||
|
||||
# The height and width of each image.
|
||||
_IMAGE_SIZE = 32
|
||||
|
||||
# The names of the classes.
|
||||
_CLASS_NAMES = [
|
||||
'airplane',
|
||||
'automobile',
|
||||
'bird',
|
||||
'cat',
|
||||
'deer',
|
||||
'dog',
|
||||
'frog',
|
||||
'horse',
|
||||
'ship',
|
||||
'truck',
|
||||
]
|
||||
|
||||
|
||||
def _add_to_tfrecord(filename, tfrecord_writer, offset=0):
|
||||
"""Loads data from the cifar10 pickle files and writes files to a TFRecord.
|
||||
|
||||
Args:
|
||||
filename: The filename of the cifar10 pickle file.
|
||||
tfrecord_writer: The TFRecord writer to use for writing.
|
||||
offset: An offset into the absolute number of images previously written.
|
||||
|
||||
Returns:
|
||||
The new offset.
|
||||
"""
|
||||
with tf.gfile.Open(filename, 'rb') as f:
|
||||
if sys.version_info < (3,):
|
||||
data = cPickle.load(f)
|
||||
else:
|
||||
data = cPickle.load(f, encoding='bytes')
|
||||
|
||||
images = data[b'data']
|
||||
num_images = images.shape[0]
|
||||
|
||||
images = images.reshape((num_images, 3, 32, 32))
|
||||
labels = data[b'labels']
|
||||
|
||||
with tf.Graph().as_default():
|
||||
image_placeholder = tf.placeholder(dtype=tf.uint8)
|
||||
encoded_image = tf.image.encode_png(image_placeholder)
|
||||
|
||||
with tf.Session('') as sess:
|
||||
|
||||
for j in range(num_images):
|
||||
sys.stdout.write('\r>> Reading file [%s] image %d/%d' % (
|
||||
filename, offset + j + 1, offset + num_images))
|
||||
sys.stdout.flush()
|
||||
|
||||
image = np.squeeze(images[j]).transpose((1, 2, 0))
|
||||
label = labels[j]
|
||||
|
||||
png_string = sess.run(encoded_image,
|
||||
feed_dict={image_placeholder: image})
|
||||
|
||||
example = dataset_utils.image_to_tfexample(
|
||||
png_string, b'png', _IMAGE_SIZE, _IMAGE_SIZE, label)
|
||||
tfrecord_writer.write(example.SerializeToString())
|
||||
|
||||
return offset + num_images
|
||||
|
||||
|
||||
def _get_output_filename(dataset_dir, split_name):
|
||||
"""Creates the output filename.
|
||||
|
||||
Args:
|
||||
dataset_dir: The dataset directory where the dataset is stored.
|
||||
split_name: The name of the train/test split.
|
||||
|
||||
Returns:
|
||||
An absolute file path.
|
||||
"""
|
||||
return '%s/cifar10_%s.tfrecord' % (dataset_dir, split_name)
|
||||
|
||||
|
||||
def _download_and_uncompress_dataset(dataset_dir):
|
||||
"""Downloads cifar10 and uncompresses it locally.
|
||||
|
||||
Args:
|
||||
dataset_dir: The directory where the temporary files are stored.
|
||||
"""
|
||||
filename = _DATA_URL.split('/')[-1]
|
||||
filepath = os.path.join(dataset_dir, filename)
|
||||
|
||||
if not os.path.exists(filepath):
|
||||
def _progress(count, block_size, total_size):
|
||||
sys.stdout.write('\r>> Downloading %s %.1f%%' % (
|
||||
filename, float(count * block_size) / float(total_size) * 100.0))
|
||||
sys.stdout.flush()
|
||||
filepath, _ = urllib.request.urlretrieve(_DATA_URL, filepath, _progress)
|
||||
print()
|
||||
statinfo = os.stat(filepath)
|
||||
print('Successfully downloaded', filename, statinfo.st_size, 'bytes.')
|
||||
tarfile.open(filepath, 'r:gz').extractall(dataset_dir)
|
||||
|
||||
|
||||
def _clean_up_temporary_files(dataset_dir):
|
||||
"""Removes temporary files used to create the dataset.
|
||||
|
||||
Args:
|
||||
dataset_dir: The directory where the temporary files are stored.
|
||||
"""
|
||||
filename = _DATA_URL.split('/')[-1]
|
||||
filepath = os.path.join(dataset_dir, filename)
|
||||
tf.gfile.Remove(filepath)
|
||||
|
||||
tmp_dir = os.path.join(dataset_dir, 'cifar-10-batches-py')
|
||||
tf.gfile.DeleteRecursively(tmp_dir)
|
||||
|
||||
|
||||
def run(dataset_dir):
|
||||
"""Runs the download and conversion operation.
|
||||
|
||||
Args:
|
||||
dataset_dir: The dataset directory where the dataset is stored.
|
||||
"""
|
||||
if not tf.gfile.Exists(dataset_dir):
|
||||
tf.gfile.MakeDirs(dataset_dir)
|
||||
|
||||
training_filename = _get_output_filename(dataset_dir, 'train')
|
||||
testing_filename = _get_output_filename(dataset_dir, 'test')
|
||||
|
||||
if tf.gfile.Exists(training_filename) and tf.gfile.Exists(testing_filename):
|
||||
print('Dataset files already exist. Exiting without re-creating them.')
|
||||
return
|
||||
|
||||
dataset_utils.download_and_uncompress_tarball(_DATA_URL, dataset_dir)
|
||||
|
||||
# First, process the training data:
|
||||
with tf.python_io.TFRecordWriter(training_filename) as tfrecord_writer:
|
||||
offset = 0
|
||||
for i in range(_NUM_TRAIN_FILES):
|
||||
filename = os.path.join(dataset_dir,
|
||||
'cifar-10-batches-py',
|
||||
'data_batch_%d' % (i + 1)) # 1-indexed.
|
||||
offset = _add_to_tfrecord(filename, tfrecord_writer, offset)
|
||||
|
||||
# Next, process the testing data:
|
||||
with tf.python_io.TFRecordWriter(testing_filename) as tfrecord_writer:
|
||||
filename = os.path.join(dataset_dir,
|
||||
'cifar-10-batches-py',
|
||||
'test_batch')
|
||||
_add_to_tfrecord(filename, tfrecord_writer)
|
||||
|
||||
# Finally, write the labels file:
|
||||
labels_to_class_names = dict(zip(range(len(_CLASS_NAMES)), _CLASS_NAMES))
|
||||
dataset_utils.write_label_file(labels_to_class_names, dataset_dir)
|
||||
|
||||
_clean_up_temporary_files(dataset_dir)
|
||||
print('\nFinished converting the Cifar10 dataset!')
|
||||
+211
@@ -0,0 +1,211 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
r"""Downloads and converts Flowers data to TFRecords of TF-Example protos.
|
||||
|
||||
This module downloads the Flowers data, uncompresses it, reads the files
|
||||
that make up the Flowers data and creates two TFRecord datasets: one for train
|
||||
and one for test. Each TFRecord dataset is comprised of a set of TF-Example
|
||||
protocol buffers, each of which contain a single image and label.
|
||||
|
||||
The script should take about a minute to run.
|
||||
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import math
|
||||
import os
|
||||
import random
|
||||
import sys
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
from datasets import dataset_utils
|
||||
|
||||
# The URL where the Flowers data can be downloaded.
|
||||
_DATA_URL = 'http://download.tensorflow.org/example_images/flower_photos.tgz'
|
||||
|
||||
# The number of images in the validation set.
|
||||
_NUM_VALIDATION = 350
|
||||
|
||||
# Seed for repeatability.
|
||||
_RANDOM_SEED = 0
|
||||
|
||||
# The number of shards per dataset split.
|
||||
_NUM_SHARDS = 5
|
||||
|
||||
|
||||
class ImageReader(object):
|
||||
"""Helper class that provides TensorFlow image coding utilities."""
|
||||
|
||||
def __init__(self):
|
||||
# Initializes function that decodes RGB JPEG data.
|
||||
self._decode_jpeg_data = tf.placeholder(dtype=tf.string)
|
||||
self._decode_jpeg = tf.image.decode_jpeg(self._decode_jpeg_data, channels=3)
|
||||
|
||||
def read_image_dims(self, sess, image_data):
|
||||
image = self.decode_jpeg(sess, image_data)
|
||||
return image.shape[0], image.shape[1]
|
||||
|
||||
def decode_jpeg(self, sess, image_data):
|
||||
image = sess.run(self._decode_jpeg,
|
||||
feed_dict={self._decode_jpeg_data: image_data})
|
||||
assert len(image.shape) == 3
|
||||
assert image.shape[2] == 3
|
||||
return image
|
||||
|
||||
|
||||
def _get_filenames_and_classes(dataset_dir):
|
||||
"""Returns a list of filenames and inferred class names.
|
||||
|
||||
Args:
|
||||
dataset_dir: A directory containing a set of subdirectories representing
|
||||
class names. Each subdirectory should contain PNG or JPG encoded images.
|
||||
|
||||
Returns:
|
||||
A list of image file paths, relative to `dataset_dir` and the list of
|
||||
subdirectories, representing class names.
|
||||
"""
|
||||
flower_root = os.path.join(dataset_dir, 'flower_photos')
|
||||
directories = []
|
||||
class_names = []
|
||||
for filename in os.listdir(flower_root):
|
||||
path = os.path.join(flower_root, filename)
|
||||
if os.path.isdir(path):
|
||||
directories.append(path)
|
||||
class_names.append(filename)
|
||||
|
||||
photo_filenames = []
|
||||
for directory in directories:
|
||||
for filename in os.listdir(directory):
|
||||
path = os.path.join(directory, filename)
|
||||
photo_filenames.append(path)
|
||||
|
||||
return photo_filenames, sorted(class_names)
|
||||
|
||||
|
||||
def _get_dataset_filename(dataset_dir, split_name, shard_id):
|
||||
output_filename = 'flowers_%s_%05d-of-%05d.tfrecord' % (
|
||||
split_name, shard_id, _NUM_SHARDS)
|
||||
return os.path.join(dataset_dir, output_filename)
|
||||
|
||||
|
||||
def _convert_dataset(split_name, filenames, class_names_to_ids, dataset_dir):
|
||||
"""Converts the given filenames to a TFRecord dataset.
|
||||
|
||||
Args:
|
||||
split_name: The name of the dataset, either 'train' or 'validation'.
|
||||
filenames: A list of absolute paths to png or jpg images.
|
||||
class_names_to_ids: A dictionary from class names (strings) to ids
|
||||
(integers).
|
||||
dataset_dir: The directory where the converted datasets are stored.
|
||||
"""
|
||||
assert split_name in ['train', 'validation']
|
||||
|
||||
num_per_shard = int(math.ceil(len(filenames) / float(_NUM_SHARDS)))
|
||||
|
||||
with tf.Graph().as_default():
|
||||
image_reader = ImageReader()
|
||||
|
||||
with tf.Session('') as sess:
|
||||
|
||||
for shard_id in range(_NUM_SHARDS):
|
||||
output_filename = _get_dataset_filename(
|
||||
dataset_dir, split_name, shard_id)
|
||||
|
||||
with tf.python_io.TFRecordWriter(output_filename) as tfrecord_writer:
|
||||
start_ndx = shard_id * num_per_shard
|
||||
end_ndx = min((shard_id+1) * num_per_shard, len(filenames))
|
||||
for i in range(start_ndx, end_ndx):
|
||||
sys.stdout.write('\r>> Converting image %d/%d shard %d' % (
|
||||
i+1, len(filenames), shard_id))
|
||||
sys.stdout.flush()
|
||||
|
||||
# Read the filename:
|
||||
image_data = tf.gfile.GFile(filenames[i], 'rb').read()
|
||||
height, width = image_reader.read_image_dims(sess, image_data)
|
||||
|
||||
class_name = os.path.basename(os.path.dirname(filenames[i]))
|
||||
class_id = class_names_to_ids[class_name]
|
||||
|
||||
example = dataset_utils.image_to_tfexample(
|
||||
image_data, b'jpg', height, width, class_id)
|
||||
tfrecord_writer.write(example.SerializeToString())
|
||||
|
||||
sys.stdout.write('\n')
|
||||
sys.stdout.flush()
|
||||
|
||||
|
||||
def _clean_up_temporary_files(dataset_dir):
|
||||
"""Removes temporary files used to create the dataset.
|
||||
|
||||
Args:
|
||||
dataset_dir: The directory where the temporary files are stored.
|
||||
"""
|
||||
filename = _DATA_URL.split('/')[-1]
|
||||
filepath = os.path.join(dataset_dir, filename)
|
||||
tf.gfile.Remove(filepath)
|
||||
|
||||
tmp_dir = os.path.join(dataset_dir, 'flower_photos')
|
||||
tf.gfile.DeleteRecursively(tmp_dir)
|
||||
|
||||
|
||||
def _dataset_exists(dataset_dir):
|
||||
for split_name in ['train', 'validation']:
|
||||
for shard_id in range(_NUM_SHARDS):
|
||||
output_filename = _get_dataset_filename(
|
||||
dataset_dir, split_name, shard_id)
|
||||
if not tf.gfile.Exists(output_filename):
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def run(dataset_dir):
|
||||
"""Runs the download and conversion operation.
|
||||
|
||||
Args:
|
||||
dataset_dir: The dataset directory where the dataset is stored.
|
||||
"""
|
||||
if not tf.gfile.Exists(dataset_dir):
|
||||
tf.gfile.MakeDirs(dataset_dir)
|
||||
|
||||
if _dataset_exists(dataset_dir):
|
||||
print('Dataset files already exist. Exiting without re-creating them.')
|
||||
return
|
||||
|
||||
dataset_utils.download_and_uncompress_tarball(_DATA_URL, dataset_dir)
|
||||
photo_filenames, class_names = _get_filenames_and_classes(dataset_dir)
|
||||
class_names_to_ids = dict(zip(class_names, range(len(class_names))))
|
||||
|
||||
# Divide into train and test:
|
||||
random.seed(_RANDOM_SEED)
|
||||
random.shuffle(photo_filenames)
|
||||
training_filenames = photo_filenames[_NUM_VALIDATION:]
|
||||
validation_filenames = photo_filenames[:_NUM_VALIDATION]
|
||||
|
||||
# First, convert the training and validation sets.
|
||||
_convert_dataset('train', training_filenames, class_names_to_ids,
|
||||
dataset_dir)
|
||||
_convert_dataset('validation', validation_filenames, class_names_to_ids,
|
||||
dataset_dir)
|
||||
|
||||
# Finally, write the labels file:
|
||||
labels_to_class_names = dict(zip(range(len(class_names)), class_names))
|
||||
dataset_utils.write_label_file(labels_to_class_names, dataset_dir)
|
||||
|
||||
_clean_up_temporary_files(dataset_dir)
|
||||
print('\nFinished converting the Flowers dataset!')
|
||||
+103
@@ -0,0 +1,103 @@
|
||||
#!/bin/bash
|
||||
# Copyright 2016 Google Inc. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
|
||||
# Script to download and preprocess ImageNet Challenge 2012
|
||||
# training and validation data set.
|
||||
#
|
||||
# The final output of this script are sharded TFRecord files containing
|
||||
# serialized Example protocol buffers. See build_imagenet_data.py for
|
||||
# details of how the Example protocol buffers contain the ImageNet data.
|
||||
#
|
||||
# The final output of this script appears as such:
|
||||
#
|
||||
# data_dir/train-00000-of-01024
|
||||
# data_dir/train-00001-of-01024
|
||||
# ...
|
||||
# data_dir/train-00127-of-01024
|
||||
#
|
||||
# and
|
||||
#
|
||||
# data_dir/validation-00000-of-00128
|
||||
# data_dir/validation-00001-of-00128
|
||||
# ...
|
||||
# data_dir/validation-00127-of-00128
|
||||
#
|
||||
# Note that this script may take several hours to run to completion. The
|
||||
# conversion of the ImageNet data to TFRecords alone takes 2-3 hours depending
|
||||
# on the speed of your machine. Please be patient.
|
||||
#
|
||||
# **IMPORTANT**
|
||||
# To download the raw images, the user must create an account with image-net.org
|
||||
# and generate a username and access_key. The latter two are required for
|
||||
# downloading the raw images.
|
||||
#
|
||||
# usage:
|
||||
# cd research/slim
|
||||
# bazel build :download_and_convert_imagenet
|
||||
# ./bazel-bin/download_and_convert_imagenet.sh [data-dir]
|
||||
set -e
|
||||
|
||||
if [ -z "$1" ]; then
|
||||
echo "usage download_and_convert_imagenet.sh [data dir]"
|
||||
exit
|
||||
fi
|
||||
|
||||
# Create the output and temporary directories.
|
||||
DATA_DIR="${1%/}"
|
||||
SCRATCH_DIR="${DATA_DIR}/raw-data/"
|
||||
mkdir -p "${DATA_DIR}"
|
||||
mkdir -p "${SCRATCH_DIR}"
|
||||
WORK_DIR="$0.runfiles/__main__"
|
||||
|
||||
# Download the ImageNet data.
|
||||
LABELS_FILE="${WORK_DIR}/datasets/imagenet_lsvrc_2015_synsets.txt"
|
||||
DOWNLOAD_SCRIPT="${WORK_DIR}/datasets/download_imagenet.sh"
|
||||
"${DOWNLOAD_SCRIPT}" "${SCRATCH_DIR}" "${LABELS_FILE}"
|
||||
|
||||
# Note the locations of the train and validation data.
|
||||
TRAIN_DIRECTORY="${SCRATCH_DIR}train/"
|
||||
VALIDATION_DIRECTORY="${SCRATCH_DIR}validation/"
|
||||
|
||||
# Preprocess the validation data by moving the images into the appropriate
|
||||
# sub-directory based on the label (synset) of the image.
|
||||
echo "Organizing the validation data into sub-directories."
|
||||
PREPROCESS_VAL_SCRIPT="${WORK_DIR}/datasets/preprocess_imagenet_validation_data.py"
|
||||
VAL_LABELS_FILE="${WORK_DIR}/datasets/imagenet_2012_validation_synset_labels.txt"
|
||||
|
||||
"${PREPROCESS_VAL_SCRIPT}" "${VALIDATION_DIRECTORY}" "${VAL_LABELS_FILE}"
|
||||
|
||||
# Convert the XML files for bounding box annotations into a single CSV.
|
||||
echo "Extracting bounding box information from XML."
|
||||
BOUNDING_BOX_SCRIPT="${WORK_DIR}/datasets/process_bounding_boxes.py"
|
||||
BOUNDING_BOX_FILE="${SCRATCH_DIR}/imagenet_2012_bounding_boxes.csv"
|
||||
BOUNDING_BOX_DIR="${SCRATCH_DIR}bounding_boxes/"
|
||||
|
||||
"${BOUNDING_BOX_SCRIPT}" "${BOUNDING_BOX_DIR}" "${LABELS_FILE}" \
|
||||
| sort >"${BOUNDING_BOX_FILE}"
|
||||
echo "Finished downloading and preprocessing the ImageNet data."
|
||||
|
||||
# Build the TFRecords version of the ImageNet data.
|
||||
BUILD_SCRIPT="${WORK_DIR}/build_imagenet_data"
|
||||
OUTPUT_DIRECTORY="${DATA_DIR}"
|
||||
IMAGENET_METADATA_FILE="${WORK_DIR}/datasets/imagenet_metadata.txt"
|
||||
|
||||
"${BUILD_SCRIPT}" \
|
||||
--train_directory="${TRAIN_DIRECTORY}" \
|
||||
--validation_directory="${VALIDATION_DIRECTORY}" \
|
||||
--output_directory="${OUTPUT_DIRECTORY}" \
|
||||
--imagenet_metadata_file="${IMAGENET_METADATA_FILE}" \
|
||||
--labels_file="${LABELS_FILE}" \
|
||||
--bounding_box_file="${BOUNDING_BOX_FILE}"
|
||||
+221
@@ -0,0 +1,221 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
r"""Downloads and converts MNIST data to TFRecords of TF-Example protos.
|
||||
|
||||
This module downloads the MNIST data, uncompresses it, reads the files
|
||||
that make up the MNIST data and creates two TFRecord datasets: one for train
|
||||
and one for test. Each TFRecord dataset is comprised of a set of TF-Example
|
||||
protocol buffers, each of which contain a single image and label.
|
||||
|
||||
The script should take about a minute to run.
|
||||
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import gzip
|
||||
import os
|
||||
import sys
|
||||
|
||||
import numpy as np
|
||||
from six.moves import urllib
|
||||
import tensorflow as tf
|
||||
|
||||
from datasets import dataset_utils
|
||||
|
||||
# The URLs where the MNIST data can be downloaded.
|
||||
_DATA_URL = 'http://yann.lecun.com/exdb/mnist/'
|
||||
_TRAIN_DATA_FILENAME = 'train-images-idx3-ubyte.gz'
|
||||
_TRAIN_LABELS_FILENAME = 'train-labels-idx1-ubyte.gz'
|
||||
_TEST_DATA_FILENAME = 't10k-images-idx3-ubyte.gz'
|
||||
_TEST_LABELS_FILENAME = 't10k-labels-idx1-ubyte.gz'
|
||||
|
||||
_IMAGE_SIZE = 28
|
||||
_NUM_CHANNELS = 1
|
||||
|
||||
# The names of the classes.
|
||||
_CLASS_NAMES = [
|
||||
'zero',
|
||||
'one',
|
||||
'two',
|
||||
'three',
|
||||
'four',
|
||||
'five',
|
||||
'size',
|
||||
'seven',
|
||||
'eight',
|
||||
'nine',
|
||||
]
|
||||
|
||||
|
||||
def _extract_images(filename, num_images):
|
||||
"""Extract the images into a numpy array.
|
||||
|
||||
Args:
|
||||
filename: The path to an MNIST images file.
|
||||
num_images: The number of images in the file.
|
||||
|
||||
Returns:
|
||||
A numpy array of shape [number_of_images, height, width, channels].
|
||||
"""
|
||||
print('Extracting images from: ', filename)
|
||||
with gzip.open(filename) as bytestream:
|
||||
bytestream.read(16)
|
||||
buf = bytestream.read(
|
||||
_IMAGE_SIZE * _IMAGE_SIZE * num_images * _NUM_CHANNELS)
|
||||
data = np.frombuffer(buf, dtype=np.uint8)
|
||||
data = data.reshape(num_images, _IMAGE_SIZE, _IMAGE_SIZE, _NUM_CHANNELS)
|
||||
return data
|
||||
|
||||
|
||||
def _extract_labels(filename, num_labels):
|
||||
"""Extract the labels into a vector of int64 label IDs.
|
||||
|
||||
Args:
|
||||
filename: The path to an MNIST labels file.
|
||||
num_labels: The number of labels in the file.
|
||||
|
||||
Returns:
|
||||
A numpy array of shape [number_of_labels]
|
||||
"""
|
||||
print('Extracting labels from: ', filename)
|
||||
with gzip.open(filename) as bytestream:
|
||||
bytestream.read(8)
|
||||
buf = bytestream.read(1 * num_labels)
|
||||
labels = np.frombuffer(buf, dtype=np.uint8).astype(np.int64)
|
||||
return labels
|
||||
|
||||
|
||||
def _add_to_tfrecord(data_filename, labels_filename, num_images,
|
||||
tfrecord_writer):
|
||||
"""Loads data from the binary MNIST files and writes files to a TFRecord.
|
||||
|
||||
Args:
|
||||
data_filename: The filename of the MNIST images.
|
||||
labels_filename: The filename of the MNIST labels.
|
||||
num_images: The number of images in the dataset.
|
||||
tfrecord_writer: The TFRecord writer to use for writing.
|
||||
"""
|
||||
images = _extract_images(data_filename, num_images)
|
||||
labels = _extract_labels(labels_filename, num_images)
|
||||
|
||||
shape = (_IMAGE_SIZE, _IMAGE_SIZE, _NUM_CHANNELS)
|
||||
with tf.Graph().as_default():
|
||||
image = tf.placeholder(dtype=tf.uint8, shape=shape)
|
||||
encoded_png = tf.image.encode_png(image)
|
||||
|
||||
with tf.Session('') as sess:
|
||||
for j in range(num_images):
|
||||
sys.stdout.write('\r>> Converting image %d/%d' % (j + 1, num_images))
|
||||
sys.stdout.flush()
|
||||
|
||||
png_string = sess.run(encoded_png, feed_dict={image: images[j]})
|
||||
|
||||
example = dataset_utils.image_to_tfexample(
|
||||
png_string, 'png'.encode(), _IMAGE_SIZE, _IMAGE_SIZE, labels[j])
|
||||
tfrecord_writer.write(example.SerializeToString())
|
||||
|
||||
|
||||
def _get_output_filename(dataset_dir, split_name):
|
||||
"""Creates the output filename.
|
||||
|
||||
Args:
|
||||
dataset_dir: The directory where the temporary files are stored.
|
||||
split_name: The name of the train/test split.
|
||||
|
||||
Returns:
|
||||
An absolute file path.
|
||||
"""
|
||||
return '%s/mnist_%s.tfrecord' % (dataset_dir, split_name)
|
||||
|
||||
|
||||
def _download_dataset(dataset_dir):
|
||||
"""Downloads MNIST locally.
|
||||
|
||||
Args:
|
||||
dataset_dir: The directory where the temporary files are stored.
|
||||
"""
|
||||
for filename in [_TRAIN_DATA_FILENAME,
|
||||
_TRAIN_LABELS_FILENAME,
|
||||
_TEST_DATA_FILENAME,
|
||||
_TEST_LABELS_FILENAME]:
|
||||
filepath = os.path.join(dataset_dir, filename)
|
||||
|
||||
if not os.path.exists(filepath):
|
||||
print('Downloading file %s...' % filename)
|
||||
def _progress(count, block_size, total_size):
|
||||
sys.stdout.write('\r>> Downloading %.1f%%' % (
|
||||
float(count * block_size) / float(total_size) * 100.0))
|
||||
sys.stdout.flush()
|
||||
filepath, _ = urllib.request.urlretrieve(_DATA_URL + filename,
|
||||
filepath,
|
||||
_progress)
|
||||
print()
|
||||
with tf.gfile.GFile(filepath) as f:
|
||||
size = f.size()
|
||||
print('Successfully downloaded', filename, size, 'bytes.')
|
||||
|
||||
|
||||
def _clean_up_temporary_files(dataset_dir):
|
||||
"""Removes temporary files used to create the dataset.
|
||||
|
||||
Args:
|
||||
dataset_dir: The directory where the temporary files are stored.
|
||||
"""
|
||||
for filename in [_TRAIN_DATA_FILENAME,
|
||||
_TRAIN_LABELS_FILENAME,
|
||||
_TEST_DATA_FILENAME,
|
||||
_TEST_LABELS_FILENAME]:
|
||||
filepath = os.path.join(dataset_dir, filename)
|
||||
tf.gfile.Remove(filepath)
|
||||
|
||||
|
||||
def run(dataset_dir):
|
||||
"""Runs the download and conversion operation.
|
||||
|
||||
Args:
|
||||
dataset_dir: The dataset directory where the dataset is stored.
|
||||
"""
|
||||
if not tf.gfile.Exists(dataset_dir):
|
||||
tf.gfile.MakeDirs(dataset_dir)
|
||||
|
||||
training_filename = _get_output_filename(dataset_dir, 'train')
|
||||
testing_filename = _get_output_filename(dataset_dir, 'test')
|
||||
|
||||
if tf.gfile.Exists(training_filename) and tf.gfile.Exists(testing_filename):
|
||||
print('Dataset files already exist. Exiting without re-creating them.')
|
||||
return
|
||||
|
||||
_download_dataset(dataset_dir)
|
||||
|
||||
# First, process the training data:
|
||||
with tf.python_io.TFRecordWriter(training_filename) as tfrecord_writer:
|
||||
data_filename = os.path.join(dataset_dir, _TRAIN_DATA_FILENAME)
|
||||
labels_filename = os.path.join(dataset_dir, _TRAIN_LABELS_FILENAME)
|
||||
_add_to_tfrecord(data_filename, labels_filename, 60000, tfrecord_writer)
|
||||
|
||||
# Next, process the testing data:
|
||||
with tf.python_io.TFRecordWriter(testing_filename) as tfrecord_writer:
|
||||
data_filename = os.path.join(dataset_dir, _TEST_DATA_FILENAME)
|
||||
labels_filename = os.path.join(dataset_dir, _TEST_LABELS_FILENAME)
|
||||
_add_to_tfrecord(data_filename, labels_filename, 10000, tfrecord_writer)
|
||||
|
||||
# Finally, write the labels file:
|
||||
labels_to_class_names = dict(zip(range(len(_CLASS_NAMES)), _CLASS_NAMES))
|
||||
dataset_utils.write_label_file(labels_to_class_names, dataset_dir)
|
||||
|
||||
_clean_up_temporary_files(dataset_dir)
|
||||
print('\nFinished converting the MNIST dataset!')
|
||||
+158
@@ -0,0 +1,158 @@
|
||||
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
r"""Downloads and converts VisualWakewords data to TFRecords of TF-Example protos.
|
||||
|
||||
This module downloads the COCO dataset, uncompresses it, derives the
|
||||
VisualWakeWords dataset to create two TFRecord datasets: one for
|
||||
train and one for test. Each TFRecord dataset is comprised of a set of
|
||||
TF-Example protocol buffers, each of which contain a single image and label.
|
||||
|
||||
The script should take several minutes to run.
|
||||
Please note that this tool creates sharded output files.
|
||||
|
||||
VisualWakeWords dataset is used to design tiny models classifying two classes,
|
||||
such as person/not-person. The two steps to generate the VisualWakeWords
|
||||
dataset from the COCO dataset are given below:
|
||||
|
||||
1. Use COCO annotations to create VisualWakeWords annotations:
|
||||
|
||||
Note: A bounding box is 'valid' if it has the foreground_class_of_interest
|
||||
(e.g. person) and it's area is greater than 0.5% of the image area.
|
||||
|
||||
The resulting annotations file has the following fields, where 'images' are
|
||||
the same as COCO dataset. 'categories' only contains information about the
|
||||
foreground_class_of_interest (e.g. person) and 'annotations' maps an image to
|
||||
objects (a list of valid bounding boxes) and label (value is 1 if it has
|
||||
atleast one valid bounding box, otherwise 0)
|
||||
|
||||
images[{
|
||||
"id", "width", "height", "file_name", "flickr_url", "coco_url",
|
||||
"license", "date_captured",
|
||||
}]
|
||||
|
||||
categories{
|
||||
"id": {"id", "name", "supercategory"}
|
||||
}
|
||||
|
||||
annotations{
|
||||
"image_id": {"objects":[{"area", "bbox" : [x,y,width,height]}], "label"}
|
||||
}
|
||||
|
||||
2. Use VisualWakeWords annotations to create TFRecords:
|
||||
|
||||
The resulting TFRecord file contains the following features:
|
||||
{ image/height, image/width, image/source_id, image/encoded,
|
||||
image/class/label_text, image/class/label,
|
||||
image/object/class/text,
|
||||
image/object/bbox/ymin, image/object/bbox/xmin, image/object/bbox/ymax,
|
||||
image/object/bbox/xmax, image/object/area
|
||||
image/filename, image/format, image/key/sha256}
|
||||
For classification models, you need the image/encoded and image/class/label.
|
||||
|
||||
Example usage:
|
||||
Run download_and_convert_data.py in the parent directory as follows:
|
||||
|
||||
python download_and_convert_visualwakewords.py --logtostderr \
|
||||
--dataset_name=visualwakewords \
|
||||
--dataset_dir="${DATASET_DIR}" \
|
||||
--small_object_area_threshold=0.005 \
|
||||
--foreground_class_of_interest='person'
|
||||
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import os
|
||||
import tensorflow as tf
|
||||
from datasets import download_and_convert_visualwakewords_lib
|
||||
|
||||
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO)
|
||||
|
||||
tf.compat.v1.app.flags.DEFINE_string(
|
||||
'coco_dirname', 'coco_dataset',
|
||||
'A subdirectory in visualwakewords dataset directory'
|
||||
'containing the coco dataset')
|
||||
|
||||
FLAGS = tf.compat.v1.app.flags.FLAGS
|
||||
|
||||
|
||||
def run(dataset_dir, small_object_area_threshold, foreground_class_of_interest):
|
||||
"""Runs the download and conversion operation.
|
||||
|
||||
Args:
|
||||
dataset_dir: The dataset directory where the dataset is stored.
|
||||
small_object_area_threshold: Threshold of fraction of image area below which
|
||||
small objects are filtered
|
||||
foreground_class_of_interest: Build a binary classifier based on the
|
||||
presence or absence of this object in the image.
|
||||
"""
|
||||
# 1. Download the coco dataset into a subdirectory under the visualwakewords
|
||||
# dataset directory
|
||||
coco_dir = os.path.join(dataset_dir, FLAGS.coco_dirname)
|
||||
|
||||
if not tf.gfile.IsDirectory(coco_dir):
|
||||
tf.gfile.MakeDirs(coco_dir)
|
||||
|
||||
download_and_convert_visualwakewords_lib.download_coco_dataset(coco_dir)
|
||||
|
||||
# Path to COCO annotations
|
||||
train_annotations_file = os.path.join(coco_dir, 'annotations',
|
||||
'instances_train2014.json')
|
||||
val_annotations_file = os.path.join(coco_dir, 'annotations',
|
||||
'instances_val2014.json')
|
||||
train_image_dir = os.path.join(coco_dir, 'train2014')
|
||||
val_image_dir = os.path.join(coco_dir, 'val2014')
|
||||
|
||||
# Path to VisualWakeWords annotations
|
||||
visualwakewords_annotations_train = os.path.join(
|
||||
dataset_dir, 'instances_visualwakewords_train2014.json')
|
||||
visualwakewords_annotations_val = os.path.join(
|
||||
dataset_dir, 'instances_visualwakewords_val2014.json')
|
||||
visualwakewords_labels_filename = os.path.join(dataset_dir, 'labels.txt')
|
||||
train_output_path = os.path.join(dataset_dir, 'train.record')
|
||||
val_output_path = os.path.join(dataset_dir, 'val.record')
|
||||
|
||||
# 2. Create a labels file
|
||||
tf.logging.info('Creating a labels file...')
|
||||
download_and_convert_visualwakewords_lib.create_labels_file(
|
||||
foreground_class_of_interest, visualwakewords_labels_filename)
|
||||
|
||||
# 3. Use COCO annotations to create VisualWakeWords annotations
|
||||
tf.logging.info('Creating train VisualWakeWords annotations...')
|
||||
download_and_convert_visualwakewords_lib.create_visual_wakeword_annotations(
|
||||
train_annotations_file, visualwakewords_annotations_train,
|
||||
small_object_area_threshold, foreground_class_of_interest)
|
||||
tf.logging.info('Creating validation VisualWakeWords annotations...')
|
||||
download_and_convert_visualwakewords_lib.create_visual_wakeword_annotations(
|
||||
val_annotations_file, visualwakewords_annotations_val,
|
||||
small_object_area_threshold, foreground_class_of_interest)
|
||||
|
||||
# 4. Use VisualWakeWords annotations to create the TFRecords
|
||||
tf.logging.info('Creating train TFRecords for VisualWakeWords dataset...')
|
||||
download_and_convert_visualwakewords_lib.create_tf_record_for_visualwakewords_dataset(
|
||||
visualwakewords_annotations_train,
|
||||
train_image_dir,
|
||||
train_output_path,
|
||||
num_shards=100)
|
||||
|
||||
tf.logging.info(
|
||||
'Creating validation TFRecords for VisualWakeWords dataset...')
|
||||
download_and_convert_visualwakewords_lib.create_tf_record_for_visualwakewords_dataset(
|
||||
visualwakewords_annotations_val,
|
||||
val_image_dir,
|
||||
val_output_path,
|
||||
num_shards=10)
|
||||
+286
@@ -0,0 +1,286 @@
|
||||
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
r"""Helper functions to generate the Visual WakeWords dataset.
|
||||
|
||||
It filters raw COCO annotations file to Visual WakeWords Dataset
|
||||
annotations. The resulting annotations and COCO images are then converted
|
||||
to TF records.
|
||||
See download_and_convert_visualwakewords.py for the sample usage.
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import collections
|
||||
import hashlib
|
||||
import io
|
||||
import json
|
||||
import os
|
||||
import contextlib2
|
||||
|
||||
import PIL.Image
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
from datasets import dataset_utils
|
||||
|
||||
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO)
|
||||
|
||||
tf.compat.v1.app.flags.DEFINE_string(
|
||||
'coco_train_url',
|
||||
'http://images.cocodataset.org/zips/train2014.zip',
|
||||
'Link to zip file containing coco training data')
|
||||
tf.compat.v1.app.flags.DEFINE_string(
|
||||
'coco_validation_url',
|
||||
'http://images.cocodataset.org/zips/val2014.zip',
|
||||
'Link to zip file containing coco validation data')
|
||||
tf.compat.v1.app.flags.DEFINE_string(
|
||||
'coco_annotations_url',
|
||||
'http://images.cocodataset.org/annotations/annotations_trainval2014.zip',
|
||||
'Link to zip file containing coco annotation data')
|
||||
|
||||
FLAGS = tf.compat.v1.app.flags.FLAGS
|
||||
|
||||
|
||||
def download_coco_dataset(dataset_dir):
|
||||
"""Download the coco dataset.
|
||||
|
||||
Args:
|
||||
dataset_dir: Path where coco dataset should be downloaded.
|
||||
"""
|
||||
dataset_utils.download_and_uncompress_zipfile(FLAGS.coco_train_url,
|
||||
dataset_dir)
|
||||
dataset_utils.download_and_uncompress_zipfile(FLAGS.coco_validation_url,
|
||||
dataset_dir)
|
||||
dataset_utils.download_and_uncompress_zipfile(FLAGS.coco_annotations_url,
|
||||
dataset_dir)
|
||||
|
||||
|
||||
def create_labels_file(foreground_class_of_interest,
|
||||
visualwakewords_labels_file):
|
||||
"""Generate visualwakewords labels file.
|
||||
|
||||
Args:
|
||||
foreground_class_of_interest: category from COCO dataset that is filtered by
|
||||
the visualwakewords dataset
|
||||
visualwakewords_labels_file: output visualwakewords label file
|
||||
"""
|
||||
labels_to_class_names = {0: 'background', 1: foreground_class_of_interest}
|
||||
with open(visualwakewords_labels_file, 'w') as fp:
|
||||
for label in labels_to_class_names:
|
||||
fp.write(str(label) + ':' + str(labels_to_class_names[label]) + '\n')
|
||||
|
||||
|
||||
def create_visual_wakeword_annotations(annotations_file,
|
||||
visualwakewords_annotations_file,
|
||||
small_object_area_threshold,
|
||||
foreground_class_of_interest):
|
||||
"""Generate visual wakewords annotations file.
|
||||
|
||||
Loads COCO annotation json files to generate visualwakewords annotations file.
|
||||
|
||||
Args:
|
||||
annotations_file: JSON file containing COCO bounding box annotations
|
||||
visualwakewords_annotations_file: path to output annotations file
|
||||
small_object_area_threshold: threshold on fraction of image area below which
|
||||
small object bounding boxes are filtered
|
||||
foreground_class_of_interest: category from COCO dataset that is filtered by
|
||||
the visual wakewords dataset
|
||||
"""
|
||||
# default object of interest is person
|
||||
foreground_class_of_interest_id = 1
|
||||
with tf.gfile.GFile(annotations_file, 'r') as fid:
|
||||
groundtruth_data = json.load(fid)
|
||||
images = groundtruth_data['images']
|
||||
# Create category index
|
||||
category_index = {}
|
||||
for category in groundtruth_data['categories']:
|
||||
if category['name'] == foreground_class_of_interest:
|
||||
foreground_class_of_interest_id = category['id']
|
||||
category_index[category['id']] = category
|
||||
# Create annotations index, a map of image_id to it's annotations
|
||||
tf.logging.info('Building annotations index...')
|
||||
annotations_index = collections.defaultdict(
|
||||
lambda: collections.defaultdict(list))
|
||||
# structure is { "image_id": {"objects" : [list of the image annotations]}}
|
||||
for annotation in groundtruth_data['annotations']:
|
||||
annotations_index[annotation['image_id']]['objects'].append(annotation)
|
||||
missing_annotation_count = len(images) - len(annotations_index)
|
||||
tf.logging.info('%d images are missing annotations.',
|
||||
missing_annotation_count)
|
||||
# Create filtered annotations index
|
||||
annotations_index_filtered = {}
|
||||
for idx, image in enumerate(images):
|
||||
if idx % 100 == 0:
|
||||
tf.logging.info('On image %d of %d', idx, len(images))
|
||||
annotations = annotations_index[image['id']]
|
||||
annotations_filtered = _filter_annotations(
|
||||
annotations, image, small_object_area_threshold,
|
||||
foreground_class_of_interest_id)
|
||||
annotations_index_filtered[image['id']] = annotations_filtered
|
||||
|
||||
with open(visualwakewords_annotations_file, 'w') as fp:
|
||||
json.dump(
|
||||
{
|
||||
'images': images,
|
||||
'annotations': annotations_index_filtered,
|
||||
'categories': category_index
|
||||
}, fp)
|
||||
|
||||
|
||||
def _filter_annotations(annotations, image, small_object_area_threshold,
|
||||
foreground_class_of_interest_id):
|
||||
"""Filters COCO annotations to visual wakewords annotations.
|
||||
|
||||
Args:
|
||||
annotations: dicts with keys: {
|
||||
u'objects': [{u'id', u'image_id', u'category_id', u'segmentation',
|
||||
u'area', u'bbox' : [x,y,width,height], u'iscrowd'}] } Notice
|
||||
that bounding box coordinates in the official COCO dataset
|
||||
are given as [x, y, width, height] tuples using absolute
|
||||
coordinates where x, y represent the top-left (0-indexed)
|
||||
corner.
|
||||
image: dict with keys: [u'license', u'file_name', u'coco_url', u'height',
|
||||
u'width', u'date_captured', u'flickr_url', u'id']
|
||||
small_object_area_threshold: threshold on fraction of image area below which
|
||||
small objects are filtered
|
||||
foreground_class_of_interest_id: category of COCO dataset which visual
|
||||
wakewords filters
|
||||
|
||||
Returns:
|
||||
annotations_filtered: dict with keys: {
|
||||
u'objects': [{"area", "bbox" : [x,y,width,height]}],
|
||||
u'label',
|
||||
}
|
||||
"""
|
||||
objects = []
|
||||
image_area = image['height'] * image['width']
|
||||
for annotation in annotations['objects']:
|
||||
normalized_object_area = annotation['area'] / image_area
|
||||
category_id = int(annotation['category_id'])
|
||||
# Filter valid bounding boxes
|
||||
if category_id == foreground_class_of_interest_id and \
|
||||
normalized_object_area > small_object_area_threshold:
|
||||
objects.append({
|
||||
u'area': annotation['area'],
|
||||
u'bbox': annotation['bbox'],
|
||||
})
|
||||
label = 1 if objects else 0
|
||||
return {
|
||||
'objects': objects,
|
||||
'label': label,
|
||||
}
|
||||
|
||||
|
||||
def create_tf_record_for_visualwakewords_dataset(annotations_file, image_dir,
|
||||
output_path, num_shards):
|
||||
"""Loads Visual WakeWords annotations/images and converts to tf.Record format.
|
||||
|
||||
Args:
|
||||
annotations_file: JSON file containing bounding box annotations.
|
||||
image_dir: Directory containing the image files.
|
||||
output_path: Path to output tf.Record file.
|
||||
num_shards: number of output file shards.
|
||||
"""
|
||||
with contextlib2.ExitStack() as tf_record_close_stack, \
|
||||
tf.gfile.GFile(annotations_file, 'r') as fid:
|
||||
output_tfrecords = dataset_utils.open_sharded_output_tfrecords(
|
||||
tf_record_close_stack, output_path, num_shards)
|
||||
groundtruth_data = json.load(fid)
|
||||
images = groundtruth_data['images']
|
||||
annotations_index = groundtruth_data['annotations']
|
||||
annotations_index = {int(k): v for k, v in annotations_index.iteritems()}
|
||||
# convert 'unicode' key to 'int' key after we parse the json file
|
||||
|
||||
for idx, image in enumerate(images):
|
||||
if idx % 100 == 0:
|
||||
tf.logging.info('On image %d of %d', idx, len(images))
|
||||
annotations = annotations_index[image['id']]
|
||||
tf_example = _create_tf_example(image, annotations, image_dir)
|
||||
shard_idx = idx % num_shards
|
||||
output_tfrecords[shard_idx].write(tf_example.SerializeToString())
|
||||
|
||||
|
||||
def _create_tf_example(image, annotations, image_dir):
|
||||
"""Converts image and annotations to a tf.Example proto.
|
||||
|
||||
Args:
|
||||
image: dict with keys: [u'license', u'file_name', u'coco_url', u'height',
|
||||
u'width', u'date_captured', u'flickr_url', u'id']
|
||||
annotations: dict with objects (a list of image annotations) and a label.
|
||||
{u'objects':[{"area", "bbox" : [x,y,width,height}], u'label'}. Notice
|
||||
that bounding box coordinates in the COCO dataset are given as[x, y,
|
||||
width, height] tuples using absolute coordinates where x, y represent
|
||||
the top-left (0-indexed) corner. This function also converts to the format
|
||||
that can be used by the Tensorflow Object Detection API (which is [ymin,
|
||||
xmin, ymax, xmax] with coordinates normalized relative to image size).
|
||||
image_dir: directory containing the image files.
|
||||
Returns:
|
||||
tf_example: The converted tf.Example
|
||||
|
||||
Raises:
|
||||
ValueError: if the image pointed to by data['filename'] is not a valid JPEG
|
||||
"""
|
||||
image_height = image['height']
|
||||
image_width = image['width']
|
||||
filename = image['file_name']
|
||||
image_id = image['id']
|
||||
|
||||
full_path = os.path.join(image_dir, filename)
|
||||
with tf.gfile.GFile(full_path, 'rb') as fid:
|
||||
encoded_jpg = fid.read()
|
||||
encoded_jpg_io = io.BytesIO(encoded_jpg)
|
||||
image = PIL.Image.open(encoded_jpg_io)
|
||||
key = hashlib.sha256(encoded_jpg).hexdigest()
|
||||
|
||||
xmin, xmax, ymin, ymax, area = [], [], [], [], []
|
||||
for obj in annotations['objects']:
|
||||
(x, y, width, height) = tuple(obj['bbox'])
|
||||
xmin.append(float(x) / image_width)
|
||||
xmax.append(float(x + width) / image_width)
|
||||
ymin.append(float(y) / image_height)
|
||||
ymax.append(float(y + height) / image_height)
|
||||
area.append(obj['area'])
|
||||
|
||||
feature_dict = {
|
||||
'image/height':
|
||||
dataset_utils.int64_feature(image_height),
|
||||
'image/width':
|
||||
dataset_utils.int64_feature(image_width),
|
||||
'image/filename':
|
||||
dataset_utils.bytes_feature(filename.encode('utf8')),
|
||||
'image/source_id':
|
||||
dataset_utils.bytes_feature(str(image_id).encode('utf8')),
|
||||
'image/key/sha256':
|
||||
dataset_utils.bytes_feature(key.encode('utf8')),
|
||||
'image/encoded':
|
||||
dataset_utils.bytes_feature(encoded_jpg),
|
||||
'image/format':
|
||||
dataset_utils.bytes_feature('jpeg'.encode('utf8')),
|
||||
'image/class/label':
|
||||
dataset_utils.int64_feature(annotations['label']),
|
||||
'image/object/bbox/xmin':
|
||||
dataset_utils.float_list_feature(xmin),
|
||||
'image/object/bbox/xmax':
|
||||
dataset_utils.float_list_feature(xmax),
|
||||
'image/object/bbox/ymin':
|
||||
dataset_utils.float_list_feature(ymin),
|
||||
'image/object/bbox/ymax':
|
||||
dataset_utils.float_list_feature(ymax),
|
||||
'image/object/area':
|
||||
dataset_utils.float_list_feature(area),
|
||||
}
|
||||
example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
|
||||
return example
|
||||
+99
@@ -0,0 +1,99 @@
|
||||
#!/bin/bash
|
||||
# Copyright 2016 Google Inc. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
|
||||
# Script to download ImageNet Challenge 2012 training and validation data set.
|
||||
#
|
||||
# Downloads and decompresses raw images and bounding boxes.
|
||||
#
|
||||
# **IMPORTANT**
|
||||
# To download the raw images, the user must create an account with image-net.org
|
||||
# and generate a username and access_key. The latter two are required for
|
||||
# downloading the raw images.
|
||||
#
|
||||
# usage:
|
||||
# ./download_imagenet.sh [dirname]
|
||||
set -e
|
||||
|
||||
if [ "x$IMAGENET_ACCESS_KEY" == x -o "x$IMAGENET_USERNAME" == x ]; then
|
||||
cat <<END
|
||||
In order to download the imagenet data, you have to create an account with
|
||||
image-net.org. This will get you a username and an access key. You can set the
|
||||
IMAGENET_USERNAME and IMAGENET_ACCESS_KEY environment variables, or you can
|
||||
enter the credentials here.
|
||||
END
|
||||
read -p "Username: " IMAGENET_USERNAME
|
||||
read -p "Access key: " IMAGENET_ACCESS_KEY
|
||||
fi
|
||||
|
||||
OUTDIR="${1:-./imagenet-data}"
|
||||
SYNSETS_FILE="${2:-./synsets.txt}"
|
||||
|
||||
echo "Saving downloaded files to $OUTDIR"
|
||||
mkdir -p "${OUTDIR}"
|
||||
CURRENT_DIR=$(pwd)
|
||||
BBOX_DIR="${OUTDIR}bounding_boxes"
|
||||
mkdir -p "${BBOX_DIR}"
|
||||
cd "${OUTDIR}"
|
||||
|
||||
# Download and process all of the ImageNet bounding boxes.
|
||||
BASE_URL="http://www.image-net.org/challenges/LSVRC/2012/nnoupb"
|
||||
|
||||
# See here for details: http://www.image-net.org/download-bboxes
|
||||
BOUNDING_BOX_ANNOTATIONS="${BASE_URL}/ILSVRC2012_bbox_train_v2.tar.gz"
|
||||
BBOX_TAR_BALL="${BBOX_DIR}/annotations.tar.gz"
|
||||
echo "Downloading bounding box annotations."
|
||||
wget "${BOUNDING_BOX_ANNOTATIONS}" -O "${BBOX_TAR_BALL}"
|
||||
echo "Uncompressing bounding box annotations ..."
|
||||
tar xzf "${BBOX_TAR_BALL}" -C "${BBOX_DIR}"
|
||||
|
||||
LABELS_ANNOTATED="${BBOX_DIR}/*"
|
||||
NUM_XML=$(ls -1 ${LABELS_ANNOTATED} | wc -l)
|
||||
echo "Identified ${NUM_XML} bounding box annotations."
|
||||
|
||||
# Download and uncompress all images from the ImageNet 2012 validation dataset.
|
||||
VALIDATION_TARBALL="ILSVRC2012_img_val.tar"
|
||||
OUTPUT_PATH="${OUTDIR}validation/"
|
||||
mkdir -p "${OUTPUT_PATH}"
|
||||
cd "${OUTDIR}/.."
|
||||
echo "Downloading ${VALIDATION_TARBALL} to ${OUTPUT_PATH}."
|
||||
wget -nd -c "${BASE_URL}/${VALIDATION_TARBALL}"
|
||||
tar xf "${VALIDATION_TARBALL}" -C "${OUTPUT_PATH}"
|
||||
|
||||
# Download all images from the ImageNet 2012 train dataset.
|
||||
TRAIN_TARBALL="ILSVRC2012_img_train.tar"
|
||||
OUTPUT_PATH="${OUTDIR}train/"
|
||||
mkdir -p "${OUTPUT_PATH}"
|
||||
cd "${OUTDIR}/.."
|
||||
echo "Downloading ${TRAIN_TARBALL} to ${OUTPUT_PATH}."
|
||||
wget -nd -c "${BASE_URL}/${TRAIN_TARBALL}"
|
||||
|
||||
# Un-compress the individual tar-files within the train tar-file.
|
||||
echo "Uncompressing individual train tar-balls in the training data."
|
||||
|
||||
while read SYNSET; do
|
||||
echo "Processing: ${SYNSET}"
|
||||
|
||||
# Create a directory and delete anything there.
|
||||
mkdir -p "${OUTPUT_PATH}/${SYNSET}"
|
||||
rm -rf "${OUTPUT_PATH}/${SYNSET}/*"
|
||||
|
||||
# Uncompress into the directory.
|
||||
tar xf "${TRAIN_TARBALL}" "${SYNSET}.tar"
|
||||
tar xf "${SYNSET}.tar" -C "${OUTPUT_PATH}/${SYNSET}/"
|
||||
rm -f "${SYNSET}.tar"
|
||||
|
||||
echo "Finished processing: ${SYNSET}"
|
||||
done < "${SYNSETS_FILE}"
|
||||
+99
@@ -0,0 +1,99 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Provides data for the flowers dataset.
|
||||
|
||||
The dataset scripts used to create the dataset can be found at:
|
||||
tensorflow/models/research/slim/datasets/download_and_convert_flowers.py
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import os
|
||||
import tensorflow as tf
|
||||
from tensorflow.contrib import slim as contrib_slim
|
||||
|
||||
from datasets import dataset_utils
|
||||
|
||||
slim = contrib_slim
|
||||
|
||||
_FILE_PATTERN = 'flowers_%s_*.tfrecord'
|
||||
|
||||
SPLITS_TO_SIZES = {'train': 3320, 'validation': 350}
|
||||
|
||||
_NUM_CLASSES = 5
|
||||
|
||||
_ITEMS_TO_DESCRIPTIONS = {
|
||||
'image': 'A color image of varying size.',
|
||||
'label': 'A single integer between 0 and 4',
|
||||
}
|
||||
|
||||
|
||||
def get_split(split_name, dataset_dir, file_pattern=None, reader=None):
|
||||
"""Gets a dataset tuple with instructions for reading flowers.
|
||||
|
||||
Args:
|
||||
split_name: A train/validation split name.
|
||||
dataset_dir: The base directory of the dataset sources.
|
||||
file_pattern: The file pattern to use when matching the dataset sources.
|
||||
It is assumed that the pattern contains a '%s' string so that the split
|
||||
name can be inserted.
|
||||
reader: The TensorFlow reader type.
|
||||
|
||||
Returns:
|
||||
A `Dataset` namedtuple.
|
||||
|
||||
Raises:
|
||||
ValueError: if `split_name` is not a valid train/validation split.
|
||||
"""
|
||||
if split_name not in SPLITS_TO_SIZES:
|
||||
raise ValueError('split name %s was not recognized.' % split_name)
|
||||
|
||||
if not file_pattern:
|
||||
file_pattern = _FILE_PATTERN
|
||||
file_pattern = os.path.join(dataset_dir, file_pattern % split_name)
|
||||
|
||||
# Allowing None in the signature so that dataset_factory can use the default.
|
||||
if reader is None:
|
||||
reader = tf.TFRecordReader
|
||||
|
||||
keys_to_features = {
|
||||
'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''),
|
||||
'image/format': tf.FixedLenFeature((), tf.string, default_value='png'),
|
||||
'image/class/label': tf.FixedLenFeature(
|
||||
[], tf.int64, default_value=tf.zeros([], dtype=tf.int64)),
|
||||
}
|
||||
|
||||
items_to_handlers = {
|
||||
'image': slim.tfexample_decoder.Image(),
|
||||
'label': slim.tfexample_decoder.Tensor('image/class/label'),
|
||||
}
|
||||
|
||||
decoder = slim.tfexample_decoder.TFExampleDecoder(
|
||||
keys_to_features, items_to_handlers)
|
||||
|
||||
labels_to_names = None
|
||||
if dataset_utils.has_labels(dataset_dir):
|
||||
labels_to_names = dataset_utils.read_label_file(dataset_dir)
|
||||
|
||||
return slim.dataset.Dataset(
|
||||
data_sources=file_pattern,
|
||||
reader=reader,
|
||||
decoder=decoder,
|
||||
num_samples=SPLITS_TO_SIZES[split_name],
|
||||
items_to_descriptions=_ITEMS_TO_DESCRIPTIONS,
|
||||
num_classes=_NUM_CLASSES,
|
||||
labels_to_names=labels_to_names)
|
||||
+199
@@ -0,0 +1,199 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Provides data for the ImageNet ILSVRC 2012 Dataset plus some bounding boxes.
|
||||
|
||||
Some images have one or more bounding boxes associated with the label of the
|
||||
image. See details here: http://image-net.org/download-bboxes
|
||||
|
||||
ImageNet is based upon WordNet 3.0. To uniquely identify a synset, we use
|
||||
"WordNet ID" (wnid), which is a concatenation of POS ( i.e. part of speech )
|
||||
and SYNSET OFFSET of WordNet. For more information, please refer to the
|
||||
WordNet documentation[http://wordnet.princeton.edu/wordnet/documentation/].
|
||||
|
||||
"There are bounding boxes for over 3000 popular synsets available.
|
||||
For each synset, there are on average 150 images with bounding boxes."
|
||||
|
||||
WARNING: Don't use for object detection, in this case all the bounding boxes
|
||||
of the image belong to just one class.
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import os
|
||||
from six.moves import urllib
|
||||
import tensorflow as tf
|
||||
from tensorflow.contrib import slim as contrib_slim
|
||||
|
||||
from datasets import dataset_utils
|
||||
|
||||
slim = contrib_slim
|
||||
|
||||
# TODO(nsilberman): Add tfrecord file type once the script is updated.
|
||||
_FILE_PATTERN = '%s-*'
|
||||
|
||||
_SPLITS_TO_SIZES = {
|
||||
'train': 1281167,
|
||||
'validation': 50000,
|
||||
}
|
||||
|
||||
_ITEMS_TO_DESCRIPTIONS = {
|
||||
'image': 'A color image of varying height and width.',
|
||||
'label': 'The label id of the image, integer between 0 and 999',
|
||||
'label_text': 'The text of the label.',
|
||||
'object/bbox': 'A list of bounding boxes.',
|
||||
'object/label': 'A list of labels, one per each object.',
|
||||
}
|
||||
|
||||
_NUM_CLASSES = 1001
|
||||
|
||||
# If set to false, will not try to set label_to_names in dataset
|
||||
# by reading them from labels.txt or github.
|
||||
LOAD_READABLE_NAMES = True
|
||||
|
||||
|
||||
def create_readable_names_for_imagenet_labels():
|
||||
"""Create a dict mapping label id to human readable string.
|
||||
|
||||
Returns:
|
||||
labels_to_names: dictionary where keys are integers from to 1000
|
||||
and values are human-readable names.
|
||||
|
||||
We retrieve a synset file, which contains a list of valid synset labels used
|
||||
by ILSVRC competition. There is one synset one per line, eg.
|
||||
# n01440764
|
||||
# n01443537
|
||||
We also retrieve a synset_to_human_file, which contains a mapping from synsets
|
||||
to human-readable names for every synset in Imagenet. These are stored in a
|
||||
tsv format, as follows:
|
||||
# n02119247 black fox
|
||||
# n02119359 silver fox
|
||||
We assign each synset (in alphabetical order) an integer, starting from 1
|
||||
(since 0 is reserved for the background class).
|
||||
|
||||
Code is based on
|
||||
https://github.com/tensorflow/models/blob/master/research/inception/inception/data/build_imagenet_data.py#L463
|
||||
"""
|
||||
|
||||
# pylint: disable=g-line-too-long
|
||||
base_url = 'https://raw.githubusercontent.com/tensorflow/models/master/research/inception/inception/data/'
|
||||
synset_url = '{}/imagenet_lsvrc_2015_synsets.txt'.format(base_url)
|
||||
synset_to_human_url = '{}/imagenet_metadata.txt'.format(base_url)
|
||||
|
||||
filename, _ = urllib.request.urlretrieve(synset_url)
|
||||
synset_list = [s.strip() for s in open(filename).readlines()]
|
||||
num_synsets_in_ilsvrc = len(synset_list)
|
||||
assert num_synsets_in_ilsvrc == 1000
|
||||
|
||||
filename, _ = urllib.request.urlretrieve(synset_to_human_url)
|
||||
synset_to_human_list = open(filename).readlines()
|
||||
num_synsets_in_all_imagenet = len(synset_to_human_list)
|
||||
assert num_synsets_in_all_imagenet == 21842
|
||||
|
||||
synset_to_human = {}
|
||||
for s in synset_to_human_list:
|
||||
parts = s.strip().split('\t')
|
||||
assert len(parts) == 2
|
||||
synset = parts[0]
|
||||
human = parts[1]
|
||||
synset_to_human[synset] = human
|
||||
|
||||
label_index = 1
|
||||
labels_to_names = {0: 'background'}
|
||||
for synset in synset_list:
|
||||
name = synset_to_human[synset]
|
||||
labels_to_names[label_index] = name
|
||||
label_index += 1
|
||||
|
||||
return labels_to_names
|
||||
|
||||
|
||||
def get_split(split_name, dataset_dir, file_pattern=None, reader=None):
|
||||
"""Gets a dataset tuple with instructions for reading ImageNet.
|
||||
|
||||
Args:
|
||||
split_name: A train/test split name.
|
||||
dataset_dir: The base directory of the dataset sources.
|
||||
file_pattern: The file pattern to use when matching the dataset sources.
|
||||
It is assumed that the pattern contains a '%s' string so that the split
|
||||
name can be inserted.
|
||||
reader: The TensorFlow reader type.
|
||||
|
||||
Returns:
|
||||
A `Dataset` namedtuple.
|
||||
|
||||
Raises:
|
||||
ValueError: if `split_name` is not a valid train/test split.
|
||||
"""
|
||||
if split_name not in _SPLITS_TO_SIZES:
|
||||
raise ValueError('split name %s was not recognized.' % split_name)
|
||||
|
||||
if not file_pattern:
|
||||
file_pattern = _FILE_PATTERN
|
||||
file_pattern = os.path.join(dataset_dir, file_pattern % split_name)
|
||||
|
||||
# Allowing None in the signature so that dataset_factory can use the default.
|
||||
if reader is None:
|
||||
reader = tf.TFRecordReader
|
||||
|
||||
keys_to_features = {
|
||||
'image/encoded': tf.FixedLenFeature(
|
||||
(), tf.string, default_value=''),
|
||||
'image/format': tf.FixedLenFeature(
|
||||
(), tf.string, default_value='jpeg'),
|
||||
'image/class/label': tf.FixedLenFeature(
|
||||
[], dtype=tf.int64, default_value=-1),
|
||||
'image/class/text': tf.FixedLenFeature(
|
||||
[], dtype=tf.string, default_value=''),
|
||||
'image/object/bbox/xmin': tf.VarLenFeature(
|
||||
dtype=tf.float32),
|
||||
'image/object/bbox/ymin': tf.VarLenFeature(
|
||||
dtype=tf.float32),
|
||||
'image/object/bbox/xmax': tf.VarLenFeature(
|
||||
dtype=tf.float32),
|
||||
'image/object/bbox/ymax': tf.VarLenFeature(
|
||||
dtype=tf.float32),
|
||||
'image/object/class/label': tf.VarLenFeature(
|
||||
dtype=tf.int64),
|
||||
}
|
||||
|
||||
items_to_handlers = {
|
||||
'image': slim.tfexample_decoder.Image('image/encoded', 'image/format'),
|
||||
'label': slim.tfexample_decoder.Tensor('image/class/label'),
|
||||
'label_text': slim.tfexample_decoder.Tensor('image/class/text'),
|
||||
'object/bbox': slim.tfexample_decoder.BoundingBox(
|
||||
['ymin', 'xmin', 'ymax', 'xmax'], 'image/object/bbox/'),
|
||||
'object/label': slim.tfexample_decoder.Tensor('image/object/class/label'),
|
||||
}
|
||||
|
||||
decoder = slim.tfexample_decoder.TFExampleDecoder(
|
||||
keys_to_features, items_to_handlers)
|
||||
|
||||
labels_to_names = None
|
||||
if LOAD_READABLE_NAMES:
|
||||
if dataset_utils.has_labels(dataset_dir):
|
||||
labels_to_names = dataset_utils.read_label_file(dataset_dir)
|
||||
else:
|
||||
labels_to_names = create_readable_names_for_imagenet_labels()
|
||||
dataset_utils.write_label_file(labels_to_names, dataset_dir)
|
||||
|
||||
return slim.dataset.Dataset(
|
||||
data_sources=file_pattern,
|
||||
reader=reader,
|
||||
decoder=decoder,
|
||||
num_samples=_SPLITS_TO_SIZES[split_name],
|
||||
items_to_descriptions=_ITEMS_TO_DESCRIPTIONS,
|
||||
num_classes=_NUM_CLASSES,
|
||||
labels_to_names=labels_to_names)
|
||||
+50000
File diff suppressed because it is too large
Load Diff
+1000
File diff suppressed because it is too large
Load Diff
+21842
File diff suppressed because it is too large
Load Diff
+99
@@ -0,0 +1,99 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Provides data for the MNIST dataset.
|
||||
|
||||
The dataset scripts used to create the dataset can be found at:
|
||||
tensorflow/models/research/slim/datasets/download_and_convert_mnist.py
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import os
|
||||
import tensorflow as tf
|
||||
from tensorflow.contrib import slim as contrib_slim
|
||||
|
||||
from datasets import dataset_utils
|
||||
|
||||
slim = contrib_slim
|
||||
|
||||
_FILE_PATTERN = 'mnist_%s.tfrecord'
|
||||
|
||||
_SPLITS_TO_SIZES = {'train': 60000, 'test': 10000}
|
||||
|
||||
_NUM_CLASSES = 10
|
||||
|
||||
_ITEMS_TO_DESCRIPTIONS = {
|
||||
'image': 'A [28 x 28 x 1] grayscale image.',
|
||||
'label': 'A single integer between 0 and 9',
|
||||
}
|
||||
|
||||
|
||||
def get_split(split_name, dataset_dir, file_pattern=None, reader=None):
|
||||
"""Gets a dataset tuple with instructions for reading MNIST.
|
||||
|
||||
Args:
|
||||
split_name: A train/test split name.
|
||||
dataset_dir: The base directory of the dataset sources.
|
||||
file_pattern: The file pattern to use when matching the dataset sources.
|
||||
It is assumed that the pattern contains a '%s' string so that the split
|
||||
name can be inserted.
|
||||
reader: The TensorFlow reader type.
|
||||
|
||||
Returns:
|
||||
A `Dataset` namedtuple.
|
||||
|
||||
Raises:
|
||||
ValueError: if `split_name` is not a valid train/test split.
|
||||
"""
|
||||
if split_name not in _SPLITS_TO_SIZES:
|
||||
raise ValueError('split name %s was not recognized.' % split_name)
|
||||
|
||||
if not file_pattern:
|
||||
file_pattern = _FILE_PATTERN
|
||||
file_pattern = os.path.join(dataset_dir, file_pattern % split_name)
|
||||
|
||||
# Allowing None in the signature so that dataset_factory can use the default.
|
||||
if reader is None:
|
||||
reader = tf.TFRecordReader
|
||||
|
||||
keys_to_features = {
|
||||
'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''),
|
||||
'image/format': tf.FixedLenFeature((), tf.string, default_value='raw'),
|
||||
'image/class/label': tf.FixedLenFeature(
|
||||
[1], tf.int64, default_value=tf.zeros([1], dtype=tf.int64)),
|
||||
}
|
||||
|
||||
items_to_handlers = {
|
||||
'image': slim.tfexample_decoder.Image(shape=[28, 28, 1], channels=1),
|
||||
'label': slim.tfexample_decoder.Tensor('image/class/label', shape=[]),
|
||||
}
|
||||
|
||||
decoder = slim.tfexample_decoder.TFExampleDecoder(
|
||||
keys_to_features, items_to_handlers)
|
||||
|
||||
labels_to_names = None
|
||||
if dataset_utils.has_labels(dataset_dir):
|
||||
labels_to_names = dataset_utils.read_label_file(dataset_dir)
|
||||
|
||||
return slim.dataset.Dataset(
|
||||
data_sources=file_pattern,
|
||||
reader=reader,
|
||||
decoder=decoder,
|
||||
num_samples=_SPLITS_TO_SIZES[split_name],
|
||||
num_classes=_NUM_CLASSES,
|
||||
items_to_descriptions=_ITEMS_TO_DESCRIPTIONS,
|
||||
labels_to_names=labels_to_names)
|
||||
+83
@@ -0,0 +1,83 @@
|
||||
#!/usr/bin/python
|
||||
# Copyright 2016 Google Inc. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
r"""Process the ImageNet Challenge bounding boxes for TensorFlow model training.
|
||||
|
||||
Associate the ImageNet 2012 Challenge validation data set with labels.
|
||||
|
||||
The raw ImageNet validation data set is expected to reside in JPEG files
|
||||
located in the following directory structure.
|
||||
|
||||
data_dir/ILSVRC2012_val_00000001.JPEG
|
||||
data_dir/ILSVRC2012_val_00000002.JPEG
|
||||
...
|
||||
data_dir/ILSVRC2012_val_00050000.JPEG
|
||||
|
||||
This script moves the files into a directory structure like such:
|
||||
data_dir/n01440764/ILSVRC2012_val_00000293.JPEG
|
||||
data_dir/n01440764/ILSVRC2012_val_00000543.JPEG
|
||||
...
|
||||
where 'n01440764' is the unique synset label associated with
|
||||
these images.
|
||||
|
||||
This directory reorganization requires a mapping from validation image
|
||||
number (i.e. suffix of the original file) to the associated label. This
|
||||
is provided in the ImageNet development kit via a Matlab file.
|
||||
|
||||
In order to make life easier and divorce ourselves from Matlab, we instead
|
||||
supply a custom text file that provides this mapping for us.
|
||||
|
||||
Sample usage:
|
||||
./preprocess_imagenet_validation_data.py ILSVRC2012_img_val \
|
||||
imagenet_2012_validation_synset_labels.txt
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
from six.moves import xrange # pylint: disable=redefined-builtin
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if len(sys.argv) < 3:
|
||||
print('Invalid usage\n'
|
||||
'usage: preprocess_imagenet_validation_data.py '
|
||||
'<validation data dir> <validation labels file>')
|
||||
sys.exit(-1)
|
||||
data_dir = sys.argv[1]
|
||||
validation_labels_file = sys.argv[2]
|
||||
|
||||
# Read in the 50000 synsets associated with the validation data set.
|
||||
labels = [l.strip() for l in open(validation_labels_file).readlines()]
|
||||
unique_labels = set(labels)
|
||||
|
||||
# Make all sub-directories in the validation data dir.
|
||||
for label in unique_labels:
|
||||
labeled_data_dir = os.path.join(data_dir, label)
|
||||
os.makedirs(labeled_data_dir)
|
||||
|
||||
# Move all of the image to the appropriate sub-directory.
|
||||
for i in xrange(len(labels)):
|
||||
basename = 'ILSVRC2012_val_000%.5d.JPEG' % (i + 1)
|
||||
original_filename = os.path.join(data_dir, basename)
|
||||
if not os.path.exists(original_filename):
|
||||
print('Failed to find: ', original_filename)
|
||||
sys.exit(-1)
|
||||
new_filename = os.path.join(data_dir, labels[i], basename)
|
||||
os.rename(original_filename, new_filename)
|
||||
+253
@@ -0,0 +1,253 @@
|
||||
#!/usr/bin/python
|
||||
# Copyright 2016 Google Inc. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Process the ImageNet Challenge bounding boxes for TensorFlow model training.
|
||||
|
||||
This script is called as
|
||||
|
||||
process_bounding_boxes.py <dir> [synsets-file]
|
||||
|
||||
Where <dir> is a directory containing the downloaded and unpacked bounding box
|
||||
data. If [synsets-file] is supplied, then only the bounding boxes whose
|
||||
synstes are contained within this file are returned. Note that the
|
||||
[synsets-file] file contains synset ids, one per line.
|
||||
|
||||
The script dumps out a CSV text file in which each line contains an entry.
|
||||
n00007846_64193.JPEG,0.0060,0.2620,0.7545,0.9940
|
||||
|
||||
The entry can be read as:
|
||||
<JPEG file name>, <xmin>, <ymin>, <xmax>, <ymax>
|
||||
|
||||
The bounding box for <JPEG file name> contains two points (xmin, ymin) and
|
||||
(xmax, ymax) specifying the lower-left corner and upper-right corner of a
|
||||
bounding box in *relative* coordinates.
|
||||
|
||||
The user supplies a directory where the XML files reside. The directory
|
||||
structure in the directory <dir> is assumed to look like this:
|
||||
|
||||
<dir>/nXXXXXXXX/nXXXXXXXX_YYYY.xml
|
||||
|
||||
Each XML file contains a bounding box annotation. The script:
|
||||
|
||||
(1) Parses the XML file and extracts the filename, label and bounding box info.
|
||||
|
||||
(2) The bounding box is specified in the XML files as integer (xmin, ymin) and
|
||||
(xmax, ymax) *relative* to image size displayed to the human annotator. The
|
||||
size of the image displayed to the human annotator is stored in the XML file
|
||||
as integer (height, width).
|
||||
|
||||
Note that the displayed size will differ from the actual size of the image
|
||||
downloaded from image-net.org. To make the bounding box annotation useable,
|
||||
we convert bounding box to floating point numbers relative to displayed
|
||||
height and width of the image.
|
||||
|
||||
Note that each XML file might contain N bounding box annotations.
|
||||
|
||||
Note that the points are all clamped at a range of [0.0, 1.0] because some
|
||||
human annotations extend outside the range of the supplied image.
|
||||
|
||||
See details here: http://image-net.org/download-bboxes
|
||||
|
||||
(3) By default, the script outputs all valid bounding boxes. If a
|
||||
[synsets-file] is supplied, only the subset of bounding boxes associated
|
||||
with those synsets are outputted. Importantly, one can supply a list of
|
||||
synsets in the ImageNet Challenge and output the list of bounding boxes
|
||||
associated with the training images of the ILSVRC.
|
||||
|
||||
We use these bounding boxes to inform the random distortion of images
|
||||
supplied to the network.
|
||||
|
||||
If you run this script successfully, you will see the following output
|
||||
to stderr:
|
||||
> Finished processing 544546 XML files.
|
||||
> Skipped 0 XML files not in ImageNet Challenge.
|
||||
> Skipped 0 bounding boxes not in ImageNet Challenge.
|
||||
> Wrote 615299 bounding boxes from 544546 annotated images.
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import glob
|
||||
import os.path
|
||||
import sys
|
||||
import xml.etree.ElementTree as ET
|
||||
from six.moves import xrange # pylint: disable=redefined-builtin
|
||||
|
||||
|
||||
class BoundingBox(object):
|
||||
pass
|
||||
|
||||
|
||||
def GetItem(name, root, index=0):
|
||||
count = 0
|
||||
for item in root.iter(name):
|
||||
if count == index:
|
||||
return item.text
|
||||
count += 1
|
||||
# Failed to find "index" occurrence of item.
|
||||
return -1
|
||||
|
||||
|
||||
def GetInt(name, root, index=0):
|
||||
return int(GetItem(name, root, index))
|
||||
|
||||
|
||||
def FindNumberBoundingBoxes(root):
|
||||
index = 0
|
||||
while True:
|
||||
if GetInt('xmin', root, index) == -1:
|
||||
break
|
||||
index += 1
|
||||
return index
|
||||
|
||||
|
||||
def ProcessXMLAnnotation(xml_file):
|
||||
"""Process a single XML file containing a bounding box."""
|
||||
# pylint: disable=broad-except
|
||||
try:
|
||||
tree = ET.parse(xml_file)
|
||||
except Exception:
|
||||
print('Failed to parse: ' + xml_file, file=sys.stderr)
|
||||
return None
|
||||
# pylint: enable=broad-except
|
||||
root = tree.getroot()
|
||||
|
||||
num_boxes = FindNumberBoundingBoxes(root)
|
||||
boxes = []
|
||||
|
||||
for index in xrange(num_boxes):
|
||||
box = BoundingBox()
|
||||
# Grab the 'index' annotation.
|
||||
box.xmin = GetInt('xmin', root, index)
|
||||
box.ymin = GetInt('ymin', root, index)
|
||||
box.xmax = GetInt('xmax', root, index)
|
||||
box.ymax = GetInt('ymax', root, index)
|
||||
|
||||
box.width = GetInt('width', root)
|
||||
box.height = GetInt('height', root)
|
||||
box.filename = GetItem('filename', root) + '.JPEG'
|
||||
box.label = GetItem('name', root)
|
||||
|
||||
xmin = float(box.xmin) / float(box.width)
|
||||
xmax = float(box.xmax) / float(box.width)
|
||||
ymin = float(box.ymin) / float(box.height)
|
||||
ymax = float(box.ymax) / float(box.height)
|
||||
|
||||
# Some images contain bounding box annotations that
|
||||
# extend outside of the supplied image. See, e.g.
|
||||
# n03127925/n03127925_147.xml
|
||||
# Additionally, for some bounding boxes, the min > max
|
||||
# or the box is entirely outside of the image.
|
||||
min_x = min(xmin, xmax)
|
||||
max_x = max(xmin, xmax)
|
||||
box.xmin_scaled = min(max(min_x, 0.0), 1.0)
|
||||
box.xmax_scaled = min(max(max_x, 0.0), 1.0)
|
||||
|
||||
min_y = min(ymin, ymax)
|
||||
max_y = max(ymin, ymax)
|
||||
box.ymin_scaled = min(max(min_y, 0.0), 1.0)
|
||||
box.ymax_scaled = min(max(max_y, 0.0), 1.0)
|
||||
|
||||
boxes.append(box)
|
||||
|
||||
return boxes
|
||||
|
||||
if __name__ == '__main__':
|
||||
if len(sys.argv) < 2 or len(sys.argv) > 3:
|
||||
print('Invalid usage\n'
|
||||
'usage: process_bounding_boxes.py <dir> [synsets-file]',
|
||||
file=sys.stderr)
|
||||
sys.exit(-1)
|
||||
|
||||
xml_files = glob.glob(sys.argv[1] + '/*/*.xml')
|
||||
print('Identified %d XML files in %s' % (len(xml_files), sys.argv[1]),
|
||||
file=sys.stderr)
|
||||
|
||||
if len(sys.argv) == 3:
|
||||
labels = set([l.strip() for l in open(sys.argv[2]).readlines()])
|
||||
print('Identified %d synset IDs in %s' % (len(labels), sys.argv[2]),
|
||||
file=sys.stderr)
|
||||
else:
|
||||
labels = None
|
||||
|
||||
skipped_boxes = 0
|
||||
skipped_files = 0
|
||||
saved_boxes = 0
|
||||
saved_files = 0
|
||||
for file_index, one_file in enumerate(xml_files):
|
||||
# Example: <...>/n06470073/n00141669_6790.xml
|
||||
label = os.path.basename(os.path.dirname(one_file))
|
||||
|
||||
# Determine if the annotation is from an ImageNet Challenge label.
|
||||
if labels is not None and label not in labels:
|
||||
skipped_files += 1
|
||||
continue
|
||||
|
||||
bboxes = ProcessXMLAnnotation(one_file)
|
||||
assert bboxes is not None, 'No bounding boxes found in ' + one_file
|
||||
|
||||
found_box = False
|
||||
for bbox in bboxes:
|
||||
if labels is not None:
|
||||
if bbox.label != label:
|
||||
# Note: There is a slight bug in the bounding box annotation data.
|
||||
# Many of the dog labels have the human label 'Scottish_deerhound'
|
||||
# instead of the synset ID 'n02092002' in the bbox.label field. As a
|
||||
# simple hack to overcome this issue, we only exclude bbox labels
|
||||
# *which are synset ID's* that do not match original synset label for
|
||||
# the XML file.
|
||||
if bbox.label in labels:
|
||||
skipped_boxes += 1
|
||||
continue
|
||||
|
||||
# Guard against improperly specified boxes.
|
||||
if (bbox.xmin_scaled >= bbox.xmax_scaled or
|
||||
bbox.ymin_scaled >= bbox.ymax_scaled):
|
||||
skipped_boxes += 1
|
||||
continue
|
||||
|
||||
# Note bbox.filename occasionally contains '%s' in the name. This is
|
||||
# data set noise that is fixed by just using the basename of the XML file.
|
||||
image_filename = os.path.splitext(os.path.basename(one_file))[0]
|
||||
print('%s.JPEG,%.4f,%.4f,%.4f,%.4f' %
|
||||
(image_filename,
|
||||
bbox.xmin_scaled, bbox.ymin_scaled,
|
||||
bbox.xmax_scaled, bbox.ymax_scaled))
|
||||
|
||||
saved_boxes += 1
|
||||
found_box = True
|
||||
if found_box:
|
||||
saved_files += 1
|
||||
else:
|
||||
skipped_files += 1
|
||||
|
||||
if not file_index % 5000:
|
||||
print('--> processed %d of %d XML files.' %
|
||||
(file_index + 1, len(xml_files)),
|
||||
file=sys.stderr)
|
||||
print('--> skipped %d boxes and %d XML files.' %
|
||||
(skipped_boxes, skipped_files), file=sys.stderr)
|
||||
|
||||
print('Finished processing %d XML files.' % len(xml_files), file=sys.stderr)
|
||||
print('Skipped %d XML files not in ImageNet Challenge.' % skipped_files,
|
||||
file=sys.stderr)
|
||||
print('Skipped %d bounding boxes not in ImageNet Challenge.' % skipped_boxes,
|
||||
file=sys.stderr)
|
||||
print('Wrote %d bounding boxes from %d annotated images.' %
|
||||
(saved_boxes, saved_files),
|
||||
file=sys.stderr)
|
||||
print('Finished.', file=sys.stderr)
|
||||
+129
@@ -0,0 +1,129 @@
|
||||
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Provides data for Visual WakeWords Dataset with images+labels.
|
||||
|
||||
Visual WakeWords Dataset derives from the COCO dataset to design tiny models
|
||||
classifying two classes, such as person/not-person. The COCO annotations
|
||||
are filtered to two classes: person and not-person (or another user-defined
|
||||
category). Bounding boxes for small objects with area less than 5% of the image
|
||||
area are filtered out.
|
||||
See build_visualwakewords_data.py which generates the Visual WakeWords dataset
|
||||
annotations from the raw COCO dataset and converts them to TFRecord.
|
||||
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import os
|
||||
import tensorflow as tf
|
||||
from tensorflow.contrib import slim as contrib_slim
|
||||
|
||||
from datasets import dataset_utils
|
||||
|
||||
|
||||
slim = contrib_slim
|
||||
|
||||
_FILE_PATTERN = '%s.record-*'
|
||||
|
||||
_SPLITS_TO_SIZES = {
|
||||
'train': 82783,
|
||||
'val': 40504,
|
||||
}
|
||||
|
||||
|
||||
_ITEMS_TO_DESCRIPTIONS = {
|
||||
'image': 'A color image of varying height and width.',
|
||||
'label': 'The label id of the image, an integer in {0, 1}',
|
||||
'object/bbox': 'A list of bounding boxes.',
|
||||
}
|
||||
|
||||
_NUM_CLASSES = 2
|
||||
|
||||
# labels file
|
||||
LABELS_FILENAME = 'labels.txt'
|
||||
|
||||
|
||||
def get_split(split_name, dataset_dir, file_pattern=None, reader=None):
|
||||
"""Gets a dataset tuple with instructions for reading ImageNet.
|
||||
|
||||
Args:
|
||||
split_name: A train/test split name.
|
||||
dataset_dir: The base directory of the dataset sources.
|
||||
file_pattern: The file pattern to use when matching the dataset sources. It
|
||||
is assumed that the pattern contains a '%s' string so that the split name
|
||||
can be inserted.
|
||||
reader: The TensorFlow reader type.
|
||||
|
||||
Returns:
|
||||
A `Dataset` namedtuple.
|
||||
|
||||
Raises:
|
||||
ValueError: if `split_name` is not a valid train/test split.
|
||||
"""
|
||||
if split_name not in _SPLITS_TO_SIZES:
|
||||
raise ValueError('split name %s was not recognized.' % split_name)
|
||||
|
||||
if not file_pattern:
|
||||
file_pattern = _FILE_PATTERN
|
||||
file_pattern = os.path.join(dataset_dir, file_pattern % split_name)
|
||||
|
||||
# Allowing None in the signature so that dataset_factory can use the default.
|
||||
if reader is None:
|
||||
reader = tf.TFRecordReader
|
||||
|
||||
keys_to_features = {
|
||||
'image/encoded':
|
||||
tf.FixedLenFeature((), tf.string, default_value=''),
|
||||
'image/format':
|
||||
tf.FixedLenFeature((), tf.string, default_value='jpeg'),
|
||||
'image/class/label':
|
||||
tf.FixedLenFeature([], dtype=tf.int64, default_value=-1),
|
||||
'image/object/bbox/xmin':
|
||||
tf.VarLenFeature(dtype=tf.float32),
|
||||
'image/object/bbox/ymin':
|
||||
tf.VarLenFeature(dtype=tf.float32),
|
||||
'image/object/bbox/xmax':
|
||||
tf.VarLenFeature(dtype=tf.float32),
|
||||
'image/object/bbox/ymax':
|
||||
tf.VarLenFeature(dtype=tf.float32),
|
||||
}
|
||||
|
||||
items_to_handlers = {
|
||||
'image':
|
||||
slim.tfexample_decoder.Image('image/encoded', 'image/format'),
|
||||
'label':
|
||||
slim.tfexample_decoder.Tensor('image/class/label'),
|
||||
'object/bbox':
|
||||
slim.tfexample_decoder.BoundingBox(['ymin', 'xmin', 'ymax', 'xmax'],
|
||||
'image/object/bbox/'),
|
||||
}
|
||||
|
||||
decoder = slim.tfexample_decoder.TFExampleDecoder(keys_to_features,
|
||||
items_to_handlers)
|
||||
|
||||
labels_to_names = None
|
||||
labels_file = os.path.join(dataset_dir, LABELS_FILENAME)
|
||||
if tf.gfile.Exists(labels_file):
|
||||
labels_to_names = dataset_utils.read_label_file(dataset_dir)
|
||||
|
||||
return slim.dataset.Dataset(
|
||||
data_sources=file_pattern,
|
||||
reader=reader,
|
||||
decoder=decoder,
|
||||
num_samples=_SPLITS_TO_SIZES[split_name],
|
||||
items_to_descriptions=_ITEMS_TO_DESCRIPTIONS,
|
||||
num_classes=_NUM_CLASSES,
|
||||
labels_to_names=labels_to_names)
|
||||
+1
@@ -0,0 +1 @@
|
||||
|
||||
+677
@@ -0,0 +1,677 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Deploy Slim models across multiple clones and replicas.
|
||||
|
||||
# TODO(sguada) docstring paragraph by (a) motivating the need for the file and
|
||||
# (b) defining clones.
|
||||
|
||||
# TODO(sguada) describe the high-level components of model deployment.
|
||||
# E.g. "each model deployment is composed of several parts: a DeploymentConfig,
|
||||
# which captures A, B and C, an input_fn which loads data.. etc
|
||||
|
||||
To easily train a model on multiple GPUs or across multiple machines this
|
||||
module provides a set of helper functions: `create_clones`,
|
||||
`optimize_clones` and `deploy`.
|
||||
|
||||
Usage:
|
||||
|
||||
g = tf.Graph()
|
||||
|
||||
# Set up DeploymentConfig
|
||||
config = model_deploy.DeploymentConfig(num_clones=2, clone_on_cpu=True)
|
||||
|
||||
# Create the global step on the device storing the variables.
|
||||
with tf.device(config.variables_device()):
|
||||
global_step = slim.create_global_step()
|
||||
|
||||
# Define the inputs
|
||||
with tf.device(config.inputs_device()):
|
||||
images, labels = LoadData(...)
|
||||
inputs_queue = slim.data.prefetch_queue((images, labels))
|
||||
|
||||
# Define the optimizer.
|
||||
with tf.device(config.optimizer_device()):
|
||||
optimizer = tf.train.MomentumOptimizer(FLAGS.learning_rate, FLAGS.momentum)
|
||||
|
||||
# Define the model including the loss.
|
||||
def model_fn(inputs_queue):
|
||||
images, labels = inputs_queue.dequeue()
|
||||
predictions = CreateNetwork(images)
|
||||
slim.losses.log_loss(predictions, labels)
|
||||
|
||||
model_dp = model_deploy.deploy(config, model_fn, [inputs_queue],
|
||||
optimizer=optimizer)
|
||||
|
||||
# Run training.
|
||||
slim.learning.train(model_dp.train_op, my_log_dir,
|
||||
summary_op=model_dp.summary_op)
|
||||
|
||||
The Clone namedtuple holds together the values associated with each call to
|
||||
model_fn:
|
||||
* outputs: The return values of the calls to `model_fn()`.
|
||||
* scope: The scope used to create the clone.
|
||||
* device: The device used to create the clone.
|
||||
|
||||
DeployedModel namedtuple, holds together the values needed to train multiple
|
||||
clones:
|
||||
* train_op: An operation that run the optimizer training op and include
|
||||
all the update ops created by `model_fn`. Present only if an optimizer
|
||||
was specified.
|
||||
* summary_op: An operation that run the summaries created by `model_fn`
|
||||
and process_gradients.
|
||||
* total_loss: A `Tensor` that contains the sum of all losses created by
|
||||
`model_fn` plus the regularization losses.
|
||||
* clones: List of `Clone` tuples returned by `create_clones()`.
|
||||
|
||||
DeploymentConfig parameters:
|
||||
* num_clones: Number of model clones to deploy in each replica.
|
||||
* clone_on_cpu: True if clones should be placed on CPU.
|
||||
* replica_id: Integer. Index of the replica for which the model is
|
||||
deployed. Usually 0 for the chief replica.
|
||||
* num_replicas: Number of replicas to use.
|
||||
* num_ps_tasks: Number of tasks for the `ps` job. 0 to not use replicas.
|
||||
* worker_job_name: A name for the worker job.
|
||||
* ps_job_name: A name for the parameter server job.
|
||||
|
||||
TODO(sguada):
|
||||
- describe side effect to the graph.
|
||||
- what happens to summaries and update_ops.
|
||||
- which graph collections are altered.
|
||||
- write a tutorial on how to use this.
|
||||
- analyze the possibility of calling deploy more than once.
|
||||
|
||||
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import collections
|
||||
|
||||
import tensorflow as tf
|
||||
from tensorflow.contrib import slim as contrib_slim
|
||||
|
||||
slim = contrib_slim
|
||||
|
||||
__all__ = ['create_clones',
|
||||
'deploy',
|
||||
'optimize_clones',
|
||||
'DeployedModel',
|
||||
'DeploymentConfig',
|
||||
'Clone',
|
||||
]
|
||||
|
||||
# Namedtuple used to represent a clone during deployment.
|
||||
Clone = collections.namedtuple('Clone',
|
||||
['outputs', # Whatever model_fn() returned.
|
||||
'scope', # The scope used to create it.
|
||||
'device', # The device used to create.
|
||||
])
|
||||
|
||||
# Namedtuple used to represent a DeployedModel, returned by deploy().
|
||||
DeployedModel = collections.namedtuple('DeployedModel',
|
||||
['train_op', # The `train_op`
|
||||
'summary_op', # The `summary_op`
|
||||
'total_loss', # The loss `Tensor`
|
||||
'clones', # A list of `Clones` tuples.
|
||||
])
|
||||
|
||||
# Default parameters for DeploymentConfig
|
||||
_deployment_params = {'num_clones': 1,
|
||||
'clone_on_cpu': False,
|
||||
'replica_id': 0,
|
||||
'num_replicas': 1,
|
||||
'num_ps_tasks': 0,
|
||||
'worker_job_name': 'worker',
|
||||
'ps_job_name': 'ps'}
|
||||
|
||||
|
||||
def create_clones(config, model_fn, args=None, kwargs=None):
|
||||
"""Creates multiple clones according to config using a `model_fn`.
|
||||
|
||||
The returned values of `model_fn(*args, **kwargs)` are collected along with
|
||||
the scope and device used to created it in a namedtuple
|
||||
`Clone(outputs, scope, device)`
|
||||
|
||||
Note: it is assumed that any loss created by `model_fn` is collected at
|
||||
the tf.GraphKeys.LOSSES collection.
|
||||
|
||||
To recover the losses, summaries or update_ops created by the clone use:
|
||||
```python
|
||||
losses = tf.get_collection(tf.GraphKeys.LOSSES, clone.scope)
|
||||
summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, clone.scope)
|
||||
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, clone.scope)
|
||||
```
|
||||
|
||||
The deployment options are specified by the config object and support
|
||||
deploying one or several clones on different GPUs and one or several replicas
|
||||
of such clones.
|
||||
|
||||
The argument `model_fn` is called `config.num_clones` times to create the
|
||||
model clones as `model_fn(*args, **kwargs)`.
|
||||
|
||||
If `config` specifies deployment on multiple replicas then the default
|
||||
tensorflow device is set appropriatly for each call to `model_fn` and for the
|
||||
slim variable creation functions: model and global variables will be created
|
||||
on the `ps` device, the clone operations will be on the `worker` device.
|
||||
|
||||
Args:
|
||||
config: A DeploymentConfig object.
|
||||
model_fn: A callable. Called as `model_fn(*args, **kwargs)`
|
||||
args: Optional list of arguments to pass to `model_fn`.
|
||||
kwargs: Optional list of keyword arguments to pass to `model_fn`.
|
||||
|
||||
Returns:
|
||||
A list of namedtuples `Clone`.
|
||||
"""
|
||||
clones = []
|
||||
args = args or []
|
||||
kwargs = kwargs or {}
|
||||
with slim.arg_scope([slim.model_variable, slim.variable],
|
||||
device=config.variables_device()):
|
||||
# Create clones.
|
||||
for i in range(0, config.num_clones):
|
||||
with tf.name_scope(config.clone_scope(i)) as clone_scope:
|
||||
clone_device = config.clone_device(i)
|
||||
with tf.device(clone_device):
|
||||
with tf.variable_scope(tf.get_variable_scope(),
|
||||
reuse=True if i > 0 else None):
|
||||
outputs = model_fn(*args, **kwargs)
|
||||
clones.append(Clone(outputs, clone_scope, clone_device))
|
||||
return clones
|
||||
|
||||
|
||||
def _gather_clone_loss(clone, num_clones, regularization_losses):
|
||||
"""Gather the loss for a single clone.
|
||||
|
||||
Args:
|
||||
clone: A Clone namedtuple.
|
||||
num_clones: The number of clones being deployed.
|
||||
regularization_losses: Possibly empty list of regularization_losses
|
||||
to add to the clone losses.
|
||||
|
||||
Returns:
|
||||
A tensor for the total loss for the clone. Can be None.
|
||||
"""
|
||||
# The return value.
|
||||
sum_loss = None
|
||||
# Individual components of the loss that will need summaries.
|
||||
clone_loss = None
|
||||
regularization_loss = None
|
||||
# Compute and aggregate losses on the clone device.
|
||||
with tf.device(clone.device):
|
||||
all_losses = []
|
||||
clone_losses = tf.get_collection(tf.GraphKeys.LOSSES, clone.scope)
|
||||
if clone_losses:
|
||||
clone_loss = tf.add_n(clone_losses, name='clone_loss')
|
||||
if num_clones > 1:
|
||||
clone_loss = tf.div(clone_loss, 1.0 * num_clones,
|
||||
name='scaled_clone_loss')
|
||||
all_losses.append(clone_loss)
|
||||
if regularization_losses:
|
||||
regularization_loss = tf.add_n(regularization_losses,
|
||||
name='regularization_loss')
|
||||
all_losses.append(regularization_loss)
|
||||
if all_losses:
|
||||
sum_loss = tf.add_n(all_losses)
|
||||
# Add the summaries out of the clone device block.
|
||||
if clone_loss is not None:
|
||||
tf.summary.scalar('/'.join(filter(None,
|
||||
['Losses', clone.scope, 'clone_loss'])),
|
||||
clone_loss)
|
||||
if regularization_loss is not None:
|
||||
tf.summary.scalar('Losses/regularization_loss', regularization_loss)
|
||||
return sum_loss
|
||||
|
||||
|
||||
def _optimize_clone(optimizer, clone, num_clones, regularization_losses,
|
||||
**kwargs):
|
||||
"""Compute losses and gradients for a single clone.
|
||||
|
||||
Args:
|
||||
optimizer: A tf.Optimizer object.
|
||||
clone: A Clone namedtuple.
|
||||
num_clones: The number of clones being deployed.
|
||||
regularization_losses: Possibly empty list of regularization_losses
|
||||
to add to the clone losses.
|
||||
**kwargs: Dict of kwarg to pass to compute_gradients().
|
||||
|
||||
Returns:
|
||||
A tuple (clone_loss, clone_grads_and_vars).
|
||||
- clone_loss: A tensor for the total loss for the clone. Can be None.
|
||||
- clone_grads_and_vars: List of (gradient, variable) for the clone.
|
||||
Can be empty.
|
||||
"""
|
||||
sum_loss = _gather_clone_loss(clone, num_clones, regularization_losses)
|
||||
clone_grad = None
|
||||
if sum_loss is not None:
|
||||
# with tf.device(clone.device):
|
||||
# clone_grad = optimizer.compute_gradients(sum_loss, **kwargs)
|
||||
clone_grad = optimizer.compute_gradients(sum_loss, **kwargs)
|
||||
return sum_loss, clone_grad
|
||||
|
||||
|
||||
def optimize_clones(clones, optimizer,
|
||||
regularization_losses=None,
|
||||
**kwargs):
|
||||
"""Compute clone losses and gradients for the given list of `Clones`.
|
||||
|
||||
Note: The regularization_losses are added to the first clone losses.
|
||||
|
||||
Args:
|
||||
clones: List of `Clones` created by `create_clones()`.
|
||||
optimizer: An `Optimizer` object.
|
||||
regularization_losses: Optional list of regularization losses. If None it
|
||||
will gather them from tf.GraphKeys.REGULARIZATION_LOSSES. Pass `[]` to
|
||||
exclude them.
|
||||
**kwargs: Optional list of keyword arguments to pass to `compute_gradients`.
|
||||
|
||||
Returns:
|
||||
A tuple (total_loss, grads_and_vars).
|
||||
- total_loss: A Tensor containing the average of the clone losses including
|
||||
the regularization loss.
|
||||
- grads_and_vars: A List of tuples (gradient, variable) containing the sum
|
||||
of the gradients for each variable.
|
||||
|
||||
"""
|
||||
grads_and_vars = []
|
||||
clones_losses = []
|
||||
num_clones = len(clones)
|
||||
if regularization_losses is None:
|
||||
regularization_losses = tf.get_collection(
|
||||
tf.GraphKeys.REGULARIZATION_LOSSES)
|
||||
for clone in clones:
|
||||
with tf.name_scope(clone.scope):
|
||||
clone_loss, clone_grad = _optimize_clone(
|
||||
optimizer, clone, num_clones, regularization_losses, **kwargs)
|
||||
if clone_loss is not None:
|
||||
clones_losses.append(clone_loss)
|
||||
grads_and_vars.append(clone_grad)
|
||||
# Only use regularization_losses for the first clone
|
||||
regularization_losses = None
|
||||
# Compute the total_loss summing all the clones_losses.
|
||||
total_loss = tf.add_n(clones_losses, name='total_loss')
|
||||
# Sum the gradients across clones.
|
||||
grads_and_vars = _sum_clones_gradients(grads_and_vars)
|
||||
return total_loss, grads_and_vars
|
||||
|
||||
|
||||
def deploy(config,
|
||||
model_fn,
|
||||
args=None,
|
||||
kwargs=None,
|
||||
optimizer=None,
|
||||
summarize_gradients=False):
|
||||
"""Deploys a Slim-constructed model across multiple clones.
|
||||
|
||||
The deployment options are specified by the config object and support
|
||||
deploying one or several clones on different GPUs and one or several replicas
|
||||
of such clones.
|
||||
|
||||
The argument `model_fn` is called `config.num_clones` times to create the
|
||||
model clones as `model_fn(*args, **kwargs)`.
|
||||
|
||||
The optional argument `optimizer` is an `Optimizer` object. If not `None`,
|
||||
the deployed model is configured for training with that optimizer.
|
||||
|
||||
If `config` specifies deployment on multiple replicas then the default
|
||||
tensorflow device is set appropriatly for each call to `model_fn` and for the
|
||||
slim variable creation functions: model and global variables will be created
|
||||
on the `ps` device, the clone operations will be on the `worker` device.
|
||||
|
||||
Args:
|
||||
config: A `DeploymentConfig` object.
|
||||
model_fn: A callable. Called as `model_fn(*args, **kwargs)`
|
||||
args: Optional list of arguments to pass to `model_fn`.
|
||||
kwargs: Optional list of keyword arguments to pass to `model_fn`.
|
||||
optimizer: Optional `Optimizer` object. If passed the model is deployed
|
||||
for training with that optimizer.
|
||||
summarize_gradients: Whether or not add summaries to the gradients.
|
||||
|
||||
Returns:
|
||||
A `DeployedModel` namedtuple.
|
||||
|
||||
"""
|
||||
# Gather initial summaries.
|
||||
summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))
|
||||
|
||||
# Create Clones.
|
||||
clones = create_clones(config, model_fn, args, kwargs)
|
||||
first_clone = clones[0]
|
||||
|
||||
# Gather update_ops from the first clone. These contain, for example,
|
||||
# the updates for the batch_norm variables created by model_fn.
|
||||
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone.scope)
|
||||
|
||||
train_op = None
|
||||
total_loss = None
|
||||
with tf.device(config.optimizer_device()):
|
||||
if optimizer:
|
||||
# Place the global step on the device storing the variables.
|
||||
with tf.device(config.variables_device()):
|
||||
global_step = slim.get_or_create_global_step()
|
||||
|
||||
# Compute the gradients for the clones.
|
||||
total_loss, clones_gradients = optimize_clones(clones, optimizer)
|
||||
|
||||
if clones_gradients:
|
||||
if summarize_gradients:
|
||||
# Add summaries to the gradients.
|
||||
summaries |= set(_add_gradients_summaries(clones_gradients))
|
||||
|
||||
# Create gradient updates.
|
||||
grad_updates = optimizer.apply_gradients(clones_gradients,
|
||||
global_step=global_step)
|
||||
update_ops.append(grad_updates)
|
||||
|
||||
update_op = tf.group(*update_ops)
|
||||
with tf.control_dependencies([update_op]):
|
||||
train_op = tf.identity(total_loss, name='train_op')
|
||||
else:
|
||||
clones_losses = []
|
||||
regularization_losses = tf.get_collection(
|
||||
tf.GraphKeys.REGULARIZATION_LOSSES)
|
||||
for clone in clones:
|
||||
with tf.name_scope(clone.scope):
|
||||
clone_loss = _gather_clone_loss(clone, len(clones),
|
||||
regularization_losses)
|
||||
if clone_loss is not None:
|
||||
clones_losses.append(clone_loss)
|
||||
# Only use regularization_losses for the first clone
|
||||
regularization_losses = None
|
||||
if clones_losses:
|
||||
total_loss = tf.add_n(clones_losses, name='total_loss')
|
||||
|
||||
# Add the summaries from the first clone. These contain the summaries
|
||||
# created by model_fn and either optimize_clones() or _gather_clone_loss().
|
||||
summaries |= set(tf.get_collection(tf.GraphKeys.SUMMARIES,
|
||||
first_clone.scope))
|
||||
|
||||
if total_loss is not None:
|
||||
# Add total_loss to summary.
|
||||
summaries.add(tf.summary.scalar('total_loss', total_loss))
|
||||
|
||||
if summaries:
|
||||
# Merge all summaries together.
|
||||
summary_op = tf.summary.merge(list(summaries), name='summary_op')
|
||||
else:
|
||||
summary_op = None
|
||||
|
||||
return DeployedModel(train_op, summary_op, total_loss, clones)
|
||||
|
||||
|
||||
def _sum_clones_gradients(clone_grads):
|
||||
"""Calculate the sum gradient for each shared variable across all clones.
|
||||
|
||||
This function assumes that the clone_grads has been scaled appropriately by
|
||||
1 / num_clones.
|
||||
|
||||
Args:
|
||||
clone_grads: A List of List of tuples (gradient, variable), one list per
|
||||
`Clone`.
|
||||
|
||||
Returns:
|
||||
List of tuples of (gradient, variable) where the gradient has been summed
|
||||
across all clones.
|
||||
"""
|
||||
sum_grads = []
|
||||
for grad_and_vars in zip(*clone_grads):
|
||||
# Note that each grad_and_vars looks like the following:
|
||||
# ((grad_var0_clone0, var0), ... (grad_varN_cloneN, varN))
|
||||
grads = []
|
||||
var = grad_and_vars[0][1]
|
||||
for g, v in grad_and_vars:
|
||||
assert v == var
|
||||
if g is not None:
|
||||
grads.append(g)
|
||||
if grads:
|
||||
if len(grads) > 1:
|
||||
sum_grad = tf.add_n(grads, name=var.op.name + '/sum_grads')
|
||||
else:
|
||||
sum_grad = grads[0]
|
||||
sum_grads.append((sum_grad, var))
|
||||
return sum_grads
|
||||
|
||||
|
||||
def _add_gradients_summaries(grads_and_vars):
|
||||
"""Add histogram summaries to gradients.
|
||||
|
||||
Note: The summaries are also added to the SUMMARIES collection.
|
||||
|
||||
Args:
|
||||
grads_and_vars: A list of gradient to variable pairs (tuples).
|
||||
|
||||
Returns:
|
||||
The _list_ of the added summaries for grads_and_vars.
|
||||
"""
|
||||
summaries = []
|
||||
for grad, var in grads_and_vars:
|
||||
if grad is not None:
|
||||
if isinstance(grad, tf.IndexedSlices):
|
||||
grad_values = grad.values
|
||||
else:
|
||||
grad_values = grad
|
||||
summaries.append(tf.summary.histogram(var.op.name + ':gradient',
|
||||
grad_values))
|
||||
summaries.append(tf.summary.histogram(var.op.name + ':gradient_norm',
|
||||
tf.global_norm([grad_values])))
|
||||
else:
|
||||
tf.logging.info('Var %s has no gradient', var.op.name)
|
||||
return summaries
|
||||
|
||||
|
||||
class DeploymentConfig(object):
|
||||
"""Configuration for deploying a model with `deploy()`.
|
||||
|
||||
You can pass an instance of this class to `deploy()` to specify exactly
|
||||
how to deploy the model to build. If you do not pass one, an instance built
|
||||
from the default deployment_hparams will be used.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
num_clones=1,
|
||||
clone_on_cpu=False,
|
||||
replica_id=0,
|
||||
num_replicas=1,
|
||||
num_ps_tasks=0,
|
||||
worker_job_name='worker',
|
||||
ps_job_name='ps'):
|
||||
"""Create a DeploymentConfig.
|
||||
|
||||
The config describes how to deploy a model across multiple clones and
|
||||
replicas. The model will be replicated `num_clones` times in each replica.
|
||||
If `clone_on_cpu` is True, each clone will placed on CPU.
|
||||
|
||||
If `num_replicas` is 1, the model is deployed via a single process. In that
|
||||
case `worker_device`, `num_ps_tasks`, and `ps_device` are ignored.
|
||||
|
||||
If `num_replicas` is greater than 1, then `worker_device` and `ps_device`
|
||||
must specify TensorFlow devices for the `worker` and `ps` jobs and
|
||||
`num_ps_tasks` must be positive.
|
||||
|
||||
Args:
|
||||
num_clones: Number of model clones to deploy in each replica.
|
||||
clone_on_cpu: If True clones would be placed on CPU.
|
||||
replica_id: Integer. Index of the replica for which the model is
|
||||
deployed. Usually 0 for the chief replica.
|
||||
num_replicas: Number of replicas to use.
|
||||
num_ps_tasks: Number of tasks for the `ps` job. 0 to not use replicas.
|
||||
worker_job_name: A name for the worker job.
|
||||
ps_job_name: A name for the parameter server job.
|
||||
|
||||
Raises:
|
||||
ValueError: If the arguments are invalid.
|
||||
"""
|
||||
if num_replicas > 1:
|
||||
if num_ps_tasks < 1:
|
||||
raise ValueError('When using replicas num_ps_tasks must be positive')
|
||||
if num_replicas > 1 or num_ps_tasks > 0:
|
||||
if not worker_job_name:
|
||||
raise ValueError('Must specify worker_job_name when using replicas')
|
||||
if not ps_job_name:
|
||||
raise ValueError('Must specify ps_job_name when using parameter server')
|
||||
if replica_id >= num_replicas:
|
||||
raise ValueError('replica_id must be less than num_replicas')
|
||||
self._num_clones = num_clones
|
||||
self._clone_on_cpu = clone_on_cpu
|
||||
self._replica_id = replica_id
|
||||
self._num_replicas = num_replicas
|
||||
self._num_ps_tasks = num_ps_tasks
|
||||
self._ps_device = '/job:' + ps_job_name if num_ps_tasks > 0 else ''
|
||||
self._worker_device = '/job:' + worker_job_name if num_ps_tasks > 0 else ''
|
||||
|
||||
@property
|
||||
def num_clones(self):
|
||||
return self._num_clones
|
||||
|
||||
@property
|
||||
def clone_on_cpu(self):
|
||||
return self._clone_on_cpu
|
||||
|
||||
@property
|
||||
def replica_id(self):
|
||||
return self._replica_id
|
||||
|
||||
@property
|
||||
def num_replicas(self):
|
||||
return self._num_replicas
|
||||
|
||||
@property
|
||||
def num_ps_tasks(self):
|
||||
return self._num_ps_tasks
|
||||
|
||||
@property
|
||||
def ps_device(self):
|
||||
return self._ps_device
|
||||
|
||||
@property
|
||||
def worker_device(self):
|
||||
return self._worker_device
|
||||
|
||||
def caching_device(self):
|
||||
"""Returns the device to use for caching variables.
|
||||
|
||||
Variables are cached on the worker CPU when using replicas.
|
||||
|
||||
Returns:
|
||||
A device string or None if the variables do not need to be cached.
|
||||
"""
|
||||
if self._num_ps_tasks > 0:
|
||||
return lambda op: op.device
|
||||
else:
|
||||
return None
|
||||
|
||||
def clone_device(self, clone_index):
|
||||
"""Device used to create the clone and all the ops inside the clone.
|
||||
|
||||
Args:
|
||||
clone_index: Int, representing the clone_index.
|
||||
|
||||
Returns:
|
||||
A value suitable for `tf.device()`.
|
||||
|
||||
Raises:
|
||||
ValueError: if `clone_index` is greater or equal to the number of clones".
|
||||
"""
|
||||
if clone_index >= self._num_clones:
|
||||
raise ValueError('clone_index must be less than num_clones')
|
||||
device = ''
|
||||
if self._num_ps_tasks > 0:
|
||||
device += self._worker_device
|
||||
if self._clone_on_cpu:
|
||||
device += '/device:CPU:0'
|
||||
else:
|
||||
device += '/device:GPU:%d' % clone_index
|
||||
return device
|
||||
|
||||
def clone_scope(self, clone_index):
|
||||
"""Name scope to create the clone.
|
||||
|
||||
Args:
|
||||
clone_index: Int, representing the clone_index.
|
||||
|
||||
Returns:
|
||||
A name_scope suitable for `tf.name_scope()`.
|
||||
|
||||
Raises:
|
||||
ValueError: if `clone_index` is greater or equal to the number of clones".
|
||||
"""
|
||||
if clone_index >= self._num_clones:
|
||||
raise ValueError('clone_index must be less than num_clones')
|
||||
scope = ''
|
||||
if self._num_clones > 1:
|
||||
scope = 'clone_%d' % clone_index
|
||||
return scope
|
||||
|
||||
def optimizer_device(self):
|
||||
"""Device to use with the optimizer.
|
||||
|
||||
Returns:
|
||||
A value suitable for `tf.device()`.
|
||||
"""
|
||||
if self._num_ps_tasks > 0 or self._num_clones > 0:
|
||||
return self._worker_device + '/device:CPU:0'
|
||||
else:
|
||||
return ''
|
||||
|
||||
def inputs_device(self):
|
||||
"""Device to use to build the inputs.
|
||||
|
||||
Returns:
|
||||
A value suitable for `tf.device()`.
|
||||
"""
|
||||
device = ''
|
||||
if self._num_ps_tasks > 0:
|
||||
device += self._worker_device
|
||||
device += '/device:CPU:0'
|
||||
return device
|
||||
|
||||
def variables_device(self):
|
||||
"""Returns the device to use for variables created inside the clone.
|
||||
|
||||
Returns:
|
||||
A value suitable for `tf.device()`.
|
||||
"""
|
||||
device = ''
|
||||
if self._num_ps_tasks > 0:
|
||||
device += self._ps_device
|
||||
device += '/device:CPU:0'
|
||||
|
||||
class _PSDeviceChooser(object):
|
||||
"""Slim device chooser for variables when using PS."""
|
||||
|
||||
def __init__(self, device, tasks):
|
||||
self._device = device
|
||||
self._tasks = tasks
|
||||
self._task = 0
|
||||
|
||||
def choose(self, op):
|
||||
if op.device:
|
||||
return op.device
|
||||
node_def = op if isinstance(op, tf.NodeDef) else op.node_def
|
||||
if node_def.op.startswith('Variable'):
|
||||
t = self._task
|
||||
self._task = (self._task + 1) % self._tasks
|
||||
d = '%s/task:%d' % (self._device, t)
|
||||
return d
|
||||
else:
|
||||
return op.device
|
||||
|
||||
if not self._num_ps_tasks:
|
||||
return device
|
||||
else:
|
||||
chooser = _PSDeviceChooser(device, self._num_ps_tasks)
|
||||
return chooser.choose
|
||||
+574
@@ -0,0 +1,574 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Tests for model_deploy."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
from tensorflow.contrib import framework as contrib_framework
|
||||
from tensorflow.contrib import layers as contrib_layers
|
||||
from tensorflow.contrib import slim as contrib_slim
|
||||
|
||||
from deployment import model_deploy
|
||||
|
||||
slim = contrib_slim
|
||||
|
||||
|
||||
class DeploymentConfigTest(tf.test.TestCase):
|
||||
|
||||
def testDefaults(self):
|
||||
deploy_config = model_deploy.DeploymentConfig()
|
||||
|
||||
self.assertEqual(slim.get_variables(), [])
|
||||
self.assertEqual(deploy_config.caching_device(), None)
|
||||
self.assertDeviceEqual(deploy_config.clone_device(0), 'GPU:0')
|
||||
self.assertEqual(deploy_config.clone_scope(0), '')
|
||||
self.assertDeviceEqual(deploy_config.optimizer_device(), 'CPU:0')
|
||||
self.assertDeviceEqual(deploy_config.inputs_device(), 'CPU:0')
|
||||
self.assertDeviceEqual(deploy_config.variables_device(), 'CPU:0')
|
||||
|
||||
def testCPUonly(self):
|
||||
deploy_config = model_deploy.DeploymentConfig(clone_on_cpu=True)
|
||||
|
||||
self.assertEqual(deploy_config.caching_device(), None)
|
||||
self.assertDeviceEqual(deploy_config.clone_device(0), 'CPU:0')
|
||||
self.assertEqual(deploy_config.clone_scope(0), '')
|
||||
self.assertDeviceEqual(deploy_config.optimizer_device(), 'CPU:0')
|
||||
self.assertDeviceEqual(deploy_config.inputs_device(), 'CPU:0')
|
||||
self.assertDeviceEqual(deploy_config.variables_device(), 'CPU:0')
|
||||
|
||||
def testMultiGPU(self):
|
||||
deploy_config = model_deploy.DeploymentConfig(num_clones=2)
|
||||
|
||||
self.assertEqual(deploy_config.caching_device(), None)
|
||||
self.assertDeviceEqual(deploy_config.clone_device(0), 'GPU:0')
|
||||
self.assertDeviceEqual(deploy_config.clone_device(1), 'GPU:1')
|
||||
self.assertEqual(deploy_config.clone_scope(0), 'clone_0')
|
||||
self.assertEqual(deploy_config.clone_scope(1), 'clone_1')
|
||||
self.assertDeviceEqual(deploy_config.optimizer_device(), 'CPU:0')
|
||||
self.assertDeviceEqual(deploy_config.inputs_device(), 'CPU:0')
|
||||
self.assertDeviceEqual(deploy_config.variables_device(), 'CPU:0')
|
||||
|
||||
def testPS(self):
|
||||
deploy_config = model_deploy.DeploymentConfig(num_clones=1, num_ps_tasks=1)
|
||||
|
||||
self.assertDeviceEqual(deploy_config.clone_device(0),
|
||||
'/job:worker/device:GPU:0')
|
||||
self.assertEqual(deploy_config.clone_scope(0), '')
|
||||
self.assertDeviceEqual(deploy_config.optimizer_device(),
|
||||
'/job:worker/device:CPU:0')
|
||||
self.assertDeviceEqual(deploy_config.inputs_device(),
|
||||
'/job:worker/device:CPU:0')
|
||||
with tf.device(deploy_config.variables_device()):
|
||||
a = tf.Variable(0)
|
||||
b = tf.Variable(0)
|
||||
c = tf.no_op()
|
||||
d = slim.variable('a', [],
|
||||
caching_device=deploy_config.caching_device())
|
||||
self.assertDeviceEqual(a.device, '/job:ps/task:0/device:CPU:0')
|
||||
self.assertDeviceEqual(a.device, a.value().device)
|
||||
self.assertDeviceEqual(b.device, '/job:ps/task:0/device:CPU:0')
|
||||
self.assertDeviceEqual(b.device, b.value().device)
|
||||
self.assertDeviceEqual(c.device, '')
|
||||
self.assertDeviceEqual(d.device, '/job:ps/task:0/device:CPU:0')
|
||||
self.assertDeviceEqual(d.value().device, '')
|
||||
|
||||
def testMultiGPUPS(self):
|
||||
deploy_config = model_deploy.DeploymentConfig(num_clones=2, num_ps_tasks=1)
|
||||
|
||||
self.assertEqual(deploy_config.caching_device()(tf.no_op()), '')
|
||||
self.assertDeviceEqual(deploy_config.clone_device(0),
|
||||
'/job:worker/device:GPU:0')
|
||||
self.assertDeviceEqual(deploy_config.clone_device(1),
|
||||
'/job:worker/device:GPU:1')
|
||||
self.assertEqual(deploy_config.clone_scope(0), 'clone_0')
|
||||
self.assertEqual(deploy_config.clone_scope(1), 'clone_1')
|
||||
self.assertDeviceEqual(deploy_config.optimizer_device(),
|
||||
'/job:worker/device:CPU:0')
|
||||
self.assertDeviceEqual(deploy_config.inputs_device(),
|
||||
'/job:worker/device:CPU:0')
|
||||
|
||||
def testReplicasPS(self):
|
||||
deploy_config = model_deploy.DeploymentConfig(num_replicas=2,
|
||||
num_ps_tasks=2)
|
||||
|
||||
self.assertDeviceEqual(deploy_config.clone_device(0),
|
||||
'/job:worker/device:GPU:0')
|
||||
self.assertEqual(deploy_config.clone_scope(0), '')
|
||||
self.assertDeviceEqual(deploy_config.optimizer_device(),
|
||||
'/job:worker/device:CPU:0')
|
||||
self.assertDeviceEqual(deploy_config.inputs_device(),
|
||||
'/job:worker/device:CPU:0')
|
||||
|
||||
def testReplicasMultiGPUPS(self):
|
||||
deploy_config = model_deploy.DeploymentConfig(num_replicas=2,
|
||||
num_clones=2,
|
||||
num_ps_tasks=2)
|
||||
self.assertDeviceEqual(deploy_config.clone_device(0),
|
||||
'/job:worker/device:GPU:0')
|
||||
self.assertDeviceEqual(deploy_config.clone_device(1),
|
||||
'/job:worker/device:GPU:1')
|
||||
self.assertEqual(deploy_config.clone_scope(0), 'clone_0')
|
||||
self.assertEqual(deploy_config.clone_scope(1), 'clone_1')
|
||||
self.assertDeviceEqual(deploy_config.optimizer_device(),
|
||||
'/job:worker/device:CPU:0')
|
||||
self.assertDeviceEqual(deploy_config.inputs_device(),
|
||||
'/job:worker/device:CPU:0')
|
||||
|
||||
def testVariablesPS(self):
|
||||
deploy_config = model_deploy.DeploymentConfig(num_ps_tasks=2)
|
||||
|
||||
with tf.device(deploy_config.variables_device()):
|
||||
a = tf.Variable(0)
|
||||
b = tf.Variable(0)
|
||||
c = tf.no_op()
|
||||
d = slim.variable('a', [],
|
||||
caching_device=deploy_config.caching_device())
|
||||
|
||||
self.assertDeviceEqual(a.device, '/job:ps/task:0/device:CPU:0')
|
||||
self.assertDeviceEqual(a.device, a.value().device)
|
||||
self.assertDeviceEqual(b.device, '/job:ps/task:1/device:CPU:0')
|
||||
self.assertDeviceEqual(b.device, b.value().device)
|
||||
self.assertDeviceEqual(c.device, '')
|
||||
self.assertDeviceEqual(d.device, '/job:ps/task:0/device:CPU:0')
|
||||
self.assertDeviceEqual(d.value().device, '')
|
||||
|
||||
|
||||
def LogisticClassifier(inputs, labels, scope=None, reuse=None):
|
||||
with tf.variable_scope(scope, 'LogisticClassifier', [inputs, labels],
|
||||
reuse=reuse):
|
||||
predictions = slim.fully_connected(inputs, 1, activation_fn=tf.sigmoid,
|
||||
scope='fully_connected')
|
||||
slim.losses.log_loss(predictions, labels)
|
||||
return predictions
|
||||
|
||||
|
||||
def BatchNormClassifier(inputs, labels, scope=None, reuse=None):
|
||||
with tf.variable_scope(scope, 'BatchNormClassifier', [inputs, labels],
|
||||
reuse=reuse):
|
||||
inputs = slim.batch_norm(inputs, decay=0.1, fused=True)
|
||||
predictions = slim.fully_connected(inputs, 1,
|
||||
activation_fn=tf.sigmoid,
|
||||
scope='fully_connected')
|
||||
slim.losses.log_loss(predictions, labels)
|
||||
return predictions
|
||||
|
||||
|
||||
class CreatecloneTest(tf.test.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
# Create an easy training set:
|
||||
np.random.seed(0)
|
||||
|
||||
self._inputs = np.zeros((16, 4))
|
||||
self._labels = np.random.randint(0, 2, size=(16, 1)).astype(np.float32)
|
||||
self._logdir = self.get_temp_dir()
|
||||
|
||||
for i in range(16):
|
||||
j = int(2 * self._labels[i] + np.random.randint(0, 2))
|
||||
self._inputs[i, j] = 1
|
||||
|
||||
def testCreateLogisticClassifier(self):
|
||||
g = tf.Graph()
|
||||
with g.as_default():
|
||||
tf.set_random_seed(0)
|
||||
tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
|
||||
tf_labels = tf.constant(self._labels, dtype=tf.float32)
|
||||
|
||||
model_fn = LogisticClassifier
|
||||
clone_args = (tf_inputs, tf_labels)
|
||||
deploy_config = model_deploy.DeploymentConfig(num_clones=1)
|
||||
|
||||
self.assertEqual(slim.get_variables(), [])
|
||||
clones = model_deploy.create_clones(deploy_config, model_fn, clone_args)
|
||||
clone = clones[0]
|
||||
self.assertEqual(len(slim.get_variables()), 2)
|
||||
for v in slim.get_variables():
|
||||
self.assertDeviceEqual(v.device, 'CPU:0')
|
||||
self.assertDeviceEqual(v.value().device, 'CPU:0')
|
||||
self.assertEqual(clone.outputs.op.name,
|
||||
'LogisticClassifier/fully_connected/Sigmoid')
|
||||
self.assertEqual(clone.scope, '')
|
||||
self.assertDeviceEqual(clone.device, 'GPU:0')
|
||||
self.assertEqual(len(slim.losses.get_losses()), 1)
|
||||
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
|
||||
self.assertEqual(update_ops, [])
|
||||
|
||||
def testCreateSingleclone(self):
|
||||
g = tf.Graph()
|
||||
with g.as_default():
|
||||
tf.set_random_seed(0)
|
||||
tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
|
||||
tf_labels = tf.constant(self._labels, dtype=tf.float32)
|
||||
|
||||
model_fn = BatchNormClassifier
|
||||
clone_args = (tf_inputs, tf_labels)
|
||||
deploy_config = model_deploy.DeploymentConfig(num_clones=1)
|
||||
|
||||
self.assertEqual(slim.get_variables(), [])
|
||||
clones = model_deploy.create_clones(deploy_config, model_fn, clone_args)
|
||||
clone = clones[0]
|
||||
self.assertEqual(len(slim.get_variables()), 5)
|
||||
for v in slim.get_variables():
|
||||
self.assertDeviceEqual(v.device, 'CPU:0')
|
||||
self.assertDeviceEqual(v.value().device, 'CPU:0')
|
||||
self.assertEqual(clone.outputs.op.name,
|
||||
'BatchNormClassifier/fully_connected/Sigmoid')
|
||||
self.assertEqual(clone.scope, '')
|
||||
self.assertDeviceEqual(clone.device, 'GPU:0')
|
||||
self.assertEqual(len(slim.losses.get_losses()), 1)
|
||||
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
|
||||
self.assertEqual(len(update_ops), 2)
|
||||
|
||||
def testCreateMulticlone(self):
|
||||
g = tf.Graph()
|
||||
with g.as_default():
|
||||
tf.set_random_seed(0)
|
||||
tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
|
||||
tf_labels = tf.constant(self._labels, dtype=tf.float32)
|
||||
|
||||
model_fn = BatchNormClassifier
|
||||
clone_args = (tf_inputs, tf_labels)
|
||||
num_clones = 4
|
||||
deploy_config = model_deploy.DeploymentConfig(num_clones=num_clones)
|
||||
|
||||
self.assertEqual(slim.get_variables(), [])
|
||||
clones = model_deploy.create_clones(deploy_config, model_fn, clone_args)
|
||||
self.assertEqual(len(slim.get_variables()), 5)
|
||||
for v in slim.get_variables():
|
||||
self.assertDeviceEqual(v.device, 'CPU:0')
|
||||
self.assertDeviceEqual(v.value().device, 'CPU:0')
|
||||
self.assertEqual(len(clones), num_clones)
|
||||
for i, clone in enumerate(clones):
|
||||
self.assertEqual(
|
||||
clone.outputs.op.name,
|
||||
'clone_%d/BatchNormClassifier/fully_connected/Sigmoid' % i)
|
||||
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, clone.scope)
|
||||
self.assertEqual(len(update_ops), 2)
|
||||
self.assertEqual(clone.scope, 'clone_%d/' % i)
|
||||
self.assertDeviceEqual(clone.device, 'GPU:%d' % i)
|
||||
|
||||
def testCreateOnecloneWithPS(self):
|
||||
g = tf.Graph()
|
||||
with g.as_default():
|
||||
tf.set_random_seed(0)
|
||||
tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
|
||||
tf_labels = tf.constant(self._labels, dtype=tf.float32)
|
||||
|
||||
model_fn = BatchNormClassifier
|
||||
clone_args = (tf_inputs, tf_labels)
|
||||
deploy_config = model_deploy.DeploymentConfig(num_clones=1,
|
||||
num_ps_tasks=1)
|
||||
|
||||
self.assertEqual(slim.get_variables(), [])
|
||||
clones = model_deploy.create_clones(deploy_config, model_fn, clone_args)
|
||||
self.assertEqual(len(clones), 1)
|
||||
clone = clones[0]
|
||||
self.assertEqual(clone.outputs.op.name,
|
||||
'BatchNormClassifier/fully_connected/Sigmoid')
|
||||
self.assertDeviceEqual(clone.device, '/job:worker/device:GPU:0')
|
||||
self.assertEqual(clone.scope, '')
|
||||
self.assertEqual(len(slim.get_variables()), 5)
|
||||
for v in slim.get_variables():
|
||||
self.assertDeviceEqual(v.device, '/job:ps/task:0/CPU:0')
|
||||
self.assertDeviceEqual(v.device, v.value().device)
|
||||
|
||||
def testCreateMulticloneWithPS(self):
|
||||
g = tf.Graph()
|
||||
with g.as_default():
|
||||
tf.set_random_seed(0)
|
||||
tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
|
||||
tf_labels = tf.constant(self._labels, dtype=tf.float32)
|
||||
|
||||
model_fn = BatchNormClassifier
|
||||
clone_args = (tf_inputs, tf_labels)
|
||||
deploy_config = model_deploy.DeploymentConfig(num_clones=2,
|
||||
num_ps_tasks=2)
|
||||
|
||||
self.assertEqual(slim.get_variables(), [])
|
||||
clones = model_deploy.create_clones(deploy_config, model_fn, clone_args)
|
||||
self.assertEqual(len(slim.get_variables()), 5)
|
||||
for i, v in enumerate(slim.get_variables()):
|
||||
t = i % 2
|
||||
self.assertDeviceEqual(v.device, '/job:ps/task:%d/device:CPU:0' % t)
|
||||
self.assertDeviceEqual(v.device, v.value().device)
|
||||
self.assertEqual(len(clones), 2)
|
||||
for i, clone in enumerate(clones):
|
||||
self.assertEqual(
|
||||
clone.outputs.op.name,
|
||||
'clone_%d/BatchNormClassifier/fully_connected/Sigmoid' % i)
|
||||
self.assertEqual(clone.scope, 'clone_%d/' % i)
|
||||
self.assertDeviceEqual(clone.device, '/job:worker/device:GPU:%d' % i)
|
||||
|
||||
|
||||
class OptimizeclonesTest(tf.test.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
# Create an easy training set:
|
||||
np.random.seed(0)
|
||||
|
||||
self._inputs = np.zeros((16, 4))
|
||||
self._labels = np.random.randint(0, 2, size=(16, 1)).astype(np.float32)
|
||||
self._logdir = self.get_temp_dir()
|
||||
|
||||
for i in range(16):
|
||||
j = int(2 * self._labels[i] + np.random.randint(0, 2))
|
||||
self._inputs[i, j] = 1
|
||||
|
||||
def testCreateLogisticClassifier(self):
|
||||
g = tf.Graph()
|
||||
with g.as_default():
|
||||
tf.set_random_seed(0)
|
||||
tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
|
||||
tf_labels = tf.constant(self._labels, dtype=tf.float32)
|
||||
|
||||
model_fn = LogisticClassifier
|
||||
clone_args = (tf_inputs, tf_labels)
|
||||
deploy_config = model_deploy.DeploymentConfig(num_clones=1)
|
||||
|
||||
self.assertEqual(slim.get_variables(), [])
|
||||
clones = model_deploy.create_clones(deploy_config, model_fn, clone_args)
|
||||
self.assertEqual(len(slim.get_variables()), 2)
|
||||
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
|
||||
self.assertEqual(update_ops, [])
|
||||
|
||||
optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0)
|
||||
total_loss, grads_and_vars = model_deploy.optimize_clones(clones,
|
||||
optimizer)
|
||||
self.assertEqual(len(grads_and_vars), len(tf.trainable_variables()))
|
||||
self.assertEqual(total_loss.op.name, 'total_loss')
|
||||
for g, v in grads_and_vars:
|
||||
self.assertDeviceEqual(g.device, 'GPU:0')
|
||||
self.assertDeviceEqual(v.device, 'CPU:0')
|
||||
|
||||
def testCreateSingleclone(self):
|
||||
g = tf.Graph()
|
||||
with g.as_default():
|
||||
tf.set_random_seed(0)
|
||||
tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
|
||||
tf_labels = tf.constant(self._labels, dtype=tf.float32)
|
||||
|
||||
model_fn = BatchNormClassifier
|
||||
clone_args = (tf_inputs, tf_labels)
|
||||
deploy_config = model_deploy.DeploymentConfig(num_clones=1)
|
||||
|
||||
self.assertEqual(slim.get_variables(), [])
|
||||
clones = model_deploy.create_clones(deploy_config, model_fn, clone_args)
|
||||
self.assertEqual(len(slim.get_variables()), 5)
|
||||
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
|
||||
self.assertEqual(len(update_ops), 2)
|
||||
|
||||
optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0)
|
||||
total_loss, grads_and_vars = model_deploy.optimize_clones(clones,
|
||||
optimizer)
|
||||
self.assertEqual(len(grads_and_vars), len(tf.trainable_variables()))
|
||||
self.assertEqual(total_loss.op.name, 'total_loss')
|
||||
for g, v in grads_and_vars:
|
||||
self.assertDeviceEqual(g.device, 'GPU:0')
|
||||
self.assertDeviceEqual(v.device, 'CPU:0')
|
||||
|
||||
def testCreateMulticlone(self):
|
||||
g = tf.Graph()
|
||||
with g.as_default():
|
||||
tf.set_random_seed(0)
|
||||
tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
|
||||
tf_labels = tf.constant(self._labels, dtype=tf.float32)
|
||||
|
||||
model_fn = BatchNormClassifier
|
||||
clone_args = (tf_inputs, tf_labels)
|
||||
num_clones = 4
|
||||
deploy_config = model_deploy.DeploymentConfig(num_clones=num_clones)
|
||||
|
||||
self.assertEqual(slim.get_variables(), [])
|
||||
clones = model_deploy.create_clones(deploy_config, model_fn, clone_args)
|
||||
self.assertEqual(len(slim.get_variables()), 5)
|
||||
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
|
||||
self.assertEqual(len(update_ops), num_clones * 2)
|
||||
|
||||
optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0)
|
||||
total_loss, grads_and_vars = model_deploy.optimize_clones(clones,
|
||||
optimizer)
|
||||
self.assertEqual(len(grads_and_vars), len(tf.trainable_variables()))
|
||||
self.assertEqual(total_loss.op.name, 'total_loss')
|
||||
for g, v in grads_and_vars:
|
||||
self.assertDeviceEqual(g.device, '')
|
||||
self.assertDeviceEqual(v.device, 'CPU:0')
|
||||
|
||||
def testCreateMulticloneCPU(self):
|
||||
g = tf.Graph()
|
||||
with g.as_default():
|
||||
tf.set_random_seed(0)
|
||||
tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
|
||||
tf_labels = tf.constant(self._labels, dtype=tf.float32)
|
||||
|
||||
model_fn = BatchNormClassifier
|
||||
model_args = (tf_inputs, tf_labels)
|
||||
num_clones = 4
|
||||
deploy_config = model_deploy.DeploymentConfig(num_clones=num_clones,
|
||||
clone_on_cpu=True)
|
||||
|
||||
self.assertEqual(slim.get_variables(), [])
|
||||
clones = model_deploy.create_clones(deploy_config, model_fn, model_args)
|
||||
self.assertEqual(len(slim.get_variables()), 5)
|
||||
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
|
||||
self.assertEqual(len(update_ops), num_clones * 2)
|
||||
|
||||
optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0)
|
||||
total_loss, grads_and_vars = model_deploy.optimize_clones(clones,
|
||||
optimizer)
|
||||
self.assertEqual(len(grads_and_vars), len(tf.trainable_variables()))
|
||||
self.assertEqual(total_loss.op.name, 'total_loss')
|
||||
for g, v in grads_and_vars:
|
||||
self.assertDeviceEqual(g.device, '')
|
||||
self.assertDeviceEqual(v.device, 'CPU:0')
|
||||
|
||||
def testCreateOnecloneWithPS(self):
|
||||
g = tf.Graph()
|
||||
with g.as_default():
|
||||
tf.set_random_seed(0)
|
||||
tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
|
||||
tf_labels = tf.constant(self._labels, dtype=tf.float32)
|
||||
|
||||
model_fn = BatchNormClassifier
|
||||
model_args = (tf_inputs, tf_labels)
|
||||
deploy_config = model_deploy.DeploymentConfig(num_clones=1,
|
||||
num_ps_tasks=1)
|
||||
|
||||
self.assertEqual(slim.get_variables(), [])
|
||||
clones = model_deploy.create_clones(deploy_config, model_fn, model_args)
|
||||
self.assertEqual(len(slim.get_variables()), 5)
|
||||
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
|
||||
self.assertEqual(len(update_ops), 2)
|
||||
|
||||
optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0)
|
||||
total_loss, grads_and_vars = model_deploy.optimize_clones(clones,
|
||||
optimizer)
|
||||
self.assertEqual(len(grads_and_vars), len(tf.trainable_variables()))
|
||||
self.assertEqual(total_loss.op.name, 'total_loss')
|
||||
for g, v in grads_and_vars:
|
||||
self.assertDeviceEqual(g.device, '/job:worker/device:GPU:0')
|
||||
self.assertDeviceEqual(v.device, '/job:ps/task:0/CPU:0')
|
||||
|
||||
|
||||
class DeployTest(tf.test.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
# Create an easy training set:
|
||||
np.random.seed(0)
|
||||
|
||||
self._inputs = np.zeros((16, 4))
|
||||
self._labels = np.random.randint(0, 2, size=(16, 1)).astype(np.float32)
|
||||
self._logdir = self.get_temp_dir()
|
||||
|
||||
for i in range(16):
|
||||
j = int(2 * self._labels[i] + np.random.randint(0, 2))
|
||||
self._inputs[i, j] = 1
|
||||
|
||||
def _addBesselsCorrection(self, sample_size, expected_var):
|
||||
correction_factor = sample_size / (sample_size - 1)
|
||||
expected_var *= correction_factor
|
||||
return expected_var
|
||||
|
||||
def testLocalTrainOp(self):
|
||||
g = tf.Graph()
|
||||
with g.as_default():
|
||||
tf.set_random_seed(0)
|
||||
tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
|
||||
tf_labels = tf.constant(self._labels, dtype=tf.float32)
|
||||
|
||||
model_fn = BatchNormClassifier
|
||||
model_args = (tf_inputs, tf_labels)
|
||||
deploy_config = model_deploy.DeploymentConfig(num_clones=2,
|
||||
clone_on_cpu=True)
|
||||
|
||||
optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0)
|
||||
|
||||
self.assertEqual(slim.get_variables(), [])
|
||||
model = model_deploy.deploy(deploy_config, model_fn, model_args,
|
||||
optimizer=optimizer)
|
||||
|
||||
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
|
||||
self.assertEqual(len(update_ops), 4)
|
||||
self.assertEqual(len(model.clones), 2)
|
||||
self.assertEqual(model.total_loss.op.name, 'total_loss')
|
||||
self.assertEqual(model.summary_op.op.name, 'summary_op/summary_op')
|
||||
self.assertEqual(model.train_op.op.name, 'train_op')
|
||||
|
||||
with tf.Session() as sess:
|
||||
sess.run(tf.global_variables_initializer())
|
||||
moving_mean = contrib_framework.get_variables_by_name('moving_mean')[0]
|
||||
moving_variance = contrib_framework.get_variables_by_name(
|
||||
'moving_variance')[0]
|
||||
initial_loss = sess.run(model.total_loss)
|
||||
initial_mean, initial_variance = sess.run([moving_mean,
|
||||
moving_variance])
|
||||
self.assertAllClose(initial_mean, [0.0, 0.0, 0.0, 0.0])
|
||||
self.assertAllClose(initial_variance, [1.0, 1.0, 1.0, 1.0])
|
||||
for _ in range(10):
|
||||
sess.run(model.train_op)
|
||||
final_loss = sess.run(model.total_loss)
|
||||
self.assertLess(final_loss, initial_loss / 5.0)
|
||||
|
||||
final_mean, final_variance = sess.run([moving_mean,
|
||||
moving_variance])
|
||||
expected_mean = np.array([0.125, 0.25, 0.375, 0.25])
|
||||
expected_var = np.array([0.109375, 0.1875, 0.234375, 0.1875])
|
||||
expected_var = self._addBesselsCorrection(16, expected_var)
|
||||
self.assertAllClose(final_mean, expected_mean)
|
||||
self.assertAllClose(final_variance, expected_var)
|
||||
|
||||
def testNoSummariesOnGPU(self):
|
||||
with tf.Graph().as_default():
|
||||
deploy_config = model_deploy.DeploymentConfig(num_clones=2)
|
||||
|
||||
# clone function creates a fully_connected layer with a regularizer loss.
|
||||
def ModelFn():
|
||||
inputs = tf.constant(1.0, shape=(10, 20), dtype=tf.float32)
|
||||
reg = contrib_layers.l2_regularizer(0.001)
|
||||
contrib_layers.fully_connected(inputs, 30, weights_regularizer=reg)
|
||||
|
||||
model = model_deploy.deploy(
|
||||
deploy_config, ModelFn,
|
||||
optimizer=tf.train.GradientDescentOptimizer(1.0))
|
||||
# The model summary op should have a few summary inputs and all of them
|
||||
# should be on the CPU.
|
||||
self.assertTrue(model.summary_op.op.inputs)
|
||||
for inp in model.summary_op.op.inputs:
|
||||
self.assertEqual('/device:CPU:0', inp.device)
|
||||
|
||||
def testNoSummariesOnGPUForEvals(self):
|
||||
with tf.Graph().as_default():
|
||||
deploy_config = model_deploy.DeploymentConfig(num_clones=2)
|
||||
|
||||
# clone function creates a fully_connected layer with a regularizer loss.
|
||||
def ModelFn():
|
||||
inputs = tf.constant(1.0, shape=(10, 20), dtype=tf.float32)
|
||||
reg = contrib_layers.l2_regularizer(0.001)
|
||||
contrib_layers.fully_connected(inputs, 30, weights_regularizer=reg)
|
||||
|
||||
# No optimizer here, it's an eval.
|
||||
model = model_deploy.deploy(deploy_config, ModelFn)
|
||||
# The model summary op should have a few summary inputs and all of them
|
||||
# should be on the CPU.
|
||||
self.assertTrue(model.summary_op.op.inputs)
|
||||
for inp in model.summary_op.op.inputs:
|
||||
self.assertEqual('/device:CPU:0', inp.device)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
tf.test.main()
|
||||
+94
@@ -0,0 +1,94 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
r"""Downloads and converts a particular dataset.
|
||||
|
||||
Usage:
|
||||
```shell
|
||||
|
||||
$ python download_and_convert_data.py \
|
||||
--dataset_name=flowers \
|
||||
--dataset_dir=/tmp/flowers
|
||||
|
||||
$ python download_and_convert_data.py \
|
||||
--dataset_name=cifar10 \
|
||||
--dataset_dir=/tmp/cifar10
|
||||
|
||||
$ python download_and_convert_data.py \
|
||||
--dataset_name=mnist \
|
||||
--dataset_dir=/tmp/mnist
|
||||
|
||||
$ python download_and_convert_data.py \
|
||||
--dataset_name=visualwakewords \
|
||||
--dataset_dir=/tmp/visualwakewords
|
||||
|
||||
```
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
from datasets import download_and_convert_cifar10
|
||||
from datasets import download_and_convert_flowers
|
||||
from datasets import download_and_convert_mnist
|
||||
from datasets import download_and_convert_visualwakewords
|
||||
|
||||
FLAGS = tf.compat.v1.app.flags.FLAGS
|
||||
|
||||
tf.compat.v1.app.flags.DEFINE_string(
|
||||
'dataset_name',
|
||||
None,
|
||||
'The name of the dataset to convert, one of "flowers", "cifar10", "mnist", "visualwakewords"'
|
||||
)
|
||||
|
||||
tf.compat.v1.app.flags.DEFINE_string(
|
||||
'dataset_dir',
|
||||
None,
|
||||
'The directory where the output TFRecords and temporary files are saved.')
|
||||
|
||||
tf.flags.DEFINE_float(
|
||||
'small_object_area_threshold', 0.005,
|
||||
'For --dataset_name=visualwakewords only. Threshold of fraction of image '
|
||||
'area below which small objects are filtered')
|
||||
|
||||
tf.flags.DEFINE_string(
|
||||
'foreground_class_of_interest', 'person',
|
||||
'For --dataset_name=visualwakewords only. Build a binary classifier based '
|
||||
'on the presence or absence of this object in the image.')
|
||||
|
||||
|
||||
def main(_):
|
||||
if not FLAGS.dataset_name:
|
||||
raise ValueError('You must supply the dataset name with --dataset_name')
|
||||
if not FLAGS.dataset_dir:
|
||||
raise ValueError('You must supply the dataset directory with --dataset_dir')
|
||||
|
||||
if FLAGS.dataset_name == 'flowers':
|
||||
download_and_convert_flowers.run(FLAGS.dataset_dir)
|
||||
elif FLAGS.dataset_name == 'cifar10':
|
||||
download_and_convert_cifar10.run(FLAGS.dataset_dir)
|
||||
elif FLAGS.dataset_name == 'mnist':
|
||||
download_and_convert_mnist.run(FLAGS.dataset_dir)
|
||||
elif FLAGS.dataset_name == 'visualwakewords':
|
||||
download_and_convert_visualwakewords.run(
|
||||
FLAGS.dataset_dir, FLAGS.small_object_area_threshold,
|
||||
FLAGS.foreground_class_of_interest)
|
||||
else:
|
||||
raise ValueError(
|
||||
'dataset_name [%s] was not recognized.' % FLAGS.dataset_name)
|
||||
|
||||
if __name__ == '__main__':
|
||||
tf.compat.v1.app.run()
|
||||
@@ -0,0 +1,182 @@
|
||||
import tensorflow as tf
|
||||
from time import gmtime, strftime
|
||||
from tensorflow.contrib import slim as contrib_slim
|
||||
from gpu_helper import get_custom_getter
|
||||
import random
|
||||
import numpy as np
|
||||
import os
|
||||
|
||||
np.random.seed(0)
|
||||
random.seed(0)
|
||||
tf.set_random_seed(0)
|
||||
|
||||
|
||||
class Env:
|
||||
def __init__(self, FLAGS):
|
||||
self.FLAGS = FLAGS
|
||||
|
||||
self.slim = contrib_slim
|
||||
self.num_samples = 1281167
|
||||
|
||||
def _configure_optimizer(self, learning_rate):
|
||||
"""Configures the optimizer used for training.
|
||||
|
||||
Args:
|
||||
learning_rate: A scalar or `Tensor` learning rate.
|
||||
|
||||
Returns:
|
||||
An instance of an optimizer.
|
||||
|
||||
Raises:
|
||||
ValueError: if Initializer.FLAGS.optimizer is not recognized.
|
||||
"""
|
||||
if self.FLAGS.optimizer == 'adadelta':
|
||||
optimizer = tf.train.AdadeltaOptimizer(
|
||||
learning_rate,
|
||||
rho=self.FLAGS.adadelta_rho,
|
||||
epsilon=self.FLAGS.opt_epsilon)
|
||||
elif self.FLAGS.optimizer == 'adagrad':
|
||||
optimizer = tf.train.AdagradOptimizer(
|
||||
learning_rate,
|
||||
initial_accumulator_value=self.FLAGS.adagrad_initial_accumulator_value)
|
||||
elif self.FLAGS.optimizer == 'adam':
|
||||
optimizer = tf.train.AdamOptimizer(
|
||||
learning_rate,
|
||||
beta1=self.FLAGS.adam_beta1,
|
||||
beta2=self.FLAGS.adam_beta2,
|
||||
epsilon=self.FLAGS.opt_epsilon)
|
||||
elif self.FLAGS.optimizer == 'ftrl':
|
||||
optimizer = tf.train.FtrlOptimizer(
|
||||
learning_rate,
|
||||
learning_rate_power=self.FLAGS.ftrl_learning_rate_power,
|
||||
initial_accumulator_value=self.FLAGS.ftrl_initial_accumulator_value,
|
||||
l1_regularization_strength=self.FLAGS.ftrl_l1,
|
||||
l2_regularization_strength=self.FLAGS.ftrl_l2)
|
||||
elif self.FLAGS.optimizer == 'momentum':
|
||||
optimizer = tf.train.MomentumOptimizer(
|
||||
learning_rate,
|
||||
momentum=self.FLAGS.momentum,
|
||||
name='Momentum')
|
||||
elif self.FLAGS.optimizer == 'rmsprop':
|
||||
optimizer = tf.train.RMSPropOptimizer(
|
||||
learning_rate,
|
||||
decay=self.FLAGS.rmsprop_decay,
|
||||
momentum=self.FLAGS.rmsprop_momentum,
|
||||
epsilon=self.FLAGS.opt_epsilon)
|
||||
elif self.FLAGS.optimizer == 'sgd':
|
||||
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
|
||||
else:
|
||||
raise ValueError('Optimizer [%s] was not recognized' % self.FLAGS.optimizer)
|
||||
|
||||
return optimizer
|
||||
|
||||
def create_logdir(self):
|
||||
logdir = "results"
|
||||
os.makedirs(logdir, exist_ok=True)
|
||||
return logdir
|
||||
|
||||
def calc_logits(self, network_fn, images):
|
||||
logits, end_points = network_fn(images, reuse=tf.AUTO_REUSE)
|
||||
return logits
|
||||
|
||||
def calc_loss(self, logits_train, labels_train):
|
||||
base_loss = self.slim.losses.softmax_cross_entropy(
|
||||
logits_train, labels_train, label_smoothing=self.FLAGS.label_smoothing, weights=1.0)
|
||||
|
||||
reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
|
||||
total_loss = tf.add_n([base_loss] + reg_losses, name='total_loss')
|
||||
|
||||
loss = tf.add_n([base_loss])
|
||||
loss = tf.identity(loss, name='loss')
|
||||
|
||||
return loss, total_loss
|
||||
|
||||
def calc_steps_per_epoch(self):
|
||||
return self.num_samples // (self.FLAGS.batch_size * int(os.getenv('RANK_SIZE')))
|
||||
|
||||
def _configure_learning_rate(self, global_step):
|
||||
steps_per_epoch = self.calc_steps_per_epoch()
|
||||
decay_steps = int(steps_per_epoch * self.FLAGS.num_epochs_per_decay)
|
||||
|
||||
if self.FLAGS.learning_rate_decay_type == 'exponential':
|
||||
learning_rate = tf.train.exponential_decay(
|
||||
self.FLAGS.learning_rate,
|
||||
global_step,
|
||||
decay_steps,
|
||||
self.FLAGS.learning_rate_decay_factor,
|
||||
staircase=True,
|
||||
name='exponential_decay_learning_rate')
|
||||
elif self.FLAGS.learning_rate_decay_type == 'fixed':
|
||||
learning_rate = tf.constant(self.FLAGS.learning_rate, name='fixed_learning_rate')
|
||||
elif self.FLAGS.learning_rate_decay_type == 'cosine_annealing':
|
||||
current_step_epoch = global_step // steps_per_epoch * steps_per_epoch
|
||||
learning_rate = tf.train.cosine_decay(self.FLAGS.learning_rate, current_step_epoch,
|
||||
self.FLAGS.max_number_of_steps)
|
||||
elif self.FLAGS.learning_rate_decay_type == 'polynomial':
|
||||
learning_rate = tf.train.polynomial_decay(
|
||||
self.FLAGS.learning_rate, global_step,
|
||||
decay_steps,
|
||||
self.FLAGS.end_learning_rate,
|
||||
power=1.0,
|
||||
cycle=False,
|
||||
name='polynomial_decay_learning_rate')
|
||||
else:
|
||||
raise ValueError('learning_rate_decay_type [%s] was not recognized' %
|
||||
self.FLAGS.learning_rate_decay_type)
|
||||
|
||||
if self.FLAGS.warmup_epochs:
|
||||
warmup_lr = (
|
||||
self.FLAGS.learning_rate * tf.cast(global_step, tf.float32) /
|
||||
(steps_per_epoch * self.FLAGS.warmup_epochs))
|
||||
learning_rate = tf.minimum(warmup_lr, learning_rate)
|
||||
|
||||
learning_rate = tf.identity(learning_rate, name='learning_rate')
|
||||
# tf.Print(learning_rate, [learning_rate], '*****************')
|
||||
return learning_rate
|
||||
|
||||
def create_train_op(self, global_step, summaries, loss):
|
||||
# Gather update_ops from the first clone. These contain, for example,
|
||||
# the updates for the batch_norm variables created by network_fn.
|
||||
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) or []
|
||||
|
||||
#################################
|
||||
# Configure the moving averages #
|
||||
#################################
|
||||
if self.FLAGS.moving_average_decay:
|
||||
moving_average_variables = self.slim.get_model_variables()
|
||||
variable_averages = tf.train.ExponentialMovingAverage(
|
||||
self.FLAGS.moving_average_decay, global_step)
|
||||
else:
|
||||
moving_average_variables, variable_averages = None, None
|
||||
|
||||
#########################################
|
||||
# Configure the optimization procedure. #
|
||||
#########################################
|
||||
learning_rate = self._configure_learning_rate(global_step)
|
||||
summaries.add(tf.summary.scalar('learning_rate', learning_rate))
|
||||
|
||||
if self.FLAGS.moving_average_decay:
|
||||
# Update ops executed locally by trainer.
|
||||
update_ops.append(variable_averages.apply(moving_average_variables))
|
||||
|
||||
opt = self._configure_optimizer(learning_rate)
|
||||
|
||||
from npu_bridge.estimator.npu.npu_optimizer import NPUDistributedOptimizer
|
||||
from npu_bridge.estimator.npu.npu_loss_scale_optimizer import NPULossScaleOptimizer
|
||||
from npu_bridge.estimator.npu.npu_loss_scale_manager import FixedLossScaleManager
|
||||
from npu_bridge.estimator.npu.npu_loss_scale_manager import ExponentialUpdateLossScaleManager
|
||||
loss_scale_manager = FixedLossScaleManager(loss_scale=4096)
|
||||
# loss_scale_manager = ExponentialUpdateLossScaleManager(init_loss_scale=1024, incr_every_n_steps=1000, decr_every_n_nan_or_inf=2, decr_ratio=0.5)
|
||||
if int(os.getenv('RANK_SIZE')) == 1:
|
||||
opt = NPULossScaleOptimizer(opt, loss_scale_manager)
|
||||
else:
|
||||
opt = NPULossScaleOptimizer(opt, loss_scale_manager, is_distributed=True)
|
||||
opt = NPUDistributedOptimizer(opt)
|
||||
|
||||
update_op = tf.group(*update_ops)
|
||||
with tf.control_dependencies([update_op]):
|
||||
gate_gradients = (tf.train.Optimizer.GATE_NONE)
|
||||
grads_and_vars = opt.compute_gradients(loss)
|
||||
train_op = opt.apply_gradients(grads_and_vars, global_step=global_step)
|
||||
|
||||
return train_op
|
||||
+133
@@ -0,0 +1,133 @@
|
||||
import tensorflow as tf
|
||||
from dataloader import data_provider
|
||||
from datasets import dataset_factory
|
||||
from nets import nets_factory
|
||||
import os
|
||||
|
||||
|
||||
class EstimatorImpl:
|
||||
def __init__(self, env):
|
||||
self.env = env
|
||||
|
||||
def model_fn(self, features, labels, mode, params):
|
||||
num_classes = 1001
|
||||
|
||||
summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))
|
||||
|
||||
if mode == tf.estimator.ModeKeys.TRAIN:
|
||||
network_fn = nets_factory.get_network_fn(
|
||||
self.env.FLAGS.model_name,
|
||||
num_classes=(num_classes - self.env.FLAGS.labels_offset),
|
||||
weight_decay=self.env.FLAGS.weight_decay,
|
||||
is_training=True)
|
||||
|
||||
logits = self.env.calc_logits(network_fn, features)
|
||||
loss, total_loss = self.env.calc_loss(logits, labels)
|
||||
|
||||
# ### accuracy ### #
|
||||
predictions = tf.argmax(logits, 1)
|
||||
accuracy_ops = tf.metrics.accuracy(tf.argmax(labels, 1), predictions)
|
||||
tf.identity(accuracy_ops[1], name='train_accuracy')
|
||||
# ### accuracy ### #
|
||||
|
||||
tf.identity(total_loss, 'train_loss')
|
||||
|
||||
global_step = tf.train.get_or_create_global_step()
|
||||
train_op = self.env.create_train_op(global_step, summaries, total_loss)
|
||||
|
||||
estimator_spec = tf.estimator.EstimatorSpec(
|
||||
mode=tf.estimator.ModeKeys.TRAIN, loss=total_loss, train_op=train_op)
|
||||
|
||||
elif mode == tf.estimator.ModeKeys.EVAL:
|
||||
network_fn = nets_factory.get_network_fn(
|
||||
self.env.FLAGS.model_name,
|
||||
num_classes=(num_classes - self.env.FLAGS.labels_offset),
|
||||
weight_decay=self.env.FLAGS.weight_decay,
|
||||
is_training=False)
|
||||
|
||||
logits = self.env.calc_logits(network_fn, features)
|
||||
loss, total_loss = self.env.calc_loss(logits, labels)
|
||||
predictions = tf.argmax(logits, 1)
|
||||
accuracy_ops = tf.metrics.accuracy(tf.argmax(labels, 1), predictions)
|
||||
tf.identity(accuracy_ops[1], name='eval_accuracy')
|
||||
estimator_spec = tf.estimator.EstimatorSpec(
|
||||
mode=tf.estimator.ModeKeys.EVAL,
|
||||
loss=total_loss, eval_metric_ops={'accuracy': accuracy_ops})
|
||||
|
||||
return estimator_spec
|
||||
|
||||
def main(self):
|
||||
logdir = self.env.create_logdir()
|
||||
|
||||
from logger import LogSessionRunHook
|
||||
|
||||
config = {
|
||||
'num_training_samples': self.env.num_samples,
|
||||
# for 1p, just per loop print, for 8p, print each epoch
|
||||
'display_every': 1,
|
||||
'log_name': 'train_log.log',
|
||||
'log_dir': logdir,
|
||||
'global_batch_size': self.env.FLAGS.batch_size * int(os.getenv('RANK_SIZE')),
|
||||
'iterations_per_loop': self.env.FLAGS.iterations_per_loop if self.env.FLAGS.iterations_per_loop is not None else self.env.calc_steps_per_epoch()
|
||||
}
|
||||
|
||||
hooks = [LogSessionRunHook(config, warmup_steps=self.env.FLAGS.warmup_epochs * self.env.calc_steps_per_epoch())]
|
||||
|
||||
#################################################################
|
||||
from npu_bridge.estimator.npu.npu_config import NPURunConfig
|
||||
from npu_bridge.estimator.npu.npu_estimator import NPUEstimator
|
||||
|
||||
self.estimator_config = tf.ConfigProto(
|
||||
inter_op_parallelism_threads=10,
|
||||
intra_op_parallelism_threads=10,
|
||||
allow_soft_placement=True)
|
||||
|
||||
self.estimator_config.gpu_options.allow_growth = True
|
||||
|
||||
gpu_thread_count = 2
|
||||
os.environ['TF_GPU_THREAD_MODE'] = 'gpu_private'
|
||||
os.environ['TF_GPU_THREAD_COUNT'] = str(gpu_thread_count)
|
||||
os.environ['TF_USE_CUDNN_BATCHNORM_SPATIAL_PERSISTENT'] = '1'
|
||||
os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1'
|
||||
|
||||
run_config = NPURunConfig(
|
||||
hcom_parallel=True,
|
||||
precision_mode="allow_mix_precision",
|
||||
enable_data_pre_proc=True,
|
||||
save_checkpoints_steps=self.env.calc_steps_per_epoch(),
|
||||
session_config=self.estimator_config,
|
||||
model_dir=logdir,
|
||||
iterations_per_loop=config['iterations_per_loop'],
|
||||
keep_checkpoint_max=5)
|
||||
|
||||
classifier = NPUEstimator(
|
||||
model_fn=self.model_fn,
|
||||
config=run_config
|
||||
)
|
||||
###################################################################
|
||||
|
||||
classifier.train(
|
||||
input_fn=self.train_data,
|
||||
max_steps=self.env.FLAGS.max_number_of_steps,
|
||||
hooks=hooks,
|
||||
)
|
||||
|
||||
def train_data(self):
|
||||
dataset = dataset_factory.get_dataset(self.env.FLAGS.dataset_name, 'train', self.env.FLAGS.dataset_dir)
|
||||
|
||||
preprocessing_name = self.env.FLAGS.preprocessing_name or self.env.FLAGS.model_name
|
||||
_, ds = data_provider.get_data(dataset, self.env.FLAGS.batch_size,
|
||||
dataset.num_classes, self.env.FLAGS.labels_offset, True,
|
||||
preprocessing_name, self.env.FLAGS.use_grayscale)
|
||||
|
||||
return ds
|
||||
|
||||
def eval_data(self):
|
||||
dataset = dataset_factory.get_dataset(self.env.FLAGS.dataset_name, 'validation', self.env.FLAGS.dataset_dir)
|
||||
|
||||
preprocessing_name = self.env.FLAGS.preprocessing_name or self.env.FLAGS.model_name
|
||||
_, ds = data_provider.get_data(dataset, self.env.FLAGS.batch_size,
|
||||
dataset.num_classes, self.env.FLAGS.labels_offset, False,
|
||||
preprocessing_name, self.env.FLAGS.use_grayscale)
|
||||
|
||||
return ds
|
||||
+174
@@ -0,0 +1,174 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Generic evaluation script that evaluates a model using a given dataset."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import math
|
||||
import tensorflow as tf
|
||||
from tensorflow.contrib import quantize as contrib_quantize
|
||||
from tensorflow.contrib import slim as contrib_slim
|
||||
from benchmark_log import hwlog
|
||||
from datasets import dataset_factory
|
||||
from nets import nets_factory
|
||||
import os
|
||||
os.environ["CUDA_VISIBLE_DEVICES"] = '4'
|
||||
|
||||
slim = contrib_slim
|
||||
|
||||
tf.app.flags.DEFINE_integer(
|
||||
'batch_size', 100, 'The number of samples in each batch.')
|
||||
|
||||
tf.app.flags.DEFINE_integer(
|
||||
'max_num_batches', None,
|
||||
'Max number of batches to evaluate by default use all.')
|
||||
|
||||
tf.app.flags.DEFINE_string(
|
||||
'master', '', 'The address of the TensorFlow master to use.')
|
||||
|
||||
ckpt_path = './results/0526023335_train_hvdTrue_mnmobilenet_v2_augmentedTrue_mixedpFalse_lr0.4_optmomentum_me200_lrdtcosine_annealing_nepd0.3125_lrdf0.98_b256_me_param'
|
||||
# ckpt_path = './results/0523130615_train_hvdTrue_mnmobilenet_v2_augmentedTrue_mixedpFalse_lr0.4_optmomentum_me200_lrdtcosine_annealing_nepd0.3125_lrdf0.98_b256_me_param'
|
||||
tf.app.flags.DEFINE_string(
|
||||
'checkpoint_path', ckpt_path,
|
||||
'The directory where the model was written to or an absolute path to a '
|
||||
'checkpoint file.')
|
||||
|
||||
tf.app.flags.DEFINE_string(
|
||||
'eval_dir', ckpt_path, 'Directory where the results are saved to.')
|
||||
|
||||
tf.app.flags.DEFINE_integer(
|
||||
'num_preprocessing_threads', 4,
|
||||
'The number of threads used to create the batches.')
|
||||
|
||||
tf.app.flags.DEFINE_string(
|
||||
'dataset_name', 'imagenet', 'The name of the dataset to load.')
|
||||
|
||||
tf.app.flags.DEFINE_string(
|
||||
'dataset_split_name', 'validation', 'The name of the train/test split.')
|
||||
|
||||
tf.app.flags.DEFINE_string(
|
||||
'dataset_dir', '/data/Datasets/imagenet_TF', 'The directory where the dataset files are stored.')
|
||||
|
||||
tf.app.flags.DEFINE_integer(
|
||||
'labels_offset', 0,
|
||||
'An offset for the labels in the dataset. This flag is primarily used to '
|
||||
'evaluate the VGG and ResNet architectures which do not use a background '
|
||||
'class for the ImageNet dataset.')
|
||||
|
||||
tf.app.flags.DEFINE_string(
|
||||
'model_name', 'mobilenet_v2', 'The name of the architecture to evaluate.')
|
||||
|
||||
tf.app.flags.DEFINE_string(
|
||||
'preprocessing_name', None, 'The name of the preprocessing to use. If left '
|
||||
'as `None`, then the model_name flag is used.')
|
||||
|
||||
tf.app.flags.DEFINE_float(
|
||||
'moving_average_decay', None,
|
||||
'The decay to use for the moving average.'
|
||||
'If left as None, then moving averages are not used.')
|
||||
|
||||
tf.app.flags.DEFINE_integer(
|
||||
'eval_image_size', None, 'Eval image size')
|
||||
|
||||
tf.app.flags.DEFINE_bool(
|
||||
'quantize', False, 'whether to use quantized graph or not.')
|
||||
|
||||
tf.app.flags.DEFINE_bool('use_grayscale', False,
|
||||
'Whether to convert input images to grayscale.')
|
||||
|
||||
FLAGS = tf.app.flags.FLAGS
|
||||
|
||||
|
||||
def main(_):
|
||||
if not FLAGS.dataset_dir:
|
||||
raise ValueError('You must supply the dataset directory with --dataset_dir')
|
||||
|
||||
tf.logging.set_verbosity(tf.logging.INFO)
|
||||
with tf.Graph().as_default():
|
||||
tf_global_step = slim.get_or_create_global_step()
|
||||
|
||||
######################
|
||||
# Select the dataset #
|
||||
######################
|
||||
dataset = dataset_factory.get_dataset(
|
||||
FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir)
|
||||
|
||||
####################
|
||||
# Select the model #
|
||||
####################
|
||||
network_fn = nets_factory.get_network_fn(
|
||||
FLAGS.model_name,
|
||||
num_classes=(dataset.num_classes - FLAGS.labels_offset),
|
||||
is_training=False)
|
||||
|
||||
from dataloader import data_provider
|
||||
preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name
|
||||
iterator, _ = data_provider.get_data(dataset, FLAGS.batch_size,
|
||||
dataset.num_classes, FLAGS.labels_offset, is_training=False,
|
||||
preprocessing_name=preprocessing_name,
|
||||
use_grayscale=FLAGS.use_grayscale,
|
||||
hvd=None, enable_hvd=None)
|
||||
images, labels = iterator.get_next() # label: [100, 1001]
|
||||
images = tf.reshape(images, [FLAGS.batch_size, 224, 224, 3]) # (100, 224, 224, 3), float32
|
||||
labels = tf.argmax(labels, axis=1) # [100]
|
||||
logits, _ = network_fn(images)
|
||||
|
||||
if FLAGS.quantize:
|
||||
contrib_quantize.create_eval_graph()
|
||||
|
||||
predictions = tf.argmax(logits, 1)
|
||||
labels = tf.squeeze(labels)
|
||||
eval_accuracy, metric_update_op = tf.metrics.accuracy(labels, predictions)
|
||||
|
||||
# tf.summary.scalar('top1_acc', top1_accu)
|
||||
# summaries_op = tf.summary.merge_all()
|
||||
|
||||
# TODO(sguada) use num_epochs=1
|
||||
if FLAGS.max_num_batches:
|
||||
num_batches = FLAGS.max_num_batches
|
||||
else:
|
||||
# This ensures that we make a single pass over all of the data.
|
||||
num_batches = math.ceil(dataset.num_samples / float(FLAGS.batch_size))
|
||||
if tf.gfile.IsDirectory(FLAGS.checkpoint_path):
|
||||
checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_path)
|
||||
else:
|
||||
checkpoint_path = FLAGS.checkpoint_path
|
||||
|
||||
##### evaluate #####
|
||||
tf.logging.info('Evaluating %s' % checkpoint_path)
|
||||
saver = tf.train.Saver()
|
||||
from time import gmtime, strftime
|
||||
logdir = "results/%s" % strftime("%m%d%H%M%S_evel", gmtime())
|
||||
# summary_writer = tf.summary.FileWriter(logdir=logdir, graph=tf.get_default_graph())
|
||||
with tf.Session() as sess:
|
||||
sess.run(iterator.initializer)
|
||||
sess.run(tf.global_variables_initializer())
|
||||
sess.run(tf.local_variables_initializer())
|
||||
saver.restore(sess, f'{checkpoint_path}')
|
||||
tf.train.write_graph(sess.graph, logdir, 'graph.pbtxt')
|
||||
|
||||
for step in range(num_batches):
|
||||
_metric_update_op = sess.run([metric_update_op])
|
||||
print(f'{step}, _metric_update_op: {_metric_update_op}')
|
||||
|
||||
acc = sess.run([eval_accuracy])
|
||||
print(f'acc: {acc}')
|
||||
hwlog.remark_print(key=hwlog.EVAL_ACCURACY_TOP1, value=f'{acc}')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
tf.app.run()
|
||||
+181
@@ -0,0 +1,181 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Generic evaluation script that evaluates a model using a given dataset."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import math
|
||||
import tensorflow as tf
|
||||
from tensorflow.contrib import quantize as contrib_quantize
|
||||
from tensorflow.contrib import slim as contrib_slim
|
||||
|
||||
from datasets import dataset_factory
|
||||
from nets import nets_factory
|
||||
from benchmark_log import hwlog
|
||||
import os
|
||||
os.environ["CUDA_VISIBLE_DEVICES"] = '4'
|
||||
|
||||
slim = contrib_slim
|
||||
|
||||
tf.app.flags.DEFINE_integer(
|
||||
'batch_size', 256, 'The number of samples in each batch.')
|
||||
|
||||
tf.app.flags.DEFINE_integer(
|
||||
'max_num_batches', None,
|
||||
'Max number of batches to evaluate by default use all.')
|
||||
|
||||
tf.app.flags.DEFINE_string(
|
||||
'master', '', 'The address of the TensorFlow master to use.')
|
||||
|
||||
tf.app.flags.DEFINE_string(
|
||||
'checkpoint_path', 'ckpt_path',
|
||||
'The directory where the model was written to or an absolute path to a '
|
||||
'checkpoint file.')
|
||||
|
||||
tf.app.flags.DEFINE_string(
|
||||
'eval_dir', 'ckpt_path', 'Directory where the results are saved to.')
|
||||
|
||||
tf.app.flags.DEFINE_integer(
|
||||
'num_preprocessing_threads', 4,
|
||||
'The number of threads used to create the batches.')
|
||||
|
||||
tf.app.flags.DEFINE_string(
|
||||
'dataset_name', 'imagenet', 'The name of the dataset to load.')
|
||||
|
||||
tf.app.flags.DEFINE_string(
|
||||
'dataset_split_name', 'validation', 'The name of the train/test split.')
|
||||
|
||||
tf.app.flags.DEFINE_string(
|
||||
'dataset_dir', '/opt/npu/slimImagenet', 'The directory where the dataset files are stored.')
|
||||
|
||||
tf.app.flags.DEFINE_integer(
|
||||
'labels_offset', 0,
|
||||
'An offset for the labels in the dataset. This flag is primarily used to '
|
||||
'evaluate the VGG and ResNet architectures which do not use a background '
|
||||
'class for the ImageNet dataset.')
|
||||
|
||||
tf.app.flags.DEFINE_string(
|
||||
'model_name', 'mobilenet_v2', 'The name of the architecture to evaluate.')
|
||||
|
||||
tf.app.flags.DEFINE_string(
|
||||
'preprocessing_name', None, 'The name of the preprocessing to use. If left '
|
||||
'as `None`, then the model_name flag is used.')
|
||||
|
||||
tf.app.flags.DEFINE_float(
|
||||
'moving_average_decay', None,
|
||||
'The decay to use for the moving average.'
|
||||
'If left as None, then moving averages are not used.')
|
||||
|
||||
tf.app.flags.DEFINE_integer(
|
||||
'eval_image_size', None, 'Eval image size')
|
||||
|
||||
tf.app.flags.DEFINE_bool(
|
||||
'quantize', False, 'whether to use quantized graph or not.')
|
||||
|
||||
tf.app.flags.DEFINE_bool('use_grayscale', False,
|
||||
'Whether to convert input images to grayscale.')
|
||||
|
||||
FLAGS = tf.app.flags.FLAGS
|
||||
|
||||
|
||||
def main(_):
|
||||
if not FLAGS.dataset_dir:
|
||||
raise ValueError('You must supply the dataset directory with --dataset_dir')
|
||||
|
||||
tf.logging.set_verbosity(tf.logging.INFO)
|
||||
with tf.Graph().as_default():
|
||||
tf_global_step = slim.get_or_create_global_step()
|
||||
|
||||
######################
|
||||
# Select the dataset #
|
||||
######################
|
||||
dataset = dataset_factory.get_dataset(
|
||||
FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir)
|
||||
|
||||
####################
|
||||
# Select the model #
|
||||
####################
|
||||
network_fn = nets_factory.get_network_fn(
|
||||
FLAGS.model_name,
|
||||
num_classes=(dataset.num_classes - FLAGS.labels_offset),
|
||||
is_training=False)
|
||||
|
||||
from dataloader import data_provider
|
||||
preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name
|
||||
|
||||
iterator, _ = data_provider.get_data(dataset, FLAGS.batch_size,
|
||||
dataset.num_classes, FLAGS.labels_offset, is_training=False,
|
||||
preprocessing_name=preprocessing_name,
|
||||
use_grayscale=FLAGS.use_grayscale)
|
||||
#tf.logging.info('iterator %s' % iterator)
|
||||
images, labels = iterator.get_next() # label: [100, 1001]
|
||||
images = tf.reshape(images, [FLAGS.batch_size, 224, 224, 3]) # (100, 224, 224, 3), float32
|
||||
labels = tf.argmax(labels, axis=1) # [100]
|
||||
logits, _ = network_fn(images)
|
||||
|
||||
if FLAGS.quantize:
|
||||
contrib_quantize.create_eval_graph()
|
||||
|
||||
predictions = tf.argmax(logits, 1)
|
||||
labels = tf.squeeze(labels)
|
||||
eval_accuracy, metric_update_op = tf.metrics.accuracy(labels, predictions)
|
||||
#hwlog.remark_print(key=hwlog.EVAL_ACCURACY, value="".format(eval_accuracy))
|
||||
|
||||
# tf.summary.scalar('top1_acc', top1_accu)
|
||||
# summaries_op = tf.summary.merge_all()
|
||||
|
||||
# TODO(sguada) use num_epochs=1
|
||||
if FLAGS.max_num_batches:
|
||||
num_batches = FLAGS.max_num_batches
|
||||
else:
|
||||
# This ensures that we make a single pass over all of the data.
|
||||
num_batches = math.ceil(dataset.num_samples / float(FLAGS.batch_size)) - 1
|
||||
|
||||
if tf.gfile.IsDirectory(FLAGS.checkpoint_path):
|
||||
checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_path)
|
||||
else:
|
||||
checkpoint_path = FLAGS.checkpoint_path
|
||||
# checkpoint_path = '/opt/npu/models/mobilenetv2_v0.1/ckpt/model.ckpt'
|
||||
print(dataset.num_samples)
|
||||
print(FLAGS.batch_size)
|
||||
hwlog.remark_print(key=hwlog.GLOBAL_BATCH_SIZE, value=FLAGS.batch_size)
|
||||
##### evaluate #####
|
||||
tf.logging.info('Evaluating %s' % checkpoint_path)
|
||||
saver = tf.train.Saver()
|
||||
from time import gmtime, strftime
|
||||
logdir = "ckpt/%s" % strftime("%m%d%H%M%S_evel", gmtime())
|
||||
# summary_writer = tf.summary.FileWriter(logdir=logdir, graph=tf.get_default_graph())
|
||||
with tf.Session() as sess:
|
||||
sess.run(iterator.initializer)
|
||||
sess.run(tf.global_variables_initializer())
|
||||
sess.run(tf.local_variables_initializer())
|
||||
saver.restore(sess, f'{checkpoint_path}')
|
||||
# saver.restore(sess, 'result/8p/2/results/model.ckpt-3750')
|
||||
tf.train.write_graph(sess.graph, logdir, 'graph.pbtxt')
|
||||
|
||||
for step in range(num_batches):
|
||||
_metric_update_op = sess.run([metric_update_op])
|
||||
print(f'{step}, _metric_update_op: {_metric_update_op}')
|
||||
hwlog.remark_print(key=hwlog.GLOBAL_STEP, value=f'{step}')
|
||||
hwlog.remark_print(key=hwlog.EVAL_ACCURACY, value=f'{_metric_update_op}')
|
||||
acc = sess.run([eval_accuracy])
|
||||
print(f'acc: {acc}')
|
||||
hwlog.remark_print(key=hwlog.EVAL_ACCURACY_TOP1, value=f'{acc}')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
tf.app.run()
|
||||
+164
@@ -0,0 +1,164 @@
|
||||
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
r"""Saves out a GraphDef containing the architecture of the model.
|
||||
|
||||
To use it, run something like this, with a model name defined by slim:
|
||||
|
||||
bazel build tensorflow_models/research/slim:export_inference_graph
|
||||
bazel-bin/tensorflow_models/research/slim/export_inference_graph \
|
||||
--model_name=inception_v3 --output_file=/tmp/inception_v3_inf_graph.pb
|
||||
|
||||
If you then want to use the resulting model with your own or pretrained
|
||||
checkpoints as part of a mobile model, you can run freeze_graph to get a graph
|
||||
def with the variables inlined as constants using:
|
||||
|
||||
bazel build tensorflow/python/tools:freeze_graph
|
||||
bazel-bin/tensorflow/python/tools/freeze_graph \
|
||||
--input_graph=/tmp/inception_v3_inf_graph.pb \
|
||||
--input_checkpoint=/tmp/checkpoints/inception_v3.ckpt \
|
||||
--input_binary=true --output_graph=/tmp/frozen_inception_v3.pb \
|
||||
--output_node_names=InceptionV3/Predictions/Reshape_1
|
||||
|
||||
The output node names will vary depending on the model, but you can inspect and
|
||||
estimate them using the summarize_graph tool:
|
||||
|
||||
bazel build tensorflow/tools/graph_transforms:summarize_graph
|
||||
bazel-bin/tensorflow/tools/graph_transforms/summarize_graph \
|
||||
--in_graph=/tmp/inception_v3_inf_graph.pb
|
||||
|
||||
To run the resulting graph in C++, you can look at the label_image sample code:
|
||||
|
||||
bazel build tensorflow/examples/label_image:label_image
|
||||
bazel-bin/tensorflow/examples/label_image/label_image \
|
||||
--image=${HOME}/Pictures/flowers.jpg \
|
||||
--input_layer=input \
|
||||
--output_layer=InceptionV3/Predictions/Reshape_1 \
|
||||
--graph=/tmp/frozen_inception_v3.pb \
|
||||
--labels=/tmp/imagenet_slim_labels.txt \
|
||||
--input_mean=0 \
|
||||
--input_std=255
|
||||
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
import os
|
||||
|
||||
import tensorflow as tf
|
||||
from tensorflow.contrib import quantize as contrib_quantize
|
||||
from tensorflow.contrib import slim as contrib_slim
|
||||
|
||||
from tensorflow.python.platform import gfile
|
||||
from datasets import dataset_factory
|
||||
from nets import nets_factory
|
||||
|
||||
|
||||
slim = contrib_slim
|
||||
|
||||
tf.app.flags.DEFINE_string(
|
||||
'model_name', 'inception_v3', 'The name of the architecture to save.')
|
||||
|
||||
tf.app.flags.DEFINE_boolean(
|
||||
'is_training', False,
|
||||
'Whether to save out a training-focused version of the model.')
|
||||
|
||||
tf.app.flags.DEFINE_integer(
|
||||
'image_size', None,
|
||||
'The image size to use, otherwise use the model default_image_size.')
|
||||
|
||||
tf.app.flags.DEFINE_integer(
|
||||
'batch_size', None,
|
||||
'Batch size for the exported model. Defaulted to "None" so batch size can '
|
||||
'be specified at model runtime.')
|
||||
|
||||
tf.app.flags.DEFINE_string('dataset_name', 'imagenet',
|
||||
'The name of the dataset to use with the model.')
|
||||
|
||||
tf.app.flags.DEFINE_integer(
|
||||
'labels_offset', 0,
|
||||
'An offset for the labels in the dataset. This flag is primarily used to '
|
||||
'evaluate the VGG and ResNet architectures which do not use a background '
|
||||
'class for the ImageNet dataset.')
|
||||
|
||||
tf.app.flags.DEFINE_string(
|
||||
'output_file', '', 'Where to save the resulting file to.')
|
||||
|
||||
tf.app.flags.DEFINE_string(
|
||||
'dataset_dir', '', 'Directory to save intermediate dataset files to')
|
||||
|
||||
tf.app.flags.DEFINE_bool(
|
||||
'quantize', False, 'whether to use quantized graph or not.')
|
||||
|
||||
tf.app.flags.DEFINE_bool(
|
||||
'is_video_model', False, 'whether to use 5-D inputs for video model.')
|
||||
|
||||
tf.app.flags.DEFINE_integer(
|
||||
'num_frames', None,
|
||||
'The number of frames to use. Only used if is_video_model is True.')
|
||||
|
||||
tf.app.flags.DEFINE_bool('write_text_graphdef', False,
|
||||
'Whether to write a text version of graphdef.')
|
||||
|
||||
tf.app.flags.DEFINE_bool('use_grayscale', False,
|
||||
'Whether to convert input images to grayscale.')
|
||||
|
||||
FLAGS = tf.app.flags.FLAGS
|
||||
|
||||
|
||||
def main(_):
|
||||
if not FLAGS.output_file:
|
||||
raise ValueError('You must supply the path to save to with --output_file')
|
||||
if FLAGS.is_video_model and not FLAGS.num_frames:
|
||||
raise ValueError(
|
||||
'Number of frames must be specified for video models with --num_frames')
|
||||
tf.logging.set_verbosity(tf.logging.INFO)
|
||||
with tf.Graph().as_default() as graph:
|
||||
dataset = dataset_factory.get_dataset(FLAGS.dataset_name, 'train',
|
||||
FLAGS.dataset_dir)
|
||||
network_fn = nets_factory.get_network_fn(
|
||||
FLAGS.model_name,
|
||||
num_classes=(dataset.num_classes - FLAGS.labels_offset),
|
||||
is_training=FLAGS.is_training)
|
||||
image_size = FLAGS.image_size or network_fn.default_image_size
|
||||
num_channels = 1 if FLAGS.use_grayscale else 3
|
||||
if FLAGS.is_video_model:
|
||||
input_shape = [
|
||||
FLAGS.batch_size, FLAGS.num_frames, image_size, image_size,
|
||||
num_channels
|
||||
]
|
||||
else:
|
||||
input_shape = [FLAGS.batch_size, image_size, image_size, num_channels]
|
||||
placeholder = tf.placeholder(name='input', dtype=tf.float32,
|
||||
shape=input_shape)
|
||||
network_fn(placeholder)
|
||||
|
||||
if FLAGS.quantize:
|
||||
contrib_quantize.create_eval_graph()
|
||||
|
||||
graph_def = graph.as_graph_def()
|
||||
if FLAGS.write_text_graphdef:
|
||||
tf.io.write_graph(
|
||||
graph_def,
|
||||
os.path.dirname(FLAGS.output_file),
|
||||
os.path.basename(FLAGS.output_file),
|
||||
as_text=True)
|
||||
else:
|
||||
with gfile.GFile(FLAGS.output_file, 'wb') as f:
|
||||
f.write(graph_def.SerializeToString())
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
tf.app.run()
|
||||
+44
@@ -0,0 +1,44 @@
|
||||
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
|
||||
"""Tests for export_inference_graph."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import os
|
||||
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
from tensorflow.python.platform import gfile
|
||||
import export_inference_graph
|
||||
|
||||
|
||||
class ExportInferenceGraphTest(tf.test.TestCase):
|
||||
|
||||
def testExportInferenceGraph(self):
|
||||
tmpdir = self.get_temp_dir()
|
||||
output_file = os.path.join(tmpdir, 'inception_v3.pb')
|
||||
flags = tf.app.flags.FLAGS
|
||||
flags.output_file = output_file
|
||||
flags.model_name = 'inception_v3'
|
||||
flags.dataset_dir = tmpdir
|
||||
export_inference_graph.main(None)
|
||||
self.assertTrue(gfile.Exists(output_file))
|
||||
|
||||
if __name__ == '__main__':
|
||||
tf.test.main()
|
||||
+44
@@ -0,0 +1,44 @@
|
||||
import tensorflow as tf
|
||||
import numpy as np
|
||||
|
||||
def float32_variable_storage_getter(getter, name, shape=None, dtype=None,
|
||||
initializer=None, regularizer=None,
|
||||
trainable=True,
|
||||
*args, **kwargs):
|
||||
"""Custom variable getter that forces trainable variables to be stored in
|
||||
float32 precision and then casts them to the training precision.
|
||||
"""
|
||||
storage_dtype = tf.float32 if trainable else dtype
|
||||
variable = getter(name, shape, dtype=storage_dtype,
|
||||
initializer=initializer, regularizer=regularizer,
|
||||
trainable=trainable,
|
||||
*args, **kwargs)
|
||||
if trainable and dtype != tf.float32:
|
||||
variable = tf.cast(variable, dtype)
|
||||
return variable
|
||||
|
||||
def get_custom_getter(compute_type):
|
||||
return float32_variable_storage_getter if compute_type == tf.float16 else None
|
||||
|
||||
|
||||
|
||||
def float32_variable_storage_getter_1(getter, name, shape=None, dtype=None,
|
||||
initializer=None, regularizer=None,
|
||||
trainable=True,
|
||||
*args, **kwargs):
|
||||
"""Custom variable getter that forces trainable variables to be stored in
|
||||
float32 precision and then casts them to the training precision.
|
||||
"""
|
||||
dtype = tf.float16
|
||||
storage_dtype = tf.float32 if trainable else dtype
|
||||
variable = getter(name, shape, dtype=storage_dtype,
|
||||
initializer=initializer, regularizer=regularizer,
|
||||
trainable=trainable,
|
||||
*args, **kwargs)
|
||||
if trainable and dtype != tf.float32:
|
||||
variable = tf.cast(variable, dtype)
|
||||
return variable
|
||||
|
||||
def get_custom_getter_1(compute_type):
|
||||
return float32_variable_storage_getter_1 if compute_type == tf.float16 else None
|
||||
|
||||
@@ -0,0 +1,86 @@
|
||||
from __future__ import print_function
|
||||
|
||||
import datetime
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
from benchmark_log import hwlog
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
|
||||
|
||||
class LogSessionRunHook(tf.train.SessionRunHook):
|
||||
def __init__(self, config, warmup_steps=5):
|
||||
self.global_batch_size = config['global_batch_size']
|
||||
self.iterations_per_loop = config['iterations_per_loop']
|
||||
self.warmup_steps = warmup_steps
|
||||
self.iter_times = []
|
||||
self.num_records = config['num_training_samples']
|
||||
self.display_every = config['display_every']
|
||||
self.logger = get_logger(config['log_name'], config['log_dir'])
|
||||
rank0log(self.logger, 'PY' + str(sys.version) + 'TF' + str(tf.__version__))
|
||||
|
||||
def after_create_session(self, session, coord):
|
||||
rank0log(self.logger, 'Step Epoch Speed Loss FinLoss LR')
|
||||
self.elapsed_secs = 0.
|
||||
self.count = 0
|
||||
|
||||
def before_run(self, run_context):
|
||||
self.t0 = time.time()
|
||||
return tf.train.SessionRunArgs(
|
||||
fetches=[tf.train.get_global_step(), 'loss:0', 'total_loss:0', 'learning_rate:0',
|
||||
'train_accuracy:0'])
|
||||
|
||||
def after_run(self, run_context, run_values):
|
||||
batch_time = time.time() - self.t0
|
||||
self.iter_times.append(batch_time)
|
||||
self.elapsed_secs += batch_time
|
||||
self.count += 1
|
||||
global_step, loss, total_loss, lr, train_accuracy = run_values.results
|
||||
if global_step == 1 or global_step % self.display_every == 0:
|
||||
dt = self.elapsed_secs / self.count
|
||||
img_per_sec = self.global_batch_size * self.iterations_per_loop / dt
|
||||
epoch = global_step * self.global_batch_size / self.num_records
|
||||
self.logger.info(f'step:{global_step} epoch:{epoch} ips:{img_per_sec} '
|
||||
f'loss:{loss} total_loss:{total_loss} lr:{lr}, '
|
||||
f'train_accuracy:{train_accuracy}')
|
||||
|
||||
hwlog.remark_print(key=hwlog.GLOBAL_STEP, value=f"{global_step}")
|
||||
hwlog.remark_print(key=hwlog.CURRENT_EPOCH, value=f"{epoch}")
|
||||
hwlog.remark_print(key=hwlog.TRAIN_ACCURACY, value=f"{train_accuracy}")
|
||||
hwlog.remark_print(key=hwlog.FPS, value=f"{img_per_sec}")
|
||||
self.elapsed_secs = 0.
|
||||
self.count = 0
|
||||
|
||||
def get_average_speed(self):
|
||||
avg_time = np.mean(self.iter_times[self.warmup_steps:])
|
||||
speed = self.global_batch_size / avg_time
|
||||
return speed
|
||||
|
||||
|
||||
def rank0log(logger, *args, **kwargs):
|
||||
if logger:
|
||||
logger.info(''.join([str(x) for x in list(args)]))
|
||||
else:
|
||||
print(*args, **kwargs)
|
||||
|
||||
|
||||
def get_logger(log_name, log_dir):
|
||||
logger = logging.getLogger(log_name)
|
||||
logger.setLevel(logging.INFO) # INFO, ERROR
|
||||
if not os.path.isdir(log_dir):
|
||||
try:
|
||||
os.makedirs(log_dir)
|
||||
except FileExistsError:
|
||||
pass
|
||||
ch = logging.StreamHandler()
|
||||
ch.setLevel(logging.INFO)
|
||||
formatter = logging.Formatter('%(message)s')
|
||||
ch.setFormatter(formatter)
|
||||
logger.addHandler(ch)
|
||||
fh = logging.FileHandler(os.path.join(log_dir, log_name))
|
||||
fh.setLevel(logging.DEBUG)
|
||||
fh.setFormatter(formatter)
|
||||
logger.addHandler(fh)
|
||||
return logger
|
||||
+1
@@ -0,0 +1 @@
|
||||
|
||||
+148
@@ -0,0 +1,148 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Contains a model definition for AlexNet.
|
||||
|
||||
This work was first described in:
|
||||
ImageNet Classification with Deep Convolutional Neural Networks
|
||||
Alex Krizhevsky, Ilya Sutskever and Geoffrey E. Hinton
|
||||
|
||||
and later refined in:
|
||||
One weird trick for parallelizing convolutional neural networks
|
||||
Alex Krizhevsky, 2014
|
||||
|
||||
Here we provide the implementation proposed in "One weird trick" and not
|
||||
"ImageNet Classification", as per the paper, the LRN layers have been removed.
|
||||
|
||||
Usage:
|
||||
with slim.arg_scope(alexnet.alexnet_v2_arg_scope()):
|
||||
outputs, end_points = alexnet.alexnet_v2(inputs)
|
||||
|
||||
@@alexnet_v2
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import tensorflow as tf
|
||||
from tensorflow.contrib import slim as contrib_slim
|
||||
|
||||
slim = contrib_slim
|
||||
|
||||
# pylint: disable=g-long-lambda
|
||||
trunc_normal = lambda stddev: tf.compat.v1.truncated_normal_initializer(
|
||||
0.0, stddev)
|
||||
|
||||
|
||||
def alexnet_v2_arg_scope(weight_decay=0.0005):
|
||||
with slim.arg_scope([slim.conv2d, slim.fully_connected],
|
||||
activation_fn=tf.nn.relu,
|
||||
biases_initializer=tf.compat.v1.constant_initializer(0.1),
|
||||
weights_regularizer=slim.l2_regularizer(weight_decay)):
|
||||
with slim.arg_scope([slim.conv2d], padding='SAME'):
|
||||
with slim.arg_scope([slim.max_pool2d], padding='VALID') as arg_sc:
|
||||
return arg_sc
|
||||
|
||||
|
||||
def alexnet_v2(inputs,
|
||||
num_classes=1000,
|
||||
is_training=True,
|
||||
dropout_keep_prob=0.5,
|
||||
spatial_squeeze=True,
|
||||
scope='alexnet_v2',
|
||||
global_pool=False):
|
||||
"""AlexNet version 2.
|
||||
|
||||
Described in: http://arxiv.org/pdf/1404.5997v2.pdf
|
||||
Parameters from:
|
||||
github.com/akrizhevsky/cuda-convnet2/blob/master/layers/
|
||||
layers-imagenet-1gpu.cfg
|
||||
|
||||
Note: All the fully_connected layers have been transformed to conv2d layers.
|
||||
To use in classification mode, resize input to 224x224 or set
|
||||
global_pool=True. To use in fully convolutional mode, set
|
||||
spatial_squeeze to false.
|
||||
The LRN layers have been removed and change the initializers from
|
||||
random_normal_initializer to xavier_initializer.
|
||||
|
||||
Args:
|
||||
inputs: a tensor of size [batch_size, height, width, channels].
|
||||
num_classes: the number of predicted classes. If 0 or None, the logits layer
|
||||
is omitted and the input features to the logits layer are returned instead.
|
||||
is_training: whether or not the model is being trained.
|
||||
dropout_keep_prob: the probability that activations are kept in the dropout
|
||||
layers during training.
|
||||
spatial_squeeze: whether or not should squeeze the spatial dimensions of the
|
||||
logits. Useful to remove unnecessary dimensions for classification.
|
||||
scope: Optional scope for the variables.
|
||||
global_pool: Optional boolean flag. If True, the input to the classification
|
||||
layer is avgpooled to size 1x1, for any input size. (This is not part
|
||||
of the original AlexNet.)
|
||||
|
||||
Returns:
|
||||
net: the output of the logits layer (if num_classes is a non-zero integer),
|
||||
or the non-dropped-out input to the logits layer (if num_classes is 0
|
||||
or None).
|
||||
end_points: a dict of tensors with intermediate activations.
|
||||
"""
|
||||
with tf.compat.v1.variable_scope(scope, 'alexnet_v2', [inputs]) as sc:
|
||||
end_points_collection = sc.original_name_scope + '_end_points'
|
||||
# Collect outputs for conv2d, fully_connected and max_pool2d.
|
||||
with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d],
|
||||
outputs_collections=[end_points_collection]):
|
||||
net = slim.conv2d(inputs, 64, [11, 11], 4, padding='VALID',
|
||||
scope='conv1')
|
||||
net = slim.max_pool2d(net, [3, 3], 2, scope='pool1')
|
||||
net = slim.conv2d(net, 192, [5, 5], scope='conv2')
|
||||
net = slim.max_pool2d(net, [3, 3], 2, scope='pool2')
|
||||
net = slim.conv2d(net, 384, [3, 3], scope='conv3')
|
||||
net = slim.conv2d(net, 384, [3, 3], scope='conv4')
|
||||
net = slim.conv2d(net, 256, [3, 3], scope='conv5')
|
||||
net = slim.max_pool2d(net, [3, 3], 2, scope='pool5')
|
||||
|
||||
# Use conv2d instead of fully_connected layers.
|
||||
with slim.arg_scope(
|
||||
[slim.conv2d],
|
||||
weights_initializer=trunc_normal(0.005),
|
||||
biases_initializer=tf.compat.v1.constant_initializer(0.1)):
|
||||
net = slim.conv2d(net, 4096, [5, 5], padding='VALID',
|
||||
scope='fc6')
|
||||
net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
|
||||
scope='dropout6')
|
||||
net = slim.conv2d(net, 4096, [1, 1], scope='fc7')
|
||||
# Convert end_points_collection into a end_point dict.
|
||||
end_points = slim.utils.convert_collection_to_dict(
|
||||
end_points_collection)
|
||||
if global_pool:
|
||||
net = tf.reduce_mean(
|
||||
input_tensor=net, axis=[1, 2], keepdims=True, name='global_pool')
|
||||
end_points['global_pool'] = net
|
||||
if num_classes:
|
||||
net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
|
||||
scope='dropout7')
|
||||
net = slim.conv2d(
|
||||
net,
|
||||
num_classes, [1, 1],
|
||||
activation_fn=None,
|
||||
normalizer_fn=None,
|
||||
biases_initializer=tf.compat.v1.zeros_initializer(),
|
||||
scope='fc8')
|
||||
if spatial_squeeze:
|
||||
net = tf.squeeze(net, [1, 2], name='fc8/squeezed')
|
||||
end_points[sc.name + '/fc8'] = net
|
||||
return net, end_points
|
||||
|
||||
|
||||
alexnet_v2.default_image_size = 224
|
||||
+181
@@ -0,0 +1,181 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Tests for slim.nets.alexnet."""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import tensorflow as tf
|
||||
from tensorflow.contrib import slim as contrib_slim
|
||||
|
||||
from nets import alexnet
|
||||
|
||||
slim = contrib_slim
|
||||
|
||||
|
||||
class AlexnetV2Test(tf.test.TestCase):
|
||||
|
||||
def testBuild(self):
|
||||
batch_size = 5
|
||||
height, width = 224, 224
|
||||
num_classes = 1000
|
||||
with self.test_session():
|
||||
inputs = tf.random.uniform((batch_size, height, width, 3))
|
||||
logits, _ = alexnet.alexnet_v2(inputs, num_classes)
|
||||
self.assertEquals(logits.op.name, 'alexnet_v2/fc8/squeezed')
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[batch_size, num_classes])
|
||||
|
||||
def testFullyConvolutional(self):
|
||||
batch_size = 1
|
||||
height, width = 300, 400
|
||||
num_classes = 1000
|
||||
with self.test_session():
|
||||
inputs = tf.random.uniform((batch_size, height, width, 3))
|
||||
logits, _ = alexnet.alexnet_v2(inputs, num_classes, spatial_squeeze=False)
|
||||
self.assertEquals(logits.op.name, 'alexnet_v2/fc8/BiasAdd')
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[batch_size, 4, 7, num_classes])
|
||||
|
||||
def testGlobalPool(self):
|
||||
batch_size = 1
|
||||
height, width = 256, 256
|
||||
num_classes = 1000
|
||||
with self.test_session():
|
||||
inputs = tf.random.uniform((batch_size, height, width, 3))
|
||||
logits, _ = alexnet.alexnet_v2(inputs, num_classes, spatial_squeeze=False,
|
||||
global_pool=True)
|
||||
self.assertEquals(logits.op.name, 'alexnet_v2/fc8/BiasAdd')
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[batch_size, 1, 1, num_classes])
|
||||
|
||||
def testEndPoints(self):
|
||||
batch_size = 5
|
||||
height, width = 224, 224
|
||||
num_classes = 1000
|
||||
with self.test_session():
|
||||
inputs = tf.random.uniform((batch_size, height, width, 3))
|
||||
_, end_points = alexnet.alexnet_v2(inputs, num_classes)
|
||||
expected_names = ['alexnet_v2/conv1',
|
||||
'alexnet_v2/pool1',
|
||||
'alexnet_v2/conv2',
|
||||
'alexnet_v2/pool2',
|
||||
'alexnet_v2/conv3',
|
||||
'alexnet_v2/conv4',
|
||||
'alexnet_v2/conv5',
|
||||
'alexnet_v2/pool5',
|
||||
'alexnet_v2/fc6',
|
||||
'alexnet_v2/fc7',
|
||||
'alexnet_v2/fc8'
|
||||
]
|
||||
self.assertSetEqual(set(end_points.keys()), set(expected_names))
|
||||
|
||||
def testNoClasses(self):
|
||||
batch_size = 5
|
||||
height, width = 224, 224
|
||||
num_classes = None
|
||||
with self.test_session():
|
||||
inputs = tf.random.uniform((batch_size, height, width, 3))
|
||||
net, end_points = alexnet.alexnet_v2(inputs, num_classes)
|
||||
expected_names = ['alexnet_v2/conv1',
|
||||
'alexnet_v2/pool1',
|
||||
'alexnet_v2/conv2',
|
||||
'alexnet_v2/pool2',
|
||||
'alexnet_v2/conv3',
|
||||
'alexnet_v2/conv4',
|
||||
'alexnet_v2/conv5',
|
||||
'alexnet_v2/pool5',
|
||||
'alexnet_v2/fc6',
|
||||
'alexnet_v2/fc7'
|
||||
]
|
||||
self.assertSetEqual(set(end_points.keys()), set(expected_names))
|
||||
self.assertTrue(net.op.name.startswith('alexnet_v2/fc7'))
|
||||
self.assertListEqual(net.get_shape().as_list(),
|
||||
[batch_size, 1, 1, 4096])
|
||||
|
||||
def testModelVariables(self):
|
||||
batch_size = 5
|
||||
height, width = 224, 224
|
||||
num_classes = 1000
|
||||
with self.test_session():
|
||||
inputs = tf.random.uniform((batch_size, height, width, 3))
|
||||
alexnet.alexnet_v2(inputs, num_classes)
|
||||
expected_names = ['alexnet_v2/conv1/weights',
|
||||
'alexnet_v2/conv1/biases',
|
||||
'alexnet_v2/conv2/weights',
|
||||
'alexnet_v2/conv2/biases',
|
||||
'alexnet_v2/conv3/weights',
|
||||
'alexnet_v2/conv3/biases',
|
||||
'alexnet_v2/conv4/weights',
|
||||
'alexnet_v2/conv4/biases',
|
||||
'alexnet_v2/conv5/weights',
|
||||
'alexnet_v2/conv5/biases',
|
||||
'alexnet_v2/fc6/weights',
|
||||
'alexnet_v2/fc6/biases',
|
||||
'alexnet_v2/fc7/weights',
|
||||
'alexnet_v2/fc7/biases',
|
||||
'alexnet_v2/fc8/weights',
|
||||
'alexnet_v2/fc8/biases',
|
||||
]
|
||||
model_variables = [v.op.name for v in slim.get_model_variables()]
|
||||
self.assertSetEqual(set(model_variables), set(expected_names))
|
||||
|
||||
def testEvaluation(self):
|
||||
batch_size = 2
|
||||
height, width = 224, 224
|
||||
num_classes = 1000
|
||||
with self.test_session():
|
||||
eval_inputs = tf.random.uniform((batch_size, height, width, 3))
|
||||
logits, _ = alexnet.alexnet_v2(eval_inputs, is_training=False)
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[batch_size, num_classes])
|
||||
predictions = tf.argmax(input=logits, axis=1)
|
||||
self.assertListEqual(predictions.get_shape().as_list(), [batch_size])
|
||||
|
||||
def testTrainEvalWithReuse(self):
|
||||
train_batch_size = 2
|
||||
eval_batch_size = 1
|
||||
train_height, train_width = 224, 224
|
||||
eval_height, eval_width = 300, 400
|
||||
num_classes = 1000
|
||||
with self.test_session():
|
||||
train_inputs = tf.random.uniform(
|
||||
(train_batch_size, train_height, train_width, 3))
|
||||
logits, _ = alexnet.alexnet_v2(train_inputs)
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[train_batch_size, num_classes])
|
||||
tf.compat.v1.get_variable_scope().reuse_variables()
|
||||
eval_inputs = tf.random.uniform(
|
||||
(eval_batch_size, eval_height, eval_width, 3))
|
||||
logits, _ = alexnet.alexnet_v2(eval_inputs, is_training=False,
|
||||
spatial_squeeze=False)
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[eval_batch_size, 4, 7, num_classes])
|
||||
logits = tf.reduce_mean(input_tensor=logits, axis=[1, 2])
|
||||
predictions = tf.argmax(input=logits, axis=1)
|
||||
self.assertEquals(predictions.get_shape().as_list(), [eval_batch_size])
|
||||
|
||||
def testForward(self):
|
||||
batch_size = 1
|
||||
height, width = 224, 224
|
||||
with self.test_session() as sess:
|
||||
inputs = tf.random.uniform((batch_size, height, width, 3))
|
||||
logits, _ = alexnet.alexnet_v2(inputs)
|
||||
sess.run(tf.compat.v1.global_variables_initializer())
|
||||
output = sess.run(logits)
|
||||
self.assertTrue(output.any())
|
||||
|
||||
if __name__ == '__main__':
|
||||
tf.test.main()
|
||||
+123
@@ -0,0 +1,123 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Contains a variant of the CIFAR-10 model definition."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import tensorflow as tf
|
||||
from tensorflow.contrib import slim as contrib_slim
|
||||
|
||||
slim = contrib_slim
|
||||
|
||||
# pylint: disable=g-long-lambda
|
||||
trunc_normal = lambda stddev: tf.compat.v1.truncated_normal_initializer(
|
||||
stddev=stddev)
|
||||
|
||||
|
||||
def cifarnet(images, num_classes=10, is_training=False,
|
||||
dropout_keep_prob=0.5,
|
||||
prediction_fn=slim.softmax,
|
||||
scope='CifarNet'):
|
||||
"""Creates a variant of the CifarNet model.
|
||||
|
||||
Note that since the output is a set of 'logits', the values fall in the
|
||||
interval of (-infinity, infinity). Consequently, to convert the outputs to a
|
||||
probability distribution over the characters, one will need to convert them
|
||||
using the softmax function:
|
||||
|
||||
logits = cifarnet.cifarnet(images, is_training=False)
|
||||
probabilities = tf.nn.softmax(logits)
|
||||
predictions = tf.argmax(logits, 1)
|
||||
|
||||
Args:
|
||||
images: A batch of `Tensors` of size [batch_size, height, width, channels].
|
||||
num_classes: the number of classes in the dataset. If 0 or None, the logits
|
||||
layer is omitted and the input features to the logits layer are returned
|
||||
instead.
|
||||
is_training: specifies whether or not we're currently training the model.
|
||||
This variable will determine the behaviour of the dropout layer.
|
||||
dropout_keep_prob: the percentage of activation values that are retained.
|
||||
prediction_fn: a function to get predictions out of logits.
|
||||
scope: Optional variable_scope.
|
||||
|
||||
Returns:
|
||||
net: a 2D Tensor with the logits (pre-softmax activations) if num_classes
|
||||
is a non-zero integer, or the input to the logits layer if num_classes
|
||||
is 0 or None.
|
||||
end_points: a dictionary from components of the network to the corresponding
|
||||
activation.
|
||||
"""
|
||||
end_points = {}
|
||||
|
||||
with tf.compat.v1.variable_scope(scope, 'CifarNet', [images]):
|
||||
net = slim.conv2d(images, 64, [5, 5], scope='conv1')
|
||||
end_points['conv1'] = net
|
||||
net = slim.max_pool2d(net, [2, 2], 2, scope='pool1')
|
||||
end_points['pool1'] = net
|
||||
net = tf.nn.lrn(net, 4, bias=1.0, alpha=0.001/9.0, beta=0.75, name='norm1')
|
||||
net = slim.conv2d(net, 64, [5, 5], scope='conv2')
|
||||
end_points['conv2'] = net
|
||||
net = tf.nn.lrn(net, 4, bias=1.0, alpha=0.001/9.0, beta=0.75, name='norm2')
|
||||
net = slim.max_pool2d(net, [2, 2], 2, scope='pool2')
|
||||
end_points['pool2'] = net
|
||||
net = slim.flatten(net)
|
||||
end_points['Flatten'] = net
|
||||
net = slim.fully_connected(net, 384, scope='fc3')
|
||||
end_points['fc3'] = net
|
||||
net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
|
||||
scope='dropout3')
|
||||
net = slim.fully_connected(net, 192, scope='fc4')
|
||||
end_points['fc4'] = net
|
||||
if not num_classes:
|
||||
return net, end_points
|
||||
logits = slim.fully_connected(
|
||||
net,
|
||||
num_classes,
|
||||
biases_initializer=tf.compat.v1.zeros_initializer(),
|
||||
weights_initializer=trunc_normal(1 / 192.0),
|
||||
weights_regularizer=None,
|
||||
activation_fn=None,
|
||||
scope='logits')
|
||||
|
||||
end_points['Logits'] = logits
|
||||
end_points['Predictions'] = prediction_fn(logits, scope='Predictions')
|
||||
|
||||
return logits, end_points
|
||||
cifarnet.default_image_size = 32
|
||||
|
||||
|
||||
def cifarnet_arg_scope(weight_decay=0.004):
|
||||
"""Defines the default cifarnet argument scope.
|
||||
|
||||
Args:
|
||||
weight_decay: The weight decay to use for regularizing the model.
|
||||
|
||||
Returns:
|
||||
An `arg_scope` to use for the inception v3 model.
|
||||
"""
|
||||
with slim.arg_scope(
|
||||
[slim.conv2d],
|
||||
weights_initializer=tf.compat.v1.truncated_normal_initializer(
|
||||
stddev=5e-2),
|
||||
activation_fn=tf.nn.relu):
|
||||
with slim.arg_scope(
|
||||
[slim.fully_connected],
|
||||
biases_initializer=tf.compat.v1.constant_initializer(0.1),
|
||||
weights_initializer=trunc_normal(0.04),
|
||||
weights_regularizer=slim.l2_regularizer(weight_decay),
|
||||
activation_fn=tf.nn.relu) as sc:
|
||||
return sc
|
||||
+280
@@ -0,0 +1,280 @@
|
||||
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Defines the CycleGAN generator and discriminator networks."""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import numpy as np
|
||||
from six.moves import xrange # pylint: disable=redefined-builtin
|
||||
import tensorflow as tf
|
||||
from tensorflow.contrib import framework as contrib_framework
|
||||
from tensorflow.contrib import layers as contrib_layers
|
||||
from tensorflow.contrib import util as contrib_util
|
||||
|
||||
layers = contrib_layers
|
||||
|
||||
|
||||
def cyclegan_arg_scope(instance_norm_center=True,
|
||||
instance_norm_scale=True,
|
||||
instance_norm_epsilon=0.001,
|
||||
weights_init_stddev=0.02,
|
||||
weight_decay=0.0):
|
||||
"""Returns a default argument scope for all generators and discriminators.
|
||||
|
||||
Args:
|
||||
instance_norm_center: Whether instance normalization applies centering.
|
||||
instance_norm_scale: Whether instance normalization applies scaling.
|
||||
instance_norm_epsilon: Small float added to the variance in the instance
|
||||
normalization to avoid dividing by zero.
|
||||
weights_init_stddev: Standard deviation of the random values to initialize
|
||||
the convolution kernels with.
|
||||
weight_decay: Magnitude of weight decay applied to all convolution kernel
|
||||
variables of the generator.
|
||||
|
||||
Returns:
|
||||
An arg-scope.
|
||||
"""
|
||||
instance_norm_params = {
|
||||
'center': instance_norm_center,
|
||||
'scale': instance_norm_scale,
|
||||
'epsilon': instance_norm_epsilon,
|
||||
}
|
||||
|
||||
weights_regularizer = None
|
||||
if weight_decay and weight_decay > 0.0:
|
||||
weights_regularizer = layers.l2_regularizer(weight_decay)
|
||||
|
||||
with contrib_framework.arg_scope(
|
||||
[layers.conv2d],
|
||||
normalizer_fn=layers.instance_norm,
|
||||
normalizer_params=instance_norm_params,
|
||||
weights_initializer=tf.compat.v1.random_normal_initializer(
|
||||
0, weights_init_stddev),
|
||||
weights_regularizer=weights_regularizer) as sc:
|
||||
return sc
|
||||
|
||||
|
||||
def cyclegan_upsample(net, num_outputs, stride, method='conv2d_transpose',
|
||||
pad_mode='REFLECT', align_corners=False):
|
||||
"""Upsamples the given inputs.
|
||||
|
||||
Args:
|
||||
net: A Tensor of size [batch_size, height, width, filters].
|
||||
num_outputs: The number of output filters.
|
||||
stride: A list of 2 scalars or a 1x2 Tensor indicating the scale,
|
||||
relative to the inputs, of the output dimensions. For example, if kernel
|
||||
size is [2, 3], then the output height and width will be twice and three
|
||||
times the input size.
|
||||
method: The upsampling method: 'nn_upsample_conv', 'bilinear_upsample_conv',
|
||||
or 'conv2d_transpose'.
|
||||
pad_mode: mode for tf.pad, one of "CONSTANT", "REFLECT", or "SYMMETRIC".
|
||||
align_corners: option for method, 'bilinear_upsample_conv'. If true, the
|
||||
centers of the 4 corner pixels of the input and output tensors are
|
||||
aligned, preserving the values at the corner pixels.
|
||||
|
||||
Returns:
|
||||
A Tensor which was upsampled using the specified method.
|
||||
|
||||
Raises:
|
||||
ValueError: if `method` is not recognized.
|
||||
"""
|
||||
with tf.compat.v1.variable_scope('upconv'):
|
||||
net_shape = tf.shape(input=net)
|
||||
height = net_shape[1]
|
||||
width = net_shape[2]
|
||||
|
||||
# Reflection pad by 1 in spatial dimensions (axes 1, 2 = h, w) to make a 3x3
|
||||
# 'valid' convolution produce an output with the same dimension as the
|
||||
# input.
|
||||
spatial_pad_1 = np.array([[0, 0], [1, 1], [1, 1], [0, 0]])
|
||||
|
||||
if method == 'nn_upsample_conv':
|
||||
net = tf.image.resize(
|
||||
net, [stride[0] * height, stride[1] * width],
|
||||
method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
|
||||
net = tf.pad(tensor=net, paddings=spatial_pad_1, mode=pad_mode)
|
||||
net = layers.conv2d(net, num_outputs, kernel_size=[3, 3], padding='valid')
|
||||
elif method == 'bilinear_upsample_conv':
|
||||
net = tf.compat.v1.image.resize_bilinear(
|
||||
net, [stride[0] * height, stride[1] * width],
|
||||
align_corners=align_corners)
|
||||
net = tf.pad(tensor=net, paddings=spatial_pad_1, mode=pad_mode)
|
||||
net = layers.conv2d(net, num_outputs, kernel_size=[3, 3], padding='valid')
|
||||
elif method == 'conv2d_transpose':
|
||||
# This corrects 1 pixel offset for images with even width and height.
|
||||
# conv2d is left aligned and conv2d_transpose is right aligned for even
|
||||
# sized images (while doing 'SAME' padding).
|
||||
# Note: This doesn't reflect actual model in paper.
|
||||
net = layers.conv2d_transpose(
|
||||
net, num_outputs, kernel_size=[3, 3], stride=stride, padding='valid')
|
||||
net = net[:, 1:, 1:, :]
|
||||
else:
|
||||
raise ValueError('Unknown method: [%s]' % method)
|
||||
|
||||
return net
|
||||
|
||||
|
||||
def _dynamic_or_static_shape(tensor):
|
||||
shape = tf.shape(input=tensor)
|
||||
static_shape = contrib_util.constant_value(shape)
|
||||
return static_shape if static_shape is not None else shape
|
||||
|
||||
|
||||
def cyclegan_generator_resnet(images,
|
||||
arg_scope_fn=cyclegan_arg_scope,
|
||||
num_resnet_blocks=6,
|
||||
num_filters=64,
|
||||
upsample_fn=cyclegan_upsample,
|
||||
kernel_size=3,
|
||||
tanh_linear_slope=0.0,
|
||||
is_training=False):
|
||||
"""Defines the cyclegan resnet network architecture.
|
||||
|
||||
As closely as possible following
|
||||
https://github.com/junyanz/CycleGAN/blob/master/models/architectures.lua#L232
|
||||
|
||||
FYI: This network requires input height and width to be divisible by 4 in
|
||||
order to generate an output with shape equal to input shape. Assertions will
|
||||
catch this if input dimensions are known at graph construction time, but
|
||||
there's no protection if unknown at graph construction time (you'll see an
|
||||
error).
|
||||
|
||||
Args:
|
||||
images: Input image tensor of shape [batch_size, h, w, 3].
|
||||
arg_scope_fn: Function to create the global arg_scope for the network.
|
||||
num_resnet_blocks: Number of ResNet blocks in the middle of the generator.
|
||||
num_filters: Number of filters of the first hidden layer.
|
||||
upsample_fn: Upsampling function for the decoder part of the generator.
|
||||
kernel_size: Size w or list/tuple [h, w] of the filter kernels for all inner
|
||||
layers.
|
||||
tanh_linear_slope: Slope of the linear function to add to the tanh over the
|
||||
logits.
|
||||
is_training: Whether the network is created in training mode or inference
|
||||
only mode. Not actually needed, just for compliance with other generator
|
||||
network functions.
|
||||
|
||||
Returns:
|
||||
A `Tensor` representing the model output and a dictionary of model end
|
||||
points.
|
||||
|
||||
Raises:
|
||||
ValueError: If the input height or width is known at graph construction time
|
||||
and not a multiple of 4.
|
||||
"""
|
||||
# Neither dropout nor batch norm -> dont need is_training
|
||||
del is_training
|
||||
|
||||
end_points = {}
|
||||
|
||||
input_size = images.shape.as_list()
|
||||
height, width = input_size[1], input_size[2]
|
||||
if height and height % 4 != 0:
|
||||
raise ValueError('The input height must be a multiple of 4.')
|
||||
if width and width % 4 != 0:
|
||||
raise ValueError('The input width must be a multiple of 4.')
|
||||
num_outputs = input_size[3]
|
||||
|
||||
if not isinstance(kernel_size, (list, tuple)):
|
||||
kernel_size = [kernel_size, kernel_size]
|
||||
|
||||
kernel_height = kernel_size[0]
|
||||
kernel_width = kernel_size[1]
|
||||
pad_top = (kernel_height - 1) // 2
|
||||
pad_bottom = kernel_height // 2
|
||||
pad_left = (kernel_width - 1) // 2
|
||||
pad_right = kernel_width // 2
|
||||
paddings = np.array(
|
||||
[[0, 0], [pad_top, pad_bottom], [pad_left, pad_right], [0, 0]],
|
||||
dtype=np.int32)
|
||||
spatial_pad_3 = np.array([[0, 0], [3, 3], [3, 3], [0, 0]])
|
||||
|
||||
with contrib_framework.arg_scope(arg_scope_fn()):
|
||||
|
||||
###########
|
||||
# Encoder #
|
||||
###########
|
||||
with tf.compat.v1.variable_scope('input'):
|
||||
# 7x7 input stage
|
||||
net = tf.pad(tensor=images, paddings=spatial_pad_3, mode='REFLECT')
|
||||
net = layers.conv2d(net, num_filters, kernel_size=[7, 7], padding='VALID')
|
||||
end_points['encoder_0'] = net
|
||||
|
||||
with tf.compat.v1.variable_scope('encoder'):
|
||||
with contrib_framework.arg_scope([layers.conv2d],
|
||||
kernel_size=kernel_size,
|
||||
stride=2,
|
||||
activation_fn=tf.nn.relu,
|
||||
padding='VALID'):
|
||||
|
||||
net = tf.pad(tensor=net, paddings=paddings, mode='REFLECT')
|
||||
net = layers.conv2d(net, num_filters * 2)
|
||||
end_points['encoder_1'] = net
|
||||
net = tf.pad(tensor=net, paddings=paddings, mode='REFLECT')
|
||||
net = layers.conv2d(net, num_filters * 4)
|
||||
end_points['encoder_2'] = net
|
||||
|
||||
###################
|
||||
# Residual Blocks #
|
||||
###################
|
||||
with tf.compat.v1.variable_scope('residual_blocks'):
|
||||
with contrib_framework.arg_scope([layers.conv2d],
|
||||
kernel_size=kernel_size,
|
||||
stride=1,
|
||||
activation_fn=tf.nn.relu,
|
||||
padding='VALID'):
|
||||
for block_id in xrange(num_resnet_blocks):
|
||||
with tf.compat.v1.variable_scope('block_{}'.format(block_id)):
|
||||
res_net = tf.pad(tensor=net, paddings=paddings, mode='REFLECT')
|
||||
res_net = layers.conv2d(res_net, num_filters * 4)
|
||||
res_net = tf.pad(tensor=res_net, paddings=paddings, mode='REFLECT')
|
||||
res_net = layers.conv2d(res_net, num_filters * 4,
|
||||
activation_fn=None)
|
||||
net += res_net
|
||||
|
||||
end_points['resnet_block_%d' % block_id] = net
|
||||
|
||||
###########
|
||||
# Decoder #
|
||||
###########
|
||||
with tf.compat.v1.variable_scope('decoder'):
|
||||
|
||||
with contrib_framework.arg_scope([layers.conv2d],
|
||||
kernel_size=kernel_size,
|
||||
stride=1,
|
||||
activation_fn=tf.nn.relu):
|
||||
|
||||
with tf.compat.v1.variable_scope('decoder1'):
|
||||
net = upsample_fn(net, num_outputs=num_filters * 2, stride=[2, 2])
|
||||
end_points['decoder1'] = net
|
||||
|
||||
with tf.compat.v1.variable_scope('decoder2'):
|
||||
net = upsample_fn(net, num_outputs=num_filters, stride=[2, 2])
|
||||
end_points['decoder2'] = net
|
||||
|
||||
with tf.compat.v1.variable_scope('output'):
|
||||
net = tf.pad(tensor=net, paddings=spatial_pad_3, mode='REFLECT')
|
||||
logits = layers.conv2d(
|
||||
net,
|
||||
num_outputs, [7, 7],
|
||||
activation_fn=None,
|
||||
normalizer_fn=None,
|
||||
padding='valid')
|
||||
logits = tf.reshape(logits, _dynamic_or_static_shape(images))
|
||||
|
||||
end_points['logits'] = logits
|
||||
end_points['predictions'] = tf.tanh(logits) + logits * tanh_linear_slope
|
||||
|
||||
return end_points['predictions'], end_points
|
||||
+110
@@ -0,0 +1,110 @@
|
||||
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Tests for tensorflow.contrib.slim.nets.cyclegan."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
from nets import cyclegan
|
||||
|
||||
|
||||
# TODO(joelshor): Add a test to check generator endpoints.
|
||||
class CycleganTest(tf.test.TestCase):
|
||||
|
||||
def test_generator_inference(self):
|
||||
"""Check one inference step."""
|
||||
img_batch = tf.zeros([2, 32, 32, 3])
|
||||
model_output, _ = cyclegan.cyclegan_generator_resnet(img_batch)
|
||||
with self.test_session() as sess:
|
||||
sess.run(tf.compat.v1.global_variables_initializer())
|
||||
sess.run(model_output)
|
||||
|
||||
def _test_generator_graph_helper(self, shape):
|
||||
"""Check that generator can take small and non-square inputs."""
|
||||
output_imgs, _ = cyclegan.cyclegan_generator_resnet(tf.ones(shape))
|
||||
self.assertAllEqual(shape, output_imgs.shape.as_list())
|
||||
|
||||
def test_generator_graph_small(self):
|
||||
self._test_generator_graph_helper([4, 32, 32, 3])
|
||||
|
||||
def test_generator_graph_medium(self):
|
||||
self._test_generator_graph_helper([3, 128, 128, 3])
|
||||
|
||||
def test_generator_graph_nonsquare(self):
|
||||
self._test_generator_graph_helper([2, 80, 400, 3])
|
||||
|
||||
def test_generator_unknown_batch_dim(self):
|
||||
"""Check that generator can take unknown batch dimension inputs."""
|
||||
img = tf.compat.v1.placeholder(tf.float32, shape=[None, 32, None, 3])
|
||||
output_imgs, _ = cyclegan.cyclegan_generator_resnet(img)
|
||||
|
||||
self.assertAllEqual([None, 32, None, 3], output_imgs.shape.as_list())
|
||||
|
||||
def _input_and_output_same_shape_helper(self, kernel_size):
|
||||
img_batch = tf.compat.v1.placeholder(tf.float32, shape=[None, 32, 32, 3])
|
||||
output_img_batch, _ = cyclegan.cyclegan_generator_resnet(
|
||||
img_batch, kernel_size=kernel_size)
|
||||
|
||||
self.assertAllEqual(img_batch.shape.as_list(),
|
||||
output_img_batch.shape.as_list())
|
||||
|
||||
def input_and_output_same_shape_kernel3(self):
|
||||
self._input_and_output_same_shape_helper(3)
|
||||
|
||||
def input_and_output_same_shape_kernel4(self):
|
||||
self._input_and_output_same_shape_helper(4)
|
||||
|
||||
def input_and_output_same_shape_kernel5(self):
|
||||
self._input_and_output_same_shape_helper(5)
|
||||
|
||||
def input_and_output_same_shape_kernel6(self):
|
||||
self._input_and_output_same_shape_helper(6)
|
||||
|
||||
def _error_if_height_not_multiple_of_four_helper(self, height):
|
||||
self.assertRaisesRegexp(
|
||||
ValueError, 'The input height must be a multiple of 4.',
|
||||
cyclegan.cyclegan_generator_resnet,
|
||||
tf.compat.v1.placeholder(tf.float32, shape=[None, height, 32, 3]))
|
||||
|
||||
def test_error_if_height_not_multiple_of_four_height29(self):
|
||||
self._error_if_height_not_multiple_of_four_helper(29)
|
||||
|
||||
def test_error_if_height_not_multiple_of_four_height30(self):
|
||||
self._error_if_height_not_multiple_of_four_helper(30)
|
||||
|
||||
def test_error_if_height_not_multiple_of_four_height31(self):
|
||||
self._error_if_height_not_multiple_of_four_helper(31)
|
||||
|
||||
def _error_if_width_not_multiple_of_four_helper(self, width):
|
||||
self.assertRaisesRegexp(
|
||||
ValueError, 'The input width must be a multiple of 4.',
|
||||
cyclegan.cyclegan_generator_resnet,
|
||||
tf.compat.v1.placeholder(tf.float32, shape=[None, 32, width, 3]))
|
||||
|
||||
def test_error_if_width_not_multiple_of_four_width29(self):
|
||||
self._error_if_width_not_multiple_of_four_helper(29)
|
||||
|
||||
def test_error_if_width_not_multiple_of_four_width30(self):
|
||||
self._error_if_width_not_multiple_of_four_helper(30)
|
||||
|
||||
def test_error_if_width_not_multiple_of_four_width31(self):
|
||||
self._error_if_width_not_multiple_of_four_helper(31)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
tf.test.main()
|
||||
+205
@@ -0,0 +1,205 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""DCGAN generator and discriminator from https://arxiv.org/abs/1511.06434."""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
from math import log
|
||||
|
||||
from six.moves import xrange # pylint: disable=redefined-builtin
|
||||
import tensorflow as tf
|
||||
from tensorflow.contrib import slim as contrib_slim
|
||||
|
||||
slim = contrib_slim
|
||||
|
||||
|
||||
def _validate_image_inputs(inputs):
|
||||
inputs.get_shape().assert_has_rank(4)
|
||||
inputs.get_shape()[1:3].assert_is_fully_defined()
|
||||
if inputs.get_shape()[1] != inputs.get_shape()[2]:
|
||||
raise ValueError('Input tensor does not have equal width and height: ',
|
||||
inputs.get_shape()[1:3])
|
||||
width = inputs.get_shape().as_list()[1]
|
||||
if log(width, 2) != int(log(width, 2)):
|
||||
raise ValueError('Input tensor `width` is not a power of 2: ', width)
|
||||
|
||||
|
||||
# TODO(joelshor): Use fused batch norm by default. Investigate why some GAN
|
||||
# setups need the gradient of gradient FusedBatchNormGrad.
|
||||
def discriminator(inputs,
|
||||
depth=64,
|
||||
is_training=True,
|
||||
reuse=None,
|
||||
scope='Discriminator',
|
||||
fused_batch_norm=False):
|
||||
"""Discriminator network for DCGAN.
|
||||
|
||||
Construct discriminator network from inputs to the final endpoint.
|
||||
|
||||
Args:
|
||||
inputs: A tensor of size [batch_size, height, width, channels]. Must be
|
||||
floating point.
|
||||
depth: Number of channels in first convolution layer.
|
||||
is_training: Whether the network is for training or not.
|
||||
reuse: Whether or not the network variables should be reused. `scope`
|
||||
must be given to be reused.
|
||||
scope: Optional variable_scope.
|
||||
fused_batch_norm: If `True`, use a faster, fused implementation of
|
||||
batch norm.
|
||||
|
||||
Returns:
|
||||
logits: The pre-softmax activations, a tensor of size [batch_size, 1]
|
||||
end_points: a dictionary from components of the network to their activation.
|
||||
|
||||
Raises:
|
||||
ValueError: If the input image shape is not 4-dimensional, if the spatial
|
||||
dimensions aren't defined at graph construction time, if the spatial
|
||||
dimensions aren't square, or if the spatial dimensions aren't a power of
|
||||
two.
|
||||
"""
|
||||
|
||||
normalizer_fn = slim.batch_norm
|
||||
normalizer_fn_args = {
|
||||
'is_training': is_training,
|
||||
'zero_debias_moving_mean': True,
|
||||
'fused': fused_batch_norm,
|
||||
}
|
||||
|
||||
_validate_image_inputs(inputs)
|
||||
inp_shape = inputs.get_shape().as_list()[1]
|
||||
|
||||
end_points = {}
|
||||
with tf.compat.v1.variable_scope(
|
||||
scope, values=[inputs], reuse=reuse) as scope:
|
||||
with slim.arg_scope([normalizer_fn], **normalizer_fn_args):
|
||||
with slim.arg_scope([slim.conv2d],
|
||||
stride=2,
|
||||
kernel_size=4,
|
||||
activation_fn=tf.nn.leaky_relu):
|
||||
net = inputs
|
||||
for i in xrange(int(log(inp_shape, 2))):
|
||||
scope = 'conv%i' % (i + 1)
|
||||
current_depth = depth * 2**i
|
||||
normalizer_fn_ = None if i == 0 else normalizer_fn
|
||||
net = slim.conv2d(
|
||||
net, current_depth, normalizer_fn=normalizer_fn_, scope=scope)
|
||||
end_points[scope] = net
|
||||
|
||||
logits = slim.conv2d(net, 1, kernel_size=1, stride=1, padding='VALID',
|
||||
normalizer_fn=None, activation_fn=None)
|
||||
logits = tf.reshape(logits, [-1, 1])
|
||||
end_points['logits'] = logits
|
||||
|
||||
return logits, end_points
|
||||
|
||||
|
||||
# TODO(joelshor): Use fused batch norm by default. Investigate why some GAN
|
||||
# setups need the gradient of gradient FusedBatchNormGrad.
|
||||
def generator(inputs,
|
||||
depth=64,
|
||||
final_size=32,
|
||||
num_outputs=3,
|
||||
is_training=True,
|
||||
reuse=None,
|
||||
scope='Generator',
|
||||
fused_batch_norm=False):
|
||||
"""Generator network for DCGAN.
|
||||
|
||||
Construct generator network from inputs to the final endpoint.
|
||||
|
||||
Args:
|
||||
inputs: A tensor with any size N. [batch_size, N]
|
||||
depth: Number of channels in last deconvolution layer.
|
||||
final_size: The shape of the final output.
|
||||
num_outputs: Number of output features. For images, this is the number of
|
||||
channels.
|
||||
is_training: whether is training or not.
|
||||
reuse: Whether or not the network has its variables should be reused. scope
|
||||
must be given to be reused.
|
||||
scope: Optional variable_scope.
|
||||
fused_batch_norm: If `True`, use a faster, fused implementation of
|
||||
batch norm.
|
||||
|
||||
Returns:
|
||||
logits: the pre-softmax activations, a tensor of size
|
||||
[batch_size, 32, 32, channels]
|
||||
end_points: a dictionary from components of the network to their activation.
|
||||
|
||||
Raises:
|
||||
ValueError: If `inputs` is not 2-dimensional.
|
||||
ValueError: If `final_size` isn't a power of 2 or is less than 8.
|
||||
"""
|
||||
normalizer_fn = slim.batch_norm
|
||||
normalizer_fn_args = {
|
||||
'is_training': is_training,
|
||||
'zero_debias_moving_mean': True,
|
||||
'fused': fused_batch_norm,
|
||||
}
|
||||
|
||||
inputs.get_shape().assert_has_rank(2)
|
||||
if log(final_size, 2) != int(log(final_size, 2)):
|
||||
raise ValueError('`final_size` (%i) must be a power of 2.' % final_size)
|
||||
if final_size < 8:
|
||||
raise ValueError('`final_size` (%i) must be greater than 8.' % final_size)
|
||||
|
||||
end_points = {}
|
||||
num_layers = int(log(final_size, 2)) - 1
|
||||
with tf.compat.v1.variable_scope(
|
||||
scope, values=[inputs], reuse=reuse) as scope:
|
||||
with slim.arg_scope([normalizer_fn], **normalizer_fn_args):
|
||||
with slim.arg_scope([slim.conv2d_transpose],
|
||||
normalizer_fn=normalizer_fn,
|
||||
stride=2,
|
||||
kernel_size=4):
|
||||
net = tf.expand_dims(tf.expand_dims(inputs, 1), 1)
|
||||
|
||||
# First upscaling is different because it takes the input vector.
|
||||
current_depth = depth * 2 ** (num_layers - 1)
|
||||
scope = 'deconv1'
|
||||
net = slim.conv2d_transpose(
|
||||
net, current_depth, stride=1, padding='VALID', scope=scope)
|
||||
end_points[scope] = net
|
||||
|
||||
for i in xrange(2, num_layers):
|
||||
scope = 'deconv%i' % (i)
|
||||
current_depth = depth * 2 ** (num_layers - i)
|
||||
net = slim.conv2d_transpose(net, current_depth, scope=scope)
|
||||
end_points[scope] = net
|
||||
|
||||
# Last layer has different normalizer and activation.
|
||||
scope = 'deconv%i' % (num_layers)
|
||||
net = slim.conv2d_transpose(
|
||||
net, depth, normalizer_fn=None, activation_fn=None, scope=scope)
|
||||
end_points[scope] = net
|
||||
|
||||
# Convert to proper channels.
|
||||
scope = 'logits'
|
||||
logits = slim.conv2d(
|
||||
net,
|
||||
num_outputs,
|
||||
normalizer_fn=None,
|
||||
activation_fn=None,
|
||||
kernel_size=1,
|
||||
stride=1,
|
||||
padding='VALID',
|
||||
scope=scope)
|
||||
end_points[scope] = logits
|
||||
|
||||
logits.get_shape().assert_has_rank(4)
|
||||
logits.get_shape().assert_is_compatible_with(
|
||||
[None, final_size, final_size, num_outputs])
|
||||
|
||||
return logits, end_points
|
||||
+121
@@ -0,0 +1,121 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Tests for dcgan."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
from six.moves import xrange # pylint: disable=redefined-builtin
|
||||
import tensorflow as tf
|
||||
|
||||
from nets import dcgan
|
||||
|
||||
|
||||
class DCGANTest(tf.test.TestCase):
|
||||
|
||||
def test_generator_run(self):
|
||||
tf.compat.v1.set_random_seed(1234)
|
||||
noise = tf.random.normal([100, 64])
|
||||
image, _ = dcgan.generator(noise)
|
||||
with self.test_session() as sess:
|
||||
sess.run(tf.compat.v1.global_variables_initializer())
|
||||
image.eval()
|
||||
|
||||
def test_generator_graph(self):
|
||||
tf.compat.v1.set_random_seed(1234)
|
||||
# Check graph construction for a number of image size/depths and batch
|
||||
# sizes.
|
||||
for i, batch_size in zip(xrange(3, 7), xrange(3, 8)):
|
||||
tf.compat.v1.reset_default_graph()
|
||||
final_size = 2 ** i
|
||||
noise = tf.random.normal([batch_size, 64])
|
||||
image, end_points = dcgan.generator(
|
||||
noise,
|
||||
depth=32,
|
||||
final_size=final_size)
|
||||
|
||||
self.assertAllEqual([batch_size, final_size, final_size, 3],
|
||||
image.shape.as_list())
|
||||
|
||||
expected_names = ['deconv%i' % j for j in xrange(1, i)] + ['logits']
|
||||
self.assertSetEqual(set(expected_names), set(end_points.keys()))
|
||||
|
||||
# Check layer depths.
|
||||
for j in range(1, i):
|
||||
layer = end_points['deconv%i' % j]
|
||||
self.assertEqual(32 * 2**(i-j-1), layer.get_shape().as_list()[-1])
|
||||
|
||||
def test_generator_invalid_input(self):
|
||||
wrong_dim_input = tf.zeros([5, 32, 32])
|
||||
with self.assertRaises(ValueError):
|
||||
dcgan.generator(wrong_dim_input)
|
||||
|
||||
correct_input = tf.zeros([3, 2])
|
||||
with self.assertRaisesRegexp(ValueError, 'must be a power of 2'):
|
||||
dcgan.generator(correct_input, final_size=30)
|
||||
|
||||
with self.assertRaisesRegexp(ValueError, 'must be greater than 8'):
|
||||
dcgan.generator(correct_input, final_size=4)
|
||||
|
||||
def test_discriminator_run(self):
|
||||
image = tf.random.uniform([5, 32, 32, 3], -1, 1)
|
||||
output, _ = dcgan.discriminator(image)
|
||||
with self.test_session() as sess:
|
||||
sess.run(tf.compat.v1.global_variables_initializer())
|
||||
output.eval()
|
||||
|
||||
def test_discriminator_graph(self):
|
||||
# Check graph construction for a number of image size/depths and batch
|
||||
# sizes.
|
||||
for i, batch_size in zip(xrange(1, 6), xrange(3, 8)):
|
||||
tf.compat.v1.reset_default_graph()
|
||||
img_w = 2 ** i
|
||||
image = tf.random.uniform([batch_size, img_w, img_w, 3], -1, 1)
|
||||
output, end_points = dcgan.discriminator(
|
||||
image,
|
||||
depth=32)
|
||||
|
||||
self.assertAllEqual([batch_size, 1], output.get_shape().as_list())
|
||||
|
||||
expected_names = ['conv%i' % j for j in xrange(1, i+1)] + ['logits']
|
||||
self.assertSetEqual(set(expected_names), set(end_points.keys()))
|
||||
|
||||
# Check layer depths.
|
||||
for j in range(1, i+1):
|
||||
layer = end_points['conv%i' % j]
|
||||
self.assertEqual(32 * 2**(j-1), layer.get_shape().as_list()[-1])
|
||||
|
||||
def test_discriminator_invalid_input(self):
|
||||
wrong_dim_img = tf.zeros([5, 32, 32])
|
||||
with self.assertRaises(ValueError):
|
||||
dcgan.discriminator(wrong_dim_img)
|
||||
|
||||
spatially_undefined_shape = tf.compat.v1.placeholder(
|
||||
tf.float32, [5, 32, None, 3])
|
||||
with self.assertRaises(ValueError):
|
||||
dcgan.discriminator(spatially_undefined_shape)
|
||||
|
||||
not_square = tf.zeros([5, 32, 16, 3])
|
||||
with self.assertRaisesRegexp(ValueError, 'not have equal width and height'):
|
||||
dcgan.discriminator(not_square)
|
||||
|
||||
not_power_2 = tf.zeros([5, 30, 30, 3])
|
||||
with self.assertRaisesRegexp(ValueError, 'not a power of 2'):
|
||||
dcgan.discriminator(not_power_2)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
tf.test.main()
|
||||
+181
@@ -0,0 +1,181 @@
|
||||
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Contains the definition for Inflated 3D Inception V1 (I3D).
|
||||
|
||||
The network architecture is proposed by:
|
||||
Joao Carreira and Andrew Zisserman,
|
||||
Quo Vadis, Action Recognition? A New Model and the Kinetics Dataset.
|
||||
https://arxiv.org/abs/1705.07750
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import tensorflow as tf
|
||||
from tensorflow.contrib import slim as contrib_slim
|
||||
|
||||
from nets import i3d_utils
|
||||
from nets import s3dg
|
||||
|
||||
slim = contrib_slim
|
||||
|
||||
# pylint: disable=g-long-lambda
|
||||
trunc_normal = lambda stddev: tf.compat.v1.truncated_normal_initializer(
|
||||
0.0, stddev)
|
||||
conv3d_spatiotemporal = i3d_utils.conv3d_spatiotemporal
|
||||
|
||||
|
||||
def i3d_arg_scope(weight_decay=1e-7,
|
||||
batch_norm_decay=0.999,
|
||||
batch_norm_epsilon=0.001,
|
||||
use_renorm=False,
|
||||
separable_conv3d=False):
|
||||
"""Defines default arg_scope for I3D.
|
||||
|
||||
Args:
|
||||
weight_decay: The weight decay to use for regularizing the model.
|
||||
batch_norm_decay: Decay for batch norm moving average.
|
||||
batch_norm_epsilon: Small float added to variance to avoid dividing by zero
|
||||
in batch norm.
|
||||
use_renorm: Whether to use batch renormalization or not.
|
||||
separable_conv3d: Whether to use separable 3d Convs.
|
||||
|
||||
Returns:
|
||||
sc: An arg_scope to use for the models.
|
||||
"""
|
||||
batch_norm_params = {
|
||||
# Decay for the moving averages.
|
||||
'decay': batch_norm_decay,
|
||||
# epsilon to prevent 0s in variance.
|
||||
'epsilon': batch_norm_epsilon,
|
||||
# Turns off fused batch norm.
|
||||
'fused': False,
|
||||
'renorm': use_renorm,
|
||||
# collection containing the moving mean and moving variance.
|
||||
'variables_collections': {
|
||||
'beta': None,
|
||||
'gamma': None,
|
||||
'moving_mean': ['moving_vars'],
|
||||
'moving_variance': ['moving_vars'],
|
||||
}
|
||||
}
|
||||
|
||||
with slim.arg_scope(
|
||||
[slim.conv3d, conv3d_spatiotemporal],
|
||||
weights_regularizer=slim.l2_regularizer(weight_decay),
|
||||
activation_fn=tf.nn.relu,
|
||||
normalizer_fn=slim.batch_norm,
|
||||
normalizer_params=batch_norm_params):
|
||||
with slim.arg_scope(
|
||||
[conv3d_spatiotemporal], separable=separable_conv3d) as sc:
|
||||
return sc
|
||||
|
||||
|
||||
def i3d_base(inputs, final_endpoint='Mixed_5c',
|
||||
scope='InceptionV1'):
|
||||
"""Defines the I3D base architecture.
|
||||
|
||||
Note that we use the names as defined in Inception V1 to facilitate checkpoint
|
||||
conversion from an image-trained Inception V1 checkpoint to I3D checkpoint.
|
||||
|
||||
Args:
|
||||
inputs: A 5-D float tensor of size [batch_size, num_frames, height, width,
|
||||
channels].
|
||||
final_endpoint: Specifies the endpoint to construct the network up to. It
|
||||
can be one of ['Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1',
|
||||
'Conv2d_2c_3x3', 'MaxPool_3a_3x3', 'Mixed_3b', 'Mixed_3c',
|
||||
'MaxPool_4a_3x3', 'Mixed_4b', 'Mixed_4c', 'Mixed_4d', 'Mixed_4e',
|
||||
'Mixed_4f', 'MaxPool_5a_2x2', 'Mixed_5b', 'Mixed_5c']
|
||||
scope: Optional variable_scope.
|
||||
|
||||
Returns:
|
||||
A dictionary from components of the network to the corresponding activation.
|
||||
|
||||
Raises:
|
||||
ValueError: if final_endpoint is not set to one of the predefined values.
|
||||
"""
|
||||
|
||||
return s3dg.s3dg_base(
|
||||
inputs,
|
||||
first_temporal_kernel_size=7,
|
||||
temporal_conv_startat='Conv2d_2c_3x3',
|
||||
gating_startat=None,
|
||||
final_endpoint=final_endpoint,
|
||||
min_depth=16,
|
||||
depth_multiplier=1.0,
|
||||
data_format='NDHWC',
|
||||
scope=scope)
|
||||
|
||||
|
||||
def i3d(inputs,
|
||||
num_classes=1000,
|
||||
dropout_keep_prob=0.8,
|
||||
is_training=True,
|
||||
prediction_fn=slim.softmax,
|
||||
spatial_squeeze=True,
|
||||
reuse=None,
|
||||
scope='InceptionV1'):
|
||||
"""Defines the I3D architecture.
|
||||
|
||||
The default image size used to train this network is 224x224.
|
||||
|
||||
Args:
|
||||
inputs: A 5-D float tensor of size [batch_size, num_frames, height, width,
|
||||
channels].
|
||||
num_classes: number of predicted classes.
|
||||
dropout_keep_prob: the percentage of activation values that are retained.
|
||||
is_training: whether is training or not.
|
||||
prediction_fn: a function to get predictions out of logits.
|
||||
spatial_squeeze: if True, logits is of shape is [B, C], if false logits is
|
||||
of shape [B, 1, 1, C], where B is batch_size and C is number of classes.
|
||||
reuse: whether or not the network and its variables should be reused. To be
|
||||
able to reuse 'scope' must be given.
|
||||
scope: Optional variable_scope.
|
||||
|
||||
Returns:
|
||||
logits: the pre-softmax activations, a tensor of size
|
||||
[batch_size, num_classes]
|
||||
end_points: a dictionary from components of the network to the corresponding
|
||||
activation.
|
||||
"""
|
||||
# Final pooling and prediction
|
||||
with tf.compat.v1.variable_scope(
|
||||
scope, 'InceptionV1', [inputs, num_classes], reuse=reuse) as scope:
|
||||
with slim.arg_scope(
|
||||
[slim.batch_norm, slim.dropout], is_training=is_training):
|
||||
net, end_points = i3d_base(inputs, scope=scope)
|
||||
with tf.compat.v1.variable_scope('Logits'):
|
||||
kernel_size = i3d_utils.reduced_kernel_size_3d(net, [2, 7, 7])
|
||||
net = slim.avg_pool3d(
|
||||
net, kernel_size, stride=1, scope='AvgPool_0a_7x7')
|
||||
net = slim.dropout(net, dropout_keep_prob, scope='Dropout_0b')
|
||||
logits = slim.conv3d(
|
||||
net,
|
||||
num_classes, [1, 1, 1],
|
||||
activation_fn=None,
|
||||
normalizer_fn=None,
|
||||
scope='Conv2d_0c_1x1')
|
||||
# Temporal average pooling.
|
||||
logits = tf.reduce_mean(input_tensor=logits, axis=1)
|
||||
if spatial_squeeze:
|
||||
logits = tf.squeeze(logits, [1, 2], name='SpatialSqueeze')
|
||||
|
||||
end_points['Logits'] = logits
|
||||
end_points['Predictions'] = prediction_fn(logits, scope='Predictions')
|
||||
return logits, end_points
|
||||
|
||||
|
||||
i3d.default_image_size = 224
|
||||
+149
@@ -0,0 +1,149 @@
|
||||
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Tests for networks.i3d."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
from nets import i3d
|
||||
|
||||
|
||||
class I3DTest(tf.test.TestCase):
|
||||
|
||||
def testBuildClassificationNetwork(self):
|
||||
batch_size = 5
|
||||
num_frames = 64
|
||||
height, width = 224, 224
|
||||
num_classes = 1000
|
||||
|
||||
inputs = tf.random.uniform((batch_size, num_frames, height, width, 3))
|
||||
logits, end_points = i3d.i3d(inputs, num_classes)
|
||||
self.assertTrue(logits.op.name.startswith('InceptionV1/Logits'))
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[batch_size, num_classes])
|
||||
self.assertTrue('Predictions' in end_points)
|
||||
self.assertListEqual(end_points['Predictions'].get_shape().as_list(),
|
||||
[batch_size, num_classes])
|
||||
|
||||
def testBuildBaseNetwork(self):
|
||||
batch_size = 5
|
||||
num_frames = 64
|
||||
height, width = 224, 224
|
||||
|
||||
inputs = tf.random.uniform((batch_size, num_frames, height, width, 3))
|
||||
mixed_6c, end_points = i3d.i3d_base(inputs)
|
||||
self.assertTrue(mixed_6c.op.name.startswith('InceptionV1/Mixed_5c'))
|
||||
self.assertListEqual(mixed_6c.get_shape().as_list(),
|
||||
[batch_size, 8, 7, 7, 1024])
|
||||
expected_endpoints = ['Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1',
|
||||
'Conv2d_2c_3x3', 'MaxPool_3a_3x3', 'Mixed_3b',
|
||||
'Mixed_3c', 'MaxPool_4a_3x3', 'Mixed_4b', 'Mixed_4c',
|
||||
'Mixed_4d', 'Mixed_4e', 'Mixed_4f', 'MaxPool_5a_2x2',
|
||||
'Mixed_5b', 'Mixed_5c']
|
||||
self.assertItemsEqual(end_points.keys(), expected_endpoints)
|
||||
|
||||
def testBuildOnlyUptoFinalEndpoint(self):
|
||||
batch_size = 5
|
||||
num_frames = 64
|
||||
height, width = 224, 224
|
||||
endpoints = ['Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1',
|
||||
'Conv2d_2c_3x3', 'MaxPool_3a_3x3', 'Mixed_3b', 'Mixed_3c',
|
||||
'MaxPool_4a_3x3', 'Mixed_4b', 'Mixed_4c', 'Mixed_4d',
|
||||
'Mixed_4e', 'Mixed_4f', 'MaxPool_5a_2x2', 'Mixed_5b',
|
||||
'Mixed_5c']
|
||||
for index, endpoint in enumerate(endpoints):
|
||||
with tf.Graph().as_default():
|
||||
inputs = tf.random.uniform((batch_size, num_frames, height, width, 3))
|
||||
out_tensor, end_points = i3d.i3d_base(
|
||||
inputs, final_endpoint=endpoint)
|
||||
self.assertTrue(out_tensor.op.name.startswith(
|
||||
'InceptionV1/' + endpoint))
|
||||
self.assertItemsEqual(endpoints[:index+1], end_points)
|
||||
|
||||
def testBuildAndCheckAllEndPointsUptoMixed5c(self):
|
||||
batch_size = 5
|
||||
num_frames = 64
|
||||
height, width = 224, 224
|
||||
|
||||
inputs = tf.random.uniform((batch_size, num_frames, height, width, 3))
|
||||
_, end_points = i3d.i3d_base(inputs,
|
||||
final_endpoint='Mixed_5c')
|
||||
endpoints_shapes = {'Conv2d_1a_7x7': [5, 32, 112, 112, 64],
|
||||
'MaxPool_2a_3x3': [5, 32, 56, 56, 64],
|
||||
'Conv2d_2b_1x1': [5, 32, 56, 56, 64],
|
||||
'Conv2d_2c_3x3': [5, 32, 56, 56, 192],
|
||||
'MaxPool_3a_3x3': [5, 32, 28, 28, 192],
|
||||
'Mixed_3b': [5, 32, 28, 28, 256],
|
||||
'Mixed_3c': [5, 32, 28, 28, 480],
|
||||
'MaxPool_4a_3x3': [5, 16, 14, 14, 480],
|
||||
'Mixed_4b': [5, 16, 14, 14, 512],
|
||||
'Mixed_4c': [5, 16, 14, 14, 512],
|
||||
'Mixed_4d': [5, 16, 14, 14, 512],
|
||||
'Mixed_4e': [5, 16, 14, 14, 528],
|
||||
'Mixed_4f': [5, 16, 14, 14, 832],
|
||||
'MaxPool_5a_2x2': [5, 8, 7, 7, 832],
|
||||
'Mixed_5b': [5, 8, 7, 7, 832],
|
||||
'Mixed_5c': [5, 8, 7, 7, 1024]}
|
||||
|
||||
self.assertItemsEqual(endpoints_shapes.keys(), end_points.keys())
|
||||
for endpoint_name, expected_shape in endpoints_shapes.iteritems():
|
||||
self.assertTrue(endpoint_name in end_points)
|
||||
self.assertListEqual(end_points[endpoint_name].get_shape().as_list(),
|
||||
expected_shape)
|
||||
|
||||
def testHalfSizeImages(self):
|
||||
batch_size = 5
|
||||
num_frames = 64
|
||||
height, width = 112, 112
|
||||
|
||||
inputs = tf.random.uniform((batch_size, num_frames, height, width, 3))
|
||||
mixed_5c, _ = i3d.i3d_base(inputs)
|
||||
self.assertTrue(mixed_5c.op.name.startswith('InceptionV1/Mixed_5c'))
|
||||
self.assertListEqual(mixed_5c.get_shape().as_list(),
|
||||
[batch_size, 8, 4, 4, 1024])
|
||||
|
||||
def testTenFrames(self):
|
||||
batch_size = 5
|
||||
num_frames = 10
|
||||
height, width = 224, 224
|
||||
|
||||
inputs = tf.random.uniform((batch_size, num_frames, height, width, 3))
|
||||
mixed_5c, _ = i3d.i3d_base(inputs)
|
||||
self.assertTrue(mixed_5c.op.name.startswith('InceptionV1/Mixed_5c'))
|
||||
self.assertListEqual(mixed_5c.get_shape().as_list(),
|
||||
[batch_size, 2, 7, 7, 1024])
|
||||
|
||||
def testEvaluation(self):
|
||||
batch_size = 2
|
||||
num_frames = 64
|
||||
height, width = 224, 224
|
||||
num_classes = 1000
|
||||
|
||||
eval_inputs = tf.random.uniform((batch_size, num_frames, height, width, 3))
|
||||
logits, _ = i3d.i3d(eval_inputs, num_classes,
|
||||
is_training=False)
|
||||
predictions = tf.argmax(input=logits, axis=1)
|
||||
|
||||
with self.test_session() as sess:
|
||||
sess.run(tf.compat.v1.global_variables_initializer())
|
||||
output = sess.run(predictions)
|
||||
self.assertEquals(output.shape, (batch_size,))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
tf.test.main()
|
||||
+289
@@ -0,0 +1,289 @@
|
||||
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Utilities for building I3D network models."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
from tensorflow.contrib import framework as contrib_framework
|
||||
from tensorflow.contrib import layers as contrib_layers
|
||||
|
||||
|
||||
# Orignaly, add_arg_scope = slim.add_arg_scope and layers = slim, now switch to
|
||||
# more update-to-date tf.contrib.* API.
|
||||
add_arg_scope = contrib_framework.add_arg_scope
|
||||
layers = contrib_layers
|
||||
|
||||
|
||||
def center_initializer():
|
||||
"""Centering Initializer for I3D.
|
||||
|
||||
This initializer allows identity mapping for temporal convolution at the
|
||||
initialization, which is critical for a desired convergence behavior
|
||||
for training a seprable I3D model.
|
||||
|
||||
The centering behavior of this initializer requires an odd-sized kernel,
|
||||
typically set to 3.
|
||||
|
||||
Returns:
|
||||
A weight initializer op used in temporal convolutional layers.
|
||||
|
||||
Raises:
|
||||
ValueError: Input tensor data type has to be tf.float32.
|
||||
ValueError: If input tensor is not a 5-D tensor.
|
||||
ValueError: If input and output channel dimensions are different.
|
||||
ValueError: If spatial kernel sizes are not 1.
|
||||
ValueError: If temporal kernel size is even.
|
||||
"""
|
||||
|
||||
def _initializer(shape, dtype=tf.float32, partition_info=None): # pylint: disable=unused-argument
|
||||
"""Initializer op."""
|
||||
|
||||
if dtype != tf.float32 and dtype != tf.bfloat16:
|
||||
raise ValueError(
|
||||
'Input tensor data type has to be tf.float32 or tf.bfloat16.')
|
||||
if len(shape) != 5:
|
||||
raise ValueError('Input tensor has to be 5-D.')
|
||||
if shape[3] != shape[4]:
|
||||
raise ValueError('Input and output channel dimensions must be the same.')
|
||||
if shape[1] != 1 or shape[2] != 1:
|
||||
raise ValueError('Spatial kernel sizes must be 1 (pointwise conv).')
|
||||
if shape[0] % 2 == 0:
|
||||
raise ValueError('Temporal kernel size has to be odd.')
|
||||
|
||||
center_pos = int(shape[0] / 2)
|
||||
init_mat = np.zeros(
|
||||
[shape[0], shape[1], shape[2], shape[3], shape[4]], dtype=np.float32)
|
||||
for i in range(0, shape[3]):
|
||||
init_mat[center_pos, 0, 0, i, i] = 1.0
|
||||
|
||||
init_op = tf.constant(init_mat, dtype=dtype)
|
||||
return init_op
|
||||
|
||||
return _initializer
|
||||
|
||||
|
||||
@add_arg_scope
|
||||
def conv3d_spatiotemporal(inputs,
|
||||
num_outputs,
|
||||
kernel_size,
|
||||
stride=1,
|
||||
padding='SAME',
|
||||
activation_fn=None,
|
||||
normalizer_fn=None,
|
||||
normalizer_params=None,
|
||||
weights_regularizer=None,
|
||||
separable=False,
|
||||
data_format='NDHWC',
|
||||
scope=''):
|
||||
"""A wrapper for conv3d to model spatiotemporal representations.
|
||||
|
||||
This allows switching between original 3D convolution and separable 3D
|
||||
convolutions for spatial and temporal features respectively. On Kinetics,
|
||||
seprable 3D convolutions yields better classification performance.
|
||||
|
||||
Args:
|
||||
inputs: a 5-D tensor `[batch_size, depth, height, width, channels]`.
|
||||
num_outputs: integer, the number of output filters.
|
||||
kernel_size: a list of length 3
|
||||
`[kernel_depth, kernel_height, kernel_width]` of the filters. Can be an
|
||||
int if all values are the same.
|
||||
stride: a list of length 3 `[stride_depth, stride_height, stride_width]`.
|
||||
Can be an int if all strides are the same.
|
||||
padding: one of `VALID` or `SAME`.
|
||||
activation_fn: activation function.
|
||||
normalizer_fn: normalization function to use instead of `biases`.
|
||||
normalizer_params: dictionary of normalization function parameters.
|
||||
weights_regularizer: Optional regularizer for the weights.
|
||||
separable: If `True`, use separable spatiotemporal convolutions.
|
||||
data_format: An optional string from: "NDHWC", "NCDHW". Defaults to "NDHWC".
|
||||
The data format of the input and output data. With the default format
|
||||
"NDHWC", the data is stored in the order of: [batch, in_depth, in_height,
|
||||
in_width, in_channels]. Alternatively, the format could be "NCDHW", the
|
||||
data storage order is:
|
||||
[batch, in_channels, in_depth, in_height, in_width].
|
||||
scope: scope for `variable_scope`.
|
||||
|
||||
Returns:
|
||||
A tensor representing the output of the (separable) conv3d operation.
|
||||
|
||||
"""
|
||||
assert len(kernel_size) == 3
|
||||
if separable and kernel_size[0] != 1:
|
||||
spatial_kernel_size = [1, kernel_size[1], kernel_size[2]]
|
||||
temporal_kernel_size = [kernel_size[0], 1, 1]
|
||||
if isinstance(stride, list) and len(stride) == 3:
|
||||
spatial_stride = [1, stride[1], stride[2]]
|
||||
temporal_stride = [stride[0], 1, 1]
|
||||
else:
|
||||
spatial_stride = [1, stride, stride]
|
||||
temporal_stride = [stride, 1, 1]
|
||||
net = layers.conv3d(
|
||||
inputs,
|
||||
num_outputs,
|
||||
spatial_kernel_size,
|
||||
stride=spatial_stride,
|
||||
padding=padding,
|
||||
activation_fn=activation_fn,
|
||||
normalizer_fn=normalizer_fn,
|
||||
normalizer_params=normalizer_params,
|
||||
weights_regularizer=weights_regularizer,
|
||||
data_format=data_format,
|
||||
scope=scope)
|
||||
net = layers.conv3d(
|
||||
net,
|
||||
num_outputs,
|
||||
temporal_kernel_size,
|
||||
stride=temporal_stride,
|
||||
padding=padding,
|
||||
scope=scope + '/temporal',
|
||||
activation_fn=activation_fn,
|
||||
normalizer_fn=None,
|
||||
data_format=data_format,
|
||||
weights_initializer=center_initializer())
|
||||
return net
|
||||
else:
|
||||
return layers.conv3d(
|
||||
inputs,
|
||||
num_outputs,
|
||||
kernel_size,
|
||||
stride=stride,
|
||||
padding=padding,
|
||||
activation_fn=activation_fn,
|
||||
normalizer_fn=normalizer_fn,
|
||||
normalizer_params=normalizer_params,
|
||||
weights_regularizer=weights_regularizer,
|
||||
data_format=data_format,
|
||||
scope=scope)
|
||||
|
||||
|
||||
@add_arg_scope
|
||||
def inception_block_v1_3d(inputs,
|
||||
num_outputs_0_0a,
|
||||
num_outputs_1_0a,
|
||||
num_outputs_1_0b,
|
||||
num_outputs_2_0a,
|
||||
num_outputs_2_0b,
|
||||
num_outputs_3_0b,
|
||||
temporal_kernel_size=3,
|
||||
self_gating_fn=None,
|
||||
data_format='NDHWC',
|
||||
scope=''):
|
||||
"""A 3D Inception v1 block.
|
||||
|
||||
This allows use of separable 3D convolutions and self-gating, as
|
||||
described in:
|
||||
Saining Xie, Chen Sun, Jonathan Huang, Zhuowen Tu and Kevin Murphy,
|
||||
Rethinking Spatiotemporal Feature Learning For Video Understanding.
|
||||
https://arxiv.org/abs/1712.04851.
|
||||
|
||||
Args:
|
||||
inputs: a 5-D tensor `[batch_size, depth, height, width, channels]`.
|
||||
num_outputs_0_0a: integer, the number of output filters for Branch 0,
|
||||
operation Conv2d_0a_1x1.
|
||||
num_outputs_1_0a: integer, the number of output filters for Branch 1,
|
||||
operation Conv2d_0a_1x1.
|
||||
num_outputs_1_0b: integer, the number of output filters for Branch 1,
|
||||
operation Conv2d_0b_3x3.
|
||||
num_outputs_2_0a: integer, the number of output filters for Branch 2,
|
||||
operation Conv2d_0a_1x1.
|
||||
num_outputs_2_0b: integer, the number of output filters for Branch 2,
|
||||
operation Conv2d_0b_3x3.
|
||||
num_outputs_3_0b: integer, the number of output filters for Branch 3,
|
||||
operation Conv2d_0b_1x1.
|
||||
temporal_kernel_size: integer, the size of the temporal convolutional
|
||||
filters in the conv3d_spatiotemporal blocks.
|
||||
self_gating_fn: function which optionally performs self-gating.
|
||||
Must have two arguments, `inputs` and `scope`, and return one output
|
||||
tensor the same size as `inputs`. If `None`, no self-gating is
|
||||
applied.
|
||||
data_format: An optional string from: "NDHWC", "NCDHW". Defaults to "NDHWC".
|
||||
The data format of the input and output data. With the default format
|
||||
"NDHWC", the data is stored in the order of: [batch, in_depth, in_height,
|
||||
in_width, in_channels]. Alternatively, the format could be "NCDHW", the
|
||||
data storage order is:
|
||||
[batch, in_channels, in_depth, in_height, in_width].
|
||||
scope: scope for `variable_scope`.
|
||||
|
||||
Returns:
|
||||
A 5-D tensor `[batch_size, depth, height, width, out_channels]`, where
|
||||
`out_channels = num_outputs_0_0a + num_outputs_1_0b + num_outputs_2_0b
|
||||
+ num_outputs_3_0b`.
|
||||
|
||||
"""
|
||||
use_gating = self_gating_fn is not None
|
||||
|
||||
with tf.compat.v1.variable_scope(scope):
|
||||
with tf.compat.v1.variable_scope('Branch_0'):
|
||||
branch_0 = layers.conv3d(
|
||||
inputs, num_outputs_0_0a, [1, 1, 1], scope='Conv2d_0a_1x1')
|
||||
if use_gating:
|
||||
branch_0 = self_gating_fn(branch_0, scope='Conv2d_0a_1x1')
|
||||
with tf.compat.v1.variable_scope('Branch_1'):
|
||||
branch_1 = layers.conv3d(
|
||||
inputs, num_outputs_1_0a, [1, 1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_1 = conv3d_spatiotemporal(
|
||||
branch_1, num_outputs_1_0b, [temporal_kernel_size, 3, 3],
|
||||
scope='Conv2d_0b_3x3')
|
||||
if use_gating:
|
||||
branch_1 = self_gating_fn(branch_1, scope='Conv2d_0b_3x3')
|
||||
with tf.compat.v1.variable_scope('Branch_2'):
|
||||
branch_2 = layers.conv3d(
|
||||
inputs, num_outputs_2_0a, [1, 1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_2 = conv3d_spatiotemporal(
|
||||
branch_2, num_outputs_2_0b, [temporal_kernel_size, 3, 3],
|
||||
scope='Conv2d_0b_3x3')
|
||||
if use_gating:
|
||||
branch_2 = self_gating_fn(branch_2, scope='Conv2d_0b_3x3')
|
||||
with tf.compat.v1.variable_scope('Branch_3'):
|
||||
branch_3 = layers.max_pool3d(inputs, [3, 3, 3], scope='MaxPool_0a_3x3')
|
||||
branch_3 = layers.conv3d(
|
||||
branch_3, num_outputs_3_0b, [1, 1, 1], scope='Conv2d_0b_1x1')
|
||||
if use_gating:
|
||||
branch_3 = self_gating_fn(branch_3, scope='Conv2d_0b_1x1')
|
||||
index_c = data_format.index('C')
|
||||
assert 1 <= index_c <= 4, 'Cannot identify channel dimension.'
|
||||
output = tf.concat([branch_0, branch_1, branch_2, branch_3], index_c)
|
||||
return output
|
||||
|
||||
|
||||
def reduced_kernel_size_3d(input_tensor, kernel_size):
|
||||
"""Define kernel size which is automatically reduced for small input.
|
||||
|
||||
If the shape of the input images is unknown at graph construction time this
|
||||
function assumes that the input images are large enough.
|
||||
|
||||
Args:
|
||||
input_tensor: input tensor of size
|
||||
[batch_size, time, height, width, channels].
|
||||
kernel_size: desired kernel size of length 3, corresponding to time,
|
||||
height and width.
|
||||
|
||||
Returns:
|
||||
a tensor with the kernel size.
|
||||
"""
|
||||
assert len(kernel_size) == 3
|
||||
shape = input_tensor.get_shape().as_list()
|
||||
assert len(shape) == 5
|
||||
if None in shape[1:4]:
|
||||
kernel_size_out = kernel_size
|
||||
else:
|
||||
kernel_size_out = [min(shape[1], kernel_size[0]),
|
||||
min(shape[2], kernel_size[1]),
|
||||
min(shape[3], kernel_size[2])]
|
||||
return kernel_size_out
|
||||
+37
@@ -0,0 +1,37 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Brings all inception models under one namespace."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
# pylint: disable=unused-import
|
||||
from nets.inception_resnet_v2 import inception_resnet_v2
|
||||
from nets.inception_resnet_v2 import inception_resnet_v2_arg_scope
|
||||
from nets.inception_resnet_v2 import inception_resnet_v2_base
|
||||
from nets.inception_v1 import inception_v1
|
||||
from nets.inception_v1 import inception_v1_arg_scope
|
||||
from nets.inception_v1 import inception_v1_base
|
||||
from nets.inception_v2 import inception_v2
|
||||
from nets.inception_v2 import inception_v2_arg_scope
|
||||
from nets.inception_v2 import inception_v2_base
|
||||
from nets.inception_v3 import inception_v3
|
||||
from nets.inception_v3 import inception_v3_arg_scope
|
||||
from nets.inception_v3 import inception_v3_base
|
||||
from nets.inception_v4 import inception_v4
|
||||
from nets.inception_v4 import inception_v4_arg_scope
|
||||
from nets.inception_v4 import inception_v4_base
|
||||
# pylint: enable=unused-import
|
||||
+408
@@ -0,0 +1,408 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Contains the definition of the Inception Resnet V2 architecture.
|
||||
|
||||
As described in http://arxiv.org/abs/1602.07261.
|
||||
|
||||
Inception-v4, Inception-ResNet and the Impact of Residual Connections
|
||||
on Learning
|
||||
Christian Szegedy, Sergey Ioffe, Vincent Vanhoucke, Alex Alemi
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
|
||||
import tensorflow as tf
|
||||
from tensorflow.contrib import slim as contrib_slim
|
||||
|
||||
slim = contrib_slim
|
||||
|
||||
|
||||
def block35(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None):
|
||||
"""Builds the 35x35 resnet block."""
|
||||
with tf.compat.v1.variable_scope(scope, 'Block35', [net], reuse=reuse):
|
||||
with tf.compat.v1.variable_scope('Branch_0'):
|
||||
tower_conv = slim.conv2d(net, 32, 1, scope='Conv2d_1x1')
|
||||
with tf.compat.v1.variable_scope('Branch_1'):
|
||||
tower_conv1_0 = slim.conv2d(net, 32, 1, scope='Conv2d_0a_1x1')
|
||||
tower_conv1_1 = slim.conv2d(tower_conv1_0, 32, 3, scope='Conv2d_0b_3x3')
|
||||
with tf.compat.v1.variable_scope('Branch_2'):
|
||||
tower_conv2_0 = slim.conv2d(net, 32, 1, scope='Conv2d_0a_1x1')
|
||||
tower_conv2_1 = slim.conv2d(tower_conv2_0, 48, 3, scope='Conv2d_0b_3x3')
|
||||
tower_conv2_2 = slim.conv2d(tower_conv2_1, 64, 3, scope='Conv2d_0c_3x3')
|
||||
mixed = tf.concat(axis=3, values=[tower_conv, tower_conv1_1, tower_conv2_2])
|
||||
up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None,
|
||||
activation_fn=None, scope='Conv2d_1x1')
|
||||
scaled_up = up * scale
|
||||
if activation_fn == tf.nn.relu6:
|
||||
# Use clip_by_value to simulate bandpass activation.
|
||||
scaled_up = tf.clip_by_value(scaled_up, -6.0, 6.0)
|
||||
|
||||
net += scaled_up
|
||||
if activation_fn:
|
||||
net = activation_fn(net)
|
||||
return net
|
||||
|
||||
|
||||
def block17(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None):
|
||||
"""Builds the 17x17 resnet block."""
|
||||
with tf.compat.v1.variable_scope(scope, 'Block17', [net], reuse=reuse):
|
||||
with tf.compat.v1.variable_scope('Branch_0'):
|
||||
tower_conv = slim.conv2d(net, 192, 1, scope='Conv2d_1x1')
|
||||
with tf.compat.v1.variable_scope('Branch_1'):
|
||||
tower_conv1_0 = slim.conv2d(net, 128, 1, scope='Conv2d_0a_1x1')
|
||||
tower_conv1_1 = slim.conv2d(tower_conv1_0, 160, [1, 7],
|
||||
scope='Conv2d_0b_1x7')
|
||||
tower_conv1_2 = slim.conv2d(tower_conv1_1, 192, [7, 1],
|
||||
scope='Conv2d_0c_7x1')
|
||||
mixed = tf.concat(axis=3, values=[tower_conv, tower_conv1_2])
|
||||
up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None,
|
||||
activation_fn=None, scope='Conv2d_1x1')
|
||||
|
||||
scaled_up = up * scale
|
||||
if activation_fn == tf.nn.relu6:
|
||||
# Use clip_by_value to simulate bandpass activation.
|
||||
scaled_up = tf.clip_by_value(scaled_up, -6.0, 6.0)
|
||||
|
||||
net += scaled_up
|
||||
if activation_fn:
|
||||
net = activation_fn(net)
|
||||
return net
|
||||
|
||||
|
||||
def block8(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None):
|
||||
"""Builds the 8x8 resnet block."""
|
||||
with tf.compat.v1.variable_scope(scope, 'Block8', [net], reuse=reuse):
|
||||
with tf.compat.v1.variable_scope('Branch_0'):
|
||||
tower_conv = slim.conv2d(net, 192, 1, scope='Conv2d_1x1')
|
||||
with tf.compat.v1.variable_scope('Branch_1'):
|
||||
tower_conv1_0 = slim.conv2d(net, 192, 1, scope='Conv2d_0a_1x1')
|
||||
tower_conv1_1 = slim.conv2d(tower_conv1_0, 224, [1, 3],
|
||||
scope='Conv2d_0b_1x3')
|
||||
tower_conv1_2 = slim.conv2d(tower_conv1_1, 256, [3, 1],
|
||||
scope='Conv2d_0c_3x1')
|
||||
mixed = tf.concat(axis=3, values=[tower_conv, tower_conv1_2])
|
||||
up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None,
|
||||
activation_fn=None, scope='Conv2d_1x1')
|
||||
|
||||
scaled_up = up * scale
|
||||
if activation_fn == tf.nn.relu6:
|
||||
# Use clip_by_value to simulate bandpass activation.
|
||||
scaled_up = tf.clip_by_value(scaled_up, -6.0, 6.0)
|
||||
|
||||
net += scaled_up
|
||||
if activation_fn:
|
||||
net = activation_fn(net)
|
||||
return net
|
||||
|
||||
|
||||
def inception_resnet_v2_base(inputs,
|
||||
final_endpoint='Conv2d_7b_1x1',
|
||||
output_stride=16,
|
||||
align_feature_maps=False,
|
||||
scope=None,
|
||||
activation_fn=tf.nn.relu):
|
||||
"""Inception model from http://arxiv.org/abs/1602.07261.
|
||||
|
||||
Constructs an Inception Resnet v2 network from inputs to the given final
|
||||
endpoint. This method can construct the network up to the final inception
|
||||
block Conv2d_7b_1x1.
|
||||
|
||||
Args:
|
||||
inputs: a tensor of size [batch_size, height, width, channels].
|
||||
final_endpoint: specifies the endpoint to construct the network up to. It
|
||||
can be one of ['Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3',
|
||||
'MaxPool_3a_3x3', 'Conv2d_3b_1x1', 'Conv2d_4a_3x3', 'MaxPool_5a_3x3',
|
||||
'Mixed_5b', 'Mixed_6a', 'PreAuxLogits', 'Mixed_7a', 'Conv2d_7b_1x1']
|
||||
output_stride: A scalar that specifies the requested ratio of input to
|
||||
output spatial resolution. Only supports 8 and 16.
|
||||
align_feature_maps: When true, changes all the VALID paddings in the network
|
||||
to SAME padding so that the feature maps are aligned.
|
||||
scope: Optional variable_scope.
|
||||
activation_fn: Activation function for block scopes.
|
||||
|
||||
Returns:
|
||||
tensor_out: output tensor corresponding to the final_endpoint.
|
||||
end_points: a set of activations for external use, for example summaries or
|
||||
losses.
|
||||
|
||||
Raises:
|
||||
ValueError: if final_endpoint is not set to one of the predefined values,
|
||||
or if the output_stride is not 8 or 16, or if the output_stride is 8 and
|
||||
we request an end point after 'PreAuxLogits'.
|
||||
"""
|
||||
if output_stride != 8 and output_stride != 16:
|
||||
raise ValueError('output_stride must be 8 or 16.')
|
||||
|
||||
padding = 'SAME' if align_feature_maps else 'VALID'
|
||||
|
||||
end_points = {}
|
||||
|
||||
def add_and_check_final(name, net):
|
||||
end_points[name] = net
|
||||
return name == final_endpoint
|
||||
|
||||
with tf.compat.v1.variable_scope(scope, 'InceptionResnetV2', [inputs]):
|
||||
with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
|
||||
stride=1, padding='SAME'):
|
||||
# 149 x 149 x 32
|
||||
net = slim.conv2d(inputs, 32, 3, stride=2, padding=padding,
|
||||
scope='Conv2d_1a_3x3')
|
||||
if add_and_check_final('Conv2d_1a_3x3', net): return net, end_points
|
||||
|
||||
# 147 x 147 x 32
|
||||
net = slim.conv2d(net, 32, 3, padding=padding,
|
||||
scope='Conv2d_2a_3x3')
|
||||
if add_and_check_final('Conv2d_2a_3x3', net): return net, end_points
|
||||
# 147 x 147 x 64
|
||||
net = slim.conv2d(net, 64, 3, scope='Conv2d_2b_3x3')
|
||||
if add_and_check_final('Conv2d_2b_3x3', net): return net, end_points
|
||||
# 73 x 73 x 64
|
||||
net = slim.max_pool2d(net, 3, stride=2, padding=padding,
|
||||
scope='MaxPool_3a_3x3')
|
||||
if add_and_check_final('MaxPool_3a_3x3', net): return net, end_points
|
||||
# 73 x 73 x 80
|
||||
net = slim.conv2d(net, 80, 1, padding=padding,
|
||||
scope='Conv2d_3b_1x1')
|
||||
if add_and_check_final('Conv2d_3b_1x1', net): return net, end_points
|
||||
# 71 x 71 x 192
|
||||
net = slim.conv2d(net, 192, 3, padding=padding,
|
||||
scope='Conv2d_4a_3x3')
|
||||
if add_and_check_final('Conv2d_4a_3x3', net): return net, end_points
|
||||
# 35 x 35 x 192
|
||||
net = slim.max_pool2d(net, 3, stride=2, padding=padding,
|
||||
scope='MaxPool_5a_3x3')
|
||||
if add_and_check_final('MaxPool_5a_3x3', net): return net, end_points
|
||||
|
||||
# 35 x 35 x 320
|
||||
with tf.compat.v1.variable_scope('Mixed_5b'):
|
||||
with tf.compat.v1.variable_scope('Branch_0'):
|
||||
tower_conv = slim.conv2d(net, 96, 1, scope='Conv2d_1x1')
|
||||
with tf.compat.v1.variable_scope('Branch_1'):
|
||||
tower_conv1_0 = slim.conv2d(net, 48, 1, scope='Conv2d_0a_1x1')
|
||||
tower_conv1_1 = slim.conv2d(tower_conv1_0, 64, 5,
|
||||
scope='Conv2d_0b_5x5')
|
||||
with tf.compat.v1.variable_scope('Branch_2'):
|
||||
tower_conv2_0 = slim.conv2d(net, 64, 1, scope='Conv2d_0a_1x1')
|
||||
tower_conv2_1 = slim.conv2d(tower_conv2_0, 96, 3,
|
||||
scope='Conv2d_0b_3x3')
|
||||
tower_conv2_2 = slim.conv2d(tower_conv2_1, 96, 3,
|
||||
scope='Conv2d_0c_3x3')
|
||||
with tf.compat.v1.variable_scope('Branch_3'):
|
||||
tower_pool = slim.avg_pool2d(net, 3, stride=1, padding='SAME',
|
||||
scope='AvgPool_0a_3x3')
|
||||
tower_pool_1 = slim.conv2d(tower_pool, 64, 1,
|
||||
scope='Conv2d_0b_1x1')
|
||||
net = tf.concat(
|
||||
[tower_conv, tower_conv1_1, tower_conv2_2, tower_pool_1], 3)
|
||||
|
||||
if add_and_check_final('Mixed_5b', net): return net, end_points
|
||||
# TODO(alemi): Register intermediate endpoints
|
||||
net = slim.repeat(net, 10, block35, scale=0.17,
|
||||
activation_fn=activation_fn)
|
||||
|
||||
# 17 x 17 x 1088 if output_stride == 8,
|
||||
# 33 x 33 x 1088 if output_stride == 16
|
||||
use_atrous = output_stride == 8
|
||||
|
||||
with tf.compat.v1.variable_scope('Mixed_6a'):
|
||||
with tf.compat.v1.variable_scope('Branch_0'):
|
||||
tower_conv = slim.conv2d(net, 384, 3, stride=1 if use_atrous else 2,
|
||||
padding=padding,
|
||||
scope='Conv2d_1a_3x3')
|
||||
with tf.compat.v1.variable_scope('Branch_1'):
|
||||
tower_conv1_0 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1')
|
||||
tower_conv1_1 = slim.conv2d(tower_conv1_0, 256, 3,
|
||||
scope='Conv2d_0b_3x3')
|
||||
tower_conv1_2 = slim.conv2d(tower_conv1_1, 384, 3,
|
||||
stride=1 if use_atrous else 2,
|
||||
padding=padding,
|
||||
scope='Conv2d_1a_3x3')
|
||||
with tf.compat.v1.variable_scope('Branch_2'):
|
||||
tower_pool = slim.max_pool2d(net, 3, stride=1 if use_atrous else 2,
|
||||
padding=padding,
|
||||
scope='MaxPool_1a_3x3')
|
||||
net = tf.concat([tower_conv, tower_conv1_2, tower_pool], 3)
|
||||
|
||||
if add_and_check_final('Mixed_6a', net): return net, end_points
|
||||
|
||||
# TODO(alemi): register intermediate endpoints
|
||||
with slim.arg_scope([slim.conv2d], rate=2 if use_atrous else 1):
|
||||
net = slim.repeat(net, 20, block17, scale=0.10,
|
||||
activation_fn=activation_fn)
|
||||
if add_and_check_final('PreAuxLogits', net): return net, end_points
|
||||
|
||||
if output_stride == 8:
|
||||
# TODO(gpapan): Properly support output_stride for the rest of the net.
|
||||
raise ValueError('output_stride==8 is only supported up to the '
|
||||
'PreAuxlogits end_point for now.')
|
||||
|
||||
# 8 x 8 x 2080
|
||||
with tf.compat.v1.variable_scope('Mixed_7a'):
|
||||
with tf.compat.v1.variable_scope('Branch_0'):
|
||||
tower_conv = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1')
|
||||
tower_conv_1 = slim.conv2d(tower_conv, 384, 3, stride=2,
|
||||
padding=padding,
|
||||
scope='Conv2d_1a_3x3')
|
||||
with tf.compat.v1.variable_scope('Branch_1'):
|
||||
tower_conv1 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1')
|
||||
tower_conv1_1 = slim.conv2d(tower_conv1, 288, 3, stride=2,
|
||||
padding=padding,
|
||||
scope='Conv2d_1a_3x3')
|
||||
with tf.compat.v1.variable_scope('Branch_2'):
|
||||
tower_conv2 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1')
|
||||
tower_conv2_1 = slim.conv2d(tower_conv2, 288, 3,
|
||||
scope='Conv2d_0b_3x3')
|
||||
tower_conv2_2 = slim.conv2d(tower_conv2_1, 320, 3, stride=2,
|
||||
padding=padding,
|
||||
scope='Conv2d_1a_3x3')
|
||||
with tf.compat.v1.variable_scope('Branch_3'):
|
||||
tower_pool = slim.max_pool2d(net, 3, stride=2,
|
||||
padding=padding,
|
||||
scope='MaxPool_1a_3x3')
|
||||
net = tf.concat(
|
||||
[tower_conv_1, tower_conv1_1, tower_conv2_2, tower_pool], 3)
|
||||
|
||||
if add_and_check_final('Mixed_7a', net): return net, end_points
|
||||
|
||||
# TODO(alemi): register intermediate endpoints
|
||||
net = slim.repeat(net, 9, block8, scale=0.20, activation_fn=activation_fn)
|
||||
net = block8(net, activation_fn=None)
|
||||
|
||||
# 8 x 8 x 1536
|
||||
net = slim.conv2d(net, 1536, 1, scope='Conv2d_7b_1x1')
|
||||
if add_and_check_final('Conv2d_7b_1x1', net): return net, end_points
|
||||
|
||||
raise ValueError('final_endpoint (%s) not recognized', final_endpoint)
|
||||
|
||||
|
||||
def inception_resnet_v2(inputs, num_classes=1001, is_training=True,
|
||||
dropout_keep_prob=0.8,
|
||||
reuse=None,
|
||||
scope='InceptionResnetV2',
|
||||
create_aux_logits=True,
|
||||
activation_fn=tf.nn.relu):
|
||||
"""Creates the Inception Resnet V2 model.
|
||||
|
||||
Args:
|
||||
inputs: a 4-D tensor of size [batch_size, height, width, 3].
|
||||
Dimension batch_size may be undefined. If create_aux_logits is false,
|
||||
also height and width may be undefined.
|
||||
num_classes: number of predicted classes. If 0 or None, the logits layer
|
||||
is omitted and the input features to the logits layer (before dropout)
|
||||
are returned instead.
|
||||
is_training: whether is training or not.
|
||||
dropout_keep_prob: float, the fraction to keep before final layer.
|
||||
reuse: whether or not the network and its variables should be reused. To be
|
||||
able to reuse 'scope' must be given.
|
||||
scope: Optional variable_scope.
|
||||
create_aux_logits: Whether to include the auxilliary logits.
|
||||
activation_fn: Activation function for conv2d.
|
||||
|
||||
Returns:
|
||||
net: the output of the logits layer (if num_classes is a non-zero integer),
|
||||
or the non-dropped-out input to the logits layer (if num_classes is 0 or
|
||||
None).
|
||||
end_points: the set of end_points from the inception model.
|
||||
"""
|
||||
end_points = {}
|
||||
|
||||
with tf.compat.v1.variable_scope(
|
||||
scope, 'InceptionResnetV2', [inputs], reuse=reuse) as scope:
|
||||
with slim.arg_scope([slim.batch_norm, slim.dropout],
|
||||
is_training=is_training):
|
||||
|
||||
net, end_points = inception_resnet_v2_base(inputs, scope=scope,
|
||||
activation_fn=activation_fn)
|
||||
|
||||
if create_aux_logits and num_classes:
|
||||
with tf.compat.v1.variable_scope('AuxLogits'):
|
||||
aux = end_points['PreAuxLogits']
|
||||
aux = slim.avg_pool2d(aux, 5, stride=3, padding='VALID',
|
||||
scope='Conv2d_1a_3x3')
|
||||
aux = slim.conv2d(aux, 128, 1, scope='Conv2d_1b_1x1')
|
||||
aux = slim.conv2d(aux, 768, aux.get_shape()[1:3],
|
||||
padding='VALID', scope='Conv2d_2a_5x5')
|
||||
aux = slim.flatten(aux)
|
||||
aux = slim.fully_connected(aux, num_classes, activation_fn=None,
|
||||
scope='Logits')
|
||||
end_points['AuxLogits'] = aux
|
||||
|
||||
with tf.compat.v1.variable_scope('Logits'):
|
||||
# TODO(sguada,arnoegw): Consider adding a parameter global_pool which
|
||||
# can be set to False to disable pooling here (as in resnet_*()).
|
||||
kernel_size = net.get_shape()[1:3]
|
||||
if kernel_size.is_fully_defined():
|
||||
net = slim.avg_pool2d(net, kernel_size, padding='VALID',
|
||||
scope='AvgPool_1a_8x8')
|
||||
else:
|
||||
net = tf.reduce_mean(
|
||||
input_tensor=net, axis=[1, 2], keepdims=True, name='global_pool')
|
||||
end_points['global_pool'] = net
|
||||
if not num_classes:
|
||||
return net, end_points
|
||||
net = slim.flatten(net)
|
||||
net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
|
||||
scope='Dropout')
|
||||
end_points['PreLogitsFlatten'] = net
|
||||
logits = slim.fully_connected(net, num_classes, activation_fn=None,
|
||||
scope='Logits')
|
||||
end_points['Logits'] = logits
|
||||
end_points['Predictions'] = tf.nn.softmax(logits, name='Predictions')
|
||||
|
||||
return logits, end_points
|
||||
inception_resnet_v2.default_image_size = 299
|
||||
|
||||
|
||||
def inception_resnet_v2_arg_scope(
|
||||
weight_decay=0.00004,
|
||||
batch_norm_decay=0.9997,
|
||||
batch_norm_epsilon=0.001,
|
||||
activation_fn=tf.nn.relu,
|
||||
batch_norm_updates_collections=tf.compat.v1.GraphKeys.UPDATE_OPS,
|
||||
batch_norm_scale=False):
|
||||
"""Returns the scope with the default parameters for inception_resnet_v2.
|
||||
|
||||
Args:
|
||||
weight_decay: the weight decay for weights variables.
|
||||
batch_norm_decay: decay for the moving average of batch_norm momentums.
|
||||
batch_norm_epsilon: small float added to variance to avoid dividing by zero.
|
||||
activation_fn: Activation function for conv2d.
|
||||
batch_norm_updates_collections: Collection for the update ops for
|
||||
batch norm.
|
||||
batch_norm_scale: If True, uses an explicit `gamma` multiplier to scale the
|
||||
activations in the batch normalization layer.
|
||||
|
||||
Returns:
|
||||
a arg_scope with the parameters needed for inception_resnet_v2.
|
||||
"""
|
||||
# Set weight_decay for weights in conv2d and fully_connected layers.
|
||||
with slim.arg_scope([slim.conv2d, slim.fully_connected],
|
||||
weights_regularizer=slim.l2_regularizer(weight_decay),
|
||||
biases_regularizer=slim.l2_regularizer(weight_decay)):
|
||||
|
||||
batch_norm_params = {
|
||||
'decay': batch_norm_decay,
|
||||
'epsilon': batch_norm_epsilon,
|
||||
'updates_collections': batch_norm_updates_collections,
|
||||
'fused': None, # Use fused batch norm if possible.
|
||||
'scale': batch_norm_scale,
|
||||
}
|
||||
# Set activation_fn and parameters for batch_norm.
|
||||
with slim.arg_scope([slim.conv2d], activation_fn=activation_fn,
|
||||
normalizer_fn=slim.batch_norm,
|
||||
normalizer_params=batch_norm_params) as scope:
|
||||
return scope
|
||||
+338
@@ -0,0 +1,338 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Tests for slim.inception_resnet_v2."""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import tensorflow as tf
|
||||
from tensorflow.contrib import slim as contrib_slim
|
||||
|
||||
from nets import inception
|
||||
|
||||
|
||||
class InceptionTest(tf.test.TestCase):
|
||||
|
||||
def testBuildLogits(self):
|
||||
batch_size = 5
|
||||
height, width = 299, 299
|
||||
num_classes = 1000
|
||||
with self.test_session():
|
||||
inputs = tf.random.uniform((batch_size, height, width, 3))
|
||||
logits, endpoints = inception.inception_resnet_v2(inputs, num_classes)
|
||||
self.assertTrue('AuxLogits' in endpoints)
|
||||
auxlogits = endpoints['AuxLogits']
|
||||
self.assertTrue(
|
||||
auxlogits.op.name.startswith('InceptionResnetV2/AuxLogits'))
|
||||
self.assertListEqual(auxlogits.get_shape().as_list(),
|
||||
[batch_size, num_classes])
|
||||
self.assertTrue(logits.op.name.startswith('InceptionResnetV2/Logits'))
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[batch_size, num_classes])
|
||||
|
||||
def testBuildWithoutAuxLogits(self):
|
||||
batch_size = 5
|
||||
height, width = 299, 299
|
||||
num_classes = 1000
|
||||
with self.test_session():
|
||||
inputs = tf.random.uniform((batch_size, height, width, 3))
|
||||
logits, endpoints = inception.inception_resnet_v2(inputs, num_classes,
|
||||
create_aux_logits=False)
|
||||
self.assertTrue('AuxLogits' not in endpoints)
|
||||
self.assertTrue(logits.op.name.startswith('InceptionResnetV2/Logits'))
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[batch_size, num_classes])
|
||||
|
||||
def testBuildNoClasses(self):
|
||||
batch_size = 5
|
||||
height, width = 299, 299
|
||||
num_classes = None
|
||||
with self.test_session():
|
||||
inputs = tf.random.uniform((batch_size, height, width, 3))
|
||||
net, endpoints = inception.inception_resnet_v2(inputs, num_classes)
|
||||
self.assertTrue('AuxLogits' not in endpoints)
|
||||
self.assertTrue('Logits' not in endpoints)
|
||||
self.assertTrue(
|
||||
net.op.name.startswith('InceptionResnetV2/Logits/AvgPool'))
|
||||
self.assertListEqual(net.get_shape().as_list(), [batch_size, 1, 1, 1536])
|
||||
|
||||
def testBuildEndPoints(self):
|
||||
batch_size = 5
|
||||
height, width = 299, 299
|
||||
num_classes = 1000
|
||||
with self.test_session():
|
||||
inputs = tf.random.uniform((batch_size, height, width, 3))
|
||||
_, end_points = inception.inception_resnet_v2(inputs, num_classes)
|
||||
self.assertTrue('Logits' in end_points)
|
||||
logits = end_points['Logits']
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[batch_size, num_classes])
|
||||
self.assertTrue('AuxLogits' in end_points)
|
||||
aux_logits = end_points['AuxLogits']
|
||||
self.assertListEqual(aux_logits.get_shape().as_list(),
|
||||
[batch_size, num_classes])
|
||||
pre_pool = end_points['Conv2d_7b_1x1']
|
||||
self.assertListEqual(pre_pool.get_shape().as_list(),
|
||||
[batch_size, 8, 8, 1536])
|
||||
|
||||
def testBuildBaseNetwork(self):
|
||||
batch_size = 5
|
||||
height, width = 299, 299
|
||||
|
||||
inputs = tf.random.uniform((batch_size, height, width, 3))
|
||||
net, end_points = inception.inception_resnet_v2_base(inputs)
|
||||
self.assertTrue(net.op.name.startswith('InceptionResnetV2/Conv2d_7b_1x1'))
|
||||
self.assertListEqual(net.get_shape().as_list(),
|
||||
[batch_size, 8, 8, 1536])
|
||||
expected_endpoints = ['Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3',
|
||||
'MaxPool_3a_3x3', 'Conv2d_3b_1x1', 'Conv2d_4a_3x3',
|
||||
'MaxPool_5a_3x3', 'Mixed_5b', 'Mixed_6a',
|
||||
'PreAuxLogits', 'Mixed_7a', 'Conv2d_7b_1x1']
|
||||
self.assertItemsEqual(end_points.keys(), expected_endpoints)
|
||||
|
||||
def testBuildOnlyUptoFinalEndpoint(self):
|
||||
batch_size = 5
|
||||
height, width = 299, 299
|
||||
endpoints = ['Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3',
|
||||
'MaxPool_3a_3x3', 'Conv2d_3b_1x1', 'Conv2d_4a_3x3',
|
||||
'MaxPool_5a_3x3', 'Mixed_5b', 'Mixed_6a',
|
||||
'PreAuxLogits', 'Mixed_7a', 'Conv2d_7b_1x1']
|
||||
for index, endpoint in enumerate(endpoints):
|
||||
with tf.Graph().as_default():
|
||||
inputs = tf.random.uniform((batch_size, height, width, 3))
|
||||
out_tensor, end_points = inception.inception_resnet_v2_base(
|
||||
inputs, final_endpoint=endpoint)
|
||||
if endpoint != 'PreAuxLogits':
|
||||
self.assertTrue(out_tensor.op.name.startswith(
|
||||
'InceptionResnetV2/' + endpoint))
|
||||
self.assertItemsEqual(endpoints[:index+1], end_points.keys())
|
||||
|
||||
def testBuildAndCheckAllEndPointsUptoPreAuxLogits(self):
|
||||
batch_size = 5
|
||||
height, width = 299, 299
|
||||
|
||||
inputs = tf.random.uniform((batch_size, height, width, 3))
|
||||
_, end_points = inception.inception_resnet_v2_base(
|
||||
inputs, final_endpoint='PreAuxLogits')
|
||||
endpoints_shapes = {'Conv2d_1a_3x3': [5, 149, 149, 32],
|
||||
'Conv2d_2a_3x3': [5, 147, 147, 32],
|
||||
'Conv2d_2b_3x3': [5, 147, 147, 64],
|
||||
'MaxPool_3a_3x3': [5, 73, 73, 64],
|
||||
'Conv2d_3b_1x1': [5, 73, 73, 80],
|
||||
'Conv2d_4a_3x3': [5, 71, 71, 192],
|
||||
'MaxPool_5a_3x3': [5, 35, 35, 192],
|
||||
'Mixed_5b': [5, 35, 35, 320],
|
||||
'Mixed_6a': [5, 17, 17, 1088],
|
||||
'PreAuxLogits': [5, 17, 17, 1088]
|
||||
}
|
||||
|
||||
self.assertItemsEqual(endpoints_shapes.keys(), end_points.keys())
|
||||
for endpoint_name in endpoints_shapes:
|
||||
expected_shape = endpoints_shapes[endpoint_name]
|
||||
self.assertTrue(endpoint_name in end_points)
|
||||
self.assertListEqual(end_points[endpoint_name].get_shape().as_list(),
|
||||
expected_shape)
|
||||
|
||||
def testBuildAndCheckAllEndPointsUptoPreAuxLogitsWithAlignedFeatureMaps(self):
|
||||
batch_size = 5
|
||||
height, width = 299, 299
|
||||
|
||||
inputs = tf.random.uniform((batch_size, height, width, 3))
|
||||
_, end_points = inception.inception_resnet_v2_base(
|
||||
inputs, final_endpoint='PreAuxLogits', align_feature_maps=True)
|
||||
endpoints_shapes = {'Conv2d_1a_3x3': [5, 150, 150, 32],
|
||||
'Conv2d_2a_3x3': [5, 150, 150, 32],
|
||||
'Conv2d_2b_3x3': [5, 150, 150, 64],
|
||||
'MaxPool_3a_3x3': [5, 75, 75, 64],
|
||||
'Conv2d_3b_1x1': [5, 75, 75, 80],
|
||||
'Conv2d_4a_3x3': [5, 75, 75, 192],
|
||||
'MaxPool_5a_3x3': [5, 38, 38, 192],
|
||||
'Mixed_5b': [5, 38, 38, 320],
|
||||
'Mixed_6a': [5, 19, 19, 1088],
|
||||
'PreAuxLogits': [5, 19, 19, 1088]
|
||||
}
|
||||
|
||||
self.assertItemsEqual(endpoints_shapes.keys(), end_points.keys())
|
||||
for endpoint_name in endpoints_shapes:
|
||||
expected_shape = endpoints_shapes[endpoint_name]
|
||||
self.assertTrue(endpoint_name in end_points)
|
||||
self.assertListEqual(end_points[endpoint_name].get_shape().as_list(),
|
||||
expected_shape)
|
||||
|
||||
def testBuildAndCheckAllEndPointsUptoPreAuxLogitsWithOutputStrideEight(self):
|
||||
batch_size = 5
|
||||
height, width = 299, 299
|
||||
|
||||
inputs = tf.random.uniform((batch_size, height, width, 3))
|
||||
_, end_points = inception.inception_resnet_v2_base(
|
||||
inputs, final_endpoint='PreAuxLogits', output_stride=8)
|
||||
endpoints_shapes = {'Conv2d_1a_3x3': [5, 149, 149, 32],
|
||||
'Conv2d_2a_3x3': [5, 147, 147, 32],
|
||||
'Conv2d_2b_3x3': [5, 147, 147, 64],
|
||||
'MaxPool_3a_3x3': [5, 73, 73, 64],
|
||||
'Conv2d_3b_1x1': [5, 73, 73, 80],
|
||||
'Conv2d_4a_3x3': [5, 71, 71, 192],
|
||||
'MaxPool_5a_3x3': [5, 35, 35, 192],
|
||||
'Mixed_5b': [5, 35, 35, 320],
|
||||
'Mixed_6a': [5, 33, 33, 1088],
|
||||
'PreAuxLogits': [5, 33, 33, 1088]
|
||||
}
|
||||
|
||||
self.assertItemsEqual(endpoints_shapes.keys(), end_points.keys())
|
||||
for endpoint_name in endpoints_shapes:
|
||||
expected_shape = endpoints_shapes[endpoint_name]
|
||||
self.assertTrue(endpoint_name in end_points)
|
||||
self.assertListEqual(end_points[endpoint_name].get_shape().as_list(),
|
||||
expected_shape)
|
||||
|
||||
def testVariablesSetDevice(self):
|
||||
batch_size = 5
|
||||
height, width = 299, 299
|
||||
num_classes = 1000
|
||||
with self.test_session():
|
||||
inputs = tf.random.uniform((batch_size, height, width, 3))
|
||||
# Force all Variables to reside on the device.
|
||||
with tf.compat.v1.variable_scope('on_cpu'), tf.device('/cpu:0'):
|
||||
inception.inception_resnet_v2(inputs, num_classes)
|
||||
with tf.compat.v1.variable_scope('on_gpu'), tf.device('/gpu:0'):
|
||||
inception.inception_resnet_v2(inputs, num_classes)
|
||||
for v in tf.compat.v1.get_collection(
|
||||
tf.compat.v1.GraphKeys.GLOBAL_VARIABLES, scope='on_cpu'):
|
||||
self.assertDeviceEqual(v.device, '/cpu:0')
|
||||
for v in tf.compat.v1.get_collection(
|
||||
tf.compat.v1.GraphKeys.GLOBAL_VARIABLES, scope='on_gpu'):
|
||||
self.assertDeviceEqual(v.device, '/gpu:0')
|
||||
|
||||
def testHalfSizeImages(self):
|
||||
batch_size = 5
|
||||
height, width = 150, 150
|
||||
num_classes = 1000
|
||||
with self.test_session():
|
||||
inputs = tf.random.uniform((batch_size, height, width, 3))
|
||||
logits, end_points = inception.inception_resnet_v2(inputs, num_classes)
|
||||
self.assertTrue(logits.op.name.startswith('InceptionResnetV2/Logits'))
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[batch_size, num_classes])
|
||||
pre_pool = end_points['Conv2d_7b_1x1']
|
||||
self.assertListEqual(pre_pool.get_shape().as_list(),
|
||||
[batch_size, 3, 3, 1536])
|
||||
|
||||
def testGlobalPool(self):
|
||||
batch_size = 1
|
||||
height, width = 330, 400
|
||||
num_classes = 1000
|
||||
with self.test_session():
|
||||
inputs = tf.random.uniform((batch_size, height, width, 3))
|
||||
logits, end_points = inception.inception_resnet_v2(inputs, num_classes)
|
||||
self.assertTrue(logits.op.name.startswith('InceptionResnetV2/Logits'))
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[batch_size, num_classes])
|
||||
pre_pool = end_points['Conv2d_7b_1x1']
|
||||
self.assertListEqual(pre_pool.get_shape().as_list(),
|
||||
[batch_size, 8, 11, 1536])
|
||||
|
||||
def testGlobalPoolUnknownImageShape(self):
|
||||
batch_size = 1
|
||||
height, width = 330, 400
|
||||
num_classes = 1000
|
||||
with self.test_session() as sess:
|
||||
inputs = tf.compat.v1.placeholder(tf.float32, (batch_size, None, None, 3))
|
||||
logits, end_points = inception.inception_resnet_v2(
|
||||
inputs, num_classes, create_aux_logits=False)
|
||||
self.assertTrue(logits.op.name.startswith('InceptionResnetV2/Logits'))
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[batch_size, num_classes])
|
||||
pre_pool = end_points['Conv2d_7b_1x1']
|
||||
images = tf.random.uniform((batch_size, height, width, 3))
|
||||
sess.run(tf.compat.v1.global_variables_initializer())
|
||||
logits_out, pre_pool_out = sess.run([logits, pre_pool],
|
||||
{inputs: images.eval()})
|
||||
self.assertTupleEqual(logits_out.shape, (batch_size, num_classes))
|
||||
self.assertTupleEqual(pre_pool_out.shape, (batch_size, 8, 11, 1536))
|
||||
|
||||
def testUnknownBatchSize(self):
|
||||
batch_size = 1
|
||||
height, width = 299, 299
|
||||
num_classes = 1000
|
||||
with self.test_session() as sess:
|
||||
inputs = tf.compat.v1.placeholder(tf.float32, (None, height, width, 3))
|
||||
logits, _ = inception.inception_resnet_v2(inputs, num_classes)
|
||||
self.assertTrue(logits.op.name.startswith('InceptionResnetV2/Logits'))
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[None, num_classes])
|
||||
images = tf.random.uniform((batch_size, height, width, 3))
|
||||
sess.run(tf.compat.v1.global_variables_initializer())
|
||||
output = sess.run(logits, {inputs: images.eval()})
|
||||
self.assertEquals(output.shape, (batch_size, num_classes))
|
||||
|
||||
def testEvaluation(self):
|
||||
batch_size = 2
|
||||
height, width = 299, 299
|
||||
num_classes = 1000
|
||||
with self.test_session() as sess:
|
||||
eval_inputs = tf.random.uniform((batch_size, height, width, 3))
|
||||
logits, _ = inception.inception_resnet_v2(eval_inputs,
|
||||
num_classes,
|
||||
is_training=False)
|
||||
predictions = tf.argmax(input=logits, axis=1)
|
||||
sess.run(tf.compat.v1.global_variables_initializer())
|
||||
output = sess.run(predictions)
|
||||
self.assertEquals(output.shape, (batch_size,))
|
||||
|
||||
def testTrainEvalWithReuse(self):
|
||||
train_batch_size = 5
|
||||
eval_batch_size = 2
|
||||
height, width = 150, 150
|
||||
num_classes = 1000
|
||||
with self.test_session() as sess:
|
||||
train_inputs = tf.random.uniform((train_batch_size, height, width, 3))
|
||||
inception.inception_resnet_v2(train_inputs, num_classes)
|
||||
eval_inputs = tf.random.uniform((eval_batch_size, height, width, 3))
|
||||
logits, _ = inception.inception_resnet_v2(eval_inputs,
|
||||
num_classes,
|
||||
is_training=False,
|
||||
reuse=True)
|
||||
predictions = tf.argmax(input=logits, axis=1)
|
||||
sess.run(tf.compat.v1.global_variables_initializer())
|
||||
output = sess.run(predictions)
|
||||
self.assertEquals(output.shape, (eval_batch_size,))
|
||||
|
||||
def testNoBatchNormScaleByDefault(self):
|
||||
height, width = 299, 299
|
||||
num_classes = 1000
|
||||
inputs = tf.compat.v1.placeholder(tf.float32, (1, height, width, 3))
|
||||
with contrib_slim.arg_scope(inception.inception_resnet_v2_arg_scope()):
|
||||
inception.inception_resnet_v2(inputs, num_classes, is_training=False)
|
||||
|
||||
self.assertEqual(tf.compat.v1.global_variables('.*/BatchNorm/gamma:0$'), [])
|
||||
|
||||
def testBatchNormScale(self):
|
||||
height, width = 299, 299
|
||||
num_classes = 1000
|
||||
inputs = tf.compat.v1.placeholder(tf.float32, (1, height, width, 3))
|
||||
with contrib_slim.arg_scope(
|
||||
inception.inception_resnet_v2_arg_scope(batch_norm_scale=True)):
|
||||
inception.inception_resnet_v2(inputs, num_classes, is_training=False)
|
||||
|
||||
gamma_names = set(
|
||||
v.op.name
|
||||
for v in tf.compat.v1.global_variables('.*/BatchNorm/gamma:0$'))
|
||||
self.assertGreater(len(gamma_names), 0)
|
||||
for v in tf.compat.v1.global_variables('.*/BatchNorm/moving_mean:0$'):
|
||||
self.assertIn(v.op.name[:-len('moving_mean')] + 'gamma', gamma_names)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
tf.test.main()
|
||||
+84
@@ -0,0 +1,84 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Contains common code shared by all inception models.
|
||||
|
||||
Usage of arg scope:
|
||||
with slim.arg_scope(inception_arg_scope()):
|
||||
logits, end_points = inception.inception_v3(images, num_classes,
|
||||
is_training=is_training)
|
||||
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import tensorflow as tf
|
||||
from tensorflow.contrib import slim as contrib_slim
|
||||
|
||||
slim = contrib_slim
|
||||
|
||||
|
||||
def inception_arg_scope(
|
||||
weight_decay=0.00004,
|
||||
use_batch_norm=True,
|
||||
batch_norm_decay=0.9997,
|
||||
batch_norm_epsilon=0.001,
|
||||
activation_fn=tf.nn.relu,
|
||||
batch_norm_updates_collections=tf.compat.v1.GraphKeys.UPDATE_OPS,
|
||||
batch_norm_scale=False):
|
||||
"""Defines the default arg scope for inception models.
|
||||
|
||||
Args:
|
||||
weight_decay: The weight decay to use for regularizing the model.
|
||||
use_batch_norm: "If `True`, batch_norm is applied after each convolution.
|
||||
batch_norm_decay: Decay for batch norm moving average.
|
||||
batch_norm_epsilon: Small float added to variance to avoid dividing by zero
|
||||
in batch norm.
|
||||
activation_fn: Activation function for conv2d.
|
||||
batch_norm_updates_collections: Collection for the update ops for
|
||||
batch norm.
|
||||
batch_norm_scale: If True, uses an explicit `gamma` multiplier to scale the
|
||||
activations in the batch normalization layer.
|
||||
|
||||
Returns:
|
||||
An `arg_scope` to use for the inception models.
|
||||
"""
|
||||
batch_norm_params = {
|
||||
# Decay for the moving averages.
|
||||
'decay': batch_norm_decay,
|
||||
# epsilon to prevent 0s in variance.
|
||||
'epsilon': batch_norm_epsilon,
|
||||
# collection containing update_ops.
|
||||
'updates_collections': batch_norm_updates_collections,
|
||||
# use fused batch norm if possible.
|
||||
'fused': None,
|
||||
'scale': batch_norm_scale,
|
||||
}
|
||||
if use_batch_norm:
|
||||
normalizer_fn = slim.batch_norm
|
||||
normalizer_params = batch_norm_params
|
||||
else:
|
||||
normalizer_fn = None
|
||||
normalizer_params = {}
|
||||
# Set weight_decay for weights in Conv and FC layers.
|
||||
with slim.arg_scope([slim.conv2d, slim.fully_connected],
|
||||
weights_regularizer=slim.l2_regularizer(weight_decay)):
|
||||
with slim.arg_scope(
|
||||
[slim.conv2d],
|
||||
weights_initializer=slim.variance_scaling_initializer(),
|
||||
activation_fn=activation_fn,
|
||||
normalizer_fn=normalizer_fn,
|
||||
normalizer_params=normalizer_params) as sc:
|
||||
return sc
|
||||
+347
@@ -0,0 +1,347 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Contains the definition for inception v1 classification network."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import tensorflow as tf
|
||||
from tensorflow.contrib import slim as contrib_slim
|
||||
|
||||
from nets import inception_utils
|
||||
|
||||
slim = contrib_slim
|
||||
|
||||
# pylint: disable=g-long-lambda
|
||||
trunc_normal = lambda stddev: tf.compat.v1.truncated_normal_initializer(
|
||||
0.0, stddev)
|
||||
|
||||
|
||||
def inception_v1_base(inputs,
|
||||
final_endpoint='Mixed_5c',
|
||||
include_root_block=True,
|
||||
scope='InceptionV1'):
|
||||
"""Defines the Inception V1 base architecture.
|
||||
|
||||
This architecture is defined in:
|
||||
Going deeper with convolutions
|
||||
Christian Szegedy, Wei Liu, Yangqing Jia, Pierre Sermanet, Scott Reed,
|
||||
Dragomir Anguelov, Dumitru Erhan, Vincent Vanhoucke, Andrew Rabinovich.
|
||||
http://arxiv.org/pdf/1409.4842v1.pdf.
|
||||
|
||||
Args:
|
||||
inputs: a tensor of size [batch_size, height, width, channels].
|
||||
final_endpoint: specifies the endpoint to construct the network up to. It
|
||||
can be one of ['Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1',
|
||||
'Conv2d_2c_3x3', 'MaxPool_3a_3x3', 'Mixed_3b', 'Mixed_3c',
|
||||
'MaxPool_4a_3x3', 'Mixed_4b', 'Mixed_4c', 'Mixed_4d', 'Mixed_4e',
|
||||
'Mixed_4f', 'MaxPool_5a_2x2', 'Mixed_5b', 'Mixed_5c']. If
|
||||
include_root_block is False, ['Conv2d_1a_7x7', 'MaxPool_2a_3x3',
|
||||
'Conv2d_2b_1x1', 'Conv2d_2c_3x3', 'MaxPool_3a_3x3'] will not be available.
|
||||
include_root_block: If True, include the convolution and max-pooling layers
|
||||
before the inception modules. If False, excludes those layers.
|
||||
scope: Optional variable_scope.
|
||||
|
||||
Returns:
|
||||
A dictionary from components of the network to the corresponding activation.
|
||||
|
||||
Raises:
|
||||
ValueError: if final_endpoint is not set to one of the predefined values.
|
||||
"""
|
||||
end_points = {}
|
||||
with tf.compat.v1.variable_scope(scope, 'InceptionV1', [inputs]):
|
||||
with slim.arg_scope(
|
||||
[slim.conv2d, slim.fully_connected],
|
||||
weights_initializer=trunc_normal(0.01)):
|
||||
with slim.arg_scope([slim.conv2d, slim.max_pool2d],
|
||||
stride=1, padding='SAME'):
|
||||
net = inputs
|
||||
if include_root_block:
|
||||
end_point = 'Conv2d_1a_7x7'
|
||||
net = slim.conv2d(inputs, 64, [7, 7], stride=2, scope=end_point)
|
||||
end_points[end_point] = net
|
||||
if final_endpoint == end_point:
|
||||
return net, end_points
|
||||
end_point = 'MaxPool_2a_3x3'
|
||||
net = slim.max_pool2d(net, [3, 3], stride=2, scope=end_point)
|
||||
end_points[end_point] = net
|
||||
if final_endpoint == end_point:
|
||||
return net, end_points
|
||||
end_point = 'Conv2d_2b_1x1'
|
||||
net = slim.conv2d(net, 64, [1, 1], scope=end_point)
|
||||
end_points[end_point] = net
|
||||
if final_endpoint == end_point:
|
||||
return net, end_points
|
||||
end_point = 'Conv2d_2c_3x3'
|
||||
net = slim.conv2d(net, 192, [3, 3], scope=end_point)
|
||||
end_points[end_point] = net
|
||||
if final_endpoint == end_point:
|
||||
return net, end_points
|
||||
end_point = 'MaxPool_3a_3x3'
|
||||
net = slim.max_pool2d(net, [3, 3], stride=2, scope=end_point)
|
||||
end_points[end_point] = net
|
||||
if final_endpoint == end_point:
|
||||
return net, end_points
|
||||
|
||||
end_point = 'Mixed_3b'
|
||||
with tf.compat.v1.variable_scope(end_point):
|
||||
with tf.compat.v1.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(net, 64, [1, 1], scope='Conv2d_0a_1x1')
|
||||
with tf.compat.v1.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(net, 96, [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_1 = slim.conv2d(branch_1, 128, [3, 3], scope='Conv2d_0b_3x3')
|
||||
with tf.compat.v1.variable_scope('Branch_2'):
|
||||
branch_2 = slim.conv2d(net, 16, [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_2 = slim.conv2d(branch_2, 32, [3, 3], scope='Conv2d_0b_3x3')
|
||||
with tf.compat.v1.variable_scope('Branch_3'):
|
||||
branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
|
||||
branch_3 = slim.conv2d(branch_3, 32, [1, 1], scope='Conv2d_0b_1x1')
|
||||
net = tf.concat(
|
||||
axis=3, values=[branch_0, branch_1, branch_2, branch_3])
|
||||
end_points[end_point] = net
|
||||
if final_endpoint == end_point: return net, end_points
|
||||
|
||||
end_point = 'Mixed_3c'
|
||||
with tf.compat.v1.variable_scope(end_point):
|
||||
with tf.compat.v1.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(net, 128, [1, 1], scope='Conv2d_0a_1x1')
|
||||
with tf.compat.v1.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(net, 128, [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_1 = slim.conv2d(branch_1, 192, [3, 3], scope='Conv2d_0b_3x3')
|
||||
with tf.compat.v1.variable_scope('Branch_2'):
|
||||
branch_2 = slim.conv2d(net, 32, [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_2 = slim.conv2d(branch_2, 96, [3, 3], scope='Conv2d_0b_3x3')
|
||||
with tf.compat.v1.variable_scope('Branch_3'):
|
||||
branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
|
||||
branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1')
|
||||
net = tf.concat(
|
||||
axis=3, values=[branch_0, branch_1, branch_2, branch_3])
|
||||
end_points[end_point] = net
|
||||
if final_endpoint == end_point: return net, end_points
|
||||
|
||||
end_point = 'MaxPool_4a_3x3'
|
||||
net = slim.max_pool2d(net, [3, 3], stride=2, scope=end_point)
|
||||
end_points[end_point] = net
|
||||
if final_endpoint == end_point: return net, end_points
|
||||
|
||||
end_point = 'Mixed_4b'
|
||||
with tf.compat.v1.variable_scope(end_point):
|
||||
with tf.compat.v1.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(net, 192, [1, 1], scope='Conv2d_0a_1x1')
|
||||
with tf.compat.v1.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(net, 96, [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_1 = slim.conv2d(branch_1, 208, [3, 3], scope='Conv2d_0b_3x3')
|
||||
with tf.compat.v1.variable_scope('Branch_2'):
|
||||
branch_2 = slim.conv2d(net, 16, [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_2 = slim.conv2d(branch_2, 48, [3, 3], scope='Conv2d_0b_3x3')
|
||||
with tf.compat.v1.variable_scope('Branch_3'):
|
||||
branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
|
||||
branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1')
|
||||
net = tf.concat(
|
||||
axis=3, values=[branch_0, branch_1, branch_2, branch_3])
|
||||
end_points[end_point] = net
|
||||
if final_endpoint == end_point: return net, end_points
|
||||
|
||||
end_point = 'Mixed_4c'
|
||||
with tf.compat.v1.variable_scope(end_point):
|
||||
with tf.compat.v1.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(net, 160, [1, 1], scope='Conv2d_0a_1x1')
|
||||
with tf.compat.v1.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(net, 112, [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_1 = slim.conv2d(branch_1, 224, [3, 3], scope='Conv2d_0b_3x3')
|
||||
with tf.compat.v1.variable_scope('Branch_2'):
|
||||
branch_2 = slim.conv2d(net, 24, [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_2 = slim.conv2d(branch_2, 64, [3, 3], scope='Conv2d_0b_3x3')
|
||||
with tf.compat.v1.variable_scope('Branch_3'):
|
||||
branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
|
||||
branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1')
|
||||
net = tf.concat(
|
||||
axis=3, values=[branch_0, branch_1, branch_2, branch_3])
|
||||
end_points[end_point] = net
|
||||
if final_endpoint == end_point: return net, end_points
|
||||
|
||||
end_point = 'Mixed_4d'
|
||||
with tf.compat.v1.variable_scope(end_point):
|
||||
with tf.compat.v1.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(net, 128, [1, 1], scope='Conv2d_0a_1x1')
|
||||
with tf.compat.v1.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(net, 128, [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_1 = slim.conv2d(branch_1, 256, [3, 3], scope='Conv2d_0b_3x3')
|
||||
with tf.compat.v1.variable_scope('Branch_2'):
|
||||
branch_2 = slim.conv2d(net, 24, [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_2 = slim.conv2d(branch_2, 64, [3, 3], scope='Conv2d_0b_3x3')
|
||||
with tf.compat.v1.variable_scope('Branch_3'):
|
||||
branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
|
||||
branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1')
|
||||
net = tf.concat(
|
||||
axis=3, values=[branch_0, branch_1, branch_2, branch_3])
|
||||
end_points[end_point] = net
|
||||
if final_endpoint == end_point: return net, end_points
|
||||
|
||||
end_point = 'Mixed_4e'
|
||||
with tf.compat.v1.variable_scope(end_point):
|
||||
with tf.compat.v1.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(net, 112, [1, 1], scope='Conv2d_0a_1x1')
|
||||
with tf.compat.v1.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(net, 144, [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_1 = slim.conv2d(branch_1, 288, [3, 3], scope='Conv2d_0b_3x3')
|
||||
with tf.compat.v1.variable_scope('Branch_2'):
|
||||
branch_2 = slim.conv2d(net, 32, [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_2 = slim.conv2d(branch_2, 64, [3, 3], scope='Conv2d_0b_3x3')
|
||||
with tf.compat.v1.variable_scope('Branch_3'):
|
||||
branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
|
||||
branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1')
|
||||
net = tf.concat(
|
||||
axis=3, values=[branch_0, branch_1, branch_2, branch_3])
|
||||
end_points[end_point] = net
|
||||
if final_endpoint == end_point: return net, end_points
|
||||
|
||||
end_point = 'Mixed_4f'
|
||||
with tf.compat.v1.variable_scope(end_point):
|
||||
with tf.compat.v1.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(net, 256, [1, 1], scope='Conv2d_0a_1x1')
|
||||
with tf.compat.v1.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(net, 160, [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_1 = slim.conv2d(branch_1, 320, [3, 3], scope='Conv2d_0b_3x3')
|
||||
with tf.compat.v1.variable_scope('Branch_2'):
|
||||
branch_2 = slim.conv2d(net, 32, [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_2 = slim.conv2d(branch_2, 128, [3, 3], scope='Conv2d_0b_3x3')
|
||||
with tf.compat.v1.variable_scope('Branch_3'):
|
||||
branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
|
||||
branch_3 = slim.conv2d(branch_3, 128, [1, 1], scope='Conv2d_0b_1x1')
|
||||
net = tf.concat(
|
||||
axis=3, values=[branch_0, branch_1, branch_2, branch_3])
|
||||
end_points[end_point] = net
|
||||
if final_endpoint == end_point: return net, end_points
|
||||
|
||||
end_point = 'MaxPool_5a_2x2'
|
||||
net = slim.max_pool2d(net, [2, 2], stride=2, scope=end_point)
|
||||
end_points[end_point] = net
|
||||
if final_endpoint == end_point: return net, end_points
|
||||
|
||||
end_point = 'Mixed_5b'
|
||||
with tf.compat.v1.variable_scope(end_point):
|
||||
with tf.compat.v1.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(net, 256, [1, 1], scope='Conv2d_0a_1x1')
|
||||
with tf.compat.v1.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(net, 160, [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_1 = slim.conv2d(branch_1, 320, [3, 3], scope='Conv2d_0b_3x3')
|
||||
with tf.compat.v1.variable_scope('Branch_2'):
|
||||
branch_2 = slim.conv2d(net, 32, [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_2 = slim.conv2d(branch_2, 128, [3, 3], scope='Conv2d_0a_3x3')
|
||||
with tf.compat.v1.variable_scope('Branch_3'):
|
||||
branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
|
||||
branch_3 = slim.conv2d(branch_3, 128, [1, 1], scope='Conv2d_0b_1x1')
|
||||
net = tf.concat(
|
||||
axis=3, values=[branch_0, branch_1, branch_2, branch_3])
|
||||
end_points[end_point] = net
|
||||
if final_endpoint == end_point: return net, end_points
|
||||
|
||||
end_point = 'Mixed_5c'
|
||||
with tf.compat.v1.variable_scope(end_point):
|
||||
with tf.compat.v1.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(net, 384, [1, 1], scope='Conv2d_0a_1x1')
|
||||
with tf.compat.v1.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(net, 192, [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_1 = slim.conv2d(branch_1, 384, [3, 3], scope='Conv2d_0b_3x3')
|
||||
with tf.compat.v1.variable_scope('Branch_2'):
|
||||
branch_2 = slim.conv2d(net, 48, [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_2 = slim.conv2d(branch_2, 128, [3, 3], scope='Conv2d_0b_3x3')
|
||||
with tf.compat.v1.variable_scope('Branch_3'):
|
||||
branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
|
||||
branch_3 = slim.conv2d(branch_3, 128, [1, 1], scope='Conv2d_0b_1x1')
|
||||
net = tf.concat(
|
||||
axis=3, values=[branch_0, branch_1, branch_2, branch_3])
|
||||
end_points[end_point] = net
|
||||
if final_endpoint == end_point: return net, end_points
|
||||
raise ValueError('Unknown final endpoint %s' % final_endpoint)
|
||||
|
||||
|
||||
def inception_v1(inputs,
|
||||
num_classes=1000,
|
||||
is_training=True,
|
||||
dropout_keep_prob=0.8,
|
||||
prediction_fn=slim.softmax,
|
||||
spatial_squeeze=True,
|
||||
reuse=None,
|
||||
scope='InceptionV1',
|
||||
global_pool=False):
|
||||
"""Defines the Inception V1 architecture.
|
||||
|
||||
This architecture is defined in:
|
||||
|
||||
Going deeper with convolutions
|
||||
Christian Szegedy, Wei Liu, Yangqing Jia, Pierre Sermanet, Scott Reed,
|
||||
Dragomir Anguelov, Dumitru Erhan, Vincent Vanhoucke, Andrew Rabinovich.
|
||||
http://arxiv.org/pdf/1409.4842v1.pdf.
|
||||
|
||||
The default image size used to train this network is 224x224.
|
||||
|
||||
Args:
|
||||
inputs: a tensor of size [batch_size, height, width, channels].
|
||||
num_classes: number of predicted classes. If 0 or None, the logits layer
|
||||
is omitted and the input features to the logits layer (before dropout)
|
||||
are returned instead.
|
||||
is_training: whether is training or not.
|
||||
dropout_keep_prob: the percentage of activation values that are retained.
|
||||
prediction_fn: a function to get predictions out of logits.
|
||||
spatial_squeeze: if True, logits is of shape [B, C], if false logits is of
|
||||
shape [B, 1, 1, C], where B is batch_size and C is number of classes.
|
||||
reuse: whether or not the network and its variables should be reused. To be
|
||||
able to reuse 'scope' must be given.
|
||||
scope: Optional variable_scope.
|
||||
global_pool: Optional boolean flag to control the avgpooling before the
|
||||
logits layer. If false or unset, pooling is done with a fixed window
|
||||
that reduces default-sized inputs to 1x1, while larger inputs lead to
|
||||
larger outputs. If true, any input size is pooled down to 1x1.
|
||||
|
||||
Returns:
|
||||
net: a Tensor with the logits (pre-softmax activations) if num_classes
|
||||
is a non-zero integer, or the non-dropped-out input to the logits layer
|
||||
if num_classes is 0 or None.
|
||||
end_points: a dictionary from components of the network to the corresponding
|
||||
activation.
|
||||
"""
|
||||
# Final pooling and prediction
|
||||
with tf.compat.v1.variable_scope(
|
||||
scope, 'InceptionV1', [inputs], reuse=reuse) as scope:
|
||||
with slim.arg_scope([slim.batch_norm, slim.dropout],
|
||||
is_training=is_training):
|
||||
net, end_points = inception_v1_base(inputs, scope=scope)
|
||||
with tf.compat.v1.variable_scope('Logits'):
|
||||
if global_pool:
|
||||
# Global average pooling.
|
||||
net = tf.reduce_mean(
|
||||
input_tensor=net, axis=[1, 2], keepdims=True, name='global_pool')
|
||||
end_points['global_pool'] = net
|
||||
else:
|
||||
# Pooling with a fixed kernel size.
|
||||
net = slim.avg_pool2d(net, [7, 7], stride=1, scope='AvgPool_0a_7x7')
|
||||
end_points['AvgPool_0a_7x7'] = net
|
||||
if not num_classes:
|
||||
return net, end_points
|
||||
net = slim.dropout(net, dropout_keep_prob, scope='Dropout_0b')
|
||||
logits = slim.conv2d(net, num_classes, [1, 1], activation_fn=None,
|
||||
normalizer_fn=None, scope='Conv2d_0c_1x1')
|
||||
if spatial_squeeze:
|
||||
logits = tf.squeeze(logits, [1, 2], name='SpatialSqueeze')
|
||||
|
||||
end_points['Logits'] = logits
|
||||
end_points['Predictions'] = prediction_fn(logits, scope='Predictions')
|
||||
return logits, end_points
|
||||
inception_v1.default_image_size = 224
|
||||
|
||||
inception_v1_arg_scope = inception_utils.inception_arg_scope
|
||||
+300
@@ -0,0 +1,300 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Tests for nets.inception_v1."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
from tensorflow.contrib import slim as contrib_slim
|
||||
|
||||
from nets import inception
|
||||
|
||||
slim = contrib_slim
|
||||
|
||||
|
||||
class InceptionV1Test(tf.test.TestCase):
|
||||
|
||||
def testBuildClassificationNetwork(self):
|
||||
batch_size = 5
|
||||
height, width = 224, 224
|
||||
num_classes = 1000
|
||||
|
||||
inputs = tf.random.uniform((batch_size, height, width, 3))
|
||||
logits, end_points = inception.inception_v1(inputs, num_classes)
|
||||
self.assertTrue(logits.op.name.startswith(
|
||||
'InceptionV1/Logits/SpatialSqueeze'))
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[batch_size, num_classes])
|
||||
self.assertTrue('Predictions' in end_points)
|
||||
self.assertListEqual(end_points['Predictions'].get_shape().as_list(),
|
||||
[batch_size, num_classes])
|
||||
|
||||
def testBuildPreLogitsNetwork(self):
|
||||
batch_size = 5
|
||||
height, width = 224, 224
|
||||
num_classes = None
|
||||
|
||||
inputs = tf.random.uniform((batch_size, height, width, 3))
|
||||
net, end_points = inception.inception_v1(inputs, num_classes)
|
||||
self.assertTrue(net.op.name.startswith('InceptionV1/Logits/AvgPool'))
|
||||
self.assertListEqual(net.get_shape().as_list(), [batch_size, 1, 1, 1024])
|
||||
self.assertFalse('Logits' in end_points)
|
||||
self.assertFalse('Predictions' in end_points)
|
||||
|
||||
def testBuildBaseNetwork(self):
|
||||
batch_size = 5
|
||||
height, width = 224, 224
|
||||
|
||||
inputs = tf.random.uniform((batch_size, height, width, 3))
|
||||
mixed_6c, end_points = inception.inception_v1_base(inputs)
|
||||
self.assertTrue(mixed_6c.op.name.startswith('InceptionV1/Mixed_5c'))
|
||||
self.assertListEqual(mixed_6c.get_shape().as_list(),
|
||||
[batch_size, 7, 7, 1024])
|
||||
expected_endpoints = ['Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1',
|
||||
'Conv2d_2c_3x3', 'MaxPool_3a_3x3', 'Mixed_3b',
|
||||
'Mixed_3c', 'MaxPool_4a_3x3', 'Mixed_4b', 'Mixed_4c',
|
||||
'Mixed_4d', 'Mixed_4e', 'Mixed_4f', 'MaxPool_5a_2x2',
|
||||
'Mixed_5b', 'Mixed_5c']
|
||||
self.assertItemsEqual(end_points.keys(), expected_endpoints)
|
||||
|
||||
def testBuildOnlyUptoFinalEndpoint(self):
|
||||
batch_size = 5
|
||||
height, width = 224, 224
|
||||
endpoints = ['Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1',
|
||||
'Conv2d_2c_3x3', 'MaxPool_3a_3x3', 'Mixed_3b', 'Mixed_3c',
|
||||
'MaxPool_4a_3x3', 'Mixed_4b', 'Mixed_4c', 'Mixed_4d',
|
||||
'Mixed_4e', 'Mixed_4f', 'MaxPool_5a_2x2', 'Mixed_5b',
|
||||
'Mixed_5c']
|
||||
for index, endpoint in enumerate(endpoints):
|
||||
with tf.Graph().as_default():
|
||||
inputs = tf.random.uniform((batch_size, height, width, 3))
|
||||
out_tensor, end_points = inception.inception_v1_base(
|
||||
inputs, final_endpoint=endpoint)
|
||||
self.assertTrue(out_tensor.op.name.startswith(
|
||||
'InceptionV1/' + endpoint))
|
||||
self.assertItemsEqual(endpoints[:index+1], end_points.keys())
|
||||
|
||||
def testBuildAndCheckAllEndPointsUptoMixed5c(self):
|
||||
batch_size = 5
|
||||
height, width = 224, 224
|
||||
|
||||
inputs = tf.random.uniform((batch_size, height, width, 3))
|
||||
_, end_points = inception.inception_v1_base(inputs,
|
||||
final_endpoint='Mixed_5c')
|
||||
endpoints_shapes = {
|
||||
'Conv2d_1a_7x7': [5, 112, 112, 64],
|
||||
'MaxPool_2a_3x3': [5, 56, 56, 64],
|
||||
'Conv2d_2b_1x1': [5, 56, 56, 64],
|
||||
'Conv2d_2c_3x3': [5, 56, 56, 192],
|
||||
'MaxPool_3a_3x3': [5, 28, 28, 192],
|
||||
'Mixed_3b': [5, 28, 28, 256],
|
||||
'Mixed_3c': [5, 28, 28, 480],
|
||||
'MaxPool_4a_3x3': [5, 14, 14, 480],
|
||||
'Mixed_4b': [5, 14, 14, 512],
|
||||
'Mixed_4c': [5, 14, 14, 512],
|
||||
'Mixed_4d': [5, 14, 14, 512],
|
||||
'Mixed_4e': [5, 14, 14, 528],
|
||||
'Mixed_4f': [5, 14, 14, 832],
|
||||
'MaxPool_5a_2x2': [5, 7, 7, 832],
|
||||
'Mixed_5b': [5, 7, 7, 832],
|
||||
'Mixed_5c': [5, 7, 7, 1024]
|
||||
}
|
||||
|
||||
self.assertItemsEqual(endpoints_shapes.keys(), end_points.keys())
|
||||
for endpoint_name in endpoints_shapes:
|
||||
expected_shape = endpoints_shapes[endpoint_name]
|
||||
self.assertTrue(endpoint_name in end_points)
|
||||
self.assertListEqual(end_points[endpoint_name].get_shape().as_list(),
|
||||
expected_shape)
|
||||
|
||||
def testModelHasExpectedNumberOfParameters(self):
|
||||
batch_size = 5
|
||||
height, width = 224, 224
|
||||
inputs = tf.random.uniform((batch_size, height, width, 3))
|
||||
with slim.arg_scope(inception.inception_v1_arg_scope()):
|
||||
inception.inception_v1_base(inputs)
|
||||
total_params, _ = slim.model_analyzer.analyze_vars(
|
||||
slim.get_model_variables())
|
||||
self.assertAlmostEqual(5607184, total_params)
|
||||
|
||||
def testHalfSizeImages(self):
|
||||
batch_size = 5
|
||||
height, width = 112, 112
|
||||
|
||||
inputs = tf.random.uniform((batch_size, height, width, 3))
|
||||
mixed_5c, _ = inception.inception_v1_base(inputs)
|
||||
self.assertTrue(mixed_5c.op.name.startswith('InceptionV1/Mixed_5c'))
|
||||
self.assertListEqual(mixed_5c.get_shape().as_list(),
|
||||
[batch_size, 4, 4, 1024])
|
||||
|
||||
def testBuildBaseNetworkWithoutRootBlock(self):
|
||||
batch_size = 5
|
||||
height, width = 28, 28
|
||||
channels = 192
|
||||
|
||||
inputs = tf.random.uniform((batch_size, height, width, channels))
|
||||
_, end_points = inception.inception_v1_base(
|
||||
inputs, include_root_block=False)
|
||||
endpoints_shapes = {
|
||||
'Mixed_3b': [5, 28, 28, 256],
|
||||
'Mixed_3c': [5, 28, 28, 480],
|
||||
'MaxPool_4a_3x3': [5, 14, 14, 480],
|
||||
'Mixed_4b': [5, 14, 14, 512],
|
||||
'Mixed_4c': [5, 14, 14, 512],
|
||||
'Mixed_4d': [5, 14, 14, 512],
|
||||
'Mixed_4e': [5, 14, 14, 528],
|
||||
'Mixed_4f': [5, 14, 14, 832],
|
||||
'MaxPool_5a_2x2': [5, 7, 7, 832],
|
||||
'Mixed_5b': [5, 7, 7, 832],
|
||||
'Mixed_5c': [5, 7, 7, 1024]
|
||||
}
|
||||
|
||||
self.assertItemsEqual(endpoints_shapes.keys(), end_points.keys())
|
||||
for endpoint_name in endpoints_shapes:
|
||||
expected_shape = endpoints_shapes[endpoint_name]
|
||||
self.assertTrue(endpoint_name in end_points)
|
||||
self.assertListEqual(end_points[endpoint_name].get_shape().as_list(),
|
||||
expected_shape)
|
||||
|
||||
def testUnknownImageShape(self):
|
||||
tf.compat.v1.reset_default_graph()
|
||||
batch_size = 2
|
||||
height, width = 224, 224
|
||||
num_classes = 1000
|
||||
input_np = np.random.uniform(0, 1, (batch_size, height, width, 3))
|
||||
with self.test_session() as sess:
|
||||
inputs = tf.compat.v1.placeholder(
|
||||
tf.float32, shape=(batch_size, None, None, 3))
|
||||
logits, end_points = inception.inception_v1(inputs, num_classes)
|
||||
self.assertTrue(logits.op.name.startswith('InceptionV1/Logits'))
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[batch_size, num_classes])
|
||||
pre_pool = end_points['Mixed_5c']
|
||||
feed_dict = {inputs: input_np}
|
||||
tf.compat.v1.global_variables_initializer().run()
|
||||
pre_pool_out = sess.run(pre_pool, feed_dict=feed_dict)
|
||||
self.assertListEqual(list(pre_pool_out.shape), [batch_size, 7, 7, 1024])
|
||||
|
||||
def testGlobalPoolUnknownImageShape(self):
|
||||
tf.compat.v1.reset_default_graph()
|
||||
batch_size = 1
|
||||
height, width = 250, 300
|
||||
num_classes = 1000
|
||||
input_np = np.random.uniform(0, 1, (batch_size, height, width, 3))
|
||||
with self.test_session() as sess:
|
||||
inputs = tf.compat.v1.placeholder(
|
||||
tf.float32, shape=(batch_size, None, None, 3))
|
||||
logits, end_points = inception.inception_v1(inputs, num_classes,
|
||||
global_pool=True)
|
||||
self.assertTrue(logits.op.name.startswith('InceptionV1/Logits'))
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[batch_size, num_classes])
|
||||
pre_pool = end_points['Mixed_5c']
|
||||
feed_dict = {inputs: input_np}
|
||||
tf.compat.v1.global_variables_initializer().run()
|
||||
pre_pool_out = sess.run(pre_pool, feed_dict=feed_dict)
|
||||
self.assertListEqual(list(pre_pool_out.shape), [batch_size, 8, 10, 1024])
|
||||
|
||||
def testUnknowBatchSize(self):
|
||||
batch_size = 1
|
||||
height, width = 224, 224
|
||||
num_classes = 1000
|
||||
|
||||
inputs = tf.compat.v1.placeholder(tf.float32, (None, height, width, 3))
|
||||
logits, _ = inception.inception_v1(inputs, num_classes)
|
||||
self.assertTrue(logits.op.name.startswith('InceptionV1/Logits'))
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[None, num_classes])
|
||||
images = tf.random.uniform((batch_size, height, width, 3))
|
||||
|
||||
with self.test_session() as sess:
|
||||
sess.run(tf.compat.v1.global_variables_initializer())
|
||||
output = sess.run(logits, {inputs: images.eval()})
|
||||
self.assertEquals(output.shape, (batch_size, num_classes))
|
||||
|
||||
def testEvaluation(self):
|
||||
batch_size = 2
|
||||
height, width = 224, 224
|
||||
num_classes = 1000
|
||||
|
||||
eval_inputs = tf.random.uniform((batch_size, height, width, 3))
|
||||
logits, _ = inception.inception_v1(eval_inputs, num_classes,
|
||||
is_training=False)
|
||||
predictions = tf.argmax(input=logits, axis=1)
|
||||
|
||||
with self.test_session() as sess:
|
||||
sess.run(tf.compat.v1.global_variables_initializer())
|
||||
output = sess.run(predictions)
|
||||
self.assertEquals(output.shape, (batch_size,))
|
||||
|
||||
def testTrainEvalWithReuse(self):
|
||||
train_batch_size = 5
|
||||
eval_batch_size = 2
|
||||
height, width = 224, 224
|
||||
num_classes = 1000
|
||||
|
||||
train_inputs = tf.random.uniform((train_batch_size, height, width, 3))
|
||||
inception.inception_v1(train_inputs, num_classes)
|
||||
eval_inputs = tf.random.uniform((eval_batch_size, height, width, 3))
|
||||
logits, _ = inception.inception_v1(eval_inputs, num_classes, reuse=True)
|
||||
predictions = tf.argmax(input=logits, axis=1)
|
||||
|
||||
with self.test_session() as sess:
|
||||
sess.run(tf.compat.v1.global_variables_initializer())
|
||||
output = sess.run(predictions)
|
||||
self.assertEquals(output.shape, (eval_batch_size,))
|
||||
|
||||
def testLogitsNotSqueezed(self):
|
||||
num_classes = 25
|
||||
images = tf.random.uniform([1, 224, 224, 3])
|
||||
logits, _ = inception.inception_v1(images,
|
||||
num_classes=num_classes,
|
||||
spatial_squeeze=False)
|
||||
|
||||
with self.test_session() as sess:
|
||||
tf.compat.v1.global_variables_initializer().run()
|
||||
logits_out = sess.run(logits)
|
||||
self.assertListEqual(list(logits_out.shape), [1, 1, 1, num_classes])
|
||||
|
||||
def testNoBatchNormScaleByDefault(self):
|
||||
height, width = 224, 224
|
||||
num_classes = 1000
|
||||
inputs = tf.compat.v1.placeholder(tf.float32, (1, height, width, 3))
|
||||
with slim.arg_scope(inception.inception_v1_arg_scope()):
|
||||
inception.inception_v1(inputs, num_classes, is_training=False)
|
||||
|
||||
self.assertEqual(tf.compat.v1.global_variables('.*/BatchNorm/gamma:0$'), [])
|
||||
|
||||
def testBatchNormScale(self):
|
||||
height, width = 224, 224
|
||||
num_classes = 1000
|
||||
inputs = tf.compat.v1.placeholder(tf.float32, (1, height, width, 3))
|
||||
with slim.arg_scope(
|
||||
inception.inception_v1_arg_scope(batch_norm_scale=True)):
|
||||
inception.inception_v1(inputs, num_classes, is_training=False)
|
||||
|
||||
gamma_names = set(
|
||||
v.op.name
|
||||
for v in tf.compat.v1.global_variables('.*/BatchNorm/gamma:0$'))
|
||||
self.assertGreater(len(gamma_names), 0)
|
||||
for v in tf.compat.v1.global_variables('.*/BatchNorm/moving_mean:0$'):
|
||||
self.assertIn(v.op.name[:-len('moving_mean')] + 'gamma', gamma_names)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
tf.test.main()
|
||||
+596
@@ -0,0 +1,596 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Contains the definition for inception v2 classification network."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import tensorflow as tf
|
||||
from tensorflow.contrib import slim as contrib_slim
|
||||
|
||||
from nets import inception_utils
|
||||
|
||||
slim = contrib_slim
|
||||
|
||||
# pylint: disable=g-long-lambda
|
||||
trunc_normal = lambda stddev: tf.compat.v1.truncated_normal_initializer(
|
||||
0.0, stddev)
|
||||
|
||||
|
||||
def inception_v2_base(inputs,
|
||||
final_endpoint='Mixed_5c',
|
||||
min_depth=16,
|
||||
depth_multiplier=1.0,
|
||||
use_separable_conv=True,
|
||||
data_format='NHWC',
|
||||
include_root_block=True,
|
||||
scope=None):
|
||||
"""Inception v2 (6a2).
|
||||
|
||||
Constructs an Inception v2 network from inputs to the given final endpoint.
|
||||
This method can construct the network up to the layer inception(5b) as
|
||||
described in http://arxiv.org/abs/1502.03167.
|
||||
|
||||
Args:
|
||||
inputs: a tensor of shape [batch_size, height, width, channels].
|
||||
final_endpoint: specifies the endpoint to construct the network up to. It
|
||||
can be one of ['Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1',
|
||||
'Conv2d_2c_3x3', 'MaxPool_3a_3x3', 'Mixed_3b', 'Mixed_3c', 'Mixed_4a',
|
||||
'Mixed_4b', 'Mixed_4c', 'Mixed_4d', 'Mixed_4e', 'Mixed_5a', 'Mixed_5b',
|
||||
'Mixed_5c']. If include_root_block is False, ['Conv2d_1a_7x7',
|
||||
'MaxPool_2a_3x3', 'Conv2d_2b_1x1', 'Conv2d_2c_3x3', 'MaxPool_3a_3x3'] will
|
||||
not be available.
|
||||
min_depth: Minimum depth value (number of channels) for all convolution ops.
|
||||
Enforced when depth_multiplier < 1, and not an active constraint when
|
||||
depth_multiplier >= 1.
|
||||
depth_multiplier: Float multiplier for the depth (number of channels)
|
||||
for all convolution ops. The value must be greater than zero. Typical
|
||||
usage will be to set this value in (0, 1) to reduce the number of
|
||||
parameters or computation cost of the model.
|
||||
use_separable_conv: Use a separable convolution for the first layer
|
||||
Conv2d_1a_7x7. If this is False, use a normal convolution instead.
|
||||
data_format: Data format of the activations ('NHWC' or 'NCHW').
|
||||
include_root_block: If True, include the convolution and max-pooling layers
|
||||
before the inception modules. If False, excludes those layers.
|
||||
scope: Optional variable_scope.
|
||||
|
||||
Returns:
|
||||
tensor_out: output tensor corresponding to the final_endpoint.
|
||||
end_points: a set of activations for external use, for example summaries or
|
||||
losses.
|
||||
|
||||
Raises:
|
||||
ValueError: if final_endpoint is not set to one of the predefined values,
|
||||
or depth_multiplier <= 0
|
||||
"""
|
||||
|
||||
# end_points will collect relevant activations for external use, for example
|
||||
# summaries or losses.
|
||||
end_points = {}
|
||||
|
||||
# Used to find thinned depths for each layer.
|
||||
if depth_multiplier <= 0:
|
||||
raise ValueError('depth_multiplier is not greater than zero.')
|
||||
depth = lambda d: max(int(d * depth_multiplier), min_depth)
|
||||
|
||||
if data_format != 'NHWC' and data_format != 'NCHW':
|
||||
raise ValueError('data_format must be either NHWC or NCHW.')
|
||||
if data_format == 'NCHW' and use_separable_conv:
|
||||
raise ValueError(
|
||||
'separable convolution only supports NHWC layout. NCHW data format can'
|
||||
' only be used when use_separable_conv is False.'
|
||||
)
|
||||
|
||||
concat_dim = 3 if data_format == 'NHWC' else 1
|
||||
with tf.compat.v1.variable_scope(scope, 'InceptionV2', [inputs]):
|
||||
with slim.arg_scope(
|
||||
[slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
|
||||
stride=1,
|
||||
padding='SAME',
|
||||
data_format=data_format):
|
||||
|
||||
net = inputs
|
||||
if include_root_block:
|
||||
# Note that sizes in the comments below assume an input spatial size of
|
||||
# 224x224, however, the inputs can be of any size greater 32x32.
|
||||
|
||||
# 224 x 224 x 3
|
||||
end_point = 'Conv2d_1a_7x7'
|
||||
|
||||
if use_separable_conv:
|
||||
# depthwise_multiplier here is different from depth_multiplier.
|
||||
# depthwise_multiplier determines the output channels of the initial
|
||||
# depthwise conv (see docs for tf.nn.separable_conv2d), while
|
||||
# depth_multiplier controls the # channels of the subsequent 1x1
|
||||
# convolution. Must have
|
||||
# in_channels * depthwise_multipler <= out_channels
|
||||
# so that the separable convolution is not overparameterized.
|
||||
depthwise_multiplier = min(int(depth(64) / 3), 8)
|
||||
net = slim.separable_conv2d(
|
||||
inputs,
|
||||
depth(64), [7, 7],
|
||||
depth_multiplier=depthwise_multiplier,
|
||||
stride=2,
|
||||
padding='SAME',
|
||||
weights_initializer=trunc_normal(1.0),
|
||||
scope=end_point)
|
||||
else:
|
||||
# Use a normal convolution instead of a separable convolution.
|
||||
net = slim.conv2d(
|
||||
inputs,
|
||||
depth(64), [7, 7],
|
||||
stride=2,
|
||||
weights_initializer=trunc_normal(1.0),
|
||||
scope=end_point)
|
||||
end_points[end_point] = net
|
||||
if end_point == final_endpoint:
|
||||
return net, end_points
|
||||
# 112 x 112 x 64
|
||||
end_point = 'MaxPool_2a_3x3'
|
||||
net = slim.max_pool2d(net, [3, 3], scope=end_point, stride=2)
|
||||
end_points[end_point] = net
|
||||
if end_point == final_endpoint:
|
||||
return net, end_points
|
||||
# 56 x 56 x 64
|
||||
end_point = 'Conv2d_2b_1x1'
|
||||
net = slim.conv2d(
|
||||
net,
|
||||
depth(64), [1, 1],
|
||||
scope=end_point,
|
||||
weights_initializer=trunc_normal(0.1))
|
||||
end_points[end_point] = net
|
||||
if end_point == final_endpoint:
|
||||
return net, end_points
|
||||
# 56 x 56 x 64
|
||||
end_point = 'Conv2d_2c_3x3'
|
||||
net = slim.conv2d(net, depth(192), [3, 3], scope=end_point)
|
||||
end_points[end_point] = net
|
||||
if end_point == final_endpoint:
|
||||
return net, end_points
|
||||
# 56 x 56 x 192
|
||||
end_point = 'MaxPool_3a_3x3'
|
||||
net = slim.max_pool2d(net, [3, 3], scope=end_point, stride=2)
|
||||
end_points[end_point] = net
|
||||
if end_point == final_endpoint:
|
||||
return net, end_points
|
||||
|
||||
# 28 x 28 x 192
|
||||
# Inception module.
|
||||
end_point = 'Mixed_3b'
|
||||
with tf.compat.v1.variable_scope(end_point):
|
||||
with tf.compat.v1.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1')
|
||||
with tf.compat.v1.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(
|
||||
net, depth(64), [1, 1],
|
||||
weights_initializer=trunc_normal(0.09),
|
||||
scope='Conv2d_0a_1x1')
|
||||
branch_1 = slim.conv2d(branch_1, depth(64), [3, 3],
|
||||
scope='Conv2d_0b_3x3')
|
||||
with tf.compat.v1.variable_scope('Branch_2'):
|
||||
branch_2 = slim.conv2d(
|
||||
net, depth(64), [1, 1],
|
||||
weights_initializer=trunc_normal(0.09),
|
||||
scope='Conv2d_0a_1x1')
|
||||
branch_2 = slim.conv2d(branch_2, depth(96), [3, 3],
|
||||
scope='Conv2d_0b_3x3')
|
||||
branch_2 = slim.conv2d(branch_2, depth(96), [3, 3],
|
||||
scope='Conv2d_0c_3x3')
|
||||
with tf.compat.v1.variable_scope('Branch_3'):
|
||||
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
|
||||
branch_3 = slim.conv2d(
|
||||
branch_3, depth(32), [1, 1],
|
||||
weights_initializer=trunc_normal(0.1),
|
||||
scope='Conv2d_0b_1x1')
|
||||
net = tf.concat(
|
||||
axis=concat_dim, values=[branch_0, branch_1, branch_2, branch_3])
|
||||
end_points[end_point] = net
|
||||
if end_point == final_endpoint: return net, end_points
|
||||
# 28 x 28 x 256
|
||||
end_point = 'Mixed_3c'
|
||||
with tf.compat.v1.variable_scope(end_point):
|
||||
with tf.compat.v1.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1')
|
||||
with tf.compat.v1.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(
|
||||
net, depth(64), [1, 1],
|
||||
weights_initializer=trunc_normal(0.09),
|
||||
scope='Conv2d_0a_1x1')
|
||||
branch_1 = slim.conv2d(branch_1, depth(96), [3, 3],
|
||||
scope='Conv2d_0b_3x3')
|
||||
with tf.compat.v1.variable_scope('Branch_2'):
|
||||
branch_2 = slim.conv2d(
|
||||
net, depth(64), [1, 1],
|
||||
weights_initializer=trunc_normal(0.09),
|
||||
scope='Conv2d_0a_1x1')
|
||||
branch_2 = slim.conv2d(branch_2, depth(96), [3, 3],
|
||||
scope='Conv2d_0b_3x3')
|
||||
branch_2 = slim.conv2d(branch_2, depth(96), [3, 3],
|
||||
scope='Conv2d_0c_3x3')
|
||||
with tf.compat.v1.variable_scope('Branch_3'):
|
||||
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
|
||||
branch_3 = slim.conv2d(
|
||||
branch_3, depth(64), [1, 1],
|
||||
weights_initializer=trunc_normal(0.1),
|
||||
scope='Conv2d_0b_1x1')
|
||||
net = tf.concat(
|
||||
axis=concat_dim, values=[branch_0, branch_1, branch_2, branch_3])
|
||||
end_points[end_point] = net
|
||||
if end_point == final_endpoint: return net, end_points
|
||||
# 28 x 28 x 320
|
||||
end_point = 'Mixed_4a'
|
||||
with tf.compat.v1.variable_scope(end_point):
|
||||
with tf.compat.v1.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(
|
||||
net, depth(128), [1, 1],
|
||||
weights_initializer=trunc_normal(0.09),
|
||||
scope='Conv2d_0a_1x1')
|
||||
branch_0 = slim.conv2d(branch_0, depth(160), [3, 3], stride=2,
|
||||
scope='Conv2d_1a_3x3')
|
||||
with tf.compat.v1.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(
|
||||
net, depth(64), [1, 1],
|
||||
weights_initializer=trunc_normal(0.09),
|
||||
scope='Conv2d_0a_1x1')
|
||||
branch_1 = slim.conv2d(
|
||||
branch_1, depth(96), [3, 3], scope='Conv2d_0b_3x3')
|
||||
branch_1 = slim.conv2d(
|
||||
branch_1, depth(96), [3, 3], stride=2, scope='Conv2d_1a_3x3')
|
||||
with tf.compat.v1.variable_scope('Branch_2'):
|
||||
branch_2 = slim.max_pool2d(
|
||||
net, [3, 3], stride=2, scope='MaxPool_1a_3x3')
|
||||
net = tf.concat(axis=concat_dim, values=[branch_0, branch_1, branch_2])
|
||||
end_points[end_point] = net
|
||||
if end_point == final_endpoint: return net, end_points
|
||||
# 14 x 14 x 576
|
||||
end_point = 'Mixed_4b'
|
||||
with tf.compat.v1.variable_scope(end_point):
|
||||
with tf.compat.v1.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(net, depth(224), [1, 1], scope='Conv2d_0a_1x1')
|
||||
with tf.compat.v1.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(
|
||||
net, depth(64), [1, 1],
|
||||
weights_initializer=trunc_normal(0.09),
|
||||
scope='Conv2d_0a_1x1')
|
||||
branch_1 = slim.conv2d(
|
||||
branch_1, depth(96), [3, 3], scope='Conv2d_0b_3x3')
|
||||
with tf.compat.v1.variable_scope('Branch_2'):
|
||||
branch_2 = slim.conv2d(
|
||||
net, depth(96), [1, 1],
|
||||
weights_initializer=trunc_normal(0.09),
|
||||
scope='Conv2d_0a_1x1')
|
||||
branch_2 = slim.conv2d(branch_2, depth(128), [3, 3],
|
||||
scope='Conv2d_0b_3x3')
|
||||
branch_2 = slim.conv2d(branch_2, depth(128), [3, 3],
|
||||
scope='Conv2d_0c_3x3')
|
||||
with tf.compat.v1.variable_scope('Branch_3'):
|
||||
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
|
||||
branch_3 = slim.conv2d(
|
||||
branch_3, depth(128), [1, 1],
|
||||
weights_initializer=trunc_normal(0.1),
|
||||
scope='Conv2d_0b_1x1')
|
||||
net = tf.concat(
|
||||
axis=concat_dim, values=[branch_0, branch_1, branch_2, branch_3])
|
||||
end_points[end_point] = net
|
||||
if end_point == final_endpoint: return net, end_points
|
||||
# 14 x 14 x 576
|
||||
end_point = 'Mixed_4c'
|
||||
with tf.compat.v1.variable_scope(end_point):
|
||||
with tf.compat.v1.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1')
|
||||
with tf.compat.v1.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(
|
||||
net, depth(96), [1, 1],
|
||||
weights_initializer=trunc_normal(0.09),
|
||||
scope='Conv2d_0a_1x1')
|
||||
branch_1 = slim.conv2d(branch_1, depth(128), [3, 3],
|
||||
scope='Conv2d_0b_3x3')
|
||||
with tf.compat.v1.variable_scope('Branch_2'):
|
||||
branch_2 = slim.conv2d(
|
||||
net, depth(96), [1, 1],
|
||||
weights_initializer=trunc_normal(0.09),
|
||||
scope='Conv2d_0a_1x1')
|
||||
branch_2 = slim.conv2d(branch_2, depth(128), [3, 3],
|
||||
scope='Conv2d_0b_3x3')
|
||||
branch_2 = slim.conv2d(branch_2, depth(128), [3, 3],
|
||||
scope='Conv2d_0c_3x3')
|
||||
with tf.compat.v1.variable_scope('Branch_3'):
|
||||
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
|
||||
branch_3 = slim.conv2d(
|
||||
branch_3, depth(128), [1, 1],
|
||||
weights_initializer=trunc_normal(0.1),
|
||||
scope='Conv2d_0b_1x1')
|
||||
net = tf.concat(
|
||||
axis=concat_dim, values=[branch_0, branch_1, branch_2, branch_3])
|
||||
end_points[end_point] = net
|
||||
if end_point == final_endpoint: return net, end_points
|
||||
# 14 x 14 x 576
|
||||
end_point = 'Mixed_4d'
|
||||
with tf.compat.v1.variable_scope(end_point):
|
||||
with tf.compat.v1.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(net, depth(160), [1, 1], scope='Conv2d_0a_1x1')
|
||||
with tf.compat.v1.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(
|
||||
net, depth(128), [1, 1],
|
||||
weights_initializer=trunc_normal(0.09),
|
||||
scope='Conv2d_0a_1x1')
|
||||
branch_1 = slim.conv2d(branch_1, depth(160), [3, 3],
|
||||
scope='Conv2d_0b_3x3')
|
||||
with tf.compat.v1.variable_scope('Branch_2'):
|
||||
branch_2 = slim.conv2d(
|
||||
net, depth(128), [1, 1],
|
||||
weights_initializer=trunc_normal(0.09),
|
||||
scope='Conv2d_0a_1x1')
|
||||
branch_2 = slim.conv2d(branch_2, depth(160), [3, 3],
|
||||
scope='Conv2d_0b_3x3')
|
||||
branch_2 = slim.conv2d(branch_2, depth(160), [3, 3],
|
||||
scope='Conv2d_0c_3x3')
|
||||
with tf.compat.v1.variable_scope('Branch_3'):
|
||||
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
|
||||
branch_3 = slim.conv2d(
|
||||
branch_3, depth(96), [1, 1],
|
||||
weights_initializer=trunc_normal(0.1),
|
||||
scope='Conv2d_0b_1x1')
|
||||
net = tf.concat(
|
||||
axis=concat_dim, values=[branch_0, branch_1, branch_2, branch_3])
|
||||
end_points[end_point] = net
|
||||
if end_point == final_endpoint: return net, end_points
|
||||
# 14 x 14 x 576
|
||||
end_point = 'Mixed_4e'
|
||||
with tf.compat.v1.variable_scope(end_point):
|
||||
with tf.compat.v1.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(net, depth(96), [1, 1], scope='Conv2d_0a_1x1')
|
||||
with tf.compat.v1.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(
|
||||
net, depth(128), [1, 1],
|
||||
weights_initializer=trunc_normal(0.09),
|
||||
scope='Conv2d_0a_1x1')
|
||||
branch_1 = slim.conv2d(branch_1, depth(192), [3, 3],
|
||||
scope='Conv2d_0b_3x3')
|
||||
with tf.compat.v1.variable_scope('Branch_2'):
|
||||
branch_2 = slim.conv2d(
|
||||
net, depth(160), [1, 1],
|
||||
weights_initializer=trunc_normal(0.09),
|
||||
scope='Conv2d_0a_1x1')
|
||||
branch_2 = slim.conv2d(branch_2, depth(192), [3, 3],
|
||||
scope='Conv2d_0b_3x3')
|
||||
branch_2 = slim.conv2d(branch_2, depth(192), [3, 3],
|
||||
scope='Conv2d_0c_3x3')
|
||||
with tf.compat.v1.variable_scope('Branch_3'):
|
||||
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
|
||||
branch_3 = slim.conv2d(
|
||||
branch_3, depth(96), [1, 1],
|
||||
weights_initializer=trunc_normal(0.1),
|
||||
scope='Conv2d_0b_1x1')
|
||||
net = tf.concat(
|
||||
axis=concat_dim, values=[branch_0, branch_1, branch_2, branch_3])
|
||||
end_points[end_point] = net
|
||||
if end_point == final_endpoint: return net, end_points
|
||||
# 14 x 14 x 576
|
||||
end_point = 'Mixed_5a'
|
||||
with tf.compat.v1.variable_scope(end_point):
|
||||
with tf.compat.v1.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(
|
||||
net, depth(128), [1, 1],
|
||||
weights_initializer=trunc_normal(0.09),
|
||||
scope='Conv2d_0a_1x1')
|
||||
branch_0 = slim.conv2d(branch_0, depth(192), [3, 3], stride=2,
|
||||
scope='Conv2d_1a_3x3')
|
||||
with tf.compat.v1.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(
|
||||
net, depth(192), [1, 1],
|
||||
weights_initializer=trunc_normal(0.09),
|
||||
scope='Conv2d_0a_1x1')
|
||||
branch_1 = slim.conv2d(branch_1, depth(256), [3, 3],
|
||||
scope='Conv2d_0b_3x3')
|
||||
branch_1 = slim.conv2d(branch_1, depth(256), [3, 3], stride=2,
|
||||
scope='Conv2d_1a_3x3')
|
||||
with tf.compat.v1.variable_scope('Branch_2'):
|
||||
branch_2 = slim.max_pool2d(net, [3, 3], stride=2,
|
||||
scope='MaxPool_1a_3x3')
|
||||
net = tf.concat(
|
||||
axis=concat_dim, values=[branch_0, branch_1, branch_2])
|
||||
end_points[end_point] = net
|
||||
if end_point == final_endpoint: return net, end_points
|
||||
# 7 x 7 x 1024
|
||||
end_point = 'Mixed_5b'
|
||||
with tf.compat.v1.variable_scope(end_point):
|
||||
with tf.compat.v1.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(net, depth(352), [1, 1], scope='Conv2d_0a_1x1')
|
||||
with tf.compat.v1.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(
|
||||
net, depth(192), [1, 1],
|
||||
weights_initializer=trunc_normal(0.09),
|
||||
scope='Conv2d_0a_1x1')
|
||||
branch_1 = slim.conv2d(branch_1, depth(320), [3, 3],
|
||||
scope='Conv2d_0b_3x3')
|
||||
with tf.compat.v1.variable_scope('Branch_2'):
|
||||
branch_2 = slim.conv2d(
|
||||
net, depth(160), [1, 1],
|
||||
weights_initializer=trunc_normal(0.09),
|
||||
scope='Conv2d_0a_1x1')
|
||||
branch_2 = slim.conv2d(branch_2, depth(224), [3, 3],
|
||||
scope='Conv2d_0b_3x3')
|
||||
branch_2 = slim.conv2d(branch_2, depth(224), [3, 3],
|
||||
scope='Conv2d_0c_3x3')
|
||||
with tf.compat.v1.variable_scope('Branch_3'):
|
||||
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
|
||||
branch_3 = slim.conv2d(
|
||||
branch_3, depth(128), [1, 1],
|
||||
weights_initializer=trunc_normal(0.1),
|
||||
scope='Conv2d_0b_1x1')
|
||||
net = tf.concat(
|
||||
axis=concat_dim, values=[branch_0, branch_1, branch_2, branch_3])
|
||||
end_points[end_point] = net
|
||||
if end_point == final_endpoint: return net, end_points
|
||||
# 7 x 7 x 1024
|
||||
end_point = 'Mixed_5c'
|
||||
with tf.compat.v1.variable_scope(end_point):
|
||||
with tf.compat.v1.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(net, depth(352), [1, 1], scope='Conv2d_0a_1x1')
|
||||
with tf.compat.v1.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(
|
||||
net, depth(192), [1, 1],
|
||||
weights_initializer=trunc_normal(0.09),
|
||||
scope='Conv2d_0a_1x1')
|
||||
branch_1 = slim.conv2d(branch_1, depth(320), [3, 3],
|
||||
scope='Conv2d_0b_3x3')
|
||||
with tf.compat.v1.variable_scope('Branch_2'):
|
||||
branch_2 = slim.conv2d(
|
||||
net, depth(192), [1, 1],
|
||||
weights_initializer=trunc_normal(0.09),
|
||||
scope='Conv2d_0a_1x1')
|
||||
branch_2 = slim.conv2d(branch_2, depth(224), [3, 3],
|
||||
scope='Conv2d_0b_3x3')
|
||||
branch_2 = slim.conv2d(branch_2, depth(224), [3, 3],
|
||||
scope='Conv2d_0c_3x3')
|
||||
with tf.compat.v1.variable_scope('Branch_3'):
|
||||
branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
|
||||
branch_3 = slim.conv2d(
|
||||
branch_3, depth(128), [1, 1],
|
||||
weights_initializer=trunc_normal(0.1),
|
||||
scope='Conv2d_0b_1x1')
|
||||
net = tf.concat(
|
||||
axis=concat_dim, values=[branch_0, branch_1, branch_2, branch_3])
|
||||
end_points[end_point] = net
|
||||
if end_point == final_endpoint: return net, end_points
|
||||
raise ValueError('Unknown final endpoint %s' % final_endpoint)
|
||||
|
||||
|
||||
def inception_v2(inputs,
|
||||
num_classes=1000,
|
||||
is_training=True,
|
||||
dropout_keep_prob=0.8,
|
||||
min_depth=16,
|
||||
depth_multiplier=1.0,
|
||||
prediction_fn=slim.softmax,
|
||||
spatial_squeeze=True,
|
||||
reuse=None,
|
||||
scope='InceptionV2',
|
||||
global_pool=False):
|
||||
"""Inception v2 model for classification.
|
||||
|
||||
Constructs an Inception v2 network for classification as described in
|
||||
http://arxiv.org/abs/1502.03167.
|
||||
|
||||
The default image size used to train this network is 224x224.
|
||||
|
||||
Args:
|
||||
inputs: a tensor of shape [batch_size, height, width, channels].
|
||||
num_classes: number of predicted classes. If 0 or None, the logits layer
|
||||
is omitted and the input features to the logits layer (before dropout)
|
||||
are returned instead.
|
||||
is_training: whether is training or not.
|
||||
dropout_keep_prob: the percentage of activation values that are retained.
|
||||
min_depth: Minimum depth value (number of channels) for all convolution ops.
|
||||
Enforced when depth_multiplier < 1, and not an active constraint when
|
||||
depth_multiplier >= 1.
|
||||
depth_multiplier: Float multiplier for the depth (number of channels)
|
||||
for all convolution ops. The value must be greater than zero. Typical
|
||||
usage will be to set this value in (0, 1) to reduce the number of
|
||||
parameters or computation cost of the model.
|
||||
prediction_fn: a function to get predictions out of logits.
|
||||
spatial_squeeze: if True, logits is of shape [B, C], if false logits is of
|
||||
shape [B, 1, 1, C], where B is batch_size and C is number of classes.
|
||||
reuse: whether or not the network and its variables should be reused. To be
|
||||
able to reuse 'scope' must be given.
|
||||
scope: Optional variable_scope.
|
||||
global_pool: Optional boolean flag to control the avgpooling before the
|
||||
logits layer. If false or unset, pooling is done with a fixed window
|
||||
that reduces default-sized inputs to 1x1, while larger inputs lead to
|
||||
larger outputs. If true, any input size is pooled down to 1x1.
|
||||
|
||||
Returns:
|
||||
net: a Tensor with the logits (pre-softmax activations) if num_classes
|
||||
is a non-zero integer, or the non-dropped-out input to the logits layer
|
||||
if num_classes is 0 or None.
|
||||
end_points: a dictionary from components of the network to the corresponding
|
||||
activation.
|
||||
|
||||
Raises:
|
||||
ValueError: if final_endpoint is not set to one of the predefined values,
|
||||
or depth_multiplier <= 0
|
||||
"""
|
||||
if depth_multiplier <= 0:
|
||||
raise ValueError('depth_multiplier is not greater than zero.')
|
||||
|
||||
# Final pooling and prediction
|
||||
with tf.compat.v1.variable_scope(
|
||||
scope, 'InceptionV2', [inputs], reuse=reuse) as scope:
|
||||
with slim.arg_scope([slim.batch_norm, slim.dropout],
|
||||
is_training=is_training):
|
||||
net, end_points = inception_v2_base(
|
||||
inputs, scope=scope, min_depth=min_depth,
|
||||
depth_multiplier=depth_multiplier)
|
||||
with tf.compat.v1.variable_scope('Logits'):
|
||||
if global_pool:
|
||||
# Global average pooling.
|
||||
net = tf.reduce_mean(
|
||||
input_tensor=net, axis=[1, 2], keepdims=True, name='global_pool')
|
||||
end_points['global_pool'] = net
|
||||
else:
|
||||
# Pooling with a fixed kernel size.
|
||||
kernel_size = _reduced_kernel_size_for_small_input(net, [7, 7])
|
||||
net = slim.avg_pool2d(net, kernel_size, padding='VALID',
|
||||
scope='AvgPool_1a_{}x{}'.format(*kernel_size))
|
||||
end_points['AvgPool_1a'] = net
|
||||
if not num_classes:
|
||||
return net, end_points
|
||||
# 1 x 1 x 1024
|
||||
net = slim.dropout(net, keep_prob=dropout_keep_prob, scope='Dropout_1b')
|
||||
end_points['PreLogits'] = net
|
||||
logits = slim.conv2d(net, num_classes, [1, 1], activation_fn=None,
|
||||
normalizer_fn=None, scope='Conv2d_1c_1x1')
|
||||
if spatial_squeeze:
|
||||
logits = tf.squeeze(logits, [1, 2], name='SpatialSqueeze')
|
||||
end_points['Logits'] = logits
|
||||
end_points['Predictions'] = prediction_fn(logits, scope='Predictions')
|
||||
return logits, end_points
|
||||
inception_v2.default_image_size = 224
|
||||
|
||||
|
||||
def _reduced_kernel_size_for_small_input(input_tensor, kernel_size):
|
||||
"""Define kernel size which is automatically reduced for small input.
|
||||
|
||||
If the shape of the input images is unknown at graph construction time this
|
||||
function assumes that the input images are is large enough.
|
||||
|
||||
Args:
|
||||
input_tensor: input tensor of size [batch_size, height, width, channels].
|
||||
kernel_size: desired kernel size of length 2: [kernel_height, kernel_width]
|
||||
|
||||
Returns:
|
||||
a tensor with the kernel size.
|
||||
|
||||
TODO(jrru): Make this function work with unknown shapes. Theoretically, this
|
||||
can be done with the code below. Problems are two-fold: (1) If the shape was
|
||||
known, it will be lost. (2) inception.slim.ops._two_element_tuple cannot
|
||||
handle tensors that define the kernel size.
|
||||
shape = tf.shape(input_tensor)
|
||||
return = tf.stack([tf.minimum(shape[1], kernel_size[0]),
|
||||
tf.minimum(shape[2], kernel_size[1])])
|
||||
|
||||
"""
|
||||
shape = input_tensor.get_shape().as_list()
|
||||
if shape[1] is None or shape[2] is None:
|
||||
kernel_size_out = kernel_size
|
||||
else:
|
||||
kernel_size_out = [min(shape[1], kernel_size[0]),
|
||||
min(shape[2], kernel_size[1])]
|
||||
return kernel_size_out
|
||||
|
||||
|
||||
inception_v2_arg_scope = inception_utils.inception_arg_scope
|
||||
+412
@@ -0,0 +1,412 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Tests for nets.inception_v2."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
from tensorflow.contrib import slim as contrib_slim
|
||||
|
||||
from nets import inception
|
||||
|
||||
slim = contrib_slim
|
||||
|
||||
|
||||
class InceptionV2Test(tf.test.TestCase):
|
||||
|
||||
def testBuildClassificationNetwork(self):
|
||||
batch_size = 5
|
||||
height, width = 224, 224
|
||||
num_classes = 1000
|
||||
|
||||
inputs = tf.random.uniform((batch_size, height, width, 3))
|
||||
logits, end_points = inception.inception_v2(inputs, num_classes)
|
||||
self.assertTrue(logits.op.name.startswith(
|
||||
'InceptionV2/Logits/SpatialSqueeze'))
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[batch_size, num_classes])
|
||||
self.assertTrue('Predictions' in end_points)
|
||||
self.assertListEqual(end_points['Predictions'].get_shape().as_list(),
|
||||
[batch_size, num_classes])
|
||||
|
||||
def testBuildPreLogitsNetwork(self):
|
||||
batch_size = 5
|
||||
height, width = 224, 224
|
||||
num_classes = None
|
||||
|
||||
inputs = tf.random.uniform((batch_size, height, width, 3))
|
||||
net, end_points = inception.inception_v2(inputs, num_classes)
|
||||
self.assertTrue(net.op.name.startswith('InceptionV2/Logits/AvgPool'))
|
||||
self.assertListEqual(net.get_shape().as_list(), [batch_size, 1, 1, 1024])
|
||||
self.assertFalse('Logits' in end_points)
|
||||
self.assertFalse('Predictions' in end_points)
|
||||
|
||||
def testBuildBaseNetwork(self):
|
||||
batch_size = 5
|
||||
height, width = 224, 224
|
||||
|
||||
inputs = tf.random.uniform((batch_size, height, width, 3))
|
||||
mixed_5c, end_points = inception.inception_v2_base(inputs)
|
||||
self.assertTrue(mixed_5c.op.name.startswith('InceptionV2/Mixed_5c'))
|
||||
self.assertListEqual(mixed_5c.get_shape().as_list(),
|
||||
[batch_size, 7, 7, 1024])
|
||||
expected_endpoints = ['Mixed_3b', 'Mixed_3c', 'Mixed_4a', 'Mixed_4b',
|
||||
'Mixed_4c', 'Mixed_4d', 'Mixed_4e', 'Mixed_5a',
|
||||
'Mixed_5b', 'Mixed_5c', 'Conv2d_1a_7x7',
|
||||
'MaxPool_2a_3x3', 'Conv2d_2b_1x1', 'Conv2d_2c_3x3',
|
||||
'MaxPool_3a_3x3']
|
||||
self.assertItemsEqual(list(end_points.keys()), expected_endpoints)
|
||||
|
||||
def testBuildOnlyUptoFinalEndpoint(self):
|
||||
batch_size = 5
|
||||
height, width = 224, 224
|
||||
endpoints = ['Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1',
|
||||
'Conv2d_2c_3x3', 'MaxPool_3a_3x3', 'Mixed_3b', 'Mixed_3c',
|
||||
'Mixed_4a', 'Mixed_4b', 'Mixed_4c', 'Mixed_4d', 'Mixed_4e',
|
||||
'Mixed_5a', 'Mixed_5b', 'Mixed_5c']
|
||||
for index, endpoint in enumerate(endpoints):
|
||||
with tf.Graph().as_default():
|
||||
inputs = tf.random.uniform((batch_size, height, width, 3))
|
||||
out_tensor, end_points = inception.inception_v2_base(
|
||||
inputs, final_endpoint=endpoint)
|
||||
self.assertTrue(out_tensor.op.name.startswith(
|
||||
'InceptionV2/' + endpoint))
|
||||
self.assertItemsEqual(endpoints[:index + 1], list(end_points.keys()))
|
||||
|
||||
def testBuildAndCheckAllEndPointsUptoMixed5c(self):
|
||||
batch_size = 5
|
||||
height, width = 224, 224
|
||||
|
||||
inputs = tf.random.uniform((batch_size, height, width, 3))
|
||||
_, end_points = inception.inception_v2_base(inputs,
|
||||
final_endpoint='Mixed_5c')
|
||||
endpoints_shapes = {'Mixed_3b': [batch_size, 28, 28, 256],
|
||||
'Mixed_3c': [batch_size, 28, 28, 320],
|
||||
'Mixed_4a': [batch_size, 14, 14, 576],
|
||||
'Mixed_4b': [batch_size, 14, 14, 576],
|
||||
'Mixed_4c': [batch_size, 14, 14, 576],
|
||||
'Mixed_4d': [batch_size, 14, 14, 576],
|
||||
'Mixed_4e': [batch_size, 14, 14, 576],
|
||||
'Mixed_5a': [batch_size, 7, 7, 1024],
|
||||
'Mixed_5b': [batch_size, 7, 7, 1024],
|
||||
'Mixed_5c': [batch_size, 7, 7, 1024],
|
||||
'Conv2d_1a_7x7': [batch_size, 112, 112, 64],
|
||||
'MaxPool_2a_3x3': [batch_size, 56, 56, 64],
|
||||
'Conv2d_2b_1x1': [batch_size, 56, 56, 64],
|
||||
'Conv2d_2c_3x3': [batch_size, 56, 56, 192],
|
||||
'MaxPool_3a_3x3': [batch_size, 28, 28, 192]}
|
||||
self.assertItemsEqual(
|
||||
list(endpoints_shapes.keys()), list(end_points.keys()))
|
||||
for endpoint_name in endpoints_shapes:
|
||||
expected_shape = endpoints_shapes[endpoint_name]
|
||||
self.assertTrue(endpoint_name in end_points)
|
||||
self.assertListEqual(end_points[endpoint_name].get_shape().as_list(),
|
||||
expected_shape)
|
||||
|
||||
def testModelHasExpectedNumberOfParameters(self):
|
||||
batch_size = 5
|
||||
height, width = 224, 224
|
||||
inputs = tf.random.uniform((batch_size, height, width, 3))
|
||||
with slim.arg_scope(inception.inception_v2_arg_scope()):
|
||||
inception.inception_v2_base(inputs)
|
||||
total_params, _ = slim.model_analyzer.analyze_vars(
|
||||
slim.get_model_variables())
|
||||
self.assertAlmostEqual(10173112, total_params)
|
||||
|
||||
def testBuildEndPointsWithDepthMultiplierLessThanOne(self):
|
||||
batch_size = 5
|
||||
height, width = 224, 224
|
||||
num_classes = 1000
|
||||
|
||||
inputs = tf.random.uniform((batch_size, height, width, 3))
|
||||
_, end_points = inception.inception_v2(inputs, num_classes)
|
||||
|
||||
endpoint_keys = [key for key in end_points.keys()
|
||||
if key.startswith('Mixed') or key.startswith('Conv')]
|
||||
|
||||
_, end_points_with_multiplier = inception.inception_v2(
|
||||
inputs, num_classes, scope='depth_multiplied_net',
|
||||
depth_multiplier=0.5)
|
||||
|
||||
for key in endpoint_keys:
|
||||
original_depth = end_points[key].get_shape().as_list()[3]
|
||||
new_depth = end_points_with_multiplier[key].get_shape().as_list()[3]
|
||||
self.assertEqual(0.5 * original_depth, new_depth)
|
||||
|
||||
def testBuildEndPointsWithDepthMultiplierGreaterThanOne(self):
|
||||
batch_size = 5
|
||||
height, width = 224, 224
|
||||
num_classes = 1000
|
||||
|
||||
inputs = tf.random.uniform((batch_size, height, width, 3))
|
||||
_, end_points = inception.inception_v2(inputs, num_classes)
|
||||
|
||||
endpoint_keys = [key for key in end_points.keys()
|
||||
if key.startswith('Mixed') or key.startswith('Conv')]
|
||||
|
||||
_, end_points_with_multiplier = inception.inception_v2(
|
||||
inputs, num_classes, scope='depth_multiplied_net',
|
||||
depth_multiplier=2.0)
|
||||
|
||||
for key in endpoint_keys:
|
||||
original_depth = end_points[key].get_shape().as_list()[3]
|
||||
new_depth = end_points_with_multiplier[key].get_shape().as_list()[3]
|
||||
self.assertEqual(2.0 * original_depth, new_depth)
|
||||
|
||||
def testRaiseValueErrorWithInvalidDepthMultiplier(self):
|
||||
batch_size = 5
|
||||
height, width = 224, 224
|
||||
num_classes = 1000
|
||||
|
||||
inputs = tf.random.uniform((batch_size, height, width, 3))
|
||||
with self.assertRaises(ValueError):
|
||||
_ = inception.inception_v2(inputs, num_classes, depth_multiplier=-0.1)
|
||||
with self.assertRaises(ValueError):
|
||||
_ = inception.inception_v2(inputs, num_classes, depth_multiplier=0.0)
|
||||
|
||||
def testBuildEndPointsWithUseSeparableConvolutionFalse(self):
|
||||
batch_size = 5
|
||||
height, width = 224, 224
|
||||
|
||||
inputs = tf.random.uniform((batch_size, height, width, 3))
|
||||
_, end_points = inception.inception_v2_base(inputs)
|
||||
|
||||
endpoint_keys = [
|
||||
key for key in end_points.keys()
|
||||
if key.startswith('Mixed') or key.startswith('Conv')
|
||||
]
|
||||
|
||||
_, end_points_with_replacement = inception.inception_v2_base(
|
||||
inputs, use_separable_conv=False)
|
||||
|
||||
# The endpoint shapes must be equal to the original shape even when the
|
||||
# separable convolution is replaced with a normal convolution.
|
||||
for key in endpoint_keys:
|
||||
original_shape = end_points[key].get_shape().as_list()
|
||||
self.assertTrue(key in end_points_with_replacement)
|
||||
new_shape = end_points_with_replacement[key].get_shape().as_list()
|
||||
self.assertListEqual(original_shape, new_shape)
|
||||
|
||||
def testBuildEndPointsNCHWDataFormat(self):
|
||||
batch_size = 5
|
||||
height, width = 224, 224
|
||||
|
||||
inputs = tf.random.uniform((batch_size, height, width, 3))
|
||||
_, end_points = inception.inception_v2_base(inputs)
|
||||
|
||||
endpoint_keys = [
|
||||
key for key in end_points.keys()
|
||||
if key.startswith('Mixed') or key.startswith('Conv')
|
||||
]
|
||||
|
||||
inputs_in_nchw = tf.random.uniform((batch_size, 3, height, width))
|
||||
_, end_points_with_replacement = inception.inception_v2_base(
|
||||
inputs_in_nchw, use_separable_conv=False, data_format='NCHW')
|
||||
|
||||
# With the 'NCHW' data format, all endpoint activations have a transposed
|
||||
# shape from the original shape with the 'NHWC' layout.
|
||||
for key in endpoint_keys:
|
||||
transposed_original_shape = tf.transpose(
|
||||
a=end_points[key], perm=[0, 3, 1, 2]).get_shape().as_list()
|
||||
self.assertTrue(key in end_points_with_replacement)
|
||||
new_shape = end_points_with_replacement[key].get_shape().as_list()
|
||||
self.assertListEqual(transposed_original_shape, new_shape)
|
||||
|
||||
def testBuildErrorsForDataFormats(self):
|
||||
batch_size = 5
|
||||
height, width = 224, 224
|
||||
|
||||
inputs = tf.random.uniform((batch_size, height, width, 3))
|
||||
|
||||
# 'NCWH' data format is not supported.
|
||||
with self.assertRaises(ValueError):
|
||||
_ = inception.inception_v2_base(inputs, data_format='NCWH')
|
||||
|
||||
# 'NCHW' data format is not supported for separable convolution.
|
||||
with self.assertRaises(ValueError):
|
||||
_ = inception.inception_v2_base(inputs, data_format='NCHW')
|
||||
|
||||
def testHalfSizeImages(self):
|
||||
batch_size = 5
|
||||
height, width = 112, 112
|
||||
num_classes = 1000
|
||||
|
||||
inputs = tf.random.uniform((batch_size, height, width, 3))
|
||||
logits, end_points = inception.inception_v2(inputs, num_classes)
|
||||
self.assertTrue(logits.op.name.startswith('InceptionV2/Logits'))
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[batch_size, num_classes])
|
||||
pre_pool = end_points['Mixed_5c']
|
||||
self.assertListEqual(pre_pool.get_shape().as_list(),
|
||||
[batch_size, 4, 4, 1024])
|
||||
|
||||
def testBuildBaseNetworkWithoutRootBlock(self):
|
||||
batch_size = 5
|
||||
height, width = 28, 28
|
||||
channels = 192
|
||||
|
||||
inputs = tf.random.uniform((batch_size, height, width, channels))
|
||||
_, end_points = inception.inception_v2_base(
|
||||
inputs, include_root_block=False)
|
||||
endpoints_shapes = {
|
||||
'Mixed_3b': [batch_size, 28, 28, 256],
|
||||
'Mixed_3c': [batch_size, 28, 28, 320],
|
||||
'Mixed_4a': [batch_size, 14, 14, 576],
|
||||
'Mixed_4b': [batch_size, 14, 14, 576],
|
||||
'Mixed_4c': [batch_size, 14, 14, 576],
|
||||
'Mixed_4d': [batch_size, 14, 14, 576],
|
||||
'Mixed_4e': [batch_size, 14, 14, 576],
|
||||
'Mixed_5a': [batch_size, 7, 7, 1024],
|
||||
'Mixed_5b': [batch_size, 7, 7, 1024],
|
||||
'Mixed_5c': [batch_size, 7, 7, 1024]
|
||||
}
|
||||
self.assertItemsEqual(
|
||||
list(endpoints_shapes.keys()), list(end_points.keys()))
|
||||
for endpoint_name in endpoints_shapes:
|
||||
expected_shape = endpoints_shapes[endpoint_name]
|
||||
self.assertTrue(endpoint_name in end_points)
|
||||
self.assertListEqual(end_points[endpoint_name].get_shape().as_list(),
|
||||
expected_shape)
|
||||
|
||||
def testUnknownImageShape(self):
|
||||
tf.compat.v1.reset_default_graph()
|
||||
batch_size = 2
|
||||
height, width = 224, 224
|
||||
num_classes = 1000
|
||||
input_np = np.random.uniform(0, 1, (batch_size, height, width, 3))
|
||||
with self.test_session() as sess:
|
||||
inputs = tf.compat.v1.placeholder(
|
||||
tf.float32, shape=(batch_size, None, None, 3))
|
||||
logits, end_points = inception.inception_v2(inputs, num_classes)
|
||||
self.assertTrue(logits.op.name.startswith('InceptionV2/Logits'))
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[batch_size, num_classes])
|
||||
pre_pool = end_points['Mixed_5c']
|
||||
feed_dict = {inputs: input_np}
|
||||
tf.compat.v1.global_variables_initializer().run()
|
||||
pre_pool_out = sess.run(pre_pool, feed_dict=feed_dict)
|
||||
self.assertListEqual(list(pre_pool_out.shape), [batch_size, 7, 7, 1024])
|
||||
|
||||
def testGlobalPoolUnknownImageShape(self):
|
||||
tf.compat.v1.reset_default_graph()
|
||||
batch_size = 1
|
||||
height, width = 250, 300
|
||||
num_classes = 1000
|
||||
input_np = np.random.uniform(0, 1, (batch_size, height, width, 3))
|
||||
with self.test_session() as sess:
|
||||
inputs = tf.compat.v1.placeholder(
|
||||
tf.float32, shape=(batch_size, None, None, 3))
|
||||
logits, end_points = inception.inception_v2(inputs, num_classes,
|
||||
global_pool=True)
|
||||
self.assertTrue(logits.op.name.startswith('InceptionV2/Logits'))
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[batch_size, num_classes])
|
||||
pre_pool = end_points['Mixed_5c']
|
||||
feed_dict = {inputs: input_np}
|
||||
tf.compat.v1.global_variables_initializer().run()
|
||||
pre_pool_out = sess.run(pre_pool, feed_dict=feed_dict)
|
||||
self.assertListEqual(list(pre_pool_out.shape), [batch_size, 8, 10, 1024])
|
||||
|
||||
def testUnknowBatchSize(self):
|
||||
batch_size = 1
|
||||
height, width = 224, 224
|
||||
num_classes = 1000
|
||||
|
||||
inputs = tf.compat.v1.placeholder(tf.float32, (None, height, width, 3))
|
||||
logits, _ = inception.inception_v2(inputs, num_classes)
|
||||
self.assertTrue(logits.op.name.startswith('InceptionV2/Logits'))
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[None, num_classes])
|
||||
images = tf.random.uniform((batch_size, height, width, 3))
|
||||
|
||||
with self.test_session() as sess:
|
||||
sess.run(tf.compat.v1.global_variables_initializer())
|
||||
output = sess.run(logits, {inputs: images.eval()})
|
||||
self.assertEquals(output.shape, (batch_size, num_classes))
|
||||
|
||||
def testEvaluation(self):
|
||||
batch_size = 2
|
||||
height, width = 224, 224
|
||||
num_classes = 1000
|
||||
|
||||
eval_inputs = tf.random.uniform((batch_size, height, width, 3))
|
||||
logits, _ = inception.inception_v2(eval_inputs, num_classes,
|
||||
is_training=False)
|
||||
predictions = tf.argmax(input=logits, axis=1)
|
||||
|
||||
with self.test_session() as sess:
|
||||
sess.run(tf.compat.v1.global_variables_initializer())
|
||||
output = sess.run(predictions)
|
||||
self.assertEquals(output.shape, (batch_size,))
|
||||
|
||||
def testTrainEvalWithReuse(self):
|
||||
train_batch_size = 5
|
||||
eval_batch_size = 2
|
||||
height, width = 150, 150
|
||||
num_classes = 1000
|
||||
|
||||
train_inputs = tf.random.uniform((train_batch_size, height, width, 3))
|
||||
inception.inception_v2(train_inputs, num_classes)
|
||||
eval_inputs = tf.random.uniform((eval_batch_size, height, width, 3))
|
||||
logits, _ = inception.inception_v2(eval_inputs, num_classes, reuse=True)
|
||||
predictions = tf.argmax(input=logits, axis=1)
|
||||
|
||||
with self.test_session() as sess:
|
||||
sess.run(tf.compat.v1.global_variables_initializer())
|
||||
output = sess.run(predictions)
|
||||
self.assertEquals(output.shape, (eval_batch_size,))
|
||||
|
||||
def testLogitsNotSqueezed(self):
|
||||
num_classes = 25
|
||||
images = tf.random.uniform([1, 224, 224, 3])
|
||||
logits, _ = inception.inception_v2(images,
|
||||
num_classes=num_classes,
|
||||
spatial_squeeze=False)
|
||||
|
||||
with self.test_session() as sess:
|
||||
tf.compat.v1.global_variables_initializer().run()
|
||||
logits_out = sess.run(logits)
|
||||
self.assertListEqual(list(logits_out.shape), [1, 1, 1, num_classes])
|
||||
|
||||
def testNoBatchNormScaleByDefault(self):
|
||||
height, width = 224, 224
|
||||
num_classes = 1000
|
||||
inputs = tf.compat.v1.placeholder(tf.float32, (1, height, width, 3))
|
||||
with slim.arg_scope(inception.inception_v2_arg_scope()):
|
||||
inception.inception_v2(inputs, num_classes, is_training=False)
|
||||
|
||||
self.assertEqual(tf.compat.v1.global_variables('.*/BatchNorm/gamma:0$'), [])
|
||||
|
||||
def testBatchNormScale(self):
|
||||
height, width = 224, 224
|
||||
num_classes = 1000
|
||||
inputs = tf.compat.v1.placeholder(tf.float32, (1, height, width, 3))
|
||||
with slim.arg_scope(
|
||||
inception.inception_v2_arg_scope(batch_norm_scale=True)):
|
||||
inception.inception_v2(inputs, num_classes, is_training=False)
|
||||
|
||||
gamma_names = set(
|
||||
v.op.name
|
||||
for v in tf.compat.v1.global_variables('.*/BatchNorm/gamma:0$'))
|
||||
self.assertGreater(len(gamma_names), 0)
|
||||
for v in tf.compat.v1.global_variables('.*/BatchNorm/moving_mean:0$'):
|
||||
self.assertIn(v.op.name[:-len('moving_mean')] + 'gamma', gamma_names)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
tf.test.main()
|
||||
+585
@@ -0,0 +1,585 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Contains the definition for inception v3 classification network."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import tensorflow as tf
|
||||
from tensorflow.contrib import slim as contrib_slim
|
||||
|
||||
from nets import inception_utils
|
||||
|
||||
slim = contrib_slim
|
||||
|
||||
# pylint: disable=g-long-lambda
|
||||
trunc_normal = lambda stddev: tf.compat.v1.truncated_normal_initializer(
|
||||
0.0, stddev)
|
||||
|
||||
|
||||
def inception_v3_base(inputs,
|
||||
final_endpoint='Mixed_7c',
|
||||
min_depth=16,
|
||||
depth_multiplier=1.0,
|
||||
scope=None):
|
||||
"""Inception model from http://arxiv.org/abs/1512.00567.
|
||||
|
||||
Constructs an Inception v3 network from inputs to the given final endpoint.
|
||||
This method can construct the network up to the final inception block
|
||||
Mixed_7c.
|
||||
|
||||
Note that the names of the layers in the paper do not correspond to the names
|
||||
of the endpoints registered by this function although they build the same
|
||||
network.
|
||||
|
||||
Here is a mapping from the old_names to the new names:
|
||||
Old name | New name
|
||||
=======================================
|
||||
conv0 | Conv2d_1a_3x3
|
||||
conv1 | Conv2d_2a_3x3
|
||||
conv2 | Conv2d_2b_3x3
|
||||
pool1 | MaxPool_3a_3x3
|
||||
conv3 | Conv2d_3b_1x1
|
||||
conv4 | Conv2d_4a_3x3
|
||||
pool2 | MaxPool_5a_3x3
|
||||
mixed_35x35x256a | Mixed_5b
|
||||
mixed_35x35x288a | Mixed_5c
|
||||
mixed_35x35x288b | Mixed_5d
|
||||
mixed_17x17x768a | Mixed_6a
|
||||
mixed_17x17x768b | Mixed_6b
|
||||
mixed_17x17x768c | Mixed_6c
|
||||
mixed_17x17x768d | Mixed_6d
|
||||
mixed_17x17x768e | Mixed_6e
|
||||
mixed_8x8x1280a | Mixed_7a
|
||||
mixed_8x8x2048a | Mixed_7b
|
||||
mixed_8x8x2048b | Mixed_7c
|
||||
|
||||
Args:
|
||||
inputs: a tensor of size [batch_size, height, width, channels].
|
||||
final_endpoint: specifies the endpoint to construct the network up to. It
|
||||
can be one of ['Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3',
|
||||
'MaxPool_3a_3x3', 'Conv2d_3b_1x1', 'Conv2d_4a_3x3', 'MaxPool_5a_3x3',
|
||||
'Mixed_5b', 'Mixed_5c', 'Mixed_5d', 'Mixed_6a', 'Mixed_6b', 'Mixed_6c',
|
||||
'Mixed_6d', 'Mixed_6e', 'Mixed_7a', 'Mixed_7b', 'Mixed_7c'].
|
||||
min_depth: Minimum depth value (number of channels) for all convolution ops.
|
||||
Enforced when depth_multiplier < 1, and not an active constraint when
|
||||
depth_multiplier >= 1.
|
||||
depth_multiplier: Float multiplier for the depth (number of channels)
|
||||
for all convolution ops. The value must be greater than zero. Typical
|
||||
usage will be to set this value in (0, 1) to reduce the number of
|
||||
parameters or computation cost of the model.
|
||||
scope: Optional variable_scope.
|
||||
|
||||
Returns:
|
||||
tensor_out: output tensor corresponding to the final_endpoint.
|
||||
end_points: a set of activations for external use, for example summaries or
|
||||
losses.
|
||||
|
||||
Raises:
|
||||
ValueError: if final_endpoint is not set to one of the predefined values,
|
||||
or depth_multiplier <= 0
|
||||
"""
|
||||
# end_points will collect relevant activations for external use, for example
|
||||
# summaries or losses.
|
||||
end_points = {}
|
||||
|
||||
if depth_multiplier <= 0:
|
||||
raise ValueError('depth_multiplier is not greater than zero.')
|
||||
depth = lambda d: max(int(d * depth_multiplier), min_depth)
|
||||
|
||||
with tf.compat.v1.variable_scope(scope, 'InceptionV3', [inputs]):
|
||||
with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
|
||||
stride=1, padding='VALID'):
|
||||
# 299 x 299 x 3
|
||||
end_point = 'Conv2d_1a_3x3'
|
||||
net = slim.conv2d(inputs, depth(32), [3, 3], stride=2, scope=end_point)
|
||||
end_points[end_point] = net
|
||||
if end_point == final_endpoint: return net, end_points
|
||||
# 149 x 149 x 32
|
||||
end_point = 'Conv2d_2a_3x3'
|
||||
net = slim.conv2d(net, depth(32), [3, 3], scope=end_point)
|
||||
end_points[end_point] = net
|
||||
if end_point == final_endpoint: return net, end_points
|
||||
# 147 x 147 x 32
|
||||
end_point = 'Conv2d_2b_3x3'
|
||||
net = slim.conv2d(net, depth(64), [3, 3], padding='SAME', scope=end_point)
|
||||
end_points[end_point] = net
|
||||
if end_point == final_endpoint: return net, end_points
|
||||
# 147 x 147 x 64
|
||||
end_point = 'MaxPool_3a_3x3'
|
||||
net = slim.max_pool2d(net, [3, 3], stride=2, scope=end_point)
|
||||
end_points[end_point] = net
|
||||
if end_point == final_endpoint: return net, end_points
|
||||
# 73 x 73 x 64
|
||||
end_point = 'Conv2d_3b_1x1'
|
||||
net = slim.conv2d(net, depth(80), [1, 1], scope=end_point)
|
||||
end_points[end_point] = net
|
||||
if end_point == final_endpoint: return net, end_points
|
||||
# 73 x 73 x 80.
|
||||
end_point = 'Conv2d_4a_3x3'
|
||||
net = slim.conv2d(net, depth(192), [3, 3], scope=end_point)
|
||||
end_points[end_point] = net
|
||||
if end_point == final_endpoint: return net, end_points
|
||||
# 71 x 71 x 192.
|
||||
end_point = 'MaxPool_5a_3x3'
|
||||
net = slim.max_pool2d(net, [3, 3], stride=2, scope=end_point)
|
||||
end_points[end_point] = net
|
||||
if end_point == final_endpoint: return net, end_points
|
||||
# 35 x 35 x 192.
|
||||
|
||||
# Inception blocks
|
||||
with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
|
||||
stride=1, padding='SAME'):
|
||||
# mixed: 35 x 35 x 256.
|
||||
end_point = 'Mixed_5b'
|
||||
with tf.compat.v1.variable_scope(end_point):
|
||||
with tf.compat.v1.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1')
|
||||
with tf.compat.v1.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(net, depth(48), [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_1 = slim.conv2d(branch_1, depth(64), [5, 5],
|
||||
scope='Conv2d_0b_5x5')
|
||||
with tf.compat.v1.variable_scope('Branch_2'):
|
||||
branch_2 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_2 = slim.conv2d(branch_2, depth(96), [3, 3],
|
||||
scope='Conv2d_0b_3x3')
|
||||
branch_2 = slim.conv2d(branch_2, depth(96), [3, 3],
|
||||
scope='Conv2d_0c_3x3')
|
||||
with tf.compat.v1.variable_scope('Branch_3'):
|
||||
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
|
||||
branch_3 = slim.conv2d(branch_3, depth(32), [1, 1],
|
||||
scope='Conv2d_0b_1x1')
|
||||
net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
|
||||
end_points[end_point] = net
|
||||
if end_point == final_endpoint: return net, end_points
|
||||
|
||||
# mixed_1: 35 x 35 x 288.
|
||||
end_point = 'Mixed_5c'
|
||||
with tf.compat.v1.variable_scope(end_point):
|
||||
with tf.compat.v1.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1')
|
||||
with tf.compat.v1.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(net, depth(48), [1, 1], scope='Conv2d_0b_1x1')
|
||||
branch_1 = slim.conv2d(branch_1, depth(64), [5, 5],
|
||||
scope='Conv_1_0c_5x5')
|
||||
with tf.compat.v1.variable_scope('Branch_2'):
|
||||
branch_2 = slim.conv2d(net, depth(64), [1, 1],
|
||||
scope='Conv2d_0a_1x1')
|
||||
branch_2 = slim.conv2d(branch_2, depth(96), [3, 3],
|
||||
scope='Conv2d_0b_3x3')
|
||||
branch_2 = slim.conv2d(branch_2, depth(96), [3, 3],
|
||||
scope='Conv2d_0c_3x3')
|
||||
with tf.compat.v1.variable_scope('Branch_3'):
|
||||
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
|
||||
branch_3 = slim.conv2d(branch_3, depth(64), [1, 1],
|
||||
scope='Conv2d_0b_1x1')
|
||||
net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
|
||||
end_points[end_point] = net
|
||||
if end_point == final_endpoint: return net, end_points
|
||||
|
||||
# mixed_2: 35 x 35 x 288.
|
||||
end_point = 'Mixed_5d'
|
||||
with tf.compat.v1.variable_scope(end_point):
|
||||
with tf.compat.v1.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1')
|
||||
with tf.compat.v1.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(net, depth(48), [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_1 = slim.conv2d(branch_1, depth(64), [5, 5],
|
||||
scope='Conv2d_0b_5x5')
|
||||
with tf.compat.v1.variable_scope('Branch_2'):
|
||||
branch_2 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_2 = slim.conv2d(branch_2, depth(96), [3, 3],
|
||||
scope='Conv2d_0b_3x3')
|
||||
branch_2 = slim.conv2d(branch_2, depth(96), [3, 3],
|
||||
scope='Conv2d_0c_3x3')
|
||||
with tf.compat.v1.variable_scope('Branch_3'):
|
||||
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
|
||||
branch_3 = slim.conv2d(branch_3, depth(64), [1, 1],
|
||||
scope='Conv2d_0b_1x1')
|
||||
net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
|
||||
end_points[end_point] = net
|
||||
if end_point == final_endpoint: return net, end_points
|
||||
|
||||
# mixed_3: 17 x 17 x 768.
|
||||
end_point = 'Mixed_6a'
|
||||
with tf.compat.v1.variable_scope(end_point):
|
||||
with tf.compat.v1.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(net, depth(384), [3, 3], stride=2,
|
||||
padding='VALID', scope='Conv2d_1a_1x1')
|
||||
with tf.compat.v1.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_1 = slim.conv2d(branch_1, depth(96), [3, 3],
|
||||
scope='Conv2d_0b_3x3')
|
||||
branch_1 = slim.conv2d(branch_1, depth(96), [3, 3], stride=2,
|
||||
padding='VALID', scope='Conv2d_1a_1x1')
|
||||
with tf.compat.v1.variable_scope('Branch_2'):
|
||||
branch_2 = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID',
|
||||
scope='MaxPool_1a_3x3')
|
||||
net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2])
|
||||
end_points[end_point] = net
|
||||
if end_point == final_endpoint: return net, end_points
|
||||
|
||||
# mixed4: 17 x 17 x 768.
|
||||
end_point = 'Mixed_6b'
|
||||
with tf.compat.v1.variable_scope(end_point):
|
||||
with tf.compat.v1.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1')
|
||||
with tf.compat.v1.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(net, depth(128), [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_1 = slim.conv2d(branch_1, depth(128), [1, 7],
|
||||
scope='Conv2d_0b_1x7')
|
||||
branch_1 = slim.conv2d(branch_1, depth(192), [7, 1],
|
||||
scope='Conv2d_0c_7x1')
|
||||
with tf.compat.v1.variable_scope('Branch_2'):
|
||||
branch_2 = slim.conv2d(net, depth(128), [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_2 = slim.conv2d(branch_2, depth(128), [7, 1],
|
||||
scope='Conv2d_0b_7x1')
|
||||
branch_2 = slim.conv2d(branch_2, depth(128), [1, 7],
|
||||
scope='Conv2d_0c_1x7')
|
||||
branch_2 = slim.conv2d(branch_2, depth(128), [7, 1],
|
||||
scope='Conv2d_0d_7x1')
|
||||
branch_2 = slim.conv2d(branch_2, depth(192), [1, 7],
|
||||
scope='Conv2d_0e_1x7')
|
||||
with tf.compat.v1.variable_scope('Branch_3'):
|
||||
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
|
||||
branch_3 = slim.conv2d(branch_3, depth(192), [1, 1],
|
||||
scope='Conv2d_0b_1x1')
|
||||
net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
|
||||
end_points[end_point] = net
|
||||
if end_point == final_endpoint: return net, end_points
|
||||
|
||||
# mixed_5: 17 x 17 x 768.
|
||||
end_point = 'Mixed_6c'
|
||||
with tf.compat.v1.variable_scope(end_point):
|
||||
with tf.compat.v1.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1')
|
||||
with tf.compat.v1.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(net, depth(160), [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_1 = slim.conv2d(branch_1, depth(160), [1, 7],
|
||||
scope='Conv2d_0b_1x7')
|
||||
branch_1 = slim.conv2d(branch_1, depth(192), [7, 1],
|
||||
scope='Conv2d_0c_7x1')
|
||||
with tf.compat.v1.variable_scope('Branch_2'):
|
||||
branch_2 = slim.conv2d(net, depth(160), [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_2 = slim.conv2d(branch_2, depth(160), [7, 1],
|
||||
scope='Conv2d_0b_7x1')
|
||||
branch_2 = slim.conv2d(branch_2, depth(160), [1, 7],
|
||||
scope='Conv2d_0c_1x7')
|
||||
branch_2 = slim.conv2d(branch_2, depth(160), [7, 1],
|
||||
scope='Conv2d_0d_7x1')
|
||||
branch_2 = slim.conv2d(branch_2, depth(192), [1, 7],
|
||||
scope='Conv2d_0e_1x7')
|
||||
with tf.compat.v1.variable_scope('Branch_3'):
|
||||
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
|
||||
branch_3 = slim.conv2d(branch_3, depth(192), [1, 1],
|
||||
scope='Conv2d_0b_1x1')
|
||||
net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
|
||||
end_points[end_point] = net
|
||||
if end_point == final_endpoint: return net, end_points
|
||||
# mixed_6: 17 x 17 x 768.
|
||||
end_point = 'Mixed_6d'
|
||||
with tf.compat.v1.variable_scope(end_point):
|
||||
with tf.compat.v1.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1')
|
||||
with tf.compat.v1.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(net, depth(160), [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_1 = slim.conv2d(branch_1, depth(160), [1, 7],
|
||||
scope='Conv2d_0b_1x7')
|
||||
branch_1 = slim.conv2d(branch_1, depth(192), [7, 1],
|
||||
scope='Conv2d_0c_7x1')
|
||||
with tf.compat.v1.variable_scope('Branch_2'):
|
||||
branch_2 = slim.conv2d(net, depth(160), [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_2 = slim.conv2d(branch_2, depth(160), [7, 1],
|
||||
scope='Conv2d_0b_7x1')
|
||||
branch_2 = slim.conv2d(branch_2, depth(160), [1, 7],
|
||||
scope='Conv2d_0c_1x7')
|
||||
branch_2 = slim.conv2d(branch_2, depth(160), [7, 1],
|
||||
scope='Conv2d_0d_7x1')
|
||||
branch_2 = slim.conv2d(branch_2, depth(192), [1, 7],
|
||||
scope='Conv2d_0e_1x7')
|
||||
with tf.compat.v1.variable_scope('Branch_3'):
|
||||
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
|
||||
branch_3 = slim.conv2d(branch_3, depth(192), [1, 1],
|
||||
scope='Conv2d_0b_1x1')
|
||||
net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
|
||||
end_points[end_point] = net
|
||||
if end_point == final_endpoint: return net, end_points
|
||||
|
||||
# mixed_7: 17 x 17 x 768.
|
||||
end_point = 'Mixed_6e'
|
||||
with tf.compat.v1.variable_scope(end_point):
|
||||
with tf.compat.v1.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1')
|
||||
with tf.compat.v1.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_1 = slim.conv2d(branch_1, depth(192), [1, 7],
|
||||
scope='Conv2d_0b_1x7')
|
||||
branch_1 = slim.conv2d(branch_1, depth(192), [7, 1],
|
||||
scope='Conv2d_0c_7x1')
|
||||
with tf.compat.v1.variable_scope('Branch_2'):
|
||||
branch_2 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_2 = slim.conv2d(branch_2, depth(192), [7, 1],
|
||||
scope='Conv2d_0b_7x1')
|
||||
branch_2 = slim.conv2d(branch_2, depth(192), [1, 7],
|
||||
scope='Conv2d_0c_1x7')
|
||||
branch_2 = slim.conv2d(branch_2, depth(192), [7, 1],
|
||||
scope='Conv2d_0d_7x1')
|
||||
branch_2 = slim.conv2d(branch_2, depth(192), [1, 7],
|
||||
scope='Conv2d_0e_1x7')
|
||||
with tf.compat.v1.variable_scope('Branch_3'):
|
||||
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
|
||||
branch_3 = slim.conv2d(branch_3, depth(192), [1, 1],
|
||||
scope='Conv2d_0b_1x1')
|
||||
net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
|
||||
end_points[end_point] = net
|
||||
if end_point == final_endpoint: return net, end_points
|
||||
|
||||
# mixed_8: 8 x 8 x 1280.
|
||||
end_point = 'Mixed_7a'
|
||||
with tf.compat.v1.variable_scope(end_point):
|
||||
with tf.compat.v1.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_0 = slim.conv2d(branch_0, depth(320), [3, 3], stride=2,
|
||||
padding='VALID', scope='Conv2d_1a_3x3')
|
||||
with tf.compat.v1.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_1 = slim.conv2d(branch_1, depth(192), [1, 7],
|
||||
scope='Conv2d_0b_1x7')
|
||||
branch_1 = slim.conv2d(branch_1, depth(192), [7, 1],
|
||||
scope='Conv2d_0c_7x1')
|
||||
branch_1 = slim.conv2d(branch_1, depth(192), [3, 3], stride=2,
|
||||
padding='VALID', scope='Conv2d_1a_3x3')
|
||||
with tf.compat.v1.variable_scope('Branch_2'):
|
||||
branch_2 = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID',
|
||||
scope='MaxPool_1a_3x3')
|
||||
net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2])
|
||||
end_points[end_point] = net
|
||||
if end_point == final_endpoint: return net, end_points
|
||||
# mixed_9: 8 x 8 x 2048.
|
||||
end_point = 'Mixed_7b'
|
||||
with tf.compat.v1.variable_scope(end_point):
|
||||
with tf.compat.v1.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(net, depth(320), [1, 1], scope='Conv2d_0a_1x1')
|
||||
with tf.compat.v1.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(net, depth(384), [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_1 = tf.concat(axis=3, values=[
|
||||
slim.conv2d(branch_1, depth(384), [1, 3], scope='Conv2d_0b_1x3'),
|
||||
slim.conv2d(branch_1, depth(384), [3, 1], scope='Conv2d_0b_3x1')])
|
||||
with tf.compat.v1.variable_scope('Branch_2'):
|
||||
branch_2 = slim.conv2d(net, depth(448), [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_2 = slim.conv2d(
|
||||
branch_2, depth(384), [3, 3], scope='Conv2d_0b_3x3')
|
||||
branch_2 = tf.concat(axis=3, values=[
|
||||
slim.conv2d(branch_2, depth(384), [1, 3], scope='Conv2d_0c_1x3'),
|
||||
slim.conv2d(branch_2, depth(384), [3, 1], scope='Conv2d_0d_3x1')])
|
||||
with tf.compat.v1.variable_scope('Branch_3'):
|
||||
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
|
||||
branch_3 = slim.conv2d(
|
||||
branch_3, depth(192), [1, 1], scope='Conv2d_0b_1x1')
|
||||
net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
|
||||
end_points[end_point] = net
|
||||
if end_point == final_endpoint: return net, end_points
|
||||
|
||||
# mixed_10: 8 x 8 x 2048.
|
||||
end_point = 'Mixed_7c'
|
||||
with tf.compat.v1.variable_scope(end_point):
|
||||
with tf.compat.v1.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(net, depth(320), [1, 1], scope='Conv2d_0a_1x1')
|
||||
with tf.compat.v1.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(net, depth(384), [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_1 = tf.concat(axis=3, values=[
|
||||
slim.conv2d(branch_1, depth(384), [1, 3], scope='Conv2d_0b_1x3'),
|
||||
slim.conv2d(branch_1, depth(384), [3, 1], scope='Conv2d_0c_3x1')])
|
||||
with tf.compat.v1.variable_scope('Branch_2'):
|
||||
branch_2 = slim.conv2d(net, depth(448), [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_2 = slim.conv2d(
|
||||
branch_2, depth(384), [3, 3], scope='Conv2d_0b_3x3')
|
||||
branch_2 = tf.concat(axis=3, values=[
|
||||
slim.conv2d(branch_2, depth(384), [1, 3], scope='Conv2d_0c_1x3'),
|
||||
slim.conv2d(branch_2, depth(384), [3, 1], scope='Conv2d_0d_3x1')])
|
||||
with tf.compat.v1.variable_scope('Branch_3'):
|
||||
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
|
||||
branch_3 = slim.conv2d(
|
||||
branch_3, depth(192), [1, 1], scope='Conv2d_0b_1x1')
|
||||
net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
|
||||
end_points[end_point] = net
|
||||
if end_point == final_endpoint: return net, end_points
|
||||
raise ValueError('Unknown final endpoint %s' % final_endpoint)
|
||||
|
||||
|
||||
def inception_v3(inputs,
|
||||
num_classes=1000,
|
||||
is_training=True,
|
||||
dropout_keep_prob=0.8,
|
||||
min_depth=16,
|
||||
depth_multiplier=1.0,
|
||||
prediction_fn=slim.softmax,
|
||||
spatial_squeeze=True,
|
||||
reuse=None,
|
||||
create_aux_logits=True,
|
||||
scope='InceptionV3',
|
||||
global_pool=False):
|
||||
"""Inception model from http://arxiv.org/abs/1512.00567.
|
||||
|
||||
"Rethinking the Inception Architecture for Computer Vision"
|
||||
|
||||
Christian Szegedy, Vincent Vanhoucke, Sergey Ioffe, Jonathon Shlens,
|
||||
Zbigniew Wojna.
|
||||
|
||||
With the default arguments this method constructs the exact model defined in
|
||||
the paper. However, one can experiment with variations of the inception_v3
|
||||
network by changing arguments dropout_keep_prob, min_depth and
|
||||
depth_multiplier.
|
||||
|
||||
The default image size used to train this network is 299x299.
|
||||
|
||||
Args:
|
||||
inputs: a tensor of size [batch_size, height, width, channels].
|
||||
num_classes: number of predicted classes. If 0 or None, the logits layer
|
||||
is omitted and the input features to the logits layer (before dropout)
|
||||
are returned instead.
|
||||
is_training: whether is training or not.
|
||||
dropout_keep_prob: the percentage of activation values that are retained.
|
||||
min_depth: Minimum depth value (number of channels) for all convolution ops.
|
||||
Enforced when depth_multiplier < 1, and not an active constraint when
|
||||
depth_multiplier >= 1.
|
||||
depth_multiplier: Float multiplier for the depth (number of channels)
|
||||
for all convolution ops. The value must be greater than zero. Typical
|
||||
usage will be to set this value in (0, 1) to reduce the number of
|
||||
parameters or computation cost of the model.
|
||||
prediction_fn: a function to get predictions out of logits.
|
||||
spatial_squeeze: if True, logits is of shape [B, C], if false logits is of
|
||||
shape [B, 1, 1, C], where B is batch_size and C is number of classes.
|
||||
reuse: whether or not the network and its variables should be reused. To be
|
||||
able to reuse 'scope' must be given.
|
||||
create_aux_logits: Whether to create the auxiliary logits.
|
||||
scope: Optional variable_scope.
|
||||
global_pool: Optional boolean flag to control the avgpooling before the
|
||||
logits layer. If false or unset, pooling is done with a fixed window
|
||||
that reduces default-sized inputs to 1x1, while larger inputs lead to
|
||||
larger outputs. If true, any input size is pooled down to 1x1.
|
||||
|
||||
Returns:
|
||||
net: a Tensor with the logits (pre-softmax activations) if num_classes
|
||||
is a non-zero integer, or the non-dropped-out input to the logits layer
|
||||
if num_classes is 0 or None.
|
||||
end_points: a dictionary from components of the network to the corresponding
|
||||
activation.
|
||||
|
||||
Raises:
|
||||
ValueError: if 'depth_multiplier' is less than or equal to zero.
|
||||
"""
|
||||
if depth_multiplier <= 0:
|
||||
raise ValueError('depth_multiplier is not greater than zero.')
|
||||
depth = lambda d: max(int(d * depth_multiplier), min_depth)
|
||||
|
||||
with tf.compat.v1.variable_scope(
|
||||
scope, 'InceptionV3', [inputs], reuse=reuse) as scope:
|
||||
with slim.arg_scope([slim.batch_norm, slim.dropout],
|
||||
is_training=is_training):
|
||||
net, end_points = inception_v3_base(
|
||||
inputs, scope=scope, min_depth=min_depth,
|
||||
depth_multiplier=depth_multiplier)
|
||||
|
||||
# Auxiliary Head logits
|
||||
if create_aux_logits and num_classes:
|
||||
with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
|
||||
stride=1, padding='SAME'):
|
||||
aux_logits = end_points['Mixed_6e']
|
||||
with tf.compat.v1.variable_scope('AuxLogits'):
|
||||
aux_logits = slim.avg_pool2d(
|
||||
aux_logits, [5, 5], stride=3, padding='VALID',
|
||||
scope='AvgPool_1a_5x5')
|
||||
aux_logits = slim.conv2d(aux_logits, depth(128), [1, 1],
|
||||
scope='Conv2d_1b_1x1')
|
||||
|
||||
# Shape of feature map before the final layer.
|
||||
kernel_size = _reduced_kernel_size_for_small_input(
|
||||
aux_logits, [5, 5])
|
||||
aux_logits = slim.conv2d(
|
||||
aux_logits, depth(768), kernel_size,
|
||||
weights_initializer=trunc_normal(0.01),
|
||||
padding='VALID', scope='Conv2d_2a_{}x{}'.format(*kernel_size))
|
||||
aux_logits = slim.conv2d(
|
||||
aux_logits, num_classes, [1, 1], activation_fn=None,
|
||||
normalizer_fn=None, weights_initializer=trunc_normal(0.001),
|
||||
scope='Conv2d_2b_1x1')
|
||||
if spatial_squeeze:
|
||||
aux_logits = tf.squeeze(aux_logits, [1, 2], name='SpatialSqueeze')
|
||||
end_points['AuxLogits'] = aux_logits
|
||||
|
||||
# Final pooling and prediction
|
||||
with tf.compat.v1.variable_scope('Logits'):
|
||||
if global_pool:
|
||||
# Global average pooling.
|
||||
net = tf.reduce_mean(
|
||||
input_tensor=net, axis=[1, 2], keepdims=True, name='GlobalPool')
|
||||
end_points['global_pool'] = net
|
||||
else:
|
||||
# Pooling with a fixed kernel size.
|
||||
kernel_size = _reduced_kernel_size_for_small_input(net, [8, 8])
|
||||
net = slim.avg_pool2d(net, kernel_size, padding='VALID',
|
||||
scope='AvgPool_1a_{}x{}'.format(*kernel_size))
|
||||
end_points['AvgPool_1a'] = net
|
||||
if not num_classes:
|
||||
return net, end_points
|
||||
# 1 x 1 x 2048
|
||||
net = slim.dropout(net, keep_prob=dropout_keep_prob, scope='Dropout_1b')
|
||||
end_points['PreLogits'] = net
|
||||
# 2048
|
||||
logits = slim.conv2d(net, num_classes, [1, 1], activation_fn=None,
|
||||
normalizer_fn=None, scope='Conv2d_1c_1x1')
|
||||
if spatial_squeeze:
|
||||
logits = tf.squeeze(logits, [1, 2], name='SpatialSqueeze')
|
||||
# 1000
|
||||
end_points['Logits'] = logits
|
||||
end_points['Predictions'] = prediction_fn(logits, scope='Predictions')
|
||||
return logits, end_points
|
||||
inception_v3.default_image_size = 299
|
||||
|
||||
|
||||
def _reduced_kernel_size_for_small_input(input_tensor, kernel_size):
|
||||
"""Define kernel size which is automatically reduced for small input.
|
||||
|
||||
If the shape of the input images is unknown at graph construction time this
|
||||
function assumes that the input images are is large enough.
|
||||
|
||||
Args:
|
||||
input_tensor: input tensor of size [batch_size, height, width, channels].
|
||||
kernel_size: desired kernel size of length 2: [kernel_height, kernel_width]
|
||||
|
||||
Returns:
|
||||
a tensor with the kernel size.
|
||||
|
||||
TODO(jrru): Make this function work with unknown shapes. Theoretically, this
|
||||
can be done with the code below. Problems are two-fold: (1) If the shape was
|
||||
known, it will be lost. (2) inception.slim.ops._two_element_tuple cannot
|
||||
handle tensors that define the kernel size.
|
||||
shape = tf.shape(input_tensor)
|
||||
return = tf.stack([tf.minimum(shape[1], kernel_size[0]),
|
||||
tf.minimum(shape[2], kernel_size[1])])
|
||||
|
||||
"""
|
||||
shape = input_tensor.get_shape().as_list()
|
||||
if shape[1] is None or shape[2] is None:
|
||||
kernel_size_out = kernel_size
|
||||
else:
|
||||
kernel_size_out = [min(shape[1], kernel_size[0]),
|
||||
min(shape[2], kernel_size[1])]
|
||||
return kernel_size_out
|
||||
|
||||
|
||||
inception_v3_arg_scope = inception_utils.inception_arg_scope
|
||||
+350
@@ -0,0 +1,350 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Tests for nets.inception_v1."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
from tensorflow.contrib import slim as contrib_slim
|
||||
|
||||
from nets import inception
|
||||
|
||||
slim = contrib_slim
|
||||
|
||||
|
||||
class InceptionV3Test(tf.test.TestCase):
|
||||
|
||||
def testBuildClassificationNetwork(self):
|
||||
batch_size = 5
|
||||
height, width = 299, 299
|
||||
num_classes = 1000
|
||||
|
||||
inputs = tf.random.uniform((batch_size, height, width, 3))
|
||||
logits, end_points = inception.inception_v3(inputs, num_classes)
|
||||
self.assertTrue(logits.op.name.startswith(
|
||||
'InceptionV3/Logits/SpatialSqueeze'))
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[batch_size, num_classes])
|
||||
self.assertTrue('Predictions' in end_points)
|
||||
self.assertListEqual(end_points['Predictions'].get_shape().as_list(),
|
||||
[batch_size, num_classes])
|
||||
|
||||
def testBuildPreLogitsNetwork(self):
|
||||
batch_size = 5
|
||||
height, width = 299, 299
|
||||
num_classes = None
|
||||
|
||||
inputs = tf.random.uniform((batch_size, height, width, 3))
|
||||
net, end_points = inception.inception_v3(inputs, num_classes)
|
||||
self.assertTrue(net.op.name.startswith('InceptionV3/Logits/AvgPool'))
|
||||
self.assertListEqual(net.get_shape().as_list(), [batch_size, 1, 1, 2048])
|
||||
self.assertFalse('Logits' in end_points)
|
||||
self.assertFalse('Predictions' in end_points)
|
||||
|
||||
def testBuildBaseNetwork(self):
|
||||
batch_size = 5
|
||||
height, width = 299, 299
|
||||
|
||||
inputs = tf.random.uniform((batch_size, height, width, 3))
|
||||
final_endpoint, end_points = inception.inception_v3_base(inputs)
|
||||
self.assertTrue(final_endpoint.op.name.startswith(
|
||||
'InceptionV3/Mixed_7c'))
|
||||
self.assertListEqual(final_endpoint.get_shape().as_list(),
|
||||
[batch_size, 8, 8, 2048])
|
||||
expected_endpoints = ['Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3',
|
||||
'MaxPool_3a_3x3', 'Conv2d_3b_1x1', 'Conv2d_4a_3x3',
|
||||
'MaxPool_5a_3x3', 'Mixed_5b', 'Mixed_5c', 'Mixed_5d',
|
||||
'Mixed_6a', 'Mixed_6b', 'Mixed_6c', 'Mixed_6d',
|
||||
'Mixed_6e', 'Mixed_7a', 'Mixed_7b', 'Mixed_7c']
|
||||
self.assertItemsEqual(end_points.keys(), expected_endpoints)
|
||||
|
||||
def testBuildOnlyUptoFinalEndpoint(self):
|
||||
batch_size = 5
|
||||
height, width = 299, 299
|
||||
endpoints = ['Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3',
|
||||
'MaxPool_3a_3x3', 'Conv2d_3b_1x1', 'Conv2d_4a_3x3',
|
||||
'MaxPool_5a_3x3', 'Mixed_5b', 'Mixed_5c', 'Mixed_5d',
|
||||
'Mixed_6a', 'Mixed_6b', 'Mixed_6c', 'Mixed_6d',
|
||||
'Mixed_6e', 'Mixed_7a', 'Mixed_7b', 'Mixed_7c']
|
||||
|
||||
for index, endpoint in enumerate(endpoints):
|
||||
with tf.Graph().as_default():
|
||||
inputs = tf.random.uniform((batch_size, height, width, 3))
|
||||
out_tensor, end_points = inception.inception_v3_base(
|
||||
inputs, final_endpoint=endpoint)
|
||||
self.assertTrue(out_tensor.op.name.startswith(
|
||||
'InceptionV3/' + endpoint))
|
||||
self.assertItemsEqual(endpoints[:index+1], end_points.keys())
|
||||
|
||||
def testBuildAndCheckAllEndPointsUptoMixed7c(self):
|
||||
batch_size = 5
|
||||
height, width = 299, 299
|
||||
|
||||
inputs = tf.random.uniform((batch_size, height, width, 3))
|
||||
_, end_points = inception.inception_v3_base(
|
||||
inputs, final_endpoint='Mixed_7c')
|
||||
endpoints_shapes = {'Conv2d_1a_3x3': [batch_size, 149, 149, 32],
|
||||
'Conv2d_2a_3x3': [batch_size, 147, 147, 32],
|
||||
'Conv2d_2b_3x3': [batch_size, 147, 147, 64],
|
||||
'MaxPool_3a_3x3': [batch_size, 73, 73, 64],
|
||||
'Conv2d_3b_1x1': [batch_size, 73, 73, 80],
|
||||
'Conv2d_4a_3x3': [batch_size, 71, 71, 192],
|
||||
'MaxPool_5a_3x3': [batch_size, 35, 35, 192],
|
||||
'Mixed_5b': [batch_size, 35, 35, 256],
|
||||
'Mixed_5c': [batch_size, 35, 35, 288],
|
||||
'Mixed_5d': [batch_size, 35, 35, 288],
|
||||
'Mixed_6a': [batch_size, 17, 17, 768],
|
||||
'Mixed_6b': [batch_size, 17, 17, 768],
|
||||
'Mixed_6c': [batch_size, 17, 17, 768],
|
||||
'Mixed_6d': [batch_size, 17, 17, 768],
|
||||
'Mixed_6e': [batch_size, 17, 17, 768],
|
||||
'Mixed_7a': [batch_size, 8, 8, 1280],
|
||||
'Mixed_7b': [batch_size, 8, 8, 2048],
|
||||
'Mixed_7c': [batch_size, 8, 8, 2048]}
|
||||
self.assertItemsEqual(endpoints_shapes.keys(), end_points.keys())
|
||||
for endpoint_name in endpoints_shapes:
|
||||
expected_shape = endpoints_shapes[endpoint_name]
|
||||
self.assertTrue(endpoint_name in end_points)
|
||||
self.assertListEqual(end_points[endpoint_name].get_shape().as_list(),
|
||||
expected_shape)
|
||||
|
||||
def testModelHasExpectedNumberOfParameters(self):
|
||||
batch_size = 5
|
||||
height, width = 299, 299
|
||||
inputs = tf.random.uniform((batch_size, height, width, 3))
|
||||
with slim.arg_scope(inception.inception_v3_arg_scope()):
|
||||
inception.inception_v3_base(inputs)
|
||||
total_params, _ = slim.model_analyzer.analyze_vars(
|
||||
slim.get_model_variables())
|
||||
self.assertAlmostEqual(21802784, total_params)
|
||||
|
||||
def testBuildEndPoints(self):
|
||||
batch_size = 5
|
||||
height, width = 299, 299
|
||||
num_classes = 1000
|
||||
|
||||
inputs = tf.random.uniform((batch_size, height, width, 3))
|
||||
_, end_points = inception.inception_v3(inputs, num_classes)
|
||||
self.assertTrue('Logits' in end_points)
|
||||
logits = end_points['Logits']
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[batch_size, num_classes])
|
||||
self.assertTrue('AuxLogits' in end_points)
|
||||
aux_logits = end_points['AuxLogits']
|
||||
self.assertListEqual(aux_logits.get_shape().as_list(),
|
||||
[batch_size, num_classes])
|
||||
self.assertTrue('Mixed_7c' in end_points)
|
||||
pre_pool = end_points['Mixed_7c']
|
||||
self.assertListEqual(pre_pool.get_shape().as_list(),
|
||||
[batch_size, 8, 8, 2048])
|
||||
self.assertTrue('PreLogits' in end_points)
|
||||
pre_logits = end_points['PreLogits']
|
||||
self.assertListEqual(pre_logits.get_shape().as_list(),
|
||||
[batch_size, 1, 1, 2048])
|
||||
|
||||
def testBuildEndPointsWithDepthMultiplierLessThanOne(self):
|
||||
batch_size = 5
|
||||
height, width = 299, 299
|
||||
num_classes = 1000
|
||||
|
||||
inputs = tf.random.uniform((batch_size, height, width, 3))
|
||||
_, end_points = inception.inception_v3(inputs, num_classes)
|
||||
|
||||
endpoint_keys = [key for key in end_points.keys()
|
||||
if key.startswith('Mixed') or key.startswith('Conv')]
|
||||
|
||||
_, end_points_with_multiplier = inception.inception_v3(
|
||||
inputs, num_classes, scope='depth_multiplied_net',
|
||||
depth_multiplier=0.5)
|
||||
|
||||
for key in endpoint_keys:
|
||||
original_depth = end_points[key].get_shape().as_list()[3]
|
||||
new_depth = end_points_with_multiplier[key].get_shape().as_list()[3]
|
||||
self.assertEqual(0.5 * original_depth, new_depth)
|
||||
|
||||
def testBuildEndPointsWithDepthMultiplierGreaterThanOne(self):
|
||||
batch_size = 5
|
||||
height, width = 299, 299
|
||||
num_classes = 1000
|
||||
|
||||
inputs = tf.random.uniform((batch_size, height, width, 3))
|
||||
_, end_points = inception.inception_v3(inputs, num_classes)
|
||||
|
||||
endpoint_keys = [key for key in end_points.keys()
|
||||
if key.startswith('Mixed') or key.startswith('Conv')]
|
||||
|
||||
_, end_points_with_multiplier = inception.inception_v3(
|
||||
inputs, num_classes, scope='depth_multiplied_net',
|
||||
depth_multiplier=2.0)
|
||||
|
||||
for key in endpoint_keys:
|
||||
original_depth = end_points[key].get_shape().as_list()[3]
|
||||
new_depth = end_points_with_multiplier[key].get_shape().as_list()[3]
|
||||
self.assertEqual(2.0 * original_depth, new_depth)
|
||||
|
||||
def testRaiseValueErrorWithInvalidDepthMultiplier(self):
|
||||
batch_size = 5
|
||||
height, width = 299, 299
|
||||
num_classes = 1000
|
||||
|
||||
inputs = tf.random.uniform((batch_size, height, width, 3))
|
||||
with self.assertRaises(ValueError):
|
||||
_ = inception.inception_v3(inputs, num_classes, depth_multiplier=-0.1)
|
||||
with self.assertRaises(ValueError):
|
||||
_ = inception.inception_v3(inputs, num_classes, depth_multiplier=0.0)
|
||||
|
||||
def testHalfSizeImages(self):
|
||||
batch_size = 5
|
||||
height, width = 150, 150
|
||||
num_classes = 1000
|
||||
|
||||
inputs = tf.random.uniform((batch_size, height, width, 3))
|
||||
logits, end_points = inception.inception_v3(inputs, num_classes)
|
||||
self.assertTrue(logits.op.name.startswith('InceptionV3/Logits'))
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[batch_size, num_classes])
|
||||
pre_pool = end_points['Mixed_7c']
|
||||
self.assertListEqual(pre_pool.get_shape().as_list(),
|
||||
[batch_size, 3, 3, 2048])
|
||||
|
||||
def testUnknownImageShape(self):
|
||||
tf.compat.v1.reset_default_graph()
|
||||
batch_size = 2
|
||||
height, width = 299, 299
|
||||
num_classes = 1000
|
||||
input_np = np.random.uniform(0, 1, (batch_size, height, width, 3))
|
||||
with self.test_session() as sess:
|
||||
inputs = tf.compat.v1.placeholder(
|
||||
tf.float32, shape=(batch_size, None, None, 3))
|
||||
logits, end_points = inception.inception_v3(inputs, num_classes)
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[batch_size, num_classes])
|
||||
pre_pool = end_points['Mixed_7c']
|
||||
feed_dict = {inputs: input_np}
|
||||
tf.compat.v1.global_variables_initializer().run()
|
||||
pre_pool_out = sess.run(pre_pool, feed_dict=feed_dict)
|
||||
self.assertListEqual(list(pre_pool_out.shape), [batch_size, 8, 8, 2048])
|
||||
|
||||
def testGlobalPoolUnknownImageShape(self):
|
||||
tf.compat.v1.reset_default_graph()
|
||||
batch_size = 1
|
||||
height, width = 330, 400
|
||||
num_classes = 1000
|
||||
input_np = np.random.uniform(0, 1, (batch_size, height, width, 3))
|
||||
with self.test_session() as sess:
|
||||
inputs = tf.compat.v1.placeholder(
|
||||
tf.float32, shape=(batch_size, None, None, 3))
|
||||
logits, end_points = inception.inception_v3(inputs, num_classes,
|
||||
global_pool=True)
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[batch_size, num_classes])
|
||||
pre_pool = end_points['Mixed_7c']
|
||||
feed_dict = {inputs: input_np}
|
||||
tf.compat.v1.global_variables_initializer().run()
|
||||
pre_pool_out = sess.run(pre_pool, feed_dict=feed_dict)
|
||||
self.assertListEqual(list(pre_pool_out.shape), [batch_size, 8, 11, 2048])
|
||||
|
||||
def testUnknowBatchSize(self):
|
||||
batch_size = 1
|
||||
height, width = 299, 299
|
||||
num_classes = 1000
|
||||
|
||||
inputs = tf.compat.v1.placeholder(tf.float32, (None, height, width, 3))
|
||||
logits, _ = inception.inception_v3(inputs, num_classes)
|
||||
self.assertTrue(logits.op.name.startswith('InceptionV3/Logits'))
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[None, num_classes])
|
||||
images = tf.random.uniform((batch_size, height, width, 3))
|
||||
|
||||
with self.test_session() as sess:
|
||||
sess.run(tf.compat.v1.global_variables_initializer())
|
||||
output = sess.run(logits, {inputs: images.eval()})
|
||||
self.assertEquals(output.shape, (batch_size, num_classes))
|
||||
|
||||
def testEvaluation(self):
|
||||
batch_size = 2
|
||||
height, width = 299, 299
|
||||
num_classes = 1000
|
||||
|
||||
eval_inputs = tf.random.uniform((batch_size, height, width, 3))
|
||||
logits, _ = inception.inception_v3(eval_inputs, num_classes,
|
||||
is_training=False)
|
||||
predictions = tf.argmax(input=logits, axis=1)
|
||||
|
||||
with self.test_session() as sess:
|
||||
sess.run(tf.compat.v1.global_variables_initializer())
|
||||
output = sess.run(predictions)
|
||||
self.assertEquals(output.shape, (batch_size,))
|
||||
|
||||
def testTrainEvalWithReuse(self):
|
||||
train_batch_size = 5
|
||||
eval_batch_size = 2
|
||||
height, width = 150, 150
|
||||
num_classes = 1000
|
||||
|
||||
train_inputs = tf.random.uniform((train_batch_size, height, width, 3))
|
||||
inception.inception_v3(train_inputs, num_classes)
|
||||
eval_inputs = tf.random.uniform((eval_batch_size, height, width, 3))
|
||||
logits, _ = inception.inception_v3(eval_inputs, num_classes,
|
||||
is_training=False, reuse=True)
|
||||
predictions = tf.argmax(input=logits, axis=1)
|
||||
|
||||
with self.test_session() as sess:
|
||||
sess.run(tf.compat.v1.global_variables_initializer())
|
||||
output = sess.run(predictions)
|
||||
self.assertEquals(output.shape, (eval_batch_size,))
|
||||
|
||||
def testLogitsNotSqueezed(self):
|
||||
num_classes = 25
|
||||
images = tf.random.uniform([1, 299, 299, 3])
|
||||
logits, _ = inception.inception_v3(images,
|
||||
num_classes=num_classes,
|
||||
spatial_squeeze=False)
|
||||
|
||||
with self.test_session() as sess:
|
||||
tf.compat.v1.global_variables_initializer().run()
|
||||
logits_out = sess.run(logits)
|
||||
self.assertListEqual(list(logits_out.shape), [1, 1, 1, num_classes])
|
||||
|
||||
def testNoBatchNormScaleByDefault(self):
|
||||
height, width = 299, 299
|
||||
num_classes = 1000
|
||||
inputs = tf.compat.v1.placeholder(tf.float32, (1, height, width, 3))
|
||||
with slim.arg_scope(inception.inception_v3_arg_scope()):
|
||||
inception.inception_v3(inputs, num_classes, is_training=False)
|
||||
|
||||
self.assertEqual(tf.compat.v1.global_variables('.*/BatchNorm/gamma:0$'), [])
|
||||
|
||||
def testBatchNormScale(self):
|
||||
height, width = 299, 299
|
||||
num_classes = 1000
|
||||
inputs = tf.compat.v1.placeholder(tf.float32, (1, height, width, 3))
|
||||
with slim.arg_scope(
|
||||
inception.inception_v3_arg_scope(batch_norm_scale=True)):
|
||||
inception.inception_v3(inputs, num_classes, is_training=False)
|
||||
|
||||
gamma_names = set(
|
||||
v.op.name
|
||||
for v in tf.compat.v1.global_variables('.*/BatchNorm/gamma:0$'))
|
||||
self.assertGreater(len(gamma_names), 0)
|
||||
for v in tf.compat.v1.global_variables('.*/BatchNorm/moving_mean:0$'):
|
||||
self.assertIn(v.op.name[:-len('moving_mean')] + 'gamma', gamma_names)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
tf.test.main()
|
||||
+347
@@ -0,0 +1,347 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Contains the definition of the Inception V4 architecture.
|
||||
|
||||
As described in http://arxiv.org/abs/1602.07261.
|
||||
|
||||
Inception-v4, Inception-ResNet and the Impact of Residual Connections
|
||||
on Learning
|
||||
Christian Szegedy, Sergey Ioffe, Vincent Vanhoucke, Alex Alemi
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import tensorflow as tf
|
||||
from tensorflow.contrib import slim as contrib_slim
|
||||
|
||||
from nets import inception_utils
|
||||
|
||||
slim = contrib_slim
|
||||
|
||||
|
||||
def block_inception_a(inputs, scope=None, reuse=None):
|
||||
"""Builds Inception-A block for Inception v4 network."""
|
||||
# By default use stride=1 and SAME padding
|
||||
with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d],
|
||||
stride=1, padding='SAME'):
|
||||
with tf.compat.v1.variable_scope(
|
||||
scope, 'BlockInceptionA', [inputs], reuse=reuse):
|
||||
with tf.compat.v1.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(inputs, 96, [1, 1], scope='Conv2d_0a_1x1')
|
||||
with tf.compat.v1.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(inputs, 64, [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_1 = slim.conv2d(branch_1, 96, [3, 3], scope='Conv2d_0b_3x3')
|
||||
with tf.compat.v1.variable_scope('Branch_2'):
|
||||
branch_2 = slim.conv2d(inputs, 64, [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_2 = slim.conv2d(branch_2, 96, [3, 3], scope='Conv2d_0b_3x3')
|
||||
branch_2 = slim.conv2d(branch_2, 96, [3, 3], scope='Conv2d_0c_3x3')
|
||||
with tf.compat.v1.variable_scope('Branch_3'):
|
||||
branch_3 = slim.avg_pool2d(inputs, [3, 3], scope='AvgPool_0a_3x3')
|
||||
branch_3 = slim.conv2d(branch_3, 96, [1, 1], scope='Conv2d_0b_1x1')
|
||||
return tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
|
||||
|
||||
|
||||
def block_reduction_a(inputs, scope=None, reuse=None):
|
||||
"""Builds Reduction-A block for Inception v4 network."""
|
||||
# By default use stride=1 and SAME padding
|
||||
with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d],
|
||||
stride=1, padding='SAME'):
|
||||
with tf.compat.v1.variable_scope(
|
||||
scope, 'BlockReductionA', [inputs], reuse=reuse):
|
||||
with tf.compat.v1.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(inputs, 384, [3, 3], stride=2, padding='VALID',
|
||||
scope='Conv2d_1a_3x3')
|
||||
with tf.compat.v1.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(inputs, 192, [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_1 = slim.conv2d(branch_1, 224, [3, 3], scope='Conv2d_0b_3x3')
|
||||
branch_1 = slim.conv2d(branch_1, 256, [3, 3], stride=2,
|
||||
padding='VALID', scope='Conv2d_1a_3x3')
|
||||
with tf.compat.v1.variable_scope('Branch_2'):
|
||||
branch_2 = slim.max_pool2d(inputs, [3, 3], stride=2, padding='VALID',
|
||||
scope='MaxPool_1a_3x3')
|
||||
return tf.concat(axis=3, values=[branch_0, branch_1, branch_2])
|
||||
|
||||
|
||||
def block_inception_b(inputs, scope=None, reuse=None):
|
||||
"""Builds Inception-B block for Inception v4 network."""
|
||||
# By default use stride=1 and SAME padding
|
||||
with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d],
|
||||
stride=1, padding='SAME'):
|
||||
with tf.compat.v1.variable_scope(
|
||||
scope, 'BlockInceptionB', [inputs], reuse=reuse):
|
||||
with tf.compat.v1.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(inputs, 384, [1, 1], scope='Conv2d_0a_1x1')
|
||||
with tf.compat.v1.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(inputs, 192, [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_1 = slim.conv2d(branch_1, 224, [1, 7], scope='Conv2d_0b_1x7')
|
||||
branch_1 = slim.conv2d(branch_1, 256, [7, 1], scope='Conv2d_0c_7x1')
|
||||
with tf.compat.v1.variable_scope('Branch_2'):
|
||||
branch_2 = slim.conv2d(inputs, 192, [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_2 = slim.conv2d(branch_2, 192, [7, 1], scope='Conv2d_0b_7x1')
|
||||
branch_2 = slim.conv2d(branch_2, 224, [1, 7], scope='Conv2d_0c_1x7')
|
||||
branch_2 = slim.conv2d(branch_2, 224, [7, 1], scope='Conv2d_0d_7x1')
|
||||
branch_2 = slim.conv2d(branch_2, 256, [1, 7], scope='Conv2d_0e_1x7')
|
||||
with tf.compat.v1.variable_scope('Branch_3'):
|
||||
branch_3 = slim.avg_pool2d(inputs, [3, 3], scope='AvgPool_0a_3x3')
|
||||
branch_3 = slim.conv2d(branch_3, 128, [1, 1], scope='Conv2d_0b_1x1')
|
||||
return tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
|
||||
|
||||
|
||||
def block_reduction_b(inputs, scope=None, reuse=None):
|
||||
"""Builds Reduction-B block for Inception v4 network."""
|
||||
# By default use stride=1 and SAME padding
|
||||
with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d],
|
||||
stride=1, padding='SAME'):
|
||||
with tf.compat.v1.variable_scope(
|
||||
scope, 'BlockReductionB', [inputs], reuse=reuse):
|
||||
with tf.compat.v1.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(inputs, 192, [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_0 = slim.conv2d(branch_0, 192, [3, 3], stride=2,
|
||||
padding='VALID', scope='Conv2d_1a_3x3')
|
||||
with tf.compat.v1.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(inputs, 256, [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_1 = slim.conv2d(branch_1, 256, [1, 7], scope='Conv2d_0b_1x7')
|
||||
branch_1 = slim.conv2d(branch_1, 320, [7, 1], scope='Conv2d_0c_7x1')
|
||||
branch_1 = slim.conv2d(branch_1, 320, [3, 3], stride=2,
|
||||
padding='VALID', scope='Conv2d_1a_3x3')
|
||||
with tf.compat.v1.variable_scope('Branch_2'):
|
||||
branch_2 = slim.max_pool2d(inputs, [3, 3], stride=2, padding='VALID',
|
||||
scope='MaxPool_1a_3x3')
|
||||
return tf.concat(axis=3, values=[branch_0, branch_1, branch_2])
|
||||
|
||||
|
||||
def block_inception_c(inputs, scope=None, reuse=None):
|
||||
"""Builds Inception-C block for Inception v4 network."""
|
||||
# By default use stride=1 and SAME padding
|
||||
with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d],
|
||||
stride=1, padding='SAME'):
|
||||
with tf.compat.v1.variable_scope(
|
||||
scope, 'BlockInceptionC', [inputs], reuse=reuse):
|
||||
with tf.compat.v1.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(inputs, 256, [1, 1], scope='Conv2d_0a_1x1')
|
||||
with tf.compat.v1.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(inputs, 384, [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_1 = tf.concat(axis=3, values=[
|
||||
slim.conv2d(branch_1, 256, [1, 3], scope='Conv2d_0b_1x3'),
|
||||
slim.conv2d(branch_1, 256, [3, 1], scope='Conv2d_0c_3x1')])
|
||||
with tf.compat.v1.variable_scope('Branch_2'):
|
||||
branch_2 = slim.conv2d(inputs, 384, [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_2 = slim.conv2d(branch_2, 448, [3, 1], scope='Conv2d_0b_3x1')
|
||||
branch_2 = slim.conv2d(branch_2, 512, [1, 3], scope='Conv2d_0c_1x3')
|
||||
branch_2 = tf.concat(axis=3, values=[
|
||||
slim.conv2d(branch_2, 256, [1, 3], scope='Conv2d_0d_1x3'),
|
||||
slim.conv2d(branch_2, 256, [3, 1], scope='Conv2d_0e_3x1')])
|
||||
with tf.compat.v1.variable_scope('Branch_3'):
|
||||
branch_3 = slim.avg_pool2d(inputs, [3, 3], scope='AvgPool_0a_3x3')
|
||||
branch_3 = slim.conv2d(branch_3, 256, [1, 1], scope='Conv2d_0b_1x1')
|
||||
return tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
|
||||
|
||||
|
||||
def inception_v4_base(inputs, final_endpoint='Mixed_7d', scope=None):
|
||||
"""Creates the Inception V4 network up to the given final endpoint.
|
||||
|
||||
Args:
|
||||
inputs: a 4-D tensor of size [batch_size, height, width, 3].
|
||||
final_endpoint: specifies the endpoint to construct the network up to.
|
||||
It can be one of [ 'Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3',
|
||||
'Mixed_3a', 'Mixed_4a', 'Mixed_5a', 'Mixed_5b', 'Mixed_5c', 'Mixed_5d',
|
||||
'Mixed_5e', 'Mixed_6a', 'Mixed_6b', 'Mixed_6c', 'Mixed_6d', 'Mixed_6e',
|
||||
'Mixed_6f', 'Mixed_6g', 'Mixed_6h', 'Mixed_7a', 'Mixed_7b', 'Mixed_7c',
|
||||
'Mixed_7d']
|
||||
scope: Optional variable_scope.
|
||||
|
||||
Returns:
|
||||
logits: the logits outputs of the model.
|
||||
end_points: the set of end_points from the inception model.
|
||||
|
||||
Raises:
|
||||
ValueError: if final_endpoint is not set to one of the predefined values,
|
||||
"""
|
||||
end_points = {}
|
||||
|
||||
def add_and_check_final(name, net):
|
||||
end_points[name] = net
|
||||
return name == final_endpoint
|
||||
|
||||
with tf.compat.v1.variable_scope(scope, 'InceptionV4', [inputs]):
|
||||
with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
|
||||
stride=1, padding='SAME'):
|
||||
# 299 x 299 x 3
|
||||
net = slim.conv2d(inputs, 32, [3, 3], stride=2,
|
||||
padding='VALID', scope='Conv2d_1a_3x3')
|
||||
if add_and_check_final('Conv2d_1a_3x3', net): return net, end_points
|
||||
# 149 x 149 x 32
|
||||
net = slim.conv2d(net, 32, [3, 3], padding='VALID',
|
||||
scope='Conv2d_2a_3x3')
|
||||
if add_and_check_final('Conv2d_2a_3x3', net): return net, end_points
|
||||
# 147 x 147 x 32
|
||||
net = slim.conv2d(net, 64, [3, 3], scope='Conv2d_2b_3x3')
|
||||
if add_and_check_final('Conv2d_2b_3x3', net): return net, end_points
|
||||
# 147 x 147 x 64
|
||||
with tf.compat.v1.variable_scope('Mixed_3a'):
|
||||
with tf.compat.v1.variable_scope('Branch_0'):
|
||||
branch_0 = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID',
|
||||
scope='MaxPool_0a_3x3')
|
||||
with tf.compat.v1.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(net, 96, [3, 3], stride=2, padding='VALID',
|
||||
scope='Conv2d_0a_3x3')
|
||||
net = tf.concat(axis=3, values=[branch_0, branch_1])
|
||||
if add_and_check_final('Mixed_3a', net): return net, end_points
|
||||
|
||||
# 73 x 73 x 160
|
||||
with tf.compat.v1.variable_scope('Mixed_4a'):
|
||||
with tf.compat.v1.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(net, 64, [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_0 = slim.conv2d(branch_0, 96, [3, 3], padding='VALID',
|
||||
scope='Conv2d_1a_3x3')
|
||||
with tf.compat.v1.variable_scope('Branch_1'):
|
||||
branch_1 = slim.conv2d(net, 64, [1, 1], scope='Conv2d_0a_1x1')
|
||||
branch_1 = slim.conv2d(branch_1, 64, [1, 7], scope='Conv2d_0b_1x7')
|
||||
branch_1 = slim.conv2d(branch_1, 64, [7, 1], scope='Conv2d_0c_7x1')
|
||||
branch_1 = slim.conv2d(branch_1, 96, [3, 3], padding='VALID',
|
||||
scope='Conv2d_1a_3x3')
|
||||
net = tf.concat(axis=3, values=[branch_0, branch_1])
|
||||
if add_and_check_final('Mixed_4a', net): return net, end_points
|
||||
|
||||
# 71 x 71 x 192
|
||||
with tf.compat.v1.variable_scope('Mixed_5a'):
|
||||
with tf.compat.v1.variable_scope('Branch_0'):
|
||||
branch_0 = slim.conv2d(net, 192, [3, 3], stride=2, padding='VALID',
|
||||
scope='Conv2d_1a_3x3')
|
||||
with tf.compat.v1.variable_scope('Branch_1'):
|
||||
branch_1 = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID',
|
||||
scope='MaxPool_1a_3x3')
|
||||
net = tf.concat(axis=3, values=[branch_0, branch_1])
|
||||
if add_and_check_final('Mixed_5a', net): return net, end_points
|
||||
|
||||
# 35 x 35 x 384
|
||||
# 4 x Inception-A blocks
|
||||
for idx in range(4):
|
||||
block_scope = 'Mixed_5' + chr(ord('b') + idx)
|
||||
net = block_inception_a(net, block_scope)
|
||||
if add_and_check_final(block_scope, net): return net, end_points
|
||||
|
||||
# 35 x 35 x 384
|
||||
# Reduction-A block
|
||||
net = block_reduction_a(net, 'Mixed_6a')
|
||||
if add_and_check_final('Mixed_6a', net): return net, end_points
|
||||
|
||||
# 17 x 17 x 1024
|
||||
# 7 x Inception-B blocks
|
||||
for idx in range(7):
|
||||
block_scope = 'Mixed_6' + chr(ord('b') + idx)
|
||||
net = block_inception_b(net, block_scope)
|
||||
if add_and_check_final(block_scope, net): return net, end_points
|
||||
|
||||
# 17 x 17 x 1024
|
||||
# Reduction-B block
|
||||
net = block_reduction_b(net, 'Mixed_7a')
|
||||
if add_and_check_final('Mixed_7a', net): return net, end_points
|
||||
|
||||
# 8 x 8 x 1536
|
||||
# 3 x Inception-C blocks
|
||||
for idx in range(3):
|
||||
block_scope = 'Mixed_7' + chr(ord('b') + idx)
|
||||
net = block_inception_c(net, block_scope)
|
||||
if add_and_check_final(block_scope, net): return net, end_points
|
||||
raise ValueError('Unknown final endpoint %s' % final_endpoint)
|
||||
|
||||
|
||||
def inception_v4(inputs, num_classes=1001, is_training=True,
|
||||
dropout_keep_prob=0.8,
|
||||
reuse=None,
|
||||
scope='InceptionV4',
|
||||
create_aux_logits=True):
|
||||
"""Creates the Inception V4 model.
|
||||
|
||||
Args:
|
||||
inputs: a 4-D tensor of size [batch_size, height, width, 3].
|
||||
num_classes: number of predicted classes. If 0 or None, the logits layer
|
||||
is omitted and the input features to the logits layer (before dropout)
|
||||
are returned instead.
|
||||
is_training: whether is training or not.
|
||||
dropout_keep_prob: float, the fraction to keep before final layer.
|
||||
reuse: whether or not the network and its variables should be reused. To be
|
||||
able to reuse 'scope' must be given.
|
||||
scope: Optional variable_scope.
|
||||
create_aux_logits: Whether to include the auxiliary logits.
|
||||
|
||||
Returns:
|
||||
net: a Tensor with the logits (pre-softmax activations) if num_classes
|
||||
is a non-zero integer, or the non-dropped input to the logits layer
|
||||
if num_classes is 0 or None.
|
||||
end_points: the set of end_points from the inception model.
|
||||
"""
|
||||
end_points = {}
|
||||
with tf.compat.v1.variable_scope(
|
||||
scope, 'InceptionV4', [inputs], reuse=reuse) as scope:
|
||||
with slim.arg_scope([slim.batch_norm, slim.dropout],
|
||||
is_training=is_training):
|
||||
net, end_points = inception_v4_base(inputs, scope=scope)
|
||||
|
||||
with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
|
||||
stride=1, padding='SAME'):
|
||||
# Auxiliary Head logits
|
||||
if create_aux_logits and num_classes:
|
||||
with tf.compat.v1.variable_scope('AuxLogits'):
|
||||
# 17 x 17 x 1024
|
||||
aux_logits = end_points['Mixed_6h']
|
||||
aux_logits = slim.avg_pool2d(aux_logits, [5, 5], stride=3,
|
||||
padding='VALID',
|
||||
scope='AvgPool_1a_5x5')
|
||||
aux_logits = slim.conv2d(aux_logits, 128, [1, 1],
|
||||
scope='Conv2d_1b_1x1')
|
||||
aux_logits = slim.conv2d(aux_logits, 768,
|
||||
aux_logits.get_shape()[1:3],
|
||||
padding='VALID', scope='Conv2d_2a')
|
||||
aux_logits = slim.flatten(aux_logits)
|
||||
aux_logits = slim.fully_connected(aux_logits, num_classes,
|
||||
activation_fn=None,
|
||||
scope='Aux_logits')
|
||||
end_points['AuxLogits'] = aux_logits
|
||||
|
||||
# Final pooling and prediction
|
||||
# TODO(sguada,arnoegw): Consider adding a parameter global_pool which
|
||||
# can be set to False to disable pooling here (as in resnet_*()).
|
||||
with tf.compat.v1.variable_scope('Logits'):
|
||||
# 8 x 8 x 1536
|
||||
kernel_size = net.get_shape()[1:3]
|
||||
if kernel_size.is_fully_defined():
|
||||
net = slim.avg_pool2d(net, kernel_size, padding='VALID',
|
||||
scope='AvgPool_1a')
|
||||
else:
|
||||
net = tf.reduce_mean(
|
||||
input_tensor=net,
|
||||
axis=[1, 2],
|
||||
keepdims=True,
|
||||
name='global_pool')
|
||||
end_points['global_pool'] = net
|
||||
if not num_classes:
|
||||
return net, end_points
|
||||
# 1 x 1 x 1536
|
||||
net = slim.dropout(net, dropout_keep_prob, scope='Dropout_1b')
|
||||
net = slim.flatten(net, scope='PreLogitsFlatten')
|
||||
end_points['PreLogitsFlatten'] = net
|
||||
# 1536
|
||||
logits = slim.fully_connected(net, num_classes, activation_fn=None,
|
||||
scope='Logits')
|
||||
end_points['Logits'] = logits
|
||||
end_points['Predictions'] = tf.nn.softmax(logits, name='Predictions')
|
||||
return logits, end_points
|
||||
inception_v4.default_image_size = 299
|
||||
|
||||
|
||||
inception_v4_arg_scope = inception_utils.inception_arg_scope
|
||||
+287
@@ -0,0 +1,287 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Tests for slim.inception_v4."""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import tensorflow as tf
|
||||
from tensorflow.contrib import slim as contrib_slim
|
||||
|
||||
from nets import inception
|
||||
|
||||
|
||||
class InceptionTest(tf.test.TestCase):
|
||||
|
||||
def testBuildLogits(self):
|
||||
batch_size = 5
|
||||
height, width = 299, 299
|
||||
num_classes = 1000
|
||||
inputs = tf.random.uniform((batch_size, height, width, 3))
|
||||
logits, end_points = inception.inception_v4(inputs, num_classes)
|
||||
auxlogits = end_points['AuxLogits']
|
||||
predictions = end_points['Predictions']
|
||||
self.assertTrue(auxlogits.op.name.startswith('InceptionV4/AuxLogits'))
|
||||
self.assertListEqual(auxlogits.get_shape().as_list(),
|
||||
[batch_size, num_classes])
|
||||
self.assertTrue(logits.op.name.startswith('InceptionV4/Logits'))
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[batch_size, num_classes])
|
||||
self.assertTrue(predictions.op.name.startswith(
|
||||
'InceptionV4/Logits/Predictions'))
|
||||
self.assertListEqual(predictions.get_shape().as_list(),
|
||||
[batch_size, num_classes])
|
||||
|
||||
def testBuildPreLogitsNetwork(self):
|
||||
batch_size = 5
|
||||
height, width = 299, 299
|
||||
num_classes = None
|
||||
inputs = tf.random.uniform((batch_size, height, width, 3))
|
||||
net, end_points = inception.inception_v4(inputs, num_classes)
|
||||
self.assertTrue(net.op.name.startswith('InceptionV4/Logits/AvgPool'))
|
||||
self.assertListEqual(net.get_shape().as_list(), [batch_size, 1, 1, 1536])
|
||||
self.assertFalse('Logits' in end_points)
|
||||
self.assertFalse('Predictions' in end_points)
|
||||
|
||||
def testBuildWithoutAuxLogits(self):
|
||||
batch_size = 5
|
||||
height, width = 299, 299
|
||||
num_classes = 1000
|
||||
inputs = tf.random.uniform((batch_size, height, width, 3))
|
||||
logits, endpoints = inception.inception_v4(inputs, num_classes,
|
||||
create_aux_logits=False)
|
||||
self.assertFalse('AuxLogits' in endpoints)
|
||||
self.assertTrue(logits.op.name.startswith('InceptionV4/Logits'))
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[batch_size, num_classes])
|
||||
|
||||
def testAllEndPointsShapes(self):
|
||||
batch_size = 5
|
||||
height, width = 299, 299
|
||||
num_classes = 1000
|
||||
inputs = tf.random.uniform((batch_size, height, width, 3))
|
||||
_, end_points = inception.inception_v4(inputs, num_classes)
|
||||
endpoints_shapes = {'Conv2d_1a_3x3': [batch_size, 149, 149, 32],
|
||||
'Conv2d_2a_3x3': [batch_size, 147, 147, 32],
|
||||
'Conv2d_2b_3x3': [batch_size, 147, 147, 64],
|
||||
'Mixed_3a': [batch_size, 73, 73, 160],
|
||||
'Mixed_4a': [batch_size, 71, 71, 192],
|
||||
'Mixed_5a': [batch_size, 35, 35, 384],
|
||||
# 4 x Inception-A blocks
|
||||
'Mixed_5b': [batch_size, 35, 35, 384],
|
||||
'Mixed_5c': [batch_size, 35, 35, 384],
|
||||
'Mixed_5d': [batch_size, 35, 35, 384],
|
||||
'Mixed_5e': [batch_size, 35, 35, 384],
|
||||
# Reduction-A block
|
||||
'Mixed_6a': [batch_size, 17, 17, 1024],
|
||||
# 7 x Inception-B blocks
|
||||
'Mixed_6b': [batch_size, 17, 17, 1024],
|
||||
'Mixed_6c': [batch_size, 17, 17, 1024],
|
||||
'Mixed_6d': [batch_size, 17, 17, 1024],
|
||||
'Mixed_6e': [batch_size, 17, 17, 1024],
|
||||
'Mixed_6f': [batch_size, 17, 17, 1024],
|
||||
'Mixed_6g': [batch_size, 17, 17, 1024],
|
||||
'Mixed_6h': [batch_size, 17, 17, 1024],
|
||||
# Reduction-A block
|
||||
'Mixed_7a': [batch_size, 8, 8, 1536],
|
||||
# 3 x Inception-C blocks
|
||||
'Mixed_7b': [batch_size, 8, 8, 1536],
|
||||
'Mixed_7c': [batch_size, 8, 8, 1536],
|
||||
'Mixed_7d': [batch_size, 8, 8, 1536],
|
||||
# Logits and predictions
|
||||
'AuxLogits': [batch_size, num_classes],
|
||||
'global_pool': [batch_size, 1, 1, 1536],
|
||||
'PreLogitsFlatten': [batch_size, 1536],
|
||||
'Logits': [batch_size, num_classes],
|
||||
'Predictions': [batch_size, num_classes]}
|
||||
self.assertItemsEqual(endpoints_shapes.keys(), end_points.keys())
|
||||
for endpoint_name in endpoints_shapes:
|
||||
expected_shape = endpoints_shapes[endpoint_name]
|
||||
self.assertTrue(endpoint_name in end_points)
|
||||
self.assertListEqual(end_points[endpoint_name].get_shape().as_list(),
|
||||
expected_shape)
|
||||
|
||||
def testBuildBaseNetwork(self):
|
||||
batch_size = 5
|
||||
height, width = 299, 299
|
||||
inputs = tf.random.uniform((batch_size, height, width, 3))
|
||||
net, end_points = inception.inception_v4_base(inputs)
|
||||
self.assertTrue(net.op.name.startswith(
|
||||
'InceptionV4/Mixed_7d'))
|
||||
self.assertListEqual(net.get_shape().as_list(), [batch_size, 8, 8, 1536])
|
||||
expected_endpoints = [
|
||||
'Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3', 'Mixed_3a',
|
||||
'Mixed_4a', 'Mixed_5a', 'Mixed_5b', 'Mixed_5c', 'Mixed_5d',
|
||||
'Mixed_5e', 'Mixed_6a', 'Mixed_6b', 'Mixed_6c', 'Mixed_6d',
|
||||
'Mixed_6e', 'Mixed_6f', 'Mixed_6g', 'Mixed_6h', 'Mixed_7a',
|
||||
'Mixed_7b', 'Mixed_7c', 'Mixed_7d']
|
||||
self.assertItemsEqual(end_points.keys(), expected_endpoints)
|
||||
for name, op in end_points.items():
|
||||
self.assertTrue(op.name.startswith('InceptionV4/' + name))
|
||||
|
||||
def testBuildOnlyUpToFinalEndpoint(self):
|
||||
batch_size = 5
|
||||
height, width = 299, 299
|
||||
all_endpoints = [
|
||||
'Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3', 'Mixed_3a',
|
||||
'Mixed_4a', 'Mixed_5a', 'Mixed_5b', 'Mixed_5c', 'Mixed_5d',
|
||||
'Mixed_5e', 'Mixed_6a', 'Mixed_6b', 'Mixed_6c', 'Mixed_6d',
|
||||
'Mixed_6e', 'Mixed_6f', 'Mixed_6g', 'Mixed_6h', 'Mixed_7a',
|
||||
'Mixed_7b', 'Mixed_7c', 'Mixed_7d']
|
||||
for index, endpoint in enumerate(all_endpoints):
|
||||
with tf.Graph().as_default():
|
||||
inputs = tf.random.uniform((batch_size, height, width, 3))
|
||||
out_tensor, end_points = inception.inception_v4_base(
|
||||
inputs, final_endpoint=endpoint)
|
||||
self.assertTrue(out_tensor.op.name.startswith(
|
||||
'InceptionV4/' + endpoint))
|
||||
self.assertItemsEqual(all_endpoints[:index+1], end_points.keys())
|
||||
|
||||
def testVariablesSetDevice(self):
|
||||
batch_size = 5
|
||||
height, width = 299, 299
|
||||
num_classes = 1000
|
||||
inputs = tf.random.uniform((batch_size, height, width, 3))
|
||||
# Force all Variables to reside on the device.
|
||||
with tf.compat.v1.variable_scope('on_cpu'), tf.device('/cpu:0'):
|
||||
inception.inception_v4(inputs, num_classes)
|
||||
with tf.compat.v1.variable_scope('on_gpu'), tf.device('/gpu:0'):
|
||||
inception.inception_v4(inputs, num_classes)
|
||||
for v in tf.compat.v1.get_collection(
|
||||
tf.compat.v1.GraphKeys.GLOBAL_VARIABLES, scope='on_cpu'):
|
||||
self.assertDeviceEqual(v.device, '/cpu:0')
|
||||
for v in tf.compat.v1.get_collection(
|
||||
tf.compat.v1.GraphKeys.GLOBAL_VARIABLES, scope='on_gpu'):
|
||||
self.assertDeviceEqual(v.device, '/gpu:0')
|
||||
|
||||
def testHalfSizeImages(self):
|
||||
batch_size = 5
|
||||
height, width = 150, 150
|
||||
num_classes = 1000
|
||||
inputs = tf.random.uniform((batch_size, height, width, 3))
|
||||
logits, end_points = inception.inception_v4(inputs, num_classes)
|
||||
self.assertTrue(logits.op.name.startswith('InceptionV4/Logits'))
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[batch_size, num_classes])
|
||||
pre_pool = end_points['Mixed_7d']
|
||||
self.assertListEqual(pre_pool.get_shape().as_list(),
|
||||
[batch_size, 3, 3, 1536])
|
||||
|
||||
def testGlobalPool(self):
|
||||
batch_size = 1
|
||||
height, width = 350, 400
|
||||
num_classes = 1000
|
||||
inputs = tf.random.uniform((batch_size, height, width, 3))
|
||||
logits, end_points = inception.inception_v4(inputs, num_classes)
|
||||
self.assertTrue(logits.op.name.startswith('InceptionV4/Logits'))
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[batch_size, num_classes])
|
||||
pre_pool = end_points['Mixed_7d']
|
||||
self.assertListEqual(pre_pool.get_shape().as_list(),
|
||||
[batch_size, 9, 11, 1536])
|
||||
|
||||
def testGlobalPoolUnknownImageShape(self):
|
||||
batch_size = 1
|
||||
height, width = 350, 400
|
||||
num_classes = 1000
|
||||
with self.test_session() as sess:
|
||||
inputs = tf.compat.v1.placeholder(tf.float32, (batch_size, None, None, 3))
|
||||
logits, end_points = inception.inception_v4(
|
||||
inputs, num_classes, create_aux_logits=False)
|
||||
self.assertTrue(logits.op.name.startswith('InceptionV4/Logits'))
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[batch_size, num_classes])
|
||||
pre_pool = end_points['Mixed_7d']
|
||||
images = tf.random.uniform((batch_size, height, width, 3))
|
||||
sess.run(tf.compat.v1.global_variables_initializer())
|
||||
logits_out, pre_pool_out = sess.run([logits, pre_pool],
|
||||
{inputs: images.eval()})
|
||||
self.assertTupleEqual(logits_out.shape, (batch_size, num_classes))
|
||||
self.assertTupleEqual(pre_pool_out.shape, (batch_size, 9, 11, 1536))
|
||||
|
||||
def testUnknownBatchSize(self):
|
||||
batch_size = 1
|
||||
height, width = 299, 299
|
||||
num_classes = 1000
|
||||
with self.test_session() as sess:
|
||||
inputs = tf.compat.v1.placeholder(tf.float32, (None, height, width, 3))
|
||||
logits, _ = inception.inception_v4(inputs, num_classes)
|
||||
self.assertTrue(logits.op.name.startswith('InceptionV4/Logits'))
|
||||
self.assertListEqual(logits.get_shape().as_list(),
|
||||
[None, num_classes])
|
||||
images = tf.random.uniform((batch_size, height, width, 3))
|
||||
sess.run(tf.compat.v1.global_variables_initializer())
|
||||
output = sess.run(logits, {inputs: images.eval()})
|
||||
self.assertEquals(output.shape, (batch_size, num_classes))
|
||||
|
||||
def testEvaluation(self):
|
||||
batch_size = 2
|
||||
height, width = 299, 299
|
||||
num_classes = 1000
|
||||
with self.test_session() as sess:
|
||||
eval_inputs = tf.random.uniform((batch_size, height, width, 3))
|
||||
logits, _ = inception.inception_v4(eval_inputs,
|
||||
num_classes,
|
||||
is_training=False)
|
||||
predictions = tf.argmax(input=logits, axis=1)
|
||||
sess.run(tf.compat.v1.global_variables_initializer())
|
||||
output = sess.run(predictions)
|
||||
self.assertEquals(output.shape, (batch_size,))
|
||||
|
||||
def testTrainEvalWithReuse(self):
|
||||
train_batch_size = 5
|
||||
eval_batch_size = 2
|
||||
height, width = 150, 150
|
||||
num_classes = 1000
|
||||
with self.test_session() as sess:
|
||||
train_inputs = tf.random.uniform((train_batch_size, height, width, 3))
|
||||
inception.inception_v4(train_inputs, num_classes)
|
||||
eval_inputs = tf.random.uniform((eval_batch_size, height, width, 3))
|
||||
logits, _ = inception.inception_v4(eval_inputs,
|
||||
num_classes,
|
||||
is_training=False,
|
||||
reuse=True)
|
||||
predictions = tf.argmax(input=logits, axis=1)
|
||||
sess.run(tf.compat.v1.global_variables_initializer())
|
||||
output = sess.run(predictions)
|
||||
self.assertEquals(output.shape, (eval_batch_size,))
|
||||
|
||||
def testNoBatchNormScaleByDefault(self):
|
||||
height, width = 299, 299
|
||||
num_classes = 1000
|
||||
inputs = tf.compat.v1.placeholder(tf.float32, (1, height, width, 3))
|
||||
with contrib_slim.arg_scope(inception.inception_v4_arg_scope()):
|
||||
inception.inception_v4(inputs, num_classes, is_training=False)
|
||||
|
||||
self.assertEqual(tf.compat.v1.global_variables('.*/BatchNorm/gamma:0$'), [])
|
||||
|
||||
def testBatchNormScale(self):
|
||||
height, width = 299, 299
|
||||
num_classes = 1000
|
||||
inputs = tf.compat.v1.placeholder(tf.float32, (1, height, width, 3))
|
||||
with contrib_slim.arg_scope(
|
||||
inception.inception_v4_arg_scope(batch_norm_scale=True)):
|
||||
inception.inception_v4(inputs, num_classes, is_training=False)
|
||||
|
||||
gamma_names = set(
|
||||
v.op.name
|
||||
for v in tf.compat.v1.global_variables('.*/BatchNorm/gamma:0$'))
|
||||
self.assertGreater(len(gamma_names), 0)
|
||||
for v in tf.compat.v1.global_variables('.*/BatchNorm/moving_mean:0$'):
|
||||
self.assertIn(v.op.name[:-len('moving_mean')] + 'gamma', gamma_names)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
tf.test.main()
|
||||
+98
@@ -0,0 +1,98 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Contains a variant of the LeNet model definition."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import tensorflow as tf
|
||||
from tensorflow.contrib import slim as contrib_slim
|
||||
|
||||
slim = contrib_slim
|
||||
|
||||
|
||||
def lenet(images, num_classes=10, is_training=False,
|
||||
dropout_keep_prob=0.5,
|
||||
prediction_fn=slim.softmax,
|
||||
scope='LeNet'):
|
||||
"""Creates a variant of the LeNet model.
|
||||
|
||||
Note that since the output is a set of 'logits', the values fall in the
|
||||
interval of (-infinity, infinity). Consequently, to convert the outputs to a
|
||||
probability distribution over the characters, one will need to convert them
|
||||
using the softmax function:
|
||||
|
||||
logits = lenet.lenet(images, is_training=False)
|
||||
probabilities = tf.nn.softmax(logits)
|
||||
predictions = tf.argmax(logits, 1)
|
||||
|
||||
Args:
|
||||
images: A batch of `Tensors` of size [batch_size, height, width, channels].
|
||||
num_classes: the number of classes in the dataset. If 0 or None, the logits
|
||||
layer is omitted and the input features to the logits layer are returned
|
||||
instead.
|
||||
is_training: specifies whether or not we're currently training the model.
|
||||
This variable will determine the behaviour of the dropout layer.
|
||||
dropout_keep_prob: the percentage of activation values that are retained.
|
||||
prediction_fn: a function to get predictions out of logits.
|
||||
scope: Optional variable_scope.
|
||||
|
||||
Returns:
|
||||
net: a 2D Tensor with the logits (pre-softmax activations) if num_classes
|
||||
is a non-zero integer, or the inon-dropped-out nput to the logits layer
|
||||
if num_classes is 0 or None.
|
||||
end_points: a dictionary from components of the network to the corresponding
|
||||
activation.
|
||||
"""
|
||||
end_points = {}
|
||||
|
||||
with tf.compat.v1.variable_scope(scope, 'LeNet', [images]):
|
||||
net = end_points['conv1'] = slim.conv2d(images, 32, [5, 5], scope='conv1')
|
||||
net = end_points['pool1'] = slim.max_pool2d(net, [2, 2], 2, scope='pool1')
|
||||
net = end_points['conv2'] = slim.conv2d(net, 64, [5, 5], scope='conv2')
|
||||
net = end_points['pool2'] = slim.max_pool2d(net, [2, 2], 2, scope='pool2')
|
||||
net = slim.flatten(net)
|
||||
end_points['Flatten'] = net
|
||||
|
||||
net = end_points['fc3'] = slim.fully_connected(net, 1024, scope='fc3')
|
||||
if not num_classes:
|
||||
return net, end_points
|
||||
net = end_points['dropout3'] = slim.dropout(
|
||||
net, dropout_keep_prob, is_training=is_training, scope='dropout3')
|
||||
logits = end_points['Logits'] = slim.fully_connected(
|
||||
net, num_classes, activation_fn=None, scope='fc4')
|
||||
|
||||
end_points['Predictions'] = prediction_fn(logits, scope='Predictions')
|
||||
|
||||
return logits, end_points
|
||||
lenet.default_image_size = 28
|
||||
|
||||
|
||||
def lenet_arg_scope(weight_decay=0.0):
|
||||
"""Defines the default lenet argument scope.
|
||||
|
||||
Args:
|
||||
weight_decay: The weight decay to use for regularizing the model.
|
||||
|
||||
Returns:
|
||||
An `arg_scope` to use for the inception v3 model.
|
||||
"""
|
||||
with slim.arg_scope(
|
||||
[slim.conv2d, slim.fully_connected],
|
||||
weights_regularizer=slim.l2_regularizer(weight_decay),
|
||||
weights_initializer=tf.compat.v1.truncated_normal_initializer(stddev=0.1),
|
||||
activation_fn=tf.nn.relu) as sc:
|
||||
return sc
|
||||
+4
@@ -0,0 +1,4 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="Encoding" addBOMForNewFiles="with NO BOM" />
|
||||
</project>
|
||||
+4
@@ -0,0 +1,4 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6" project-jdk-type="Python SDK" />
|
||||
</project>
|
||||
+8
@@ -0,0 +1,8 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module type="PYTHON_MODULE" version="4">
|
||||
<component name="NewModuleRootManager">
|
||||
<content url="file://$MODULE_DIR$" />
|
||||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
</module>
|
||||
+8
@@ -0,0 +1,8 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectModuleManager">
|
||||
<modules>
|
||||
<module fileurl="file://$PROJECT_DIR$/.idea/mobilenet.iml" filepath="$PROJECT_DIR$/.idea/mobilenet.iml" />
|
||||
</modules>
|
||||
</component>
|
||||
</project>
|
||||
+166
@@ -0,0 +1,166 @@
|
||||
# MobileNet
|
||||
|
||||
This folder contains building code for
|
||||
[MobileNetV2](https://arxiv.org/abs/1801.04381) and
|
||||
[MobilenetV3](https://arxiv.org/abs/1905.02244) networks. The architectural
|
||||
definition for each model is located in [mobilenet_v2.py](mobilenet_v2.py) and
|
||||
[mobilenet_v3.py](mobilenet_v3.py) respectively.
|
||||
|
||||
For MobilenetV1 please refer to this [page](../mobilenet_v1.md)
|
||||
|
||||
We have also introduced a family of MobileNets customized for the Edge TPU
|
||||
accelerator found in
|
||||
[Google Pixel4](https://blog.google/products/pixel/pixel-4/) devices. The
|
||||
architectural definition for MobileNetEdgeTPU is located in
|
||||
[mobilenet_v3.py](mobilenet_v3.py)
|
||||
|
||||
## Performance
|
||||
|
||||
### Mobilenet V3 latency
|
||||
|
||||
This is the timing of [MobileNetV2] vs [MobileNetV3] using TF-Lite on the large
|
||||
core of Pixel 1 phone.
|
||||
|
||||

|
||||
|
||||
### MACs
|
||||
|
||||
MACs, also sometimes known as MADDs - the number of multiply-accumulates needed
|
||||
to compute an inference on a single image is a common metric to measure the
|
||||
efficiency of the model. Full size Mobilenet V3 on image size 224 uses ~215
|
||||
Million MADDs (MMadds) while achieving accuracy 75.1%, while Mobilenet V2 uses
|
||||
~300MMadds and achieving accuracy 72%. By comparison ResNet-50 uses
|
||||
approximately 3500 MMAdds while achieving 76% accuracy.
|
||||
|
||||
Below is the graph comparing Mobilenets and a few selected networks. The size of
|
||||
each blob represents the number of parameters. Note for
|
||||
[ShuffleNet](https://arxiv.org/abs/1707.01083) there are no published size
|
||||
numbers. We estimate it to be comparable to MobileNetV2 numbers.
|
||||
|
||||

|
||||
|
||||
### Mobilenet EdgeTPU latency
|
||||
|
||||
The figure below shows the Pixel 4 Edge TPU latency of int8-quantized Mobilenet
|
||||
EdgeTPU compared with MobilenetV2 and the minimalistic variants of MobilenetV3
|
||||
(see below).
|
||||
|
||||

|
||||
|
||||
## Pretrained models
|
||||
|
||||
### Mobilenet V3 Imagenet Checkpoints
|
||||
|
||||
All mobilenet V3 checkpoints were trained with image resolution 224x224. All
|
||||
phone latencies are in milliseconds, measured on large core. In addition to
|
||||
large and small models this page also contains so-called minimalistic models,
|
||||
these models have the same per-layer dimensions characteristic as MobilenetV3
|
||||
however, they don't utilize any of the advanced blocks (squeeze-and-excite
|
||||
units, hard-swish, and 5x5 convolutions). While these models are less efficient
|
||||
on CPU, we find that they are much more performant on GPU/DSP.
|
||||
|
||||
| Imagenet Checkpoint | MACs (M) | Params (M) | Top1 | Pixel 1 | Pixel 2 | Pixel 3 |
|
||||
| ------------------ | -------- | ---------- | ---- | ------- | ------- | ------- |
|
||||
| [Large dm=1 (float)] | 217 | 5.4 | 75.2 | 51.2 | 61 | 44 |
|
||||
| [Large dm=1 (8-bit)] | 217 | 5.4 | 73.9 | 44 | 42.5 | 32 |
|
||||
| [Large dm=0.75 (float)] | 155 | 4.0 | 73.3 | 39.8 | 48 | 34 |
|
||||
| [Small dm=1 (float)] | 66 | 2.9 | 67.5 | 15.8 | 19.4 | 14.4 |
|
||||
| [Small dm=1 (8-bit)] | 66 | 2.9 | 64.9 | 15.5 | 15 | 10.7 |
|
||||
| [Small dm=0.75 (float)] | 44 | 2.4 | 65.4 | 12.8 | 15.9 | 11.6 |
|
||||
|
||||
#### Minimalistic checkpoints:
|
||||
|
||||
| Imagenet Checkpoint | MACs (M) | Params (M) | Top1 | Pixel 1 | Pixel 2 | Pixel 3 |
|
||||
| -------------- | -------- | ---------- | ---- | ------- | ------- | ------- |
|
||||
| [Large minimalistic (float)] | 209 | 3.9 | 72.3 | 44.1 | 51 | 35 |
|
||||
| [Large minimalistic (8-bit)][lm8] | 209 | 3.9 | 71.3 | 37 | 35 | 27 |
|
||||
| [Small minimalistic (float)] | 65 | 2.0 | 61.9 | 12.2 | 15.1 | 11 |
|
||||
|
||||
#### Edge TPU checkpoints:
|
||||
|
||||
| Imagenet Checkpoint | MACs (M) | Params (M) | Top1 | Pixel 4 Edge TPU | Pixel 4 CPU |
|
||||
| ----------------- | -------- | ---------- | ---- | ------- | ----------- |
|
||||
| [MobilenetEdgeTPU dm=0.75 (8-bit)]| 624 | 2.9 | 73.5 | 3.1 | 13.8 |
|
||||
| [MobilenetEdgeTPU dm=1 (8-bit)] | 990 | 4.0 | 75.6 | 3.6 | 20.6 |
|
||||
|
||||
|
||||
Note: 8-bit quantized versions of the MobilenetEdgeTPU models were obtained
|
||||
using Tensorflow Lite's
|
||||
[post training quantization](https://www.tensorflow.org/lite/performance/post_training_quantization)
|
||||
tool.
|
||||
|
||||
[Small minimalistic (float)]: https://storage.googleapis.com/mobilenet_v3/checkpoints/v3-small-minimalistic_224_1.0_float.tgz
|
||||
[Large minimalistic (float)]: https://storage.googleapis.com/mobilenet_v3/checkpoints/v3-large-minimalistic_224_1.0_float.tgz
|
||||
[lm8]: https://storage.googleapis.com/mobilenet_v3/checkpoints/v3-large-minimalistic_224_1.0_uint8.tgz
|
||||
[Large dm=1 (float)]: https://storage.googleapis.com/mobilenet_v3/checkpoints/v3-large_224_1.0_float.tgz
|
||||
[Small dm=1 (float)]: https://storage.googleapis.com/mobilenet_v3/checkpoints/v3-small_224_1.0_float.tgz
|
||||
[Large dm=1 (8-bit)]: https://storage.googleapis.com/mobilenet_v3/checkpoints/v3-large_224_1.0_uint8.tgz
|
||||
[Small dm=1 (8-bit)]: https://storage.googleapis.com/mobilenet_v3/checkpoints/v3-small_224_1.0_uint8.tgz
|
||||
[Large dm=0.75 (float)]: https://storage.googleapis.com/mobilenet_v3/checkpoints/v3-large_224_0.75_float.tgz
|
||||
[Small dm=0.75 (float)]: https://storage.googleapis.com/mobilenet_v3/checkpoints/v3-small_224_0.75_float.tgz
|
||||
[MobilenetEdgeTPU dm=0.75 (8-bit)]: https://storage.cloud.google.com/mobilenet_edgetpu/checkpoints/mobilenet_edgetpu_224_0.75.tgz
|
||||
[MobilenetEdgeTPU dm=1 (8-bit)]: https://storage.cloud.google.com/mobilenet_edgetpu/checkpoints/mobilenet_edgetpu_224_1.0.tgz
|
||||
|
||||
### Mobilenet V2 Imagenet Checkpoints
|
||||
|
||||
Classification Checkpoint | MACs (M) | Parameters (M) | Top 1 Accuracy | Top 5 Accuracy | Mobile CPU (ms) Pixel 1
|
||||
---------------------------------------------------------------------------------------------------------- | -------- | -------------- | -------------- | -------------- | -----------------------
|
||||
[mobilenet_v2_1.4_224](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_1.4_224.tgz) | 582 | 6.06 | 75.0 | 92.5 | 138.0
|
||||
[mobilenet_v2_1.3_224](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_1.3_224.tgz) | 509 | 5.34 | 74.4 | 92.1 | 123.0
|
||||
[mobilenet_v2_1.0_224](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_1.0_224.tgz) | 300 | 3.47 | 71.8 | 91.0 | 73.8
|
||||
[mobilenet_v2_1.0_192](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_1.0_192.tgz) | 221 | 3.47 | 70.7 | 90.1 | 55.1
|
||||
[mobilenet_v2_1.0_160](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_1.0_160.tgz) | 154 | 3.47 | 68.8 | 89.0 | 40.2
|
||||
[mobilenet_v2_1.0_128](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_1.0_128.tgz) | 99 | 3.47 | 65.3 | 86.9 | 27.6
|
||||
[mobilenet_v2_1.0_96](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_1.0_96.tgz) | 56 | 3.47 | 60.3 | 83.2 | 17.6
|
||||
[mobilenet_v2_0.75_224](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_0.75_224.tgz) | 209 | 2.61 | 69.8 | 89.6 | 55.8
|
||||
[mobilenet_v2_0.75_192](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_0.75_192.tgz) | 153 | 2.61 | 68.7 | 88.9 | 41.6
|
||||
[mobilenet_v2_0.75_160](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_0.75_160.tgz) | 107 | 2.61 | 66.4 | 87.3 | 30.4
|
||||
[mobilenet_v2_0.75_128](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_0.75_128.tgz) | 69 | 2.61 | 63.2 | 85.3 | 21.9
|
||||
[mobilenet_v2_0.75_96](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_0.75_96.tgz) | 39 | 2.61 | 58.8 | 81.6 | 14.2
|
||||
[mobilenet_v2_0.5_224](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_0.5_224.tgz) | 97 | 1.95 | 65.4 | 86.4 | 28.7
|
||||
[mobilenet_v2_0.5_192](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_0.5_192.tgz) | 71 | 1.95 | 63.9 | 85.4 | 21.1
|
||||
[mobilenet_v2_0.5_160](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_0.5_160.tgz) | 50 | 1.95 | 61.0 | 83.2 | 14.9
|
||||
[mobilenet_v2_0.5_128](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_0.5_128.tgz) | 32 | 1.95 | 57.7 | 80.8 | 9.9
|
||||
[mobilenet_v2_0.5_96](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_0.5_96.tgz) | 18 | 1.95 | 51.2 | 75.8 | 6.4
|
||||
[mobilenet_v2_0.35_224](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_0.35_224.tgz) | 59 | 1.66 | 60.3 | 82.9 | 19.7
|
||||
[mobilenet_v2_0.35_192](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_0.35_192.tgz) | 43 | 1.66 | 58.2 | 81.2 | 14.6
|
||||
[mobilenet_v2_0.35_160](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_0.35_160.tgz) | 30 | 1.66 | 55.7 | 79.1 | 10.5
|
||||
[mobilenet_v2_0.35_128](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_0.35_128.tgz) | 20 | 1.66 | 50.8 | 75.0 | 6.9
|
||||
[mobilenet_v2_0.35_96](https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_0.35_96.tgz) | 11 | 1.66 | 45.5 | 70.4 | 4.5
|
||||
|
||||
## Training
|
||||
|
||||
### V3
|
||||
|
||||
TODO: Add V3 hyperparameters
|
||||
|
||||
### V2
|
||||
|
||||
The numbers above can be reproduced using slim's
|
||||
[`train_image_classifier`](https://github.com/tensorflow/models/blob/master/research/slim/README.md#training-a-model-from-scratch).
|
||||
Below is the set of parameters that achieves 72.0% for full size MobileNetV2,
|
||||
after about 700K when trained on 8 GPU. If trained on a single GPU the full
|
||||
convergence is after 5.5M steps. Also note that learning rate and
|
||||
num_epochs_per_decay both need to be adjusted depending on how many GPUs are
|
||||
being used due to slim's internal averaging.
|
||||
|
||||
```bash
|
||||
--model_name="mobilenet_v2"
|
||||
--learning_rate=0.045 * NUM_GPUS #slim internally averages clones so we compensate
|
||||
--preprocessing_name="inception_v2"
|
||||
--label_smoothing=0.1
|
||||
--moving_average_decay=0.9999
|
||||
--batch_size= 96
|
||||
--num_clones = NUM_GPUS # you can use any number here between 1 and 8 depending on your hardware setup.
|
||||
--learning_rate_decay_factor=0.98
|
||||
--num_epochs_per_decay = 2.5 / NUM_GPUS # train_image_classifier does per clone epochs
|
||||
```
|
||||
|
||||
# Example
|
||||
|
||||
See this [ipython notebook](mobilenet_example.ipynb) or open and run the network
|
||||
directly in
|
||||
[Colaboratory](https://colab.research.google.com/github/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet_example.ipynb).
|
||||
|
||||
[MobilenetV2]: https://arxiv.org/abs/1801.04381
|
||||
[MobilenetV3]: https://arxiv.org/abs/1905.02244
|
||||
+475
@@ -0,0 +1,475 @@
|
||||
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Convolution blocks for mobilenet."""
|
||||
import contextlib
|
||||
import functools
|
||||
|
||||
import tensorflow as tf
|
||||
from tensorflow.contrib import slim as contrib_slim
|
||||
|
||||
slim = contrib_slim
|
||||
|
||||
|
||||
def _fixed_padding(inputs, kernel_size, rate=1):
|
||||
"""Pads the input along the spatial dimensions independently of input size.
|
||||
|
||||
Pads the input such that if it was used in a convolution with 'VALID' padding,
|
||||
the output would have the same dimensions as if the unpadded input was used
|
||||
in a convolution with 'SAME' padding.
|
||||
|
||||
Args:
|
||||
inputs: A tensor of size [batch, height_in, width_in, channels].
|
||||
kernel_size: The kernel to be used in the conv2d or max_pool2d operation.
|
||||
rate: An integer, rate for atrous convolution.
|
||||
|
||||
Returns:
|
||||
output: A tensor of size [batch, height_out, width_out, channels] with the
|
||||
input, either intact (if kernel_size == 1) or padded (if kernel_size > 1).
|
||||
"""
|
||||
kernel_size_effective = [kernel_size[0] + (kernel_size[0] - 1) * (rate - 1),
|
||||
kernel_size[0] + (kernel_size[0] - 1) * (rate - 1)]
|
||||
pad_total = [kernel_size_effective[0] - 1, kernel_size_effective[1] - 1]
|
||||
pad_beg = [pad_total[0] // 2, pad_total[1] // 2]
|
||||
pad_end = [pad_total[0] - pad_beg[0], pad_total[1] - pad_beg[1]]
|
||||
padded_inputs = tf.pad(inputs, [[0, 0], [pad_beg[0], pad_end[0]],
|
||||
[pad_beg[1], pad_end[1]], [0, 0]])
|
||||
return padded_inputs
|
||||
|
||||
|
||||
def _make_divisible(v, divisor, min_value=None):
|
||||
if min_value is None:
|
||||
min_value = divisor
|
||||
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
|
||||
# Make sure that round down does not go down by more than 10%.
|
||||
if new_v < 0.9 * v:
|
||||
new_v += divisor
|
||||
return new_v
|
||||
|
||||
|
||||
def _split_divisible(num, num_ways, divisible_by=8):
|
||||
"""Evenly splits num, num_ways so each piece is a multiple of divisible_by."""
|
||||
assert num % divisible_by == 0
|
||||
assert num / num_ways >= divisible_by
|
||||
# Note: want to round down, we adjust each split to match the total.
|
||||
base = num // num_ways // divisible_by * divisible_by
|
||||
result = []
|
||||
accumulated = 0
|
||||
for i in range(num_ways):
|
||||
r = base
|
||||
while accumulated + r < num * (i + 1) / num_ways:
|
||||
r += divisible_by
|
||||
result.append(r)
|
||||
accumulated += r
|
||||
assert accumulated == num
|
||||
return result
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def _v1_compatible_scope_naming(scope):
|
||||
"""v1 compatible scope naming."""
|
||||
if scope is None: # Create uniqified separable blocks.
|
||||
with tf.compat.v1.variable_scope(None, default_name='separable') as s, \
|
||||
tf.compat.v1.name_scope(s.original_name_scope):
|
||||
yield ''
|
||||
else:
|
||||
# We use scope_depthwise, scope_pointwise for compatibility with V1 ckpts.
|
||||
# which provide numbered scopes.
|
||||
scope += '_'
|
||||
yield scope
|
||||
|
||||
|
||||
@slim.add_arg_scope
|
||||
def split_separable_conv2d(input_tensor,
|
||||
num_outputs,
|
||||
scope=None,
|
||||
normalizer_fn=None,
|
||||
stride=1,
|
||||
rate=1,
|
||||
endpoints=None,
|
||||
use_explicit_padding=False):
|
||||
"""Separable mobilenet V1 style convolution.
|
||||
|
||||
Depthwise convolution, with default non-linearity,
|
||||
followed by 1x1 depthwise convolution. This is similar to
|
||||
slim.separable_conv2d, but differs in tha it applies batch
|
||||
normalization and non-linearity to depthwise. This matches
|
||||
the basic building of Mobilenet Paper
|
||||
(https://arxiv.org/abs/1704.04861)
|
||||
|
||||
Args:
|
||||
input_tensor: input
|
||||
num_outputs: number of outputs
|
||||
scope: optional name of the scope. Note if provided it will use
|
||||
scope_depthwise for deptwhise, and scope_pointwise for pointwise.
|
||||
normalizer_fn: which normalizer function to use for depthwise/pointwise
|
||||
stride: stride
|
||||
rate: output rate (also known as dilation rate)
|
||||
endpoints: optional, if provided, will export additional tensors to it.
|
||||
use_explicit_padding: Use 'VALID' padding for convolutions, but prepad
|
||||
inputs so that the output dimensions are the same as if 'SAME' padding
|
||||
were used.
|
||||
|
||||
Returns:
|
||||
output tesnor
|
||||
"""
|
||||
|
||||
with _v1_compatible_scope_naming(scope) as scope:
|
||||
dw_scope = scope + 'depthwise'
|
||||
endpoints = endpoints if endpoints is not None else {}
|
||||
kernel_size = [3, 3]
|
||||
padding = 'SAME'
|
||||
if use_explicit_padding:
|
||||
padding = 'VALID'
|
||||
input_tensor = _fixed_padding(input_tensor, kernel_size, rate)
|
||||
net = slim.separable_conv2d(
|
||||
input_tensor,
|
||||
None,
|
||||
kernel_size,
|
||||
depth_multiplier=1,
|
||||
stride=stride,
|
||||
rate=rate,
|
||||
normalizer_fn=normalizer_fn,
|
||||
padding=padding,
|
||||
scope=dw_scope)
|
||||
|
||||
endpoints[dw_scope] = net
|
||||
|
||||
pw_scope = scope + 'pointwise'
|
||||
net = slim.conv2d(
|
||||
net,
|
||||
num_outputs, [1, 1],
|
||||
stride=1,
|
||||
normalizer_fn=normalizer_fn,
|
||||
scope=pw_scope)
|
||||
endpoints[pw_scope] = net
|
||||
return net
|
||||
|
||||
|
||||
def expand_input_by_factor(n, divisible_by=8):
|
||||
return lambda num_inputs, **_: _make_divisible(num_inputs * n, divisible_by)
|
||||
|
||||
|
||||
def split_conv(input_tensor,
|
||||
num_outputs,
|
||||
num_ways,
|
||||
scope,
|
||||
divisible_by=8,
|
||||
**kwargs):
|
||||
"""Creates a split convolution.
|
||||
|
||||
Split convolution splits the input and output into
|
||||
'num_blocks' blocks of approximately the same size each,
|
||||
and only connects $i$-th input to $i$ output.
|
||||
|
||||
Args:
|
||||
input_tensor: input tensor
|
||||
num_outputs: number of output filters
|
||||
num_ways: num blocks to split by.
|
||||
scope: scope for all the operators.
|
||||
divisible_by: make sure that every part is divisiable by this.
|
||||
**kwargs: will be passed directly into conv2d operator
|
||||
Returns:
|
||||
tensor
|
||||
"""
|
||||
b = input_tensor.get_shape().as_list()[3]
|
||||
|
||||
if num_ways == 1 or min(b // num_ways,
|
||||
num_outputs // num_ways) < divisible_by:
|
||||
# Don't do any splitting if we end up with less than 8 filters
|
||||
# on either side.
|
||||
return slim.conv2d(input_tensor, num_outputs, [1, 1], scope=scope, **kwargs)
|
||||
|
||||
outs = []
|
||||
input_splits = _split_divisible(b, num_ways, divisible_by=divisible_by)
|
||||
output_splits = _split_divisible(
|
||||
num_outputs, num_ways, divisible_by=divisible_by)
|
||||
inputs = tf.split(input_tensor, input_splits, axis=3, name='split_' + scope)
|
||||
base = scope
|
||||
for i, (input_tensor, out_size) in enumerate(zip(inputs, output_splits)):
|
||||
scope = base + '_part_%d' % (i,)
|
||||
n = slim.conv2d(input_tensor, out_size, [1, 1], scope=scope, **kwargs)
|
||||
n = tf.identity(n, scope + '_output')
|
||||
outs.append(n)
|
||||
return tf.concat(outs, 3, name=scope + '_concat')
|
||||
|
||||
|
||||
@slim.add_arg_scope
|
||||
def expanded_conv(input_tensor,
|
||||
num_outputs,
|
||||
expansion_size=expand_input_by_factor(6),
|
||||
stride=1,
|
||||
rate=1,
|
||||
kernel_size=(3, 3),
|
||||
residual=True,
|
||||
normalizer_fn=None,
|
||||
split_projection=1,
|
||||
split_expansion=1,
|
||||
split_divisible_by=8,
|
||||
expansion_transform=None,
|
||||
depthwise_location='expansion',
|
||||
depthwise_channel_multiplier=1,
|
||||
endpoints=None,
|
||||
use_explicit_padding=False,
|
||||
padding='SAME',
|
||||
inner_activation_fn=None,
|
||||
depthwise_activation_fn=None,
|
||||
project_activation_fn=tf.identity,
|
||||
depthwise_fn=slim.separable_conv2d,
|
||||
expansion_fn=split_conv,
|
||||
projection_fn=split_conv,
|
||||
scope=None):
|
||||
"""Depthwise Convolution Block with expansion.
|
||||
|
||||
Builds a composite convolution that has the following structure
|
||||
expansion (1x1) -> depthwise (kernel_size) -> projection (1x1)
|
||||
|
||||
Args:
|
||||
input_tensor: input
|
||||
num_outputs: number of outputs in the final layer.
|
||||
expansion_size: the size of expansion, could be a constant or a callable.
|
||||
If latter it will be provided 'num_inputs' as an input. For forward
|
||||
compatibility it should accept arbitrary keyword arguments.
|
||||
Default will expand the input by factor of 6.
|
||||
stride: depthwise stride
|
||||
rate: depthwise rate
|
||||
kernel_size: depthwise kernel
|
||||
residual: whether to include residual connection between input
|
||||
and output.
|
||||
normalizer_fn: batchnorm or otherwise
|
||||
split_projection: how many ways to split projection operator
|
||||
(that is conv expansion->bottleneck)
|
||||
split_expansion: how many ways to split expansion op
|
||||
(that is conv bottleneck->expansion) ops will keep depth divisible
|
||||
by this value.
|
||||
split_divisible_by: make sure every split group is divisible by this number.
|
||||
expansion_transform: Optional function that takes expansion
|
||||
as a single input and returns output.
|
||||
depthwise_location: where to put depthwise covnvolutions supported
|
||||
values None, 'input', 'output', 'expansion'
|
||||
depthwise_channel_multiplier: depthwise channel multiplier:
|
||||
each input will replicated (with different filters)
|
||||
that many times. So if input had c channels,
|
||||
output will have c x depthwise_channel_multpilier.
|
||||
endpoints: An optional dictionary into which intermediate endpoints are
|
||||
placed. The keys "expansion_output", "depthwise_output",
|
||||
"projection_output" and "expansion_transform" are always populated, even
|
||||
if the corresponding functions are not invoked.
|
||||
use_explicit_padding: Use 'VALID' padding for convolutions, but prepad
|
||||
inputs so that the output dimensions are the same as if 'SAME' padding
|
||||
were used.
|
||||
padding: Padding type to use if `use_explicit_padding` is not set.
|
||||
inner_activation_fn: activation function to use in all inner convolutions.
|
||||
If none, will rely on slim default scopes.
|
||||
depthwise_activation_fn: activation function to use for deptwhise only.
|
||||
If not provided will rely on slim default scopes. If both
|
||||
inner_activation_fn and depthwise_activation_fn are provided,
|
||||
depthwise_activation_fn takes precedence over inner_activation_fn.
|
||||
project_activation_fn: activation function for the project layer.
|
||||
(note this layer is not affected by inner_activation_fn)
|
||||
depthwise_fn: Depthwise convolution function.
|
||||
expansion_fn: Expansion convolution function. If use custom function then
|
||||
"split_expansion" and "split_divisible_by" will be ignored.
|
||||
projection_fn: Projection convolution function. If use custom function then
|
||||
"split_projection" and "split_divisible_by" will be ignored.
|
||||
|
||||
scope: optional scope.
|
||||
|
||||
Returns:
|
||||
Tensor of depth num_outputs
|
||||
|
||||
Raises:
|
||||
TypeError: on inval
|
||||
"""
|
||||
conv_defaults = {}
|
||||
dw_defaults = {}
|
||||
if inner_activation_fn is not None:
|
||||
conv_defaults['activation_fn'] = inner_activation_fn
|
||||
dw_defaults['activation_fn'] = inner_activation_fn
|
||||
if depthwise_activation_fn is not None:
|
||||
dw_defaults['activation_fn'] = depthwise_activation_fn
|
||||
# pylint: disable=g-backslash-continuation
|
||||
with tf.compat.v1.variable_scope(scope, default_name='expanded_conv') as s, \
|
||||
tf.compat.v1.name_scope(s.original_name_scope), \
|
||||
slim.arg_scope((slim.conv2d,), **conv_defaults), \
|
||||
slim.arg_scope((slim.separable_conv2d,), **dw_defaults):
|
||||
prev_depth = input_tensor.get_shape().as_list()[3]
|
||||
if depthwise_location not in [None, 'input', 'output', 'expansion']:
|
||||
raise TypeError('%r is unknown value for depthwise_location' %
|
||||
depthwise_location)
|
||||
if use_explicit_padding:
|
||||
if padding != 'SAME':
|
||||
raise TypeError('`use_explicit_padding` should only be used with '
|
||||
'"SAME" padding.')
|
||||
padding = 'VALID'
|
||||
depthwise_func = functools.partial(
|
||||
depthwise_fn,
|
||||
num_outputs=None,
|
||||
kernel_size=kernel_size,
|
||||
depth_multiplier=depthwise_channel_multiplier,
|
||||
stride=stride,
|
||||
rate=rate,
|
||||
normalizer_fn=normalizer_fn,
|
||||
padding=padding,
|
||||
scope='depthwise')
|
||||
# b1 -> b2 * r -> b2
|
||||
# i -> (o * r) (bottleneck) -> o
|
||||
input_tensor = tf.identity(input_tensor, 'input')
|
||||
net = input_tensor
|
||||
|
||||
if depthwise_location == 'input':
|
||||
if use_explicit_padding:
|
||||
net = _fixed_padding(net, kernel_size, rate)
|
||||
net = depthwise_func(net, activation_fn=None)
|
||||
net = tf.identity(net, name='depthwise_output')
|
||||
if endpoints is not None:
|
||||
endpoints['depthwise_output'] = net
|
||||
|
||||
if callable(expansion_size):
|
||||
inner_size = expansion_size(num_inputs=prev_depth)
|
||||
else:
|
||||
inner_size = expansion_size
|
||||
|
||||
if inner_size > net.shape[3]:
|
||||
if expansion_fn == split_conv:
|
||||
expansion_fn = functools.partial(
|
||||
expansion_fn,
|
||||
num_ways=split_expansion,
|
||||
divisible_by=split_divisible_by,
|
||||
stride=1)
|
||||
net = expansion_fn(
|
||||
net,
|
||||
inner_size,
|
||||
scope='expand',
|
||||
normalizer_fn=normalizer_fn)
|
||||
net = tf.identity(net, 'expansion_output')
|
||||
if endpoints is not None:
|
||||
endpoints['expansion_output'] = net
|
||||
|
||||
if depthwise_location == 'expansion':
|
||||
if use_explicit_padding:
|
||||
net = _fixed_padding(net, kernel_size, rate)
|
||||
net = depthwise_func(net)
|
||||
net = tf.identity(net, name='depthwise_output')
|
||||
if endpoints is not None:
|
||||
endpoints['depthwise_output'] = net
|
||||
|
||||
if expansion_transform:
|
||||
net = expansion_transform(expansion_tensor=net, input_tensor=input_tensor)
|
||||
# Note in contrast with expansion, we always have
|
||||
# projection to produce the desired output size.
|
||||
if projection_fn == split_conv:
|
||||
projection_fn = functools.partial(
|
||||
projection_fn,
|
||||
num_ways=split_projection,
|
||||
divisible_by=split_divisible_by,
|
||||
stride=1)
|
||||
net = projection_fn(
|
||||
net,
|
||||
num_outputs,
|
||||
scope='project',
|
||||
normalizer_fn=normalizer_fn,
|
||||
activation_fn=project_activation_fn)
|
||||
if endpoints is not None:
|
||||
endpoints['projection_output'] = net
|
||||
if depthwise_location == 'output':
|
||||
if use_explicit_padding:
|
||||
net = _fixed_padding(net, kernel_size, rate)
|
||||
net = depthwise_func(net, activation_fn=None)
|
||||
net = tf.identity(net, name='depthwise_output')
|
||||
if endpoints is not None:
|
||||
endpoints['depthwise_output'] = net
|
||||
|
||||
if callable(residual): # custom residual
|
||||
net = residual(input_tensor=input_tensor, output_tensor=net)
|
||||
elif (residual and
|
||||
# stride check enforces that we don't add residuals when spatial
|
||||
# dimensions are None
|
||||
stride == 1 and
|
||||
# Depth matches
|
||||
net.get_shape().as_list()[3] ==
|
||||
input_tensor.get_shape().as_list()[3]):
|
||||
net += input_tensor
|
||||
return tf.identity(net, name='output')
|
||||
|
||||
|
||||
@slim.add_arg_scope
|
||||
def squeeze_excite(input_tensor,
|
||||
divisible_by=8,
|
||||
squeeze_factor=3,
|
||||
inner_activation_fn=tf.nn.relu,
|
||||
gating_fn=tf.sigmoid,
|
||||
squeeze_input_tensor=None,
|
||||
pool=None):
|
||||
"""Squeeze excite block for Mobilenet V3.
|
||||
|
||||
If the squeeze_input_tensor - or the input_tensor if squeeze_input_tensor is
|
||||
None - contains variable dimensions (Nonetype in tensor shape), perform
|
||||
average pooling (as the first step in the squeeze operation) by calling
|
||||
reduce_mean across the H/W of the input tensor.
|
||||
|
||||
Args:
|
||||
input_tensor: input tensor to apply SE block to.
|
||||
divisible_by: ensures all inner dimensions are divisible by this number.
|
||||
squeeze_factor: the factor of squeezing in the inner fully connected layer
|
||||
inner_activation_fn: non-linearity to be used in inner layer.
|
||||
gating_fn: non-linearity to be used for final gating function
|
||||
squeeze_input_tensor: custom tensor to use for computing gating activation.
|
||||
If provided the result will be input_tensor * SE(squeeze_input_tensor)
|
||||
instead of input_tensor * SE(input_tensor).
|
||||
pool: if number is provided will average pool with that kernel size
|
||||
to compute inner tensor, followed by bilinear upsampling.
|
||||
|
||||
Returns:
|
||||
Gated input_tensor. (e.g. X * SE(X))
|
||||
"""
|
||||
with tf.compat.v1.variable_scope('squeeze_excite'):
|
||||
if squeeze_input_tensor is None:
|
||||
squeeze_input_tensor = input_tensor
|
||||
input_size = input_tensor.shape.as_list()[1:3]
|
||||
pool_height, pool_width = squeeze_input_tensor.shape.as_list()[1:3]
|
||||
stride = 1
|
||||
if pool is not None and pool_height >= pool:
|
||||
pool_height, pool_width, stride = pool, pool, pool
|
||||
input_channels = squeeze_input_tensor.shape.as_list()[3]
|
||||
output_channels = input_tensor.shape.as_list()[3]
|
||||
squeeze_channels = _make_divisible(
|
||||
input_channels / squeeze_factor, divisor=divisible_by)
|
||||
|
||||
if pool is None:
|
||||
pooled = tf.reduce_mean(squeeze_input_tensor, axis=[1, 2], keepdims=True)
|
||||
else:
|
||||
pooled = tf.nn.avg_pool(
|
||||
squeeze_input_tensor, (1, pool_height, pool_width, 1),
|
||||
strides=(1, stride, stride, 1),
|
||||
padding='VALID')
|
||||
squeeze = slim.conv2d(
|
||||
pooled,
|
||||
kernel_size=(1, 1),
|
||||
num_outputs=squeeze_channels,
|
||||
normalizer_fn=None,
|
||||
activation_fn=inner_activation_fn)
|
||||
excite_outputs = output_channels
|
||||
excite = slim.conv2d(squeeze, num_outputs=excite_outputs,
|
||||
kernel_size=[1, 1],
|
||||
normalizer_fn=None,
|
||||
activation_fn=gating_fn)
|
||||
if pool is not None:
|
||||
# Note: As of 03/20/2019 only BILINEAR (the default) with
|
||||
# align_corners=True has gradients implemented in TPU.
|
||||
excite = tf.image.resize_images(
|
||||
excite, input_size,
|
||||
align_corners=True)
|
||||
result = input_tensor * excite
|
||||
return result
|
||||
BIN
Binary file not shown.
|
After Width: | Height: | Size: 47 KiB |
BIN
Binary file not shown.
|
After Width: | Height: | Size: 73 KiB |
BIN
Binary file not shown.
|
After Width: | Height: | Size: 80 KiB |
+501
@@ -0,0 +1,501 @@
|
||||
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Mobilenet Base Class."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
import collections
|
||||
import contextlib
|
||||
import copy
|
||||
import os
|
||||
|
||||
import tensorflow as tf
|
||||
from tensorflow.contrib import slim as contrib_slim
|
||||
|
||||
slim = contrib_slim
|
||||
|
||||
|
||||
@slim.add_arg_scope
|
||||
def apply_activation(x, name=None, activation_fn=None):
|
||||
return activation_fn(x, name=name) if activation_fn else x
|
||||
|
||||
|
||||
def _fixed_padding(inputs, kernel_size, rate=1):
|
||||
"""Pads the input along the spatial dimensions independently of input size.
|
||||
|
||||
Pads the input such that if it was used in a convolution with 'VALID' padding,
|
||||
the output would have the same dimensions as if the unpadded input was used
|
||||
in a convolution with 'SAME' padding.
|
||||
|
||||
Args:
|
||||
inputs: A tensor of size [batch, height_in, width_in, channels].
|
||||
kernel_size: The kernel to be used in the conv2d or max_pool2d operation.
|
||||
rate: An integer, rate for atrous convolution.
|
||||
|
||||
Returns:
|
||||
output: A tensor of size [batch, height_out, width_out, channels] with the
|
||||
input, either intact (if kernel_size == 1) or padded (if kernel_size > 1).
|
||||
"""
|
||||
kernel_size_effective = [kernel_size[0] + (kernel_size[0] - 1) * (rate - 1),
|
||||
kernel_size[0] + (kernel_size[0] - 1) * (rate - 1)]
|
||||
pad_total = [kernel_size_effective[0] - 1, kernel_size_effective[1] - 1]
|
||||
pad_beg = [pad_total[0] // 2, pad_total[1] // 2]
|
||||
pad_end = [pad_total[0] - pad_beg[0], pad_total[1] - pad_beg[1]]
|
||||
padded_inputs = tf.pad(
|
||||
tensor=inputs,
|
||||
paddings=[[0, 0], [pad_beg[0], pad_end[0]], [pad_beg[1], pad_end[1]],
|
||||
[0, 0]])
|
||||
return padded_inputs
|
||||
|
||||
|
||||
def _make_divisible(v, divisor, min_value=None):
|
||||
if min_value is None:
|
||||
min_value = divisor
|
||||
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
|
||||
# Make sure that round down does not go down by more than 10%.
|
||||
if new_v < 0.9 * v:
|
||||
new_v += divisor
|
||||
return int(new_v)
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def _set_arg_scope_defaults(defaults):
|
||||
"""Sets arg scope defaults for all items present in defaults.
|
||||
|
||||
Args:
|
||||
defaults: dictionary/list of pairs, containing a mapping from
|
||||
function to a dictionary of default args.
|
||||
|
||||
Yields:
|
||||
context manager where all defaults are set.
|
||||
"""
|
||||
if hasattr(defaults, 'items'):
|
||||
items = list(defaults.items())
|
||||
else:
|
||||
items = defaults
|
||||
if not items:
|
||||
yield
|
||||
else:
|
||||
func, default_arg = items[0]
|
||||
with slim.arg_scope(func, **default_arg):
|
||||
with _set_arg_scope_defaults(items[1:]):
|
||||
yield
|
||||
|
||||
|
||||
@slim.add_arg_scope
|
||||
def depth_multiplier(output_params,
|
||||
multiplier,
|
||||
divisible_by=8,
|
||||
min_depth=8,
|
||||
**unused_kwargs):
|
||||
if 'num_outputs' not in output_params:
|
||||
return
|
||||
d = output_params['num_outputs']
|
||||
output_params['num_outputs'] = _make_divisible(d * multiplier, divisible_by,
|
||||
min_depth)
|
||||
|
||||
|
||||
_Op = collections.namedtuple('Op', ['op', 'params', 'multiplier_func'])
|
||||
|
||||
|
||||
def op(opfunc, multiplier_func=depth_multiplier, **params):
|
||||
multiplier = params.pop('multiplier_transform', multiplier_func)
|
||||
return _Op(opfunc, params=params, multiplier_func=multiplier)
|
||||
|
||||
|
||||
class NoOpScope(object):
|
||||
"""No-op context manager."""
|
||||
|
||||
def __enter__(self):
|
||||
return None
|
||||
|
||||
def __exit__(self, exc_type, exc_value, traceback):
|
||||
return False
|
||||
|
||||
|
||||
def safe_arg_scope(funcs, **kwargs):
|
||||
"""Returns `slim.arg_scope` with all None arguments removed.
|
||||
|
||||
Arguments:
|
||||
funcs: Functions to pass to `arg_scope`.
|
||||
**kwargs: Arguments to pass to `arg_scope`.
|
||||
|
||||
Returns:
|
||||
arg_scope or No-op context manager.
|
||||
|
||||
Note: can be useful if None value should be interpreted as "do not overwrite
|
||||
this parameter value".
|
||||
"""
|
||||
filtered_args = {name: value for name, value in kwargs.items()
|
||||
if value is not None}
|
||||
if filtered_args:
|
||||
return slim.arg_scope(funcs, **filtered_args)
|
||||
else:
|
||||
return NoOpScope()
|
||||
|
||||
|
||||
@slim.add_arg_scope
|
||||
def mobilenet_base( # pylint: disable=invalid-name
|
||||
inputs,
|
||||
conv_defs,
|
||||
multiplier=1.0,
|
||||
final_endpoint=None,
|
||||
output_stride=None,
|
||||
use_explicit_padding=False,
|
||||
scope=None,
|
||||
is_training=False):
|
||||
"""Mobilenet base network.
|
||||
|
||||
Constructs a network from inputs to the given final endpoint. By default
|
||||
the network is constructed in inference mode. To create network
|
||||
in training mode use:
|
||||
|
||||
with slim.arg_scope(mobilenet.training_scope()):
|
||||
logits, endpoints = mobilenet_base(...)
|
||||
|
||||
Args:
|
||||
inputs: a tensor of shape [batch_size, height, width, channels].
|
||||
conv_defs: A list of op(...) layers specifying the net architecture.
|
||||
multiplier: Float multiplier for the depth (number of channels)
|
||||
for all convolution ops. The value must be greater than zero. Typical
|
||||
usage will be to set this value in (0, 1) to reduce the number of
|
||||
parameters or computation cost of the model.
|
||||
final_endpoint: The name of last layer, for early termination for
|
||||
for V1-based networks: last layer is "layer_14", for V2: "layer_20"
|
||||
output_stride: An integer that specifies the requested ratio of input to
|
||||
output spatial resolution. If not None, then we invoke atrous convolution
|
||||
if necessary to prevent the network from reducing the spatial resolution
|
||||
of the activation maps. Allowed values are 1 or any even number, excluding
|
||||
zero. Typical values are 8 (accurate fully convolutional mode), 16
|
||||
(fast fully convolutional mode), and 32 (classification mode).
|
||||
|
||||
NOTE- output_stride relies on all consequent operators to support dilated
|
||||
operators via "rate" parameter. This might require wrapping non-conv
|
||||
operators to operate properly.
|
||||
|
||||
use_explicit_padding: Use 'VALID' padding for convolutions, but prepad
|
||||
inputs so that the output dimensions are the same as if 'SAME' padding
|
||||
were used.
|
||||
scope: optional variable scope.
|
||||
is_training: How to setup batch_norm and other ops. Note: most of the time
|
||||
this does not need be set directly. Use mobilenet.training_scope() to set
|
||||
up training instead. This parameter is here for backward compatibility
|
||||
only. It is safe to set it to the value matching
|
||||
training_scope(is_training=...). It is also safe to explicitly set
|
||||
it to False, even if there is outer training_scope set to to training.
|
||||
(The network will be built in inference mode). If this is set to None,
|
||||
no arg_scope is added for slim.batch_norm's is_training parameter.
|
||||
|
||||
Returns:
|
||||
tensor_out: output tensor.
|
||||
end_points: a set of activations for external use, for example summaries or
|
||||
losses.
|
||||
|
||||
Raises:
|
||||
ValueError: depth_multiplier <= 0, or the target output_stride is not
|
||||
allowed.
|
||||
"""
|
||||
if multiplier <= 0:
|
||||
raise ValueError('multiplier is not greater than zero.')
|
||||
|
||||
# Set conv defs defaults and overrides.
|
||||
conv_defs_defaults = conv_defs.get('defaults', {})
|
||||
conv_defs_overrides = conv_defs.get('overrides', {})
|
||||
if use_explicit_padding:
|
||||
conv_defs_overrides = copy.deepcopy(conv_defs_overrides)
|
||||
conv_defs_overrides[
|
||||
(slim.conv2d, slim.separable_conv2d)] = {'padding': 'VALID'}
|
||||
|
||||
if output_stride is not None:
|
||||
if output_stride == 0 or (output_stride > 1 and output_stride % 2):
|
||||
raise ValueError('Output stride must be None, 1 or a multiple of 2.')
|
||||
|
||||
# a) Set the tensorflow scope
|
||||
# b) set padding to default: note we might consider removing this
|
||||
# since it is also set by mobilenet_scope
|
||||
# c) set all defaults
|
||||
# d) set all extra overrides.
|
||||
# pylint: disable=g-backslash-continuation
|
||||
with _scope_all(scope, default_scope='Mobilenet'), \
|
||||
safe_arg_scope([slim.batch_norm], is_training=is_training), \
|
||||
_set_arg_scope_defaults(conv_defs_defaults), \
|
||||
_set_arg_scope_defaults(conv_defs_overrides):
|
||||
# The current_stride variable keeps track of the output stride of the
|
||||
# activations, i.e., the running product of convolution strides up to the
|
||||
# current network layer. This allows us to invoke atrous convolution
|
||||
# whenever applying the next convolution would result in the activations
|
||||
# having output stride larger than the target output_stride.
|
||||
current_stride = 1
|
||||
|
||||
# The atrous convolution rate parameter.
|
||||
rate = 1
|
||||
|
||||
net = inputs
|
||||
# Insert default parameters before the base scope which includes
|
||||
# any custom overrides set in mobilenet.
|
||||
end_points = {}
|
||||
scopes = {}
|
||||
for i, opdef in enumerate(conv_defs['spec']):
|
||||
params = dict(opdef.params)
|
||||
opdef.multiplier_func(params, multiplier)
|
||||
stride = params.get('stride', 1)
|
||||
if output_stride is not None and current_stride == output_stride:
|
||||
# If we have reached the target output_stride, then we need to employ
|
||||
# atrous convolution with stride=1 and multiply the atrous rate by the
|
||||
# current unit's stride for use in subsequent layers.
|
||||
layer_stride = 1
|
||||
layer_rate = rate
|
||||
rate *= stride
|
||||
else:
|
||||
layer_stride = stride
|
||||
layer_rate = 1
|
||||
current_stride *= stride
|
||||
# Update params.
|
||||
params['stride'] = layer_stride
|
||||
# Only insert rate to params if rate > 1 and kernel size is not [1, 1].
|
||||
if layer_rate > 1:
|
||||
if tuple(params.get('kernel_size', [])) != (1, 1):
|
||||
# We will apply atrous rate in the following cases:
|
||||
# 1) When kernel_size is not in params, the operation then uses
|
||||
# default kernel size 3x3.
|
||||
# 2) When kernel_size is in params, and if the kernel_size is not
|
||||
# equal to (1, 1) (there is no need to apply atrous convolution to
|
||||
# any 1x1 convolution).
|
||||
params['rate'] = layer_rate
|
||||
# Set padding
|
||||
if use_explicit_padding:
|
||||
if 'kernel_size' in params:
|
||||
net = _fixed_padding(net, params['kernel_size'], layer_rate)
|
||||
else:
|
||||
params['use_explicit_padding'] = True
|
||||
|
||||
end_point = 'layer_%d' % (i + 1)
|
||||
try:
|
||||
net = opdef.op(net, **params)
|
||||
except Exception:
|
||||
print('Failed to create op %i: %r params: %r' % (i, opdef, params))
|
||||
raise
|
||||
end_points[end_point] = net
|
||||
scope = os.path.dirname(net.name)
|
||||
scopes[scope] = end_point
|
||||
if final_endpoint is not None and end_point == final_endpoint:
|
||||
break
|
||||
|
||||
# Add all tensors that end with 'output' to
|
||||
# endpoints
|
||||
for t in net.graph.get_operations():
|
||||
scope = os.path.dirname(t.name)
|
||||
bn = os.path.basename(t.name)
|
||||
if scope in scopes and t.name.endswith('output'):
|
||||
end_points[scopes[scope] + '/' + bn] = t.outputs[0]
|
||||
return net, end_points
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def _scope_all(scope, default_scope=None):
|
||||
with tf.compat.v1.variable_scope(scope, default_name=default_scope) as s,\
|
||||
tf.compat.v1.name_scope(s.original_name_scope):
|
||||
yield s
|
||||
|
||||
|
||||
@slim.add_arg_scope
|
||||
def mobilenet(inputs,
|
||||
num_classes=1001,
|
||||
prediction_fn=slim.softmax,
|
||||
reuse=None,
|
||||
scope='Mobilenet',
|
||||
base_only=False,
|
||||
**mobilenet_args):
|
||||
"""Mobilenet model for classification, supports both V1 and V2.
|
||||
|
||||
Note: default mode is inference, use mobilenet.training_scope to create
|
||||
training network.
|
||||
|
||||
|
||||
Args:
|
||||
inputs: a tensor of shape [batch_size, height, width, channels].
|
||||
num_classes: number of predicted classes. If 0 or None, the logits layer
|
||||
is omitted and the input features to the logits layer (before dropout)
|
||||
are returned instead.
|
||||
prediction_fn: a function to get predictions out of logits
|
||||
(default softmax).
|
||||
reuse: whether or not the network and its variables should be reused. To be
|
||||
able to reuse 'scope' must be given.
|
||||
scope: Optional variable_scope.
|
||||
base_only: if True will only create the base of the network (no pooling
|
||||
and no logits).
|
||||
**mobilenet_args: passed to mobilenet_base verbatim.
|
||||
- conv_defs: list of conv defs
|
||||
- multiplier: Float multiplier for the depth (number of channels)
|
||||
for all convolution ops. The value must be greater than zero. Typical
|
||||
usage will be to set this value in (0, 1) to reduce the number of
|
||||
parameters or computation cost of the model.
|
||||
- output_stride: will ensure that the last layer has at most total stride.
|
||||
If the architecture calls for more stride than that provided
|
||||
(e.g. output_stride=16, but the architecture has 5 stride=2 operators),
|
||||
it will replace output_stride with fractional convolutions using Atrous
|
||||
Convolutions.
|
||||
|
||||
Returns:
|
||||
logits: the pre-softmax activations, a tensor of size
|
||||
[batch_size, num_classes]
|
||||
end_points: a dictionary from components of the network to the corresponding
|
||||
activation tensor.
|
||||
|
||||
Raises:
|
||||
ValueError: Input rank is invalid.
|
||||
"""
|
||||
is_training = mobilenet_args.get('is_training', False)
|
||||
input_shape = inputs.get_shape().as_list()
|
||||
if len(input_shape) != 4:
|
||||
raise ValueError('Expected rank 4 input, was: %d' % len(input_shape))
|
||||
|
||||
with tf.compat.v1.variable_scope(scope, 'Mobilenet', reuse=reuse) as scope:
|
||||
inputs = tf.identity(inputs, 'input')
|
||||
net, end_points = mobilenet_base(inputs, scope=scope, **mobilenet_args)
|
||||
if base_only:
|
||||
return net, end_points
|
||||
|
||||
net = tf.identity(net, name='embedding')
|
||||
|
||||
with tf.compat.v1.variable_scope('Logits'):
|
||||
net = global_pool(net)
|
||||
end_points['global_pool'] = net
|
||||
if not num_classes:
|
||||
return net, end_points
|
||||
# net = slim.dropout(net, scope='Dropout', is_training=is_training)
|
||||
# 1 x 1 x num_classes
|
||||
# Note: legacy scope name.
|
||||
# logits = slim.conv2d(
|
||||
# net,
|
||||
# num_classes, [1, 1],
|
||||
# activation_fn=None,
|
||||
# normalizer_fn=None,
|
||||
# biases_initializer=tf.compat.v1.zeros_initializer(),
|
||||
# scope='Conv2d_1c_1x1')
|
||||
|
||||
# logits = tf.squeeze(logits, [1, 2])
|
||||
|
||||
# use slim.fully_connected instead
|
||||
net = tf.squeeze(net)
|
||||
net = slim.dropout(net, keep_prob=0.8, scope='Dropout', is_training=is_training)
|
||||
logits = slim.fully_connected(
|
||||
net,
|
||||
num_classes,
|
||||
activation_fn=None,
|
||||
normalizer_fn=None,
|
||||
scope='FC'
|
||||
)
|
||||
#logits = tf.expand_dims(logits, axis=[])
|
||||
|
||||
logits = tf.identity(logits, name='output')
|
||||
end_points['Logits'] = logits
|
||||
if prediction_fn:
|
||||
end_points['Predictions'] = prediction_fn(logits, 'Predictions')
|
||||
return logits, end_points
|
||||
|
||||
|
||||
def global_pool(input_tensor, pool_op=tf.compat.v2.nn.avg_pool2d):
|
||||
"""Applies avg pool to produce 1x1 output.
|
||||
|
||||
NOTE: This function is funcitonally equivalenet to reduce_mean, but it has
|
||||
baked in average pool which has better support across hardware.
|
||||
|
||||
Args:
|
||||
input_tensor: input tensor
|
||||
pool_op: pooling op (avg pool is default)
|
||||
Returns:
|
||||
a tensor batch_size x 1 x 1 x depth.
|
||||
"""
|
||||
shape = input_tensor.get_shape().as_list()
|
||||
if shape[1] is None or shape[2] is None:
|
||||
kernel_size = tf.convert_to_tensor(value=[
|
||||
1,
|
||||
tf.shape(input=input_tensor)[1],
|
||||
tf.shape(input=input_tensor)[2], 1
|
||||
])
|
||||
else:
|
||||
kernel_size = [1, shape[1], shape[2], 1]
|
||||
output = pool_op(
|
||||
input_tensor, ksize=kernel_size, strides=[1, 1, 1, 1], padding='VALID')
|
||||
# Recover output shape, for unknown shape.
|
||||
output.set_shape([None, 1, 1, None])
|
||||
return output
|
||||
|
||||
|
||||
def training_scope(is_training=True,
|
||||
weight_decay=0.00004,
|
||||
stddev=0.09,
|
||||
dropout_keep_prob=0.8,
|
||||
bn_decay=0.997):
|
||||
"""Defines Mobilenet training scope.
|
||||
|
||||
Usage:
|
||||
with tf.contrib.slim.arg_scope(mobilenet.training_scope()):
|
||||
logits, endpoints = mobilenet_v2.mobilenet(input_tensor)
|
||||
|
||||
# the network created will be trainble with dropout/batch norm
|
||||
# initialized appropriately.
|
||||
Args:
|
||||
is_training: if set to False this will ensure that all customizations are
|
||||
set to non-training mode. This might be helpful for code that is reused
|
||||
across both training/evaluation, but most of the time training_scope with
|
||||
value False is not needed. If this is set to None, the parameters is not
|
||||
added to the batch_norm arg_scope.
|
||||
|
||||
weight_decay: The weight decay to use for regularizing the model.
|
||||
stddev: Standard deviation for initialization, if negative uses xavier.
|
||||
dropout_keep_prob: dropout keep probability (not set if equals to None).
|
||||
bn_decay: decay for the batch norm moving averages (not set if equals to
|
||||
None).
|
||||
|
||||
Returns:
|
||||
An argument scope to use via arg_scope.
|
||||
"""
|
||||
# Note: do not introduce parameters that would change the inference
|
||||
# model here (for example whether to use bias), modify conv_def instead.
|
||||
batch_norm_params = {
|
||||
'decay': bn_decay,
|
||||
'is_training': is_training
|
||||
}
|
||||
#if stddev < 0:
|
||||
# weight_intitializer = slim.initializers.xavier_initializer()
|
||||
#else:
|
||||
# weight_intitializer = tf.compat.v1.truncated_normal_initializer(stddev=stddev)
|
||||
|
||||
# modified for NPU
|
||||
weight_2d = tf.initializers.variance_scaling(scale=2., mode="fan_out", distribution="untruncated_normal")
|
||||
weight_dw = tf.initializers.variance_scaling(scale=2., mode="fan_in", distribution="untruncated_normal")
|
||||
weight_pw = tf.initializers.variance_scaling(scale=2., mode="fan_out", distribution="untruncated_normal")
|
||||
weight_fc = tf.initializers.random_normal(stddev=0.01)
|
||||
|
||||
# Set weight_decay for weights in Conv and FC layers.
|
||||
with slim.arg_scope(
|
||||
#[slim.conv2d, slim.fully_connected, slim.separable_conv2d],
|
||||
[slim.conv2d],
|
||||
#weights_initializer=weight_intitializer,
|
||||
weights_initializer=weight_2d,
|
||||
normalizer_fn=slim.batch_norm), \
|
||||
slim.arg_scope([slim.fully_connected], weights_initializer=weight_fc, normalizer_fn=slim.batch_norm), \
|
||||
slim.arg_scope([slim.separable_conv2d], weights_initializer=weight_dw, pointwise_initializer=weight_pw, normalizer_fn=slim.batch_norm), \
|
||||
slim.arg_scope([mobilenet_base, mobilenet], is_training=is_training),\
|
||||
safe_arg_scope([slim.batch_norm], **batch_norm_params), \
|
||||
safe_arg_scope([slim.dropout], is_training=is_training,
|
||||
keep_prob=dropout_keep_prob), \
|
||||
slim.arg_scope([slim.conv2d], \
|
||||
weights_regularizer=slim.l2_regularizer(weight_decay)), \
|
||||
slim.arg_scope([slim.separable_conv2d], weights_regularizer=None) as s:
|
||||
return s
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user