[add]上传训练benchmark by z00560161
This commit is contained in:
+20
@@ -0,0 +1,20 @@
|
||||
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the BSD 3-Clause License (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# https://opensource.org/licenses/BSD-3-Clause
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#from . import logger
|
||||
#from . import dataloaders
|
||||
#from . import training
|
||||
#from . import utils
|
||||
#from . import mixup
|
||||
#from . import resnet
|
||||
#from . import smoothing
|
||||
+369
@@ -0,0 +1,369 @@
|
||||
# Copyright (c) 2018-2019, NVIDIA CORPORATION
|
||||
# Copyright (c) 2017- Facebook, Inc
|
||||
#
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright notice, this
|
||||
# list of conditions and the following disclaimer.
|
||||
#
|
||||
# * Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
#
|
||||
# * Neither the name of the copyright holder nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from
|
||||
# this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
import os
|
||||
import torch
|
||||
import numpy as np
|
||||
import torchvision.datasets as datasets
|
||||
import torchvision.transforms as transforms
|
||||
from PIL import Image
|
||||
|
||||
DATA_BACKEND_CHOICES = ['pytorch', 'syntetic']
|
||||
# try:
|
||||
# from nvidia.dali.plugin.pytorch import DALIClassificationIterator
|
||||
# from nvidia.dali.pipeline import Pipeline
|
||||
# import nvidia.dali.ops as ops
|
||||
# import nvidia.dali.types as types
|
||||
# DATA_BACKEND_CHOICES.append('dali-gpu')
|
||||
# DATA_BACKEND_CHOICES.append('dali-cpu')
|
||||
# except ImportError:
|
||||
# print("Please install DALI from https://www.github.com/NVIDIA/DALI to run this example.")
|
||||
|
||||
|
||||
def load_jpeg_from_file(path, cuda=True, fp16=False):
|
||||
img_transforms = transforms.Compose(
|
||||
[transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor()]
|
||||
)
|
||||
|
||||
img = img_transforms(Image.open(path))
|
||||
with torch.no_grad():
|
||||
# mean and std are not multiplied by 255 as they are in training script
|
||||
# torch dataloader reads data into bytes whereas loading directly
|
||||
# through PIL creates a tensor with floats in [0,1] range
|
||||
mean = torch.tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1)
|
||||
std = torch.tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1)
|
||||
|
||||
if cuda:
|
||||
mean = mean.cuda()
|
||||
std = std.cuda()
|
||||
img = img.cuda()
|
||||
if fp16:
|
||||
mean = mean.half()
|
||||
std = std.half()
|
||||
img = img.half()
|
||||
else:
|
||||
img = img.float()
|
||||
|
||||
input = img.unsqueeze(0).sub_(mean).div_(std)
|
||||
|
||||
return input
|
||||
|
||||
|
||||
# class HybridTrainPipe(Pipeline):
|
||||
# def __init__(self, batch_size, num_threads, device_id, data_dir, crop, dali_cpu=False):
|
||||
# super(HybridTrainPipe, self).__init__(batch_size, num_threads, device_id, seed = 12 + device_id)
|
||||
# if torch.distributed.is_initialized():
|
||||
# rank = torch.distributed.get_rank()
|
||||
# world_size = torch.distributed.get_world_size()
|
||||
# else:
|
||||
# rank = 0
|
||||
# world_size = 1
|
||||
|
||||
# self.input = ops.FileReader(
|
||||
# file_root = data_dir,
|
||||
# shard_id = rank,
|
||||
# num_shards = world_size,
|
||||
# random_shuffle = True)
|
||||
|
||||
# if dali_cpu:
|
||||
# dali_device = "cpu"
|
||||
# self.decode = ops.ImageDecoder(device=dali_device, output_type=types.RGB)
|
||||
# else:
|
||||
# dali_device = "gpu"
|
||||
# # This padding sets the size of the internal nvJPEG buffers to be able to handle all images from full-sized ImageNet
|
||||
# # without additional reallocations
|
||||
# self.decode = ops.ImageDecoder(device="mixed", output_type=types.RGB, device_memory_padding=211025920, host_memory_padding=140544512)
|
||||
|
||||
# self.res = ops.RandomResizedCrop(
|
||||
# device=dali_device,
|
||||
# size=[crop, crop],
|
||||
# interp_type=types.INTERP_LINEAR,
|
||||
# random_aspect_ratio=[0.75, 4./3.],
|
||||
# random_area=[0.08, 1.0],
|
||||
# num_attempts=100)
|
||||
|
||||
# self.cmnp = ops.CropMirrorNormalize(device = "gpu",
|
||||
# output_dtype = types.FLOAT,
|
||||
# output_layout = types.NCHW,
|
||||
# crop = (crop, crop),
|
||||
# image_type = types.RGB,
|
||||
# mean = [0.485 * 255,0.456 * 255,0.406 * 255],
|
||||
# std = [0.229 * 255,0.224 * 255,0.225 * 255])
|
||||
# self.coin = ops.CoinFlip(probability = 0.5)
|
||||
|
||||
# def define_graph(self):
|
||||
# rng = self.coin()
|
||||
# self.jpegs, self.labels = self.input(name = "Reader")
|
||||
# images = self.decode(self.jpegs)
|
||||
# images = self.res(images)
|
||||
# output = self.cmnp(images.gpu(), mirror = rng)
|
||||
# return [output, self.labels]
|
||||
|
||||
|
||||
# class HybridValPipe(Pipeline):
|
||||
# def __init__(self, batch_size, num_threads, device_id, data_dir, crop, size):
|
||||
# super(HybridValPipe, self).__init__(batch_size, num_threads, device_id, seed = 12 + device_id)
|
||||
# if torch.distributed.is_initialized():
|
||||
# rank = torch.distributed.get_rank()
|
||||
# world_size = torch.distributed.get_world_size()
|
||||
# else:
|
||||
# rank = 0
|
||||
# world_size = 1
|
||||
|
||||
# self.input = ops.FileReader(
|
||||
# file_root = data_dir,
|
||||
# shard_id = rank,
|
||||
# num_shards = world_size,
|
||||
# random_shuffle = False)
|
||||
|
||||
# self.decode = ops.ImageDecoder(device = "mixed", output_type = types.RGB)
|
||||
# self.res = ops.Resize(device = "gpu", resize_shorter = size)
|
||||
# self.cmnp = ops.CropMirrorNormalize(device = "gpu",
|
||||
# output_dtype = types.FLOAT,
|
||||
# output_layout = types.NCHW,
|
||||
# crop = (crop, crop),
|
||||
# image_type = types.RGB,
|
||||
# mean = [0.485 * 255,0.456 * 255,0.406 * 255],
|
||||
# std = [0.229 * 255,0.224 * 255,0.225 * 255])
|
||||
|
||||
# def define_graph(self):
|
||||
# self.jpegs, self.labels = self.input(name = "Reader")
|
||||
# images = self.decode(self.jpegs)
|
||||
# images = self.res(images)
|
||||
# output = self.cmnp(images)
|
||||
# return [output, self.labels]
|
||||
|
||||
|
||||
class DALIWrapper(object):
|
||||
def gen_wrapper(dalipipeline, num_classes, one_hot):
|
||||
for data in dalipipeline:
|
||||
input = data[0]["data"]
|
||||
target = torch.reshape(data[0]["label"], [-1]).cuda().long()
|
||||
if one_hot:
|
||||
target = expand(num_classes, torch.float, target)
|
||||
yield input, target
|
||||
dalipipeline.reset()
|
||||
|
||||
def __init__(self, dalipipeline, num_classes, one_hot):
|
||||
self.dalipipeline = dalipipeline
|
||||
self.num_classes = num_classes
|
||||
self.one_hot = one_hot
|
||||
|
||||
def __iter__(self):
|
||||
return DALIWrapper.gen_wrapper(self.dalipipeline, self.num_classes, self.one_hot)
|
||||
|
||||
def get_dali_train_loader(dali_cpu=False):
|
||||
# def gdtl(data_path, batch_size, num_classes, one_hot, workers=5, _worker_init_fn=None, fp16=False):
|
||||
# if torch.distributed.is_initialized():
|
||||
# rank = torch.distributed.get_rank()
|
||||
# world_size = torch.distributed.get_world_size()
|
||||
# else:
|
||||
# rank = 0
|
||||
# world_size = 1
|
||||
|
||||
# traindir = os.path.join(data_path, 'train')
|
||||
|
||||
# pipe = HybridTrainPipe(batch_size=batch_size, num_threads=workers,
|
||||
# device_id = rank % torch.cuda.device_count(),
|
||||
# data_dir = traindir, crop = 224, dali_cpu=dali_cpu)
|
||||
|
||||
# pipe.build()
|
||||
# train_loader = DALIClassificationIterator(pipe, size = int(pipe.epoch_size("Reader") / world_size))
|
||||
|
||||
# return DALIWrapper(train_loader, num_classes, one_hot), int(pipe.epoch_size("Reader") / (world_size * batch_size))
|
||||
|
||||
# return gdtl
|
||||
def gdtl(data_path, batch_size, num_classes, one_hot, workers=5, _worker_init_fn=None, fp16=False):
|
||||
return False
|
||||
return gdvl
|
||||
|
||||
def get_dali_val_loader():
|
||||
# def gdvl(data_path, batch_size, num_classes, one_hot, workers=5, _worker_init_fn=None, fp16=False):
|
||||
# if torch.distributed.is_initialized():
|
||||
# rank = torch.distributed.get_rank()
|
||||
# world_size = torch.distributed.get_world_size()
|
||||
# else:
|
||||
# rank = 0
|
||||
# world_size = 1
|
||||
|
||||
# valdir = os.path.join(data_path, 'val')
|
||||
|
||||
# pipe = HybridValPipe(batch_size=batch_size, num_threads=workers,
|
||||
# device_id = rank % torch.cuda.device_count(),
|
||||
# data_dir = valdir,
|
||||
# crop = 224, size = 256)
|
||||
|
||||
# pipe.build()
|
||||
# val_loader = DALIClassificationIterator(pipe, size = int(pipe.epoch_size("Reader") / world_size))
|
||||
|
||||
# return DALIWrapper(val_loader, num_classes, one_hot), int(pipe.epoch_size("Reader") / (world_size * batch_size))
|
||||
# return gdvl
|
||||
def gdvl(data_path, batch_size, num_classes, one_hot, workers=5, _worker_init_fn=None, fp16=False):
|
||||
return False
|
||||
return gdvl
|
||||
|
||||
def fast_collate(batch):
|
||||
imgs = [img[0] for img in batch]
|
||||
targets = torch.tensor([target[1] for target in batch], dtype=torch.int64)
|
||||
w = imgs[0].size[0]
|
||||
h = imgs[0].size[1]
|
||||
tensor = torch.zeros( (len(imgs), 3, h, w), dtype=torch.uint8 )
|
||||
for i, img in enumerate(imgs):
|
||||
nump_array = np.asarray(img, dtype=np.uint8)
|
||||
tens = torch.from_numpy(nump_array)
|
||||
if(nump_array.ndim < 3):
|
||||
nump_array = np.expand_dims(nump_array, axis=-1)
|
||||
nump_array = np.rollaxis(nump_array, 2)
|
||||
|
||||
tensor[i] += torch.from_numpy(nump_array)
|
||||
|
||||
return tensor, targets
|
||||
|
||||
|
||||
def expand(num_classes, dtype, tensor):
|
||||
e = torch.zeros(tensor.size(0), num_classes, dtype=dtype, device=torch.device('cuda'))
|
||||
e = e.scatter(1, tensor.unsqueeze(1), 1.0)
|
||||
return e
|
||||
|
||||
class PrefetchedWrapper(object):
|
||||
def prefetched_loader(loader, num_classes, fp16, one_hot):
|
||||
mean = torch.tensor([0.485 * 255, 0.456 * 255, 0.406 * 255]).cuda().view(1,3,1,1)
|
||||
std = torch.tensor([0.229 * 255, 0.224 * 255, 0.225 * 255]).cuda().view(1,3,1,1)
|
||||
if fp16:
|
||||
mean = mean.half()
|
||||
std = std.half()
|
||||
|
||||
stream = torch.cuda.Stream()
|
||||
first = True
|
||||
|
||||
for next_input, next_target in loader:
|
||||
with torch.cuda.stream(stream):
|
||||
next_input = next_input.cuda(non_blocking=True)
|
||||
next_target = next_target.cuda(non_blocking=True)
|
||||
if fp16:
|
||||
next_input = next_input.half()
|
||||
if one_hot:
|
||||
next_target = expand(num_classes, torch.half, next_target)
|
||||
else:
|
||||
next_input = next_input.float()
|
||||
if one_hot:
|
||||
next_target = expand(num_classes, torch.float, next_target)
|
||||
|
||||
next_input = next_input.sub_(mean).div_(std)
|
||||
|
||||
if not first:
|
||||
yield input, target
|
||||
else:
|
||||
first = False
|
||||
|
||||
torch.cuda.current_stream().wait_stream(stream)
|
||||
input = next_input
|
||||
target = next_target
|
||||
|
||||
yield input, target
|
||||
|
||||
def __init__(self, dataloader, num_classes, fp16, one_hot):
|
||||
self.dataloader = dataloader
|
||||
self.fp16 = fp16
|
||||
self.epoch = 0
|
||||
self.one_hot = one_hot
|
||||
self.num_classes = num_classes
|
||||
|
||||
def __iter__(self):
|
||||
if (self.dataloader.sampler is not None and
|
||||
isinstance(self.dataloader.sampler,
|
||||
torch.utils.data.distributed.DistributedSampler)):
|
||||
|
||||
self.dataloader.sampler.set_epoch(self.epoch)
|
||||
self.epoch += 1
|
||||
return PrefetchedWrapper.prefetched_loader(self.dataloader, self.num_classes, self.fp16, self.one_hot)
|
||||
|
||||
def get_pytorch_train_loader(data_path, batch_size, num_classes, one_hot, workers=5, _worker_init_fn=None, fp16=False):
|
||||
traindir = os.path.join(data_path, 'train')
|
||||
train_dataset = datasets.ImageFolder(
|
||||
traindir,
|
||||
transforms.Compose([
|
||||
transforms.RandomResizedCrop(224),
|
||||
transforms.RandomHorizontalFlip(),
|
||||
]))
|
||||
|
||||
if torch.distributed.is_initialized():
|
||||
train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
|
||||
else:
|
||||
train_sampler = None
|
||||
|
||||
train_loader = torch.utils.data.DataLoader(
|
||||
train_dataset, batch_size=batch_size, shuffle=(train_sampler is None),
|
||||
num_workers=workers, worker_init_fn=_worker_init_fn, pin_memory=True, sampler=train_sampler, collate_fn=fast_collate, drop_last=True)
|
||||
|
||||
return PrefetchedWrapper(train_loader, num_classes, fp16, one_hot), len(train_loader)
|
||||
|
||||
def get_pytorch_val_loader(data_path, batch_size, num_classes, one_hot, workers=5, _worker_init_fn=None, fp16=False):
|
||||
valdir = os.path.join(data_path, 'val')
|
||||
val_dataset = datasets.ImageFolder(
|
||||
valdir, transforms.Compose([
|
||||
transforms.Resize(256),
|
||||
transforms.CenterCrop(224),
|
||||
]))
|
||||
|
||||
if torch.distributed.is_initialized():
|
||||
val_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset)
|
||||
else:
|
||||
val_sampler = None
|
||||
|
||||
val_loader = torch.utils.data.DataLoader(
|
||||
val_dataset,
|
||||
sampler=val_sampler,
|
||||
batch_size=batch_size, shuffle=False,
|
||||
num_workers=workers, worker_init_fn=_worker_init_fn, pin_memory=True,
|
||||
collate_fn=fast_collate)
|
||||
|
||||
return PrefetchedWrapper(val_loader, num_classes, fp16, one_hot), len(val_loader)
|
||||
|
||||
class SynteticDataLoader(object):
|
||||
def __init__(self, fp16, batch_size, num_classes, num_channels, height, width, one_hot):
|
||||
input_data = torch.empty(batch_size, num_channels, height, width).cuda().normal_(0, 1.0)
|
||||
if one_hot:
|
||||
input_target = torch.empty(batch_size, num_classes).cuda()
|
||||
input_target[:, 0] = 1.0
|
||||
else:
|
||||
input_target = torch.randint(0, num_classes, (batch_size,))
|
||||
input_target=input_target.cuda()
|
||||
if fp16:
|
||||
input_data = input_data.half()
|
||||
|
||||
self.input_data = input_data
|
||||
self.input_target = input_target
|
||||
|
||||
def __iter__(self):
|
||||
while True:
|
||||
yield self.input_data, self.input_target
|
||||
|
||||
def get_syntetic_loader(data_path, batch_size, num_classes, one_hot, workers=None, _worker_init_fn=None, fp16=False):
|
||||
return SynteticDataLoader(fp16, batch_size, 1000, 3, 224, 224, one_hot), -1
|
||||
+310
@@ -0,0 +1,310 @@
|
||||
# Copyright (c) 2018-2019, NVIDIA CORPORATION
|
||||
# Copyright (c) 2017- Facebook, Inc
|
||||
#
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright notice, this
|
||||
# list of conditions and the following disclaimer.
|
||||
#
|
||||
# * Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
#
|
||||
# * Neither the name of the copyright holder nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from
|
||||
# this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
from collections import OrderedDict
|
||||
import dllogger
|
||||
import numpy as np
|
||||
|
||||
|
||||
def format_step(step):
|
||||
if isinstance(step, str):
|
||||
return step
|
||||
s = ""
|
||||
if len(step) > 0:
|
||||
s += "Epoch: {} ".format(step[0])
|
||||
if len(step) > 1:
|
||||
s += "Iteration: {} ".format(step[1])
|
||||
if len(step) > 2:
|
||||
s += "Validation Iteration: {} ".format(step[2])
|
||||
if len(step) == 0:
|
||||
s = "Summary:"
|
||||
return s
|
||||
|
||||
|
||||
PERF_METER = lambda: Meter(AverageMeter(), AverageMeter(), AverageMeter())
|
||||
LOSS_METER = lambda: Meter(AverageMeter(), AverageMeter(), MinMeter())
|
||||
ACC_METER = lambda: Meter(AverageMeter(), AverageMeter(), MaxMeter())
|
||||
LR_METER = lambda: Meter(LastMeter(), LastMeter(), LastMeter())
|
||||
|
||||
LAT_100 = lambda: Meter(QuantileMeter(1), QuantileMeter(1), QuantileMeter(1))
|
||||
LAT_99 = lambda: Meter(QuantileMeter(0.99), QuantileMeter(0.99), QuantileMeter(0.99))
|
||||
LAT_95 = lambda: Meter(QuantileMeter(0.95), QuantileMeter(0.95), QuantileMeter(0.95))
|
||||
|
||||
class Meter(object):
|
||||
def __init__(self, iteration_aggregator, epoch_aggregator, run_aggregator):
|
||||
self.run_aggregator = run_aggregator
|
||||
self.epoch_aggregator = epoch_aggregator
|
||||
self.iteration_aggregator = iteration_aggregator
|
||||
|
||||
def record(self, val, n=1):
|
||||
self.iteration_aggregator.record(val, n=n)
|
||||
|
||||
def get_iteration(self):
|
||||
v, n = self.iteration_aggregator.get_val()
|
||||
return v
|
||||
|
||||
def reset_iteration(self):
|
||||
v, n = self.iteration_aggregator.get_data()
|
||||
self.iteration_aggregator.reset()
|
||||
if v is not None:
|
||||
self.epoch_aggregator.record(v, n=n)
|
||||
|
||||
def get_epoch(self):
|
||||
v, n = self.epoch_aggregator.get_val()
|
||||
return v
|
||||
|
||||
def reset_epoch(self):
|
||||
v, n = self.epoch_aggregator.get_data()
|
||||
self.epoch_aggregator.reset()
|
||||
if v is not None:
|
||||
self.run_aggregator.record(v, n=n)
|
||||
|
||||
def get_run(self):
|
||||
v, n = self.run_aggregator.get_val()
|
||||
return v
|
||||
|
||||
def reset_run(self):
|
||||
self.run_aggregator.reset()
|
||||
|
||||
|
||||
class QuantileMeter(object):
|
||||
def __init__(self, q):
|
||||
self.q = q
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
self.vals = []
|
||||
self.n = 0
|
||||
|
||||
def record(self, val, n=1):
|
||||
if isinstance(val, list):
|
||||
self.vals += val
|
||||
self.n += len(val)
|
||||
else:
|
||||
self.vals += [val] * n
|
||||
self.n += n
|
||||
|
||||
def get_val(self):
|
||||
if not self.vals:
|
||||
return None, self.n
|
||||
return np.quantile(self.vals, self.q, interpolation='nearest'), self.n
|
||||
|
||||
def get_data(self):
|
||||
return self.vals, self.n
|
||||
|
||||
|
||||
class MaxMeter(object):
|
||||
def __init__(self):
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
self.max = None
|
||||
self.n = 0
|
||||
|
||||
def record(self, val, n=1):
|
||||
if self.max is None:
|
||||
self.max = val
|
||||
else:
|
||||
self.max = max(self.max, val)
|
||||
self.n = n
|
||||
|
||||
def get_val(self):
|
||||
return self.max, self.n
|
||||
|
||||
def get_data(self):
|
||||
return self.max, self.n
|
||||
|
||||
|
||||
class MinMeter(object):
|
||||
def __init__(self):
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
self.min = None
|
||||
self.n = 0
|
||||
|
||||
def record(self, val, n=1):
|
||||
if self.min is None:
|
||||
self.min = val
|
||||
else:
|
||||
self.min = max(self.min, val)
|
||||
self.n = n
|
||||
|
||||
def get_val(self):
|
||||
return self.min, self.n
|
||||
|
||||
def get_data(self):
|
||||
return self.min, self.n
|
||||
|
||||
|
||||
class LastMeter(object):
|
||||
def __init__(self):
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
self.last = None
|
||||
self.n = 0
|
||||
|
||||
def record(self, val, n=1):
|
||||
self.last = val
|
||||
self.n = n
|
||||
|
||||
def get_val(self):
|
||||
return self.last, self.n
|
||||
|
||||
def get_data(self):
|
||||
return self.last, self.n
|
||||
|
||||
|
||||
class AverageMeter(object):
|
||||
def __init__(self):
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
self.n = 0
|
||||
self.val = 0
|
||||
|
||||
def record(self, val, n=1):
|
||||
self.n += n
|
||||
self.val += val * n
|
||||
|
||||
def get_val(self):
|
||||
if self.n == 0:
|
||||
return None, 0
|
||||
return self.val / self.n, self.n
|
||||
|
||||
def get_data(self):
|
||||
if self.n == 0:
|
||||
return None, 0
|
||||
return self.val / self.n, self.n
|
||||
|
||||
|
||||
class Logger(object):
|
||||
def __init__(self, print_interval, backends, verbose=False):
|
||||
self.epoch = -1
|
||||
self.iteration = -1
|
||||
self.val_iteration = -1
|
||||
self.metrics = OrderedDict()
|
||||
self.backends = backends
|
||||
self.print_interval = print_interval
|
||||
self.verbose = verbose
|
||||
dllogger.init(backends)
|
||||
|
||||
def log_parameter(self, data, verbosity=0):
|
||||
dllogger.log(step="PARAMETER", data=data, verbosity=verbosity)
|
||||
|
||||
def register_metric(self, metric_name, meter, verbosity=0, metadata={}):
|
||||
if self.verbose:
|
||||
print("Registering metric: {}".format(metric_name))
|
||||
self.metrics[metric_name] = {'meter': meter, 'level': verbosity}
|
||||
dllogger.metadata(metric_name, metadata)
|
||||
|
||||
def log_metric(self, metric_name, val, n=1):
|
||||
self.metrics[metric_name]['meter'].record(val, n=n)
|
||||
|
||||
def start_iteration(self, val=False):
|
||||
if val:
|
||||
self.val_iteration += 1
|
||||
else:
|
||||
self.iteration += 1
|
||||
|
||||
def end_iteration(self, val=False):
|
||||
it = self.val_iteration if val else self.iteration
|
||||
if (it % self.print_interval == 0):
|
||||
metrics = {
|
||||
n: m
|
||||
for n, m in self.metrics.items() if n.startswith('val') == val
|
||||
}
|
||||
step = (self.epoch,
|
||||
self.iteration) if not val else (self.epoch,
|
||||
self.iteration,
|
||||
self.val_iteration)
|
||||
|
||||
verbositys = {m['level'] for _, m in metrics.items()}
|
||||
for ll in verbositys:
|
||||
llm = {n: m for n, m in metrics.items() if m['level'] == ll}
|
||||
|
||||
dllogger.log(step=step,
|
||||
data={
|
||||
n: m['meter'].get_iteration()
|
||||
for n, m in llm.items()
|
||||
},
|
||||
verbosity=ll)
|
||||
|
||||
for n, m in metrics.items():
|
||||
m['meter'].reset_iteration()
|
||||
|
||||
dllogger.flush()
|
||||
|
||||
def start_epoch(self):
|
||||
self.epoch += 1
|
||||
self.iteration = 0
|
||||
self.val_iteration = 0
|
||||
|
||||
for n, m in self.metrics.items():
|
||||
m['meter'].reset_epoch()
|
||||
|
||||
def end_epoch(self):
|
||||
for n, m in self.metrics.items():
|
||||
m['meter'].reset_iteration()
|
||||
|
||||
verbositys = {m['level'] for _, m in self.metrics.items()}
|
||||
for ll in verbositys:
|
||||
llm = {n: m for n, m in self.metrics.items() if m['level'] == ll}
|
||||
dllogger.log(step=(self.epoch, ),
|
||||
data={n: m['meter'].get_epoch()
|
||||
for n, m in llm.items()})
|
||||
|
||||
def end(self):
|
||||
for n, m in self.metrics.items():
|
||||
m['meter'].reset_epoch()
|
||||
|
||||
verbositys = {m['level'] for _, m in self.metrics.items()}
|
||||
for ll in verbositys:
|
||||
llm = {n: m for n, m in self.metrics.items() if m['level'] == ll}
|
||||
dllogger.log(step=tuple(),
|
||||
data={n: m['meter'].get_run()
|
||||
for n, m in llm.items()})
|
||||
|
||||
for n, m in self.metrics.items():
|
||||
m['meter'].reset_epoch()
|
||||
|
||||
dllogger.flush()
|
||||
|
||||
def iteration_generator_wrapper(self, gen, val=False):
|
||||
for g in gen:
|
||||
self.start_iteration(val=val)
|
||||
yield g
|
||||
self.end_iteration(val=val)
|
||||
|
||||
def epoch_generator_wrapper(self, gen):
|
||||
for g in gen:
|
||||
self.start_epoch()
|
||||
yield g
|
||||
self.end_epoch()
|
||||
+67
@@ -0,0 +1,67 @@
|
||||
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the BSD 3-Clause License (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# https://opensource.org/licenses/BSD-3-Clause
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import numpy as np
|
||||
|
||||
|
||||
def mixup(alpha, num_classes, data, target):
|
||||
with torch.no_grad():
|
||||
bs = data.size(0)
|
||||
c = np.random.beta(alpha, alpha)
|
||||
|
||||
perm = torch.randperm(bs).cuda()
|
||||
|
||||
md = c * data + (1-c) * data[perm, :]
|
||||
mt = c * target + (1-c) * target[perm, :]
|
||||
return md, mt
|
||||
|
||||
|
||||
class MixUpWrapper(object):
|
||||
def __init__(self, alpha, num_classes, dataloader):
|
||||
self.alpha = alpha
|
||||
self.dataloader = dataloader
|
||||
self.num_classes = num_classes
|
||||
|
||||
def mixup_loader(self, loader):
|
||||
for input, target in loader:
|
||||
i, t = mixup(self.alpha, self.num_classes, input, target)
|
||||
yield i, t
|
||||
|
||||
def __iter__(self):
|
||||
return self.mixup_loader(self.dataloader)
|
||||
|
||||
|
||||
class NLLMultiLabelSmooth(nn.Module):
|
||||
def __init__(self, smoothing = 0.0):
|
||||
super(NLLMultiLabelSmooth, self).__init__()
|
||||
self.confidence = 1.0 - smoothing
|
||||
self.smoothing = smoothing
|
||||
|
||||
def forward(self, x, target):
|
||||
if self.training:
|
||||
x = x.float()
|
||||
target = target.float()
|
||||
logprobs = torch.nn.functional.log_softmax(x, dim = -1)
|
||||
|
||||
nll_loss = -logprobs * target
|
||||
nll_loss = nll_loss.sum(-1)
|
||||
|
||||
smooth_loss = -logprobs.mean(dim=-1)
|
||||
|
||||
loss = self.confidence * nll_loss + self.smoothing * smooth_loss
|
||||
|
||||
return loss.mean()
|
||||
else:
|
||||
return torch.nn.functional.cross_entropy(x, target)
|
||||
+370
@@ -0,0 +1,370 @@
|
||||
# Copyright (c) 2018-2019, NVIDIA CORPORATION
|
||||
# Copyright (c) 2017- Facebook, Inc
|
||||
#
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright notice, this
|
||||
# list of conditions and the following disclaimer.
|
||||
#
|
||||
# * Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
#
|
||||
# * Neither the name of the copyright holder nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from
|
||||
# this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
import math
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import numpy as np
|
||||
|
||||
__all__ = ['ResNet', 'build_resnet', 'resnet_versions', 'resnet_configs']
|
||||
|
||||
# ResNetBuilder {{{
|
||||
|
||||
class ResNetBuilder(object):
|
||||
def __init__(self, version, config):
|
||||
self.conv3x3_cardinality = 1 if 'cardinality' not in version.keys() else version['cardinality']
|
||||
self.config = config
|
||||
|
||||
def conv(self, kernel_size, in_planes, out_planes, groups=1, stride=1):
|
||||
conv = nn.Conv2d(
|
||||
in_planes, out_planes,
|
||||
kernel_size=kernel_size, groups=groups,
|
||||
stride=stride, padding=int((kernel_size - 1)/2),
|
||||
bias=False)
|
||||
|
||||
if self.config['nonlinearity'] == 'relu':
|
||||
# torch.nn.init.kaiming_normal_(tensor, a=0, mode='fan_in', nonlinearity='leaky_relu')
|
||||
# Copy
|
||||
# 用论文 “Delving deep into rectifiers: Surpassing human-level performance on ImageNet classification” - He, K. et al. (2015) 中提及的正态分布初始化输入 Tensor。初始化后的张量中的值采样 ) 且
|
||||
# %20%5Ctimes%20%5Ctext%7Bfan%5C_in%7D%7D%7D%0D%0A%0D%0A)
|
||||
# 也被称作 He initialization。
|
||||
# 参数:
|
||||
# tensor – n 维 torch.Tensor
|
||||
# a – 该层后面一层的整流函数中负的斜率 (默认为 0,此时为 Relu)
|
||||
# mode – 'fan_in' (default) 或者 'fan_out'。使用fan_in保持weights的方差在前向传播中不变;使用fan_out保持weights的方差在反向传播中不变。
|
||||
# nonlinearity – 非线性函数 (nn.functional 中的名字),推荐只使用 'relu' 或 'leaky_relu' (default)。
|
||||
# 例子
|
||||
# >>> w = torch.empty(3, 5)
|
||||
# >>> nn.init.kaiming_normal_(w, mode='fan_out', nonlinearity='relu')
|
||||
nn.init.kaiming_normal_(conv.weight,
|
||||
mode=self.config['conv_init'],
|
||||
nonlinearity=self.config['nonlinearity'])
|
||||
|
||||
|
||||
|
||||
|
||||
return conv
|
||||
|
||||
def conv3x3(self, in_planes, out_planes, stride=1):
|
||||
"""3x3 convolution with padding"""
|
||||
c = self.conv(3, in_planes, out_planes, groups=self.conv3x3_cardinality, stride=stride)
|
||||
return c
|
||||
|
||||
def conv1x1(self, in_planes, out_planes, stride=1):
|
||||
"""1x1 convolution with padding"""
|
||||
c = self.conv(1, in_planes, out_planes, stride=stride)
|
||||
return c
|
||||
|
||||
def conv7x7(self, in_planes, out_planes, stride=1):
|
||||
"""7x7 convolution with padding"""
|
||||
c = self.conv(7, in_planes, out_planes, stride=stride)
|
||||
return c
|
||||
|
||||
def conv5x5(self, in_planes, out_planes, stride=1):
|
||||
"""5x5 convolution with padding"""
|
||||
c = self.conv(5, in_planes, out_planes, stride=stride)
|
||||
return c
|
||||
|
||||
def batchnorm(self, planes, last_bn=False):
|
||||
bn = nn.BatchNorm2d(planes)
|
||||
gamma_init_val = 0 if last_bn and self.config['last_bn_0_init'] else 1
|
||||
nn.init.constant_(bn.weight, gamma_init_val)
|
||||
nn.init.constant_(bn.bias, 0)
|
||||
|
||||
return bn
|
||||
|
||||
def activation(self):
|
||||
return self.config['activation']()
|
||||
|
||||
# ResNetBuilder }}}
|
||||
|
||||
# BasicBlock {{{
|
||||
class BasicBlock(nn.Module):
|
||||
def __init__(self, builder, inplanes, planes, expansion, stride=1, downsample=None):
|
||||
super(BasicBlock, self).__init__()
|
||||
self.conv1 = builder.conv3x3(inplanes, planes, stride)
|
||||
self.bn1 = builder.batchnorm(planes)
|
||||
self.relu = builder.activation()
|
||||
self.conv2 = builder.conv3x3(planes, planes*expansion)
|
||||
self.bn2 = builder.batchnorm(planes*expansion, last_bn=True)
|
||||
self.downsample = downsample
|
||||
self.stride = stride
|
||||
|
||||
def forward(self, x):
|
||||
residual = x
|
||||
|
||||
out = self.conv1(x)
|
||||
if self.bn1 is not None:
|
||||
out = self.bn1(out)
|
||||
|
||||
out = self.relu(out)
|
||||
|
||||
out = self.conv2(out)
|
||||
|
||||
if self.bn2 is not None:
|
||||
out = self.bn2(out)
|
||||
|
||||
if self.downsample is not None:
|
||||
residual = self.downsample(x)
|
||||
|
||||
out += residual
|
||||
out = self.relu(out)
|
||||
|
||||
return out
|
||||
# BasicBlock }}}
|
||||
|
||||
# SqueezeAndExcitation {{{
|
||||
class SqueezeAndExcitation(nn.Module):
|
||||
def __init__(self, planes, squeeze):
|
||||
super(SqueezeAndExcitation, self).__init__()
|
||||
self.squeeze = nn.Linear(planes, squeeze)
|
||||
self.expand = nn.Linear(squeeze, planes)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
self.sigmoid = nn.Sigmoid()
|
||||
|
||||
def forward(self, x):
|
||||
out = torch.mean(x.view(x.size(0), x.size(1), -1), 2)
|
||||
out = self.squeeze(out)
|
||||
out = self.relu(out)
|
||||
out = self.expand(out)
|
||||
out = self.sigmoid(out)
|
||||
out = out.unsqueeze(2).unsqueeze(3)
|
||||
|
||||
return out
|
||||
|
||||
# }}}
|
||||
|
||||
# Bottleneck {{{
|
||||
class Bottleneck(nn.Module):
|
||||
def __init__(self, builder, inplanes, planes, expansion, stride=1, se=False, se_squeeze=16, downsample=None):
|
||||
super(Bottleneck, self).__init__()
|
||||
self.conv1 = builder.conv1x1(inplanes, planes)
|
||||
self.bn1 = builder.batchnorm(planes)
|
||||
self.conv2 = builder.conv3x3(planes, planes, stride=stride)
|
||||
self.bn2 = builder.batchnorm(planes)
|
||||
self.conv3 = builder.conv1x1(planes, planes * expansion)
|
||||
self.bn3 = builder.batchnorm(planes * expansion, last_bn=True)
|
||||
self.relu = builder.activation()
|
||||
self.downsample = downsample
|
||||
self.stride = stride
|
||||
self.squeeze = SqueezeAndExcitation(planes*expansion, se_squeeze) if se else None
|
||||
|
||||
def forward(self, x):
|
||||
residual = x
|
||||
|
||||
out = self.conv1(x)
|
||||
out = self.bn1(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = self.conv2(out)
|
||||
out = self.bn2(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = self.conv3(out)
|
||||
out = self.bn3(out)
|
||||
|
||||
if self.downsample is not None:
|
||||
residual = self.downsample(x)
|
||||
|
||||
if self.squeeze is None:
|
||||
out += residual
|
||||
else:
|
||||
out = torch.addcmul(residual, 1.0, out, self.squeeze(out))
|
||||
|
||||
out = self.relu(out)
|
||||
|
||||
return out
|
||||
|
||||
def SEBottleneck(builder, inplanes, planes, expansion, stride=1, downsample=None):
|
||||
return Bottleneck(builder, inplanes, planes, expansion, stride=stride, se=True, se_squeeze=16, downsample=downsample)
|
||||
# Bottleneck }}}
|
||||
|
||||
# ResNet {{{
|
||||
class ResNet(nn.Module):
|
||||
def __init__(self, builder, block, expansion, layers, widths, num_classes=1000):
|
||||
self.inplanes = 64
|
||||
super(ResNet, self).__init__()
|
||||
self.conv1 = builder.conv7x7(3, 64, stride=2)
|
||||
self.bn1 = builder.batchnorm(64)
|
||||
self.relu = builder.activation()
|
||||
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
|
||||
self.layer1 = self._make_layer(builder, block, expansion, widths[0], layers[0])
|
||||
self.layer2 = self._make_layer(builder, block, expansion, widths[1], layers[1], stride=2)
|
||||
self.layer3 = self._make_layer(builder, block, expansion, widths[2], layers[2], stride=2)
|
||||
self.layer4 = self._make_layer(builder, block, expansion, widths[3], layers[3], stride=2)
|
||||
self.avgpool = nn.AdaptiveAvgPool2d(1)
|
||||
self.fc = nn.Linear(widths[3] * expansion, num_classes)
|
||||
|
||||
def _make_layer(self, builder, block, expansion, planes, blocks, stride=1):
|
||||
downsample = None
|
||||
if stride != 1 or self.inplanes != planes * expansion:
|
||||
dconv = builder.conv1x1(self.inplanes, planes * expansion,
|
||||
stride=stride)
|
||||
dbn = builder.batchnorm(planes * expansion)
|
||||
if dbn is not None:
|
||||
downsample = nn.Sequential(dconv, dbn)
|
||||
else:
|
||||
downsample = dconv
|
||||
|
||||
layers = []
|
||||
layers.append(block(builder, self.inplanes, planes, expansion, stride=stride, downsample=downsample))
|
||||
self.inplanes = planes * expansion
|
||||
for i in range(1, blocks):
|
||||
layers.append(block(builder, self.inplanes, planes, expansion))
|
||||
|
||||
return nn.Sequential(*layers)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv1(x)
|
||||
if self.bn1 is not None:
|
||||
x = self.bn1(x)
|
||||
x = self.relu(x)
|
||||
x = self.maxpool(x)
|
||||
|
||||
x = self.layer1(x)
|
||||
x = self.layer2(x)
|
||||
x = self.layer3(x)
|
||||
x = self.layer4(x)
|
||||
|
||||
x = self.avgpool(x)
|
||||
x = x.view(x.size(0), -1)
|
||||
x = self.fc(x)
|
||||
|
||||
return x
|
||||
# ResNet }}}
|
||||
|
||||
resnet_configs = {
|
||||
'classic' : {
|
||||
'conv' : nn.Conv2d,
|
||||
'conv_init' : 'fan_out',
|
||||
'nonlinearity' : 'relu',
|
||||
'last_bn_0_init' : False,
|
||||
'activation' : lambda: nn.ReLU(inplace=True),
|
||||
},
|
||||
'fanin' : {
|
||||
'conv' : nn.Conv2d,
|
||||
'conv_init' : 'fan_in',
|
||||
'nonlinearity' : 'relu',
|
||||
'last_bn_0_init' : False,
|
||||
'activation' : lambda: nn.ReLU(inplace=True),
|
||||
},
|
||||
'grp-fanin' : {
|
||||
'conv' : nn.Conv2d,
|
||||
'conv_init' : 'fan_in',
|
||||
'nonlinearity' : 'relu',
|
||||
'last_bn_0_init' : False,
|
||||
'activation' : lambda: nn.ReLU(inplace=True),
|
||||
},
|
||||
'grp-fanout' : {
|
||||
'conv' : nn.Conv2d,
|
||||
'conv_init' : 'fan_out',
|
||||
'nonlinearity' : 'relu',
|
||||
'last_bn_0_init' : False,
|
||||
'activation' : lambda: nn.ReLU(inplace=True),
|
||||
},
|
||||
}
|
||||
|
||||
resnet_versions = {
|
||||
'resnet18' : {
|
||||
'net' : ResNet,
|
||||
'block' : BasicBlock,
|
||||
'layers' : [2, 2, 2, 2],
|
||||
'widths' : [64, 128, 256, 512],
|
||||
'expansion' : 1,
|
||||
'num_classes' : 1000,
|
||||
},
|
||||
'resnet34' : {
|
||||
'net' : ResNet,
|
||||
'block' : BasicBlock,
|
||||
'layers' : [3, 4, 6, 3],
|
||||
'widths' : [64, 128, 256, 512],
|
||||
'expansion' : 1,
|
||||
'num_classes' : 1000,
|
||||
},
|
||||
'resnet50' : {
|
||||
'net' : ResNet,
|
||||
'block' : Bottleneck,
|
||||
'layers' : [3, 4, 6, 3],
|
||||
'widths' : [64, 128, 256, 512],
|
||||
'expansion' : 4,
|
||||
'num_classes' : 1000,
|
||||
},
|
||||
'resnet101' : {
|
||||
'net' : ResNet,
|
||||
'block' : Bottleneck,
|
||||
'layers' : [3, 4, 23, 3],
|
||||
'widths' : [64, 128, 256, 512],
|
||||
'expansion' : 4,
|
||||
'num_classes' : 1000,
|
||||
},
|
||||
'resnet152' : {
|
||||
'net' : ResNet,
|
||||
'block' : Bottleneck,
|
||||
'layers' : [3, 8, 36, 3],
|
||||
'widths' : [64, 128, 256, 512],
|
||||
'expansion' : 4,
|
||||
'num_classes' : 1000,
|
||||
},
|
||||
'resnext101-32x4d' : {
|
||||
'net' : ResNet,
|
||||
'block' : Bottleneck,
|
||||
'cardinality' : 32,
|
||||
'layers' : [3, 4, 23, 3],
|
||||
'widths' : [128, 256, 512, 1024],
|
||||
'expansion' : 2,
|
||||
'num_classes' : 1000,
|
||||
},
|
||||
'se-resnext101-32x4d' : {
|
||||
'net' : ResNet,
|
||||
'block' : SEBottleneck,
|
||||
'cardinality' : 32,
|
||||
'layers' : [3, 4, 23, 3],
|
||||
'widths' : [128, 256, 512, 1024],
|
||||
'expansion' : 2,
|
||||
'num_classes' : 1000,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def build_resnet(version, config, verbose=True):
|
||||
version = resnet_versions[version]
|
||||
config = resnet_configs[config]
|
||||
|
||||
builder = ResNetBuilder(version, config)
|
||||
if verbose:
|
||||
print("Version: {}".format(version))
|
||||
print("Config: {}".format(config))
|
||||
model = version['net'](builder,
|
||||
version['block'],
|
||||
version['expansion'],
|
||||
version['layers'],
|
||||
version['widths'],
|
||||
version['num_classes'])
|
||||
|
||||
return model
|
||||
+91
@@ -0,0 +1,91 @@
|
||||
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the BSD 3-Clause License (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# https://opensource.org/licenses/BSD-3-Clause
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
|
||||
class CrossEntropy(nn.CrossEntropyLoss):
|
||||
def __init__(self, smooth_factor=0., num_classes=1000):
|
||||
super(CrossEntropy, self).__init__()
|
||||
self.on_value = 1.0 - smooth_factor
|
||||
self.off_value = 1.0 * smooth_factor / (num_classes - 1)
|
||||
|
||||
def forward(self, input, target):
|
||||
one_hot_label = torch.npu_one_hot(target, -1, input.size(1), self.on_value, self.off_value)
|
||||
one_hot_label = one_hot_label.to(torch.float16)
|
||||
loss = torch.npu_softmax_cross_entropy_with_logits(input.to(torch.float16), one_hot_label)
|
||||
|
||||
loss = torch.mean(loss, [0], keepdim=False, dtype=torch.float32)
|
||||
return loss
|
||||
|
||||
class LabelSmoothingNpu(nn.Module):
|
||||
"""
|
||||
NLL loss with label smoothing.
|
||||
"""
|
||||
def __init__(self, smoothing=0.0):
|
||||
"""
|
||||
Constructor for the LabelSmoothing module.
|
||||
|
||||
:param smoothing: label smoothing factor
|
||||
"""
|
||||
super(LabelSmoothingNpu, self).__init__()
|
||||
self.confidence = 1.0 - smoothing
|
||||
self.smoothing = smoothing
|
||||
|
||||
self.epsilon = 0.1
|
||||
self.num_classes = 1000
|
||||
|
||||
def forward(self, x, target):
|
||||
CALCULATE_DEVICE = x.device
|
||||
logprobs = torch.nn.functional.log_softmax(x, dim=-1).to("cpu")
|
||||
|
||||
targets = torch.zeros_like(logprobs).scatter_(1, target.unsqueeze(1), 1)
|
||||
targets = (1 - self.epsilon) * targets + self.epsilon / self.num_classes
|
||||
loss = (-targets * logprobs).mean(0).sum()
|
||||
|
||||
# nll_loss = -logprobs.gather(dim=-1, index=target.unsqueeze(1))
|
||||
# nll_loss = nll_loss.squeeze(1)
|
||||
# smooth_loss = -logprobs.mean(dim=-1)
|
||||
# loss = self.confidence * nll_loss + self.smoothing * smooth_loss
|
||||
return loss.to(CALCULATE_DEVICE)
|
||||
|
||||
|
||||
class LabelSmoothingGpu(nn.Module):
|
||||
"""
|
||||
NLL loss with label smoothing.
|
||||
"""
|
||||
def __init__(self, smoothing=0.0):
|
||||
"""
|
||||
Constructor for the LabelSmoothing module.
|
||||
|
||||
:param smoothing: label smoothing factor
|
||||
"""
|
||||
super(LabelSmoothingGpu, self).__init__()
|
||||
self.confidence = 1.0 - smoothing
|
||||
self.smoothing = smoothing
|
||||
# print("----------------------LabelSooothing.__init__")
|
||||
# def __call__(self,x,target):
|
||||
# print("----------------------LabelSooothing.__call__")
|
||||
# return self.forward(self,x,target)
|
||||
|
||||
def forward(self, x, target):
|
||||
logprobs = torch.nn.functional.log_softmax(x, dim=-1)
|
||||
|
||||
nll_loss = -logprobs.gather(dim=-1, index=target.unsqueeze(1))
|
||||
nll_loss = nll_loss.squeeze(1)
|
||||
smooth_loss = -logprobs.mean(dim=-1)
|
||||
loss = self.confidence * nll_loss + self.smoothing * smooth_loss
|
||||
#print("================",type(x),x.size())
|
||||
#print("------------------",type(target),target.size(),target)
|
||||
return loss.mean()
|
||||
+51
@@ -0,0 +1,51 @@
|
||||
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the BSD 3-Clause License (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# https://opensource.org/licenses/BSD-3-Clause
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
class LabelSmoothing(nn.Module):
|
||||
"""
|
||||
NLL loss with label smoothing.
|
||||
"""
|
||||
def __init__(self, smoothing=0.0):
|
||||
"""
|
||||
Constructor for the LabelSmoothing module.
|
||||
|
||||
:param smoothing: label smoothing factor
|
||||
"""
|
||||
super(LabelSmoothing, self).__init__()
|
||||
self.confidence = 1.0 - smoothing
|
||||
self.smoothing = smoothing
|
||||
# print("----------------------LabelSooothing.__init__")
|
||||
# def __call__(self,x,target):
|
||||
# print("----------------------LabelSooothing.__call__")
|
||||
# return self.forward(self,x,target)
|
||||
|
||||
def forward(self, x, target):
|
||||
device_x = x.device
|
||||
device_target = target.device
|
||||
x = x.to("cpu")
|
||||
target = target.to("cpu")
|
||||
logprobs = torch.nn.functional.log_softmax(x, dim=-1)
|
||||
|
||||
nll_loss = -logprobs.gather(dim=-1, index=target.unsqueeze(1))
|
||||
nll_loss = nll_loss.squeeze(1)
|
||||
smooth_loss = -logprobs.mean(dim=-1)
|
||||
loss = self.confidence * nll_loss + self.smoothing * smooth_loss
|
||||
#print("================",type(x),x.size())
|
||||
#print("------------------",type(target),target.size(),target)
|
||||
|
||||
x = x.to(device_x)
|
||||
target = target.to(device_target)
|
||||
return loss.mean()
|
||||
+534
@@ -0,0 +1,534 @@
|
||||
# Copyright (c) 2018-2019, NVIDIA CORPORATION
|
||||
# Copyright (c) 2017- Facebook, Inc
|
||||
#
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright notice, this
|
||||
# list of conditions and the following disclaimer.
|
||||
#
|
||||
# * Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
#
|
||||
# * Neither the name of the copyright holder nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from
|
||||
# this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
import os
|
||||
import time
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from torch.autograd import Variable
|
||||
from . import logger as log
|
||||
from . import resnet as nvmodels
|
||||
from . import utils
|
||||
import dllogger
|
||||
try:
|
||||
#from apex.parallel import DistributedDataParallel as DDP #可以采用pytorch torch.distributed
|
||||
from apex.fp16_utils import *
|
||||
from apex import amp
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"Please install apex from https://www.github.com/nvidia/apex to run this example."
|
||||
)
|
||||
|
||||
ACC_METADATA = {'unit': '%','format': ':.2f'}
|
||||
IPS_METADATA = {'unit': 'img/s', 'format': ':.2f'}
|
||||
TIME_METADATA = {'unit': 's', 'format': ':.5f'}
|
||||
LOSS_METADATA = {'format': ':.5f'}
|
||||
|
||||
|
||||
class ModelAndLoss(nn.Module):
|
||||
def __init__(self,
|
||||
arch,
|
||||
loss,
|
||||
pretrained_weights=None,
|
||||
cuda=True,
|
||||
fp16=False):
|
||||
super(ModelAndLoss, self).__init__()
|
||||
self.arch = arch
|
||||
|
||||
print("=> creating model '{}'".format(arch))
|
||||
model = nvmodels.build_resnet(arch[0], arch[1])
|
||||
if pretrained_weights is not None:
|
||||
print("=> using pre-trained model from a file '{}'".format(arch))
|
||||
model.load_state_dict(pretrained_weights)
|
||||
|
||||
if cuda:
|
||||
model = model.cuda()
|
||||
if fp16:
|
||||
model = network_to_half(model)
|
||||
|
||||
# define loss function (criterion) and optimizer
|
||||
criterion = loss()
|
||||
|
||||
if cuda:
|
||||
criterion = criterion.cuda()
|
||||
|
||||
self.model = model
|
||||
self.loss = criterion
|
||||
|
||||
def forward(self, data, target):
|
||||
output = self.model(data)
|
||||
loss = self.loss(output, target)
|
||||
|
||||
return loss, output
|
||||
|
||||
def distributed(self):
|
||||
#self.model = DDP(self.model)
|
||||
return
|
||||
|
||||
def load_model_state(self, state):
|
||||
if not state is None:
|
||||
self.model.load_state_dict(state)
|
||||
|
||||
|
||||
def get_optimizer(parameters,
|
||||
fp16,
|
||||
lr,
|
||||
momentum,
|
||||
weight_decay,
|
||||
nesterov=False,
|
||||
state=None,
|
||||
static_loss_scale=1.,
|
||||
dynamic_loss_scale=False,
|
||||
bn_weight_decay=False):
|
||||
|
||||
if bn_weight_decay:
|
||||
print(" ! Weight decay applied to BN parameters ")
|
||||
optimizer = torch.optim.SGD([v for n, v in parameters],
|
||||
lr,
|
||||
momentum=momentum,
|
||||
weight_decay=weight_decay,
|
||||
nesterov=nesterov)
|
||||
else:
|
||||
print(" ! Weight decay NOT applied to BN parameters ")
|
||||
bn_params = [v for n, v in parameters if 'bn' in n]
|
||||
rest_params = [v for n, v in parameters if not 'bn' in n]
|
||||
print(len(bn_params))
|
||||
print(len(rest_params))
|
||||
optimizer = torch.optim.SGD([{
|
||||
'params': bn_params,
|
||||
'weight_decay': 0
|
||||
}, {
|
||||
'params': rest_params,
|
||||
'weight_decay': weight_decay
|
||||
}],
|
||||
lr,
|
||||
momentum=momentum,
|
||||
weight_decay=weight_decay,
|
||||
nesterov=nesterov)
|
||||
if fp16:
|
||||
optimizer = FP16_Optimizer(optimizer,
|
||||
static_loss_scale=static_loss_scale,
|
||||
dynamic_loss_scale=dynamic_loss_scale,
|
||||
verbose=False)
|
||||
|
||||
if not state is None:
|
||||
optimizer.load_state_dict(state)
|
||||
|
||||
return optimizer
|
||||
|
||||
|
||||
def lr_policy(lr_fn, logger=None):
|
||||
if logger is not None:
|
||||
logger.register_metric('lr',
|
||||
log.LR_METER(),
|
||||
verbosity=dllogger.Verbosity.VERBOSE)
|
||||
|
||||
def _alr(optimizer, iteration, epoch):
|
||||
lr = lr_fn(iteration, epoch)
|
||||
|
||||
if logger is not None:
|
||||
logger.log_metric('lr', lr)
|
||||
for param_group in optimizer.param_groups:
|
||||
param_group['lr'] = lr
|
||||
|
||||
return _alr
|
||||
|
||||
|
||||
def lr_step_policy(base_lr, steps, decay_factor, warmup_length, logger=None):
|
||||
def _lr_fn(iteration, epoch):
|
||||
if epoch < warmup_length:
|
||||
lr = base_lr * (epoch + 1) / warmup_length
|
||||
else:
|
||||
lr = base_lr
|
||||
for s in steps:
|
||||
if epoch >= s:
|
||||
lr *= decay_factor
|
||||
return lr
|
||||
|
||||
return lr_policy(_lr_fn, logger=logger)
|
||||
|
||||
|
||||
def lr_linear_policy(base_lr, warmup_length, epochs, logger=None):
|
||||
def _lr_fn(iteration, epoch):
|
||||
if epoch < warmup_length:
|
||||
lr = base_lr * (epoch + 1) / warmup_length
|
||||
else:
|
||||
e = epoch - warmup_length
|
||||
es = epochs - warmup_length
|
||||
lr = base_lr * (1 - (e / es))
|
||||
return lr
|
||||
|
||||
return lr_policy(_lr_fn, logger=logger)
|
||||
|
||||
|
||||
def lr_cosine_policy(base_lr, warmup_length, epochs, logger=None):
|
||||
def _lr_fn(iteration, epoch):
|
||||
if epoch < warmup_length:
|
||||
lr = base_lr * (epoch + 1) / warmup_length
|
||||
else:
|
||||
e = epoch - warmup_length
|
||||
es = epochs - warmup_length
|
||||
lr = 0.5 * (1 + np.cos(np.pi * e / es)) * base_lr
|
||||
return lr
|
||||
|
||||
return lr_policy(_lr_fn, logger=logger)
|
||||
|
||||
|
||||
def lr_exponential_policy(base_lr,
|
||||
warmup_length,
|
||||
epochs,
|
||||
final_multiplier=0.001,
|
||||
logger=None):
|
||||
es = epochs - warmup_length
|
||||
epoch_decay = np.power(2, np.log2(final_multiplier) / es)
|
||||
|
||||
def _lr_fn(iteration, epoch):
|
||||
if epoch < warmup_length:
|
||||
lr = base_lr * (epoch + 1) / warmup_length
|
||||
else:
|
||||
e = epoch - warmup_length
|
||||
lr = base_lr * (epoch_decay**e)
|
||||
return lr
|
||||
|
||||
return lr_policy(_lr_fn, logger=logger)
|
||||
|
||||
|
||||
def get_train_step(model_and_loss,
|
||||
optimizer,
|
||||
fp16,
|
||||
use_amp=False,
|
||||
batch_size_multiplier=1):
|
||||
def _step(input, target, optimizer_step=True):
|
||||
input_var = Variable(input)
|
||||
target_var = Variable(target)
|
||||
loss, output = model_and_loss(input_var, target_var)
|
||||
if torch.distributed.is_initialized():
|
||||
print('utils.reduce_tensor(loss.data)')
|
||||
reduced_loss = utils.reduce_tensor(loss.data)
|
||||
else:
|
||||
reduced_loss = loss.data
|
||||
|
||||
if fp16:
|
||||
optimizer.backward(loss)
|
||||
elif use_amp:
|
||||
with amp.scale_loss(loss, optimizer) as scaled_loss:
|
||||
scaled_loss.backward()
|
||||
else:
|
||||
loss.backward()
|
||||
|
||||
if optimizer_step:
|
||||
opt = optimizer.optimizer if isinstance(
|
||||
optimizer, FP16_Optimizer) else optimizer
|
||||
for param_group in opt.param_groups:
|
||||
for param in param_group['params']:
|
||||
param.grad /= batch_size_multiplier
|
||||
|
||||
optimizer.step()
|
||||
optimizer.zero_grad()
|
||||
|
||||
torch.cuda.synchronize()
|
||||
|
||||
return reduced_loss
|
||||
|
||||
return _step
|
||||
|
||||
|
||||
def train(train_loader,
|
||||
model_and_loss,
|
||||
optimizer,
|
||||
lr_scheduler,
|
||||
fp16,
|
||||
logger,
|
||||
epoch,
|
||||
use_amp=False,
|
||||
prof=-1,
|
||||
batch_size_multiplier=1,
|
||||
register_metrics=True):
|
||||
|
||||
if register_metrics and logger is not None:
|
||||
logger.register_metric('train.loss',
|
||||
log.LOSS_METER(),
|
||||
verbosity=dllogger.Verbosity.DEFAULT,
|
||||
metadata=LOSS_METADATA)
|
||||
logger.register_metric('train.compute_ips',
|
||||
log.PERF_METER(),
|
||||
verbosity=dllogger.Verbosity.VERBOSE,
|
||||
metadata=IPS_METADATA)
|
||||
logger.register_metric('train.total_ips',
|
||||
log.PERF_METER(),
|
||||
verbosity=dllogger.Verbosity.DEFAULT,
|
||||
metadata=IPS_METADATA)
|
||||
logger.register_metric('train.data_time',
|
||||
log.PERF_METER(),
|
||||
verbosity=dllogger.Verbosity.VERBOSE,
|
||||
metadata=TIME_METADATA)
|
||||
logger.register_metric('train.compute_time',
|
||||
log.PERF_METER(),
|
||||
verbosity=dllogger.Verbosity.VERBOSE,
|
||||
metadata=TIME_METADATA)
|
||||
|
||||
step = get_train_step(model_and_loss,
|
||||
optimizer,
|
||||
fp16,
|
||||
use_amp=use_amp,
|
||||
batch_size_multiplier=batch_size_multiplier)
|
||||
|
||||
model_and_loss.train()
|
||||
end = time.time()
|
||||
|
||||
optimizer.zero_grad()
|
||||
|
||||
data_iter = enumerate(train_loader)
|
||||
if logger is not None:
|
||||
data_iter = logger.iteration_generator_wrapper(data_iter)
|
||||
if prof > 0:
|
||||
data_iter = utils.first_n(prof, data_iter)
|
||||
|
||||
for i, (input, target) in data_iter:
|
||||
bs = input.size(0)
|
||||
lr_scheduler(optimizer, i, epoch)
|
||||
data_time = time.time() - end
|
||||
|
||||
optimizer_step = ((i + 1) % batch_size_multiplier) == 0
|
||||
loss = step(input, target, optimizer_step=optimizer_step)
|
||||
|
||||
it_time = time.time() - end
|
||||
|
||||
if logger is not None:
|
||||
logger.log_metric('train.loss', to_python_float(loss), bs)
|
||||
logger.log_metric('train.compute_ips',
|
||||
calc_ips(bs, it_time - data_time))
|
||||
logger.log_metric('train.total_ips', calc_ips(bs, it_time))
|
||||
logger.log_metric('train.data_time', data_time)
|
||||
logger.log_metric('train.compute_time', it_time - data_time)
|
||||
|
||||
end = time.time()
|
||||
|
||||
|
||||
def get_val_step(model_and_loss):
|
||||
def _step(input, target):
|
||||
input_var = Variable(input)
|
||||
target_var = Variable(target)
|
||||
|
||||
with torch.no_grad():
|
||||
loss, output = model_and_loss(input_var, target_var)
|
||||
|
||||
prec1, prec5 = utils.accuracy(output.data, target, topk=(1, 5))
|
||||
|
||||
if torch.distributed.is_initialized():
|
||||
reduced_loss = utils.reduce_tensor(loss.data)
|
||||
prec1 = utils.reduce_tensor(prec1)
|
||||
prec5 = utils.reduce_tensor(prec5)
|
||||
else:
|
||||
reduced_loss = loss.data
|
||||
|
||||
torch.cuda.synchronize()
|
||||
|
||||
return reduced_loss, prec1, prec5
|
||||
|
||||
return _step
|
||||
|
||||
|
||||
def validate(val_loader,
|
||||
model_and_loss,
|
||||
fp16,
|
||||
logger,
|
||||
epoch,
|
||||
prof=-1,
|
||||
register_metrics=True):
|
||||
if register_metrics and logger is not None:
|
||||
logger.register_metric('val.top1',
|
||||
log.ACC_METER(),
|
||||
verbosity=dllogger.Verbosity.DEFAULT,
|
||||
metadata=ACC_METADATA)
|
||||
logger.register_metric('val.top5',
|
||||
log.ACC_METER(),
|
||||
verbosity=dllogger.Verbosity.DEFAULT,
|
||||
metadata=ACC_METADATA)
|
||||
logger.register_metric('val.loss',
|
||||
log.LOSS_METER(),
|
||||
verbosity=dllogger.Verbosity.DEFAULT,
|
||||
metadata=LOSS_METADATA)
|
||||
logger.register_metric('val.compute_ips',
|
||||
log.PERF_METER(),
|
||||
verbosity=dllogger.Verbosity.VERBOSE,
|
||||
metadata=IPS_METADATA)
|
||||
logger.register_metric('val.total_ips',
|
||||
log.PERF_METER(),
|
||||
verbosity=dllogger.Verbosity.DEFAULT,
|
||||
metadata=IPS_METADATA)
|
||||
logger.register_metric('val.data_time',
|
||||
log.PERF_METER(),
|
||||
verbosity=dllogger.Verbosity.VERBOSE,
|
||||
metadata=TIME_METADATA)
|
||||
logger.register_metric('val.compute_latency',
|
||||
log.PERF_METER(),
|
||||
verbosity=dllogger.Verbosity.VERBOSE,
|
||||
metadata=TIME_METADATA)
|
||||
logger.register_metric('val.compute_latency_at100',
|
||||
log.LAT_100(),
|
||||
verbosity=dllogger.Verbosity.VERBOSE,
|
||||
metadata=TIME_METADATA)
|
||||
logger.register_metric('val.compute_latency_at99',
|
||||
log.LAT_99(),
|
||||
verbosity=dllogger.Verbosity.VERBOSE,
|
||||
metadata=TIME_METADATA)
|
||||
logger.register_metric('val.compute_latency_at95',
|
||||
log.LAT_95(),
|
||||
verbosity=dllogger.Verbosity.VERBOSE,
|
||||
metadata=TIME_METADATA)
|
||||
|
||||
|
||||
step = get_val_step(model_and_loss)
|
||||
|
||||
top1 = log.AverageMeter()
|
||||
# switch to evaluate mode
|
||||
model_and_loss.eval()
|
||||
|
||||
end = time.time()
|
||||
|
||||
data_iter = enumerate(val_loader)
|
||||
if not logger is None:
|
||||
data_iter = logger.iteration_generator_wrapper(data_iter, val=True)
|
||||
if prof > 0:
|
||||
data_iter = utils.first_n(prof, data_iter)
|
||||
|
||||
for i, (input, target) in data_iter:
|
||||
bs = input.size(0)
|
||||
data_time = time.time() - end
|
||||
|
||||
loss, prec1, prec5 = step(input, target)
|
||||
|
||||
it_time = time.time() - end
|
||||
|
||||
top1.record(to_python_float(prec1), bs)
|
||||
if logger is not None:
|
||||
logger.log_metric('val.top1', to_python_float(prec1), bs)
|
||||
logger.log_metric('val.top5', to_python_float(prec5), bs)
|
||||
logger.log_metric('val.loss', to_python_float(loss), bs)
|
||||
logger.log_metric('val.compute_ips',
|
||||
calc_ips(bs, it_time - data_time))
|
||||
logger.log_metric('val.total_ips', calc_ips(bs, it_time))
|
||||
logger.log_metric('val.data_time', data_time)
|
||||
logger.log_metric('val.compute_latency', it_time - data_time)
|
||||
logger.log_metric('val.compute_latency_at95', it_time - data_time)
|
||||
logger.log_metric('val.compute_latency_at99', it_time - data_time)
|
||||
logger.log_metric('val.compute_latency_at100', it_time - data_time)
|
||||
|
||||
end = time.time()
|
||||
|
||||
return top1.get_val()
|
||||
|
||||
|
||||
# Train loop {{{
|
||||
def calc_ips(batch_size, time):
|
||||
world_size = torch.distributed.get_world_size(
|
||||
) if torch.distributed.is_initialized() else 1
|
||||
tbs = world_size * batch_size
|
||||
return tbs / time
|
||||
|
||||
|
||||
def train_loop(model_and_loss,
|
||||
optimizer,
|
||||
lr_scheduler,
|
||||
train_loader,
|
||||
val_loader,
|
||||
epochs,
|
||||
fp16,
|
||||
logger,
|
||||
should_backup_checkpoint,
|
||||
use_amp=False,
|
||||
batch_size_multiplier=1,
|
||||
best_prec1=0,
|
||||
start_epoch=0,
|
||||
prof=-1,
|
||||
skip_training=False,
|
||||
skip_validation=False,
|
||||
save_checkpoints=True,
|
||||
checkpoint_dir='./'):
|
||||
|
||||
prec1 = -1
|
||||
|
||||
epoch_iter = range(start_epoch, epochs)
|
||||
for epoch in epoch_iter:
|
||||
if logger is not None:
|
||||
logger.start_epoch()
|
||||
if not skip_training:
|
||||
train(train_loader,
|
||||
model_and_loss,
|
||||
optimizer,
|
||||
lr_scheduler,
|
||||
fp16,
|
||||
logger,
|
||||
epoch,
|
||||
use_amp=use_amp,
|
||||
prof=prof,
|
||||
register_metrics=epoch == start_epoch,
|
||||
batch_size_multiplier=batch_size_multiplier)
|
||||
|
||||
if not skip_validation:
|
||||
prec1, nimg = validate(val_loader,
|
||||
model_and_loss,
|
||||
fp16,
|
||||
logger,
|
||||
epoch,
|
||||
prof=prof,
|
||||
register_metrics=epoch == start_epoch)
|
||||
if logger is not None:
|
||||
logger.end_epoch()
|
||||
|
||||
if save_checkpoints and (not torch.distributed.is_initialized()
|
||||
or torch.distributed.get_rank() == 0):
|
||||
if not skip_validation:
|
||||
is_best = logger.metrics['val.top1']['meter'].get_epoch() > best_prec1
|
||||
best_prec1 = max(logger.metrics['val.top1']['meter'].get_epoch(),
|
||||
best_prec1)
|
||||
else:
|
||||
is_best = False
|
||||
best_prec1 = 0
|
||||
|
||||
if should_backup_checkpoint(epoch):
|
||||
backup_filename = 'checkpoint-{}.pth.tar'.format(epoch + 1)
|
||||
else:
|
||||
backup_filename = None
|
||||
utils.save_checkpoint(
|
||||
{
|
||||
'epoch': epoch + 1,
|
||||
'arch': model_and_loss.arch,
|
||||
'state_dict': model_and_loss.model.state_dict(),
|
||||
'best_prec1': best_prec1,
|
||||
'optimizer': optimizer.state_dict(),
|
||||
},
|
||||
is_best,
|
||||
checkpoint_dir=checkpoint_dir,
|
||||
backup_filename=backup_filename)
|
||||
|
||||
|
||||
# }}}
|
||||
+106
@@ -0,0 +1,106 @@
|
||||
# Copyright (c) 2018-2019, NVIDIA CORPORATION
|
||||
# Copyright (c) 2017- Facebook, Inc
|
||||
#
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright notice, this
|
||||
# list of conditions and the following disclaimer.
|
||||
#
|
||||
# * Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
#
|
||||
# * Neither the name of the copyright holder nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from
|
||||
# this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
import os
|
||||
import numpy as np
|
||||
import torch
|
||||
import shutil
|
||||
import torch.distributed as dist
|
||||
|
||||
|
||||
def should_backup_checkpoint(args):
|
||||
def _sbc(epoch):
|
||||
return args.gather_checkpoints and (epoch < 10 or epoch % 10 == 0)
|
||||
|
||||
return _sbc
|
||||
|
||||
|
||||
def save_checkpoint(state,
|
||||
is_best,
|
||||
filename='checkpoint.pth.tar',
|
||||
checkpoint_dir='./',
|
||||
backup_filename=None):
|
||||
if (not torch.distributed.is_initialized()
|
||||
) or torch.distributed.get_rank() == 0:
|
||||
filename = os.path.join(checkpoint_dir, filename)
|
||||
print("SAVING {}".format(filename))
|
||||
torch.save(state, filename)
|
||||
if is_best:
|
||||
shutil.copyfile(filename,
|
||||
os.path.join(checkpoint_dir, 'model_best.pth.tar'))
|
||||
if backup_filename is not None:
|
||||
shutil.copyfile(filename,
|
||||
os.path.join(checkpoint_dir, backup_filename))
|
||||
|
||||
|
||||
def timed_generator(gen):
|
||||
start = time.time()
|
||||
for g in gen:
|
||||
end = time.time()
|
||||
t = end - start
|
||||
yield g, t
|
||||
start = time.time()
|
||||
|
||||
|
||||
def timed_function(f):
|
||||
def _timed_function(*args, **kwargs):
|
||||
start = time.time()
|
||||
ret = f(*args, **kwargs)
|
||||
return ret, time.time() - start
|
||||
|
||||
return _timed_function
|
||||
|
||||
|
||||
def accuracy(output, target, topk=(1, )):
|
||||
"""Computes the precision@k for the specified values of k"""
|
||||
maxk = max(topk)
|
||||
batch_size = target.size(0)
|
||||
|
||||
_, pred = output.topk(maxk, 1, True, True)
|
||||
pred = pred.t()
|
||||
correct = pred.eq(target.view(1, -1).expand_as(pred))
|
||||
|
||||
res = []
|
||||
for k in topk:
|
||||
correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
|
||||
res.append(correct_k.mul_(100.0 / batch_size))
|
||||
return res
|
||||
|
||||
|
||||
def reduce_tensor(tensor):
|
||||
rt = tensor.clone()
|
||||
dist.all_reduce(rt, op=dist.ReduceOp.SUM)
|
||||
rt /= torch.distributed.get_world_size(
|
||||
) if torch.distributed.is_initialized() else 1
|
||||
return rt
|
||||
|
||||
|
||||
def first_n(n, generator):
|
||||
for i, d in zip(range(n), generator):
|
||||
yield d
|
||||
+1121
File diff suppressed because it is too large
Load Diff
+609
@@ -0,0 +1,609 @@
|
||||
import argparse
|
||||
import os
|
||||
import random
|
||||
import shutil
|
||||
import time
|
||||
import warnings
|
||||
import math
|
||||
import numpy as np
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.parallel
|
||||
import torch.backends.cudnn as cudnn
|
||||
import torch.distributed as dist
|
||||
import torch.optim
|
||||
import torch.multiprocessing as mp
|
||||
import torch.utils.data
|
||||
import torch.utils.data.distributed
|
||||
import torchvision.transforms as transforms
|
||||
import torchvision.datasets as datasets
|
||||
import torchvision.models as models
|
||||
import torch.npu
|
||||
|
||||
from apex import amp
|
||||
from benchmark_log import hwlog
|
||||
from benchmark_log.basic_utils import get_environment_info
|
||||
from benchmark_log.basic_utils import get_model_parameter
|
||||
'''
|
||||
python3.7 pytorch-resnet50-apex.py --data /opt/npu/dataset/imagenet --npu 7 -j64 -b512 --lr 0.2 --warmup 5 --epochs 90 --label-smoothing 0.1 --optimizer-batch-size 1024 > batch1024-lr0.2-wd.txt &
|
||||
'''
|
||||
BATCH_SIZE = 512
|
||||
EPOCHS_SIZE = 100
|
||||
TRAIN_STEP = 8000
|
||||
LOG_STEP = 1
|
||||
|
||||
CALCULATE_DEVICE = "npu:7"
|
||||
PRINT_DEVICE = "cpu"
|
||||
SOURCE_DIR = "/data/imagenet"
|
||||
|
||||
model_names = sorted(name for name in models.__dict__
|
||||
if name.islower() and not name.startswith("__")
|
||||
and callable(models.__dict__[name]))
|
||||
|
||||
parser = argparse.ArgumentParser(description='PyTorch ImageNet Training')
|
||||
parser.add_argument('--data', metavar='DIR', default=SOURCE_DIR,
|
||||
help='path to dataset')
|
||||
parser.add_argument('-a', '--arch', metavar='ARCH', default='resnet50',
|
||||
choices=model_names,
|
||||
help='model architecture: ' +
|
||||
' | '.join(model_names) +
|
||||
' (default: resnet18)')
|
||||
parser.add_argument('-j', '--workers', default=32, type=int, metavar='N',
|
||||
help='number of data loading workers (default: 8)')
|
||||
parser.add_argument('--epochs', default=EPOCHS_SIZE, type=int, metavar='N',
|
||||
help='number of total epochs to run')
|
||||
parser.add_argument('--start-epoch', default=0, type=int, metavar='N',
|
||||
help='manual epoch number (useful on restarts)')
|
||||
parser.add_argument('-b', '--batch-size', default=BATCH_SIZE, type=int,
|
||||
metavar='N',
|
||||
help='mini-batch size (default: 256), this is the total '
|
||||
'batch size of all GPUs on the current node when '
|
||||
'using Data Parallel or Distributed Data Parallel')
|
||||
parser.add_argument('--lr', '--learning-rate', default=0.1, type=float,
|
||||
metavar='LR', help='initial learning rate', dest='lr')
|
||||
parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
|
||||
help='momentum')
|
||||
parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,
|
||||
metavar='W', help='weight decay (default: 1e-4)',
|
||||
dest='weight_decay')
|
||||
parser.add_argument('-p', '--print-freq', default=10, type=int,
|
||||
metavar='N', help='print frequency (default: 10)')
|
||||
parser.add_argument('--resume', default='', type=str, metavar='PATH',
|
||||
help='path to latest checkpoint (default: none)')
|
||||
parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true',
|
||||
help='evaluate model on validation set')
|
||||
parser.add_argument('--pretrained', dest='pretrained', action='store_true',
|
||||
help='use pre-trained model')
|
||||
parser.add_argument('--world-size', default=-1, type=int,
|
||||
help='number of nodes for distributed training')
|
||||
parser.add_argument('--rank', default=-1, type=int,
|
||||
help='node rank for distributed training')
|
||||
parser.add_argument('--dist-url', default='tcp://224.66.41.62:23456', type=str,
|
||||
help='url used to set up distributed training')
|
||||
parser.add_argument('--dist-backend', default='nccl', type=str,
|
||||
help='distributed backend')
|
||||
parser.add_argument('--seed', default=None, type=int,
|
||||
help='seed for initializing training. ')
|
||||
parser.add_argument('--gpu', default=None, type=int,
|
||||
help='GPU id to use.')
|
||||
parser.add_argument('--npu', default=None, type=int,
|
||||
help='NPU id to use.')
|
||||
parser.add_argument('--multiprocessing-distributed', action='store_true',
|
||||
help='Use multi-processing distributed training to launch '
|
||||
'N processes per node, which has N GPUs. This is the '
|
||||
'fastest way to use PyTorch for either single node or '
|
||||
'multi node data parallel training')
|
||||
parser.add_argument('--warmup',
|
||||
default=0,
|
||||
type=int,
|
||||
metavar='E',
|
||||
help='number of warmup epochs')
|
||||
parser.add_argument('--label-smoothing',
|
||||
default=0.0,
|
||||
type=float,
|
||||
metavar='S',
|
||||
help='label smoothing')
|
||||
parser.add_argument('--optimizer-batch-size',
|
||||
default=-1,
|
||||
type=int,
|
||||
metavar='N',
|
||||
help=
|
||||
'size of a total batch size, for simulating bigger batches using gradient accumulation')
|
||||
|
||||
parser.add_argument(
|
||||
'--static-loss-scale',
|
||||
type=float,
|
||||
default=1,
|
||||
help=
|
||||
'Static loss scale, positive power of 2 values can improve fp16 convergence.')
|
||||
|
||||
best_acc1 = 0
|
||||
|
||||
|
||||
def main():
|
||||
args = parser.parse_args()
|
||||
if args.npu is None:
|
||||
args.npu = 0
|
||||
global CALCULATE_DEVICE
|
||||
CALCULATE_DEVICE = "npu:{}".format(args.npu)
|
||||
torch.npu.set_device(CALCULATE_DEVICE)
|
||||
print("use ", CALCULATE_DEVICE)
|
||||
|
||||
if args.seed is not None:
|
||||
random.seed(args.seed)
|
||||
torch.manual_seed(args.seed)
|
||||
cudnn.deterministic = True
|
||||
warnings.warn('You have chosen to seed training. '
|
||||
'This will turn on the CUDNN deterministic setting, '
|
||||
'which can slow down your training considerably! '
|
||||
'You may see unexpected behavior when restarting '
|
||||
'from checkpoints.')
|
||||
|
||||
if args.gpu is not None:
|
||||
warnings.warn('You have chosen a specific GPU. This will completely '
|
||||
'disable data parallelism.')
|
||||
|
||||
if args.dist_url == "env://" and args.world_size == -1:
|
||||
args.world_size = int(os.environ["WORLD_SIZE"])
|
||||
|
||||
args.distributed = args.world_size > 1 or args.multiprocessing_distributed
|
||||
|
||||
ngpus_per_node = torch.cuda.device_count()
|
||||
if args.multiprocessing_distributed:
|
||||
# Since we have ngpus_per_node processes per node, the total world_size
|
||||
# needs to be adjusted accordingly
|
||||
args.world_size = ngpus_per_node * args.world_size
|
||||
# Use torch.multiprocessing.spawn to launch distributed processes: the
|
||||
# main_worker process function
|
||||
mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args))
|
||||
else:
|
||||
# Simply call main_worker function
|
||||
main_worker(args.gpu, ngpus_per_node, args)
|
||||
|
||||
|
||||
def main_worker(gpu, ngpus_per_node, args):
|
||||
global best_acc1
|
||||
args.gpu = gpu
|
||||
|
||||
if args.gpu is not None:
|
||||
print("Use GPU: {} for training".format(args.gpu))
|
||||
|
||||
if args.distributed:
|
||||
if args.dist_url == "env://" and args.rank == -1:
|
||||
args.rank = int(os.environ["RANK"])
|
||||
if args.multiprocessing_distributed:
|
||||
# For multiprocessing distributed training, rank needs to be the
|
||||
# global rank among all the processes
|
||||
args.rank = args.rank * ngpus_per_node + gpu
|
||||
dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
|
||||
world_size=args.world_size, rank=args.rank)
|
||||
# create model
|
||||
if args.pretrained:
|
||||
print("=> using pre-trained model '{}'".format(args.arch))
|
||||
model = models.__dict__[args.arch](pretrained=True)
|
||||
else:
|
||||
print("=> creating model '{}'".format(args.arch))
|
||||
model = models.__dict__[args.arch](zero_init_residual=True)
|
||||
for layer in model.modules():
|
||||
if isinstance(layer, nn.Linear):
|
||||
torch.nn.init.kaiming_normal_(layer.weight, a=math.sqrt(5), )
|
||||
if args.distributed:
|
||||
# For multiprocessing distributed, DistributedDataParallel constructor
|
||||
# should always set the single device scope, otherwise,
|
||||
# DistributedDataParallel will use all available devices.
|
||||
if args.gpu is not None:
|
||||
torch.cuda.set_device(args.gpu)
|
||||
model.cuda(args.gpu)
|
||||
# When using a single GPU per process and per
|
||||
# DistributedDataParallel, we need to divide the batch size
|
||||
# ourselves based on the total number of GPUs we have
|
||||
args.batch_size = int(args.batch_size / ngpus_per_node)
|
||||
args.workers = int((args.workers + ngpus_per_node - 1) / ngpus_per_node)
|
||||
model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
|
||||
else:
|
||||
model.cuda()
|
||||
# DistributedDataParallel will divide and allocate batch_size to all
|
||||
# available GPUs if device_ids are not set
|
||||
model = torch.nn.parallel.DistributedDataParallel(model)
|
||||
elif args.gpu is not None:
|
||||
torch.cuda.set_device(args.gpu)
|
||||
model = model.cuda(args.gpu)
|
||||
else:
|
||||
# DataParallel will divide and allocate batch_size to all available GPUs
|
||||
if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
|
||||
model.features = torch.nn.DataParallel(model.features)
|
||||
model.cuda()
|
||||
else:
|
||||
#model = torch.nn.DataParallel(model).cuda()
|
||||
model = model.to(CALCULATE_DEVICE)
|
||||
|
||||
lr_policy = lr_cosine_policy(args.lr,
|
||||
args.warmup,
|
||||
args.epochs)
|
||||
|
||||
|
||||
# define loss function (criterion) and optimizer
|
||||
#criterion = nn.CrossEntropyLoss().cuda(args.gpu)
|
||||
loss = nn.CrossEntropyLoss
|
||||
if args.label_smoothing > 0.0:
|
||||
loss = lambda: LabelSmoothing(args.label_smoothing)
|
||||
criterion = loss().to(CALCULATE_DEVICE)
|
||||
optimizer = torch.optim.SGD([
|
||||
{'params': [param for name, param in model.named_parameters() if name[-4:] == 'bias'], 'weight_decay': 0.0},
|
||||
{'params': [param for name, param in model.named_parameters() if name[-4:] != 'bias'], 'weight_decay': args.weight_decay}],
|
||||
args.lr,
|
||||
momentum=args.momentum)
|
||||
|
||||
model, optimizer = amp.initialize(model, optimizer, opt_level="O2", loss_scale=1024, verbosity=1)
|
||||
|
||||
# optionally resume from a checkpoint
|
||||
if args.resume:
|
||||
if os.path.isfile(args.resume):
|
||||
print("=> loading checkpoint '{}'".format(args.resume))
|
||||
if args.npu is not None:
|
||||
checkpoint = torch.load(args.resume)
|
||||
elif args.gpu is None:
|
||||
checkpoint = torch.load(args.resume)
|
||||
else:
|
||||
# Map model to be loaded to specified single gpu.
|
||||
loc = 'cuda:{}'.format(args.gpu)
|
||||
checkpoint = torch.load(args.resume, map_location=loc)
|
||||
args.start_epoch = checkpoint['epoch']
|
||||
best_acc1 = checkpoint['best_acc1']
|
||||
if args.npu is not None:
|
||||
best_acc1 = best_acc1.to("npu:{}".format(args.npu))
|
||||
elif args.gpu is not None:
|
||||
# best_acc1 may be from a checkpoint from a different GPU
|
||||
best_acc1 = best_acc1.to(args.gpu)
|
||||
model.load_state_dict(checkpoint['state_dict'])
|
||||
#optimizer.load_state_dict(checkpoint['optimizer'])
|
||||
print("=> loaded checkpoint '{}' (epoch {})"
|
||||
.format(args.resume, checkpoint['epoch']))
|
||||
else:
|
||||
print("=> no checkpoint found at '{}'".format(args.resume))
|
||||
|
||||
cudnn.benchmark = True
|
||||
|
||||
# Data loading code
|
||||
traindir = os.path.join(args.data, 'train')
|
||||
valdir = os.path.join(args.data, 'val')
|
||||
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
|
||||
std=[0.229, 0.224, 0.225])
|
||||
|
||||
train_dataset = datasets.ImageFolder(
|
||||
traindir,
|
||||
transforms.Compose([
|
||||
transforms.RandomResizedCrop(224),
|
||||
transforms.RandomHorizontalFlip(),
|
||||
transforms.ToTensor(),
|
||||
normalize,
|
||||
]))
|
||||
|
||||
if args.distributed:
|
||||
train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
|
||||
else:
|
||||
train_sampler = None
|
||||
|
||||
train_loader = torch.utils.data.DataLoader(
|
||||
train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None),
|
||||
num_workers=args.workers, pin_memory=True, sampler=train_sampler)
|
||||
|
||||
val_loader = torch.utils.data.DataLoader(
|
||||
datasets.ImageFolder(valdir, transforms.Compose([
|
||||
transforms.Resize(256),
|
||||
transforms.CenterCrop(224),
|
||||
transforms.ToTensor(),
|
||||
normalize,
|
||||
])),
|
||||
batch_size=args.batch_size, shuffle=True,
|
||||
num_workers=args.workers, pin_memory=True)
|
||||
|
||||
if args.evaluate:
|
||||
validate(val_loader, model, criterion, args)
|
||||
return
|
||||
|
||||
for epoch in range(args.start_epoch, args.epochs):
|
||||
if args.distributed:
|
||||
train_sampler.set_epoch(epoch)
|
||||
#adjust_learning_rate(optimizer, epoch, args)
|
||||
lr_policy(optimizer, 0, epoch)
|
||||
# train for one epoch
|
||||
train(train_loader, model, criterion, optimizer, epoch, args)
|
||||
|
||||
# evaluate on validation set
|
||||
acc1 = validate(val_loader, model, criterion, args)
|
||||
|
||||
# remember best acc@1 and save checkpoint
|
||||
is_best = acc1 > best_acc1
|
||||
best_acc1 = max(acc1, best_acc1)
|
||||
file_name = "checkpoint_npu{}".format(args.npu)
|
||||
modeltmp = model.cpu()
|
||||
save_checkpoint({
|
||||
'epoch': epoch + 1,
|
||||
'arch': args.arch,
|
||||
'state_dict': modeltmp.state_dict(),
|
||||
# 'state_dict': model,
|
||||
'best_acc1': best_acc1.to("cpu"),
|
||||
# 'optimizer' : optimizer.state_dict(),
|
||||
}, is_best.to("cpu"), file_name)
|
||||
modeltmp.to(CALCULATE_DEVICE)
|
||||
|
||||
def train(train_loader, model, criterion, optimizer, epoch, args):
|
||||
if args.optimizer_batch_size < 0:
|
||||
batch_size_multiplier = 1
|
||||
else:
|
||||
tbs = 1 * args.batch_size
|
||||
if args.optimizer_batch_size % tbs != 0:
|
||||
print(
|
||||
"Warning: simulated batch size {} is not divisible by actual batch size {}"
|
||||
.format(args.optimizer_batch_size, tbs))
|
||||
batch_size_multiplier = int(args.optimizer_batch_size / tbs)
|
||||
print("BSM: {}".format(batch_size_multiplier))
|
||||
|
||||
batch_time = AverageMeter('Time', ':6.3f')
|
||||
data_time = AverageMeter('Data', ':6.3f')
|
||||
losses = AverageMeter('Loss', ':.4e')
|
||||
top1 = AverageMeter('Acc@1', ':6.2f')
|
||||
top5 = AverageMeter('Acc@5', ':6.2f')
|
||||
progress = ProgressMeter(
|
||||
len(train_loader),
|
||||
[batch_time, data_time, losses, top1, top5],
|
||||
prefix="Epoch: [{}]".format(epoch))
|
||||
|
||||
# switch to train mode
|
||||
model.train()
|
||||
optimizer.zero_grad()
|
||||
end = time.time()
|
||||
for i, (images, target) in enumerate(train_loader):
|
||||
#with torch.autograd.profiler.profile() as prof:
|
||||
# measure data loading time
|
||||
data_time.update(time.time() - end)
|
||||
|
||||
if args.gpu is not None:
|
||||
images = images.cuda(args.gpu, non_blocking=True)
|
||||
#target = target.cuda(args.gpu, non_blocking=True)
|
||||
#if 'npu' in CALCULATE_DEVICE:
|
||||
# target = target.to(torch.int32)
|
||||
images = images.to(CALCULATE_DEVICE, non_blocking=True)
|
||||
if args.label_smoothing == 0.0:
|
||||
target = target.to(torch.int32).to(CALCULATE_DEVICE, non_blocking=True)
|
||||
# compute output
|
||||
output = model(images)
|
||||
loss = criterion(output, target)
|
||||
|
||||
if args.label_smoothing > 0.0:
|
||||
target = target.to(torch.int32).to(CALCULATE_DEVICE, non_blocking=True)
|
||||
|
||||
# measure accuracy and record loss
|
||||
acc1, acc5 = accuracy(output, target, topk=(1, 5))
|
||||
losses.update(loss.item(), images.size(0))
|
||||
top1.update(acc1[0], images.size(0))
|
||||
top5.update(acc5[0], images.size(0))
|
||||
|
||||
# compute gradient and do SGD step
|
||||
|
||||
#loss.backward()
|
||||
###############################
|
||||
with amp.scale_loss(loss, optimizer) as scaled_loss:
|
||||
#print("middle")
|
||||
scaled_loss.backward()
|
||||
optimizer_step = ((i + 1) % batch_size_multiplier) == 0
|
||||
if optimizer_step:
|
||||
if batch_size_multiplier != 1:
|
||||
for param_group in optimizer.param_groups:
|
||||
for param in param_group['params']:
|
||||
param.grad /= batch_size_multiplier
|
||||
optimizer.step()
|
||||
optimizer.zero_grad()
|
||||
|
||||
if i % LOG_STEP == 0:
|
||||
progress.display(i)
|
||||
#print(prof.key_averages().table(sort_by="self_cpu_time_total"))
|
||||
|
||||
# measure elapsed time
|
||||
batch_time.update(time.time() - end)
|
||||
end = time.time()
|
||||
if i == TRAIN_STEP:
|
||||
break
|
||||
|
||||
|
||||
def validate(val_loader, model, criterion, args):
|
||||
batch_time = AverageMeter('Time', ':6.3f')
|
||||
losses = AverageMeter('Loss', ':.4e')
|
||||
top1 = AverageMeter('Acc@1', ':6.2f')
|
||||
top5 = AverageMeter('Acc@5', ':6.2f')
|
||||
progress = ProgressMeter(
|
||||
len(val_loader),
|
||||
[batch_time, losses, top1, top5],
|
||||
prefix='Test: ')
|
||||
|
||||
# switch to evaluate mode
|
||||
model.eval()
|
||||
|
||||
with torch.no_grad():
|
||||
end = time.time()
|
||||
for i, (images, target) in enumerate(val_loader):
|
||||
#with torch.autograd.profiler.profile() as prof:
|
||||
if args.gpu is not None:
|
||||
images = images.cuda(args.gpu, non_blocking=True)
|
||||
#target = target.cuda(args.gpu, non_blocking=True)
|
||||
#if 'npu' in CALCULATE_DEVICE:
|
||||
# target = target.to(torch.int32)
|
||||
images = images.to(CALCULATE_DEVICE, non_blocking=True)
|
||||
if args.label_smoothing == 0.0:
|
||||
target = target.to(torch.int32).to(CALCULATE_DEVICE, non_blocking=True)
|
||||
|
||||
# compute output
|
||||
output = model(images)
|
||||
loss = criterion(output, target)
|
||||
|
||||
if args.label_smoothing > 0.0:
|
||||
target = target.to(torch.int32).to(CALCULATE_DEVICE, non_blocking=True)
|
||||
|
||||
# measure accuracy and record loss
|
||||
acc1, acc5 = accuracy(output, target, topk=(1, 5))
|
||||
losses.update(loss.item(), images.size(0))
|
||||
top1.update(acc1[0], images.size(0))
|
||||
top5.update(acc5[0], images.size(0))
|
||||
|
||||
# measure elapsed time
|
||||
batch_time.update(time.time() - end)
|
||||
end = time.time()
|
||||
|
||||
if i % LOG_STEP == 0:
|
||||
progress.display(i)
|
||||
#print(prof.key_averages().table(sort_by="self_cpu_time_total"))
|
||||
|
||||
# TODO: this should also be done with the ProgressMeter
|
||||
print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'
|
||||
.format(top1=top1, top5=top5))
|
||||
hwlog.remark_print(key=hwlog.EVAL_ACCURACY_TOP1, value="{top1.avg:.3f}".format(top1=top1))
|
||||
hwlog.remark_print(key=hwlog.EVAL_ACCURACY_TOP5, value="{top5.avg:.3f}".format(top5=top5))
|
||||
return top1.avg
|
||||
|
||||
|
||||
def save_checkpoint(state, is_best, filename='checkpoint'):
|
||||
filename2 = filename + ".pth.tar"
|
||||
torch.save(state, filename2)
|
||||
if is_best:
|
||||
shutil.copyfile(filename2, filename+'model_best.pth.tar')
|
||||
|
||||
|
||||
class AverageMeter(object):
|
||||
"""Computes and stores the average and current value"""
|
||||
def __init__(self, name, fmt=':f'):
|
||||
self.name = name
|
||||
self.fmt = fmt
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
self.val = 0
|
||||
self.avg = 0
|
||||
self.sum = 0
|
||||
self.count = 0
|
||||
|
||||
def update(self, val, n=1):
|
||||
self.val = val
|
||||
self.sum += val * n
|
||||
self.count += n
|
||||
self.avg = self.sum / self.count
|
||||
|
||||
def __str__(self):
|
||||
fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
|
||||
return fmtstr.format(**self.__dict__)
|
||||
|
||||
|
||||
class ProgressMeter(object):
|
||||
def __init__(self, num_batches, meters, prefix=""):
|
||||
self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
|
||||
self.meters = meters
|
||||
self.prefix = prefix
|
||||
|
||||
def display(self, batch):
|
||||
entries = [self.prefix + self.batch_fmtstr.format(batch)]
|
||||
entries += [str(meter) for meter in self.meters]
|
||||
print('\t'.join(entries))
|
||||
current_run_time=str(entries).split("Time")[1].split("Data")[0].strip().split(" ")[0]
|
||||
args = parser.parse_args()
|
||||
batch_size = args.batch_size
|
||||
if "Epoch" in self.prefix:
|
||||
if float(current_run_time) > 0:
|
||||
FPS = int(batch_size)/float(current_run_time)
|
||||
hwlog.remark_print(key=hwlog.FPS, value=float(FPS))
|
||||
|
||||
def _get_batch_fmtstr(self, num_batches):
|
||||
num_digits = len(str(num_batches // 1))
|
||||
fmt = '{:' + str(num_digits) + 'd}'
|
||||
return '[' + fmt + '/' + fmt.format(num_batches) + ']'
|
||||
|
||||
|
||||
def adjust_learning_rate(optimizer, epoch, args):
|
||||
"""Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
|
||||
lr = args.lr * (0.1 ** (epoch // 30))
|
||||
for param_group in optimizer.param_groups:
|
||||
param_group['lr'] = lr
|
||||
|
||||
|
||||
def accuracy(output, target, topk=(1,)):
|
||||
"""Computes the accuracy over the k top predictions for the specified values of k"""
|
||||
with torch.no_grad():
|
||||
maxk = max(topk)
|
||||
batch_size = target.size(0)
|
||||
|
||||
_, pred = output.topk(maxk, 1, True, True)
|
||||
pred = pred.t()
|
||||
correct = pred.eq(target.view(1, -1).expand_as(pred))
|
||||
|
||||
res = []
|
||||
for k in topk:
|
||||
correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
|
||||
res.append(correct_k.mul_(100.0 / batch_size))
|
||||
return res
|
||||
|
||||
class LabelSmoothing(nn.Module):
|
||||
"""
|
||||
NLL loss with label smoothing.
|
||||
"""
|
||||
def __init__(self, smoothing=0.0):
|
||||
"""
|
||||
Constructor for the LabelSmoothing module.
|
||||
|
||||
:param smoothing: label smoothing factor
|
||||
"""
|
||||
super(LabelSmoothing, self).__init__()
|
||||
self.confidence = 1.0 - smoothing
|
||||
self.smoothing = smoothing
|
||||
|
||||
def forward(self, x, target):
|
||||
logprobs = torch.nn.functional.log_softmax(x, dim=-1).to("cpu")
|
||||
nll_loss = -logprobs.gather(dim=-1, index=target.unsqueeze(1))
|
||||
nll_loss = nll_loss.squeeze(1)
|
||||
smooth_loss = -logprobs.mean(dim=-1)
|
||||
loss = self.confidence * nll_loss + self.smoothing * smooth_loss
|
||||
return loss.mean().to(CALCULATE_DEVICE)
|
||||
|
||||
def lr_policy(lr_fn, logger=None):
|
||||
if logger is not None:
|
||||
logger.register_metric('lr',
|
||||
log.LR_METER(),
|
||||
verbosity=dllogger.Verbosity.VERBOSE)
|
||||
|
||||
def _alr(optimizer, iteration, epoch):
|
||||
lr = lr_fn(iteration, epoch)
|
||||
|
||||
if logger is not None:
|
||||
logger.log_metric('lr', lr)
|
||||
for param_group in optimizer.param_groups:
|
||||
param_group['lr'] = lr
|
||||
|
||||
return _alr
|
||||
|
||||
def lr_cosine_policy(base_lr, warmup_length, epochs, logger=None):
|
||||
def _lr_fn(iteration, epoch):
|
||||
if epoch < warmup_length:
|
||||
lr = base_lr * (epoch + 1) / warmup_length
|
||||
else:
|
||||
e = epoch - warmup_length
|
||||
es = epochs - warmup_length
|
||||
lr = 0.5 * (1 + np.cos(np.pi * e / es)) * base_lr
|
||||
return lr
|
||||
|
||||
return lr_policy(_lr_fn, logger=logger)
|
||||
|
||||
if __name__ == '__main__':
|
||||
hwlog.ROOT_DIR = os.path.split(os.path.abspath(__file__))[0]
|
||||
cpu_info, npu_info, framework_info, os_info, benchmark_version = get_environment_info("pytorch")
|
||||
config_info = get_model_parameter("pytorch_config")
|
||||
initinal_data = {"base_lr": 0.1, "dataset": "imagenet", "optimizer": "SGD", "loss_scale": 1024}
|
||||
hwlog.remark_print(key=hwlog.CPU_INFO, value=cpu_info)
|
||||
hwlog.remark_print(key=hwlog.NPU_INFO, value=npu_info)
|
||||
hwlog.remark_print(key=hwlog.OS_INFO, value=os_info)
|
||||
hwlog.remark_print(key=hwlog.FRAMEWORK_INFO, value=framework_info)
|
||||
hwlog.remark_print(key=hwlog.BENCHMARK_VERSION, value=benchmark_version)
|
||||
hwlog.remark_print(key=hwlog.CONFIG_INFO, value=config_info)
|
||||
hwlog.remark_print(key=hwlog.BASE_LR, value=initinal_data.get("base_lr"))
|
||||
hwlog.remark_print(key=hwlog.DATASET, value=initinal_data.get("dataset"))
|
||||
hwlog.remark_print(key=hwlog.OPT_NAME, value=initinal_data.get("optimizer"))
|
||||
hwlog.remark_print(key=hwlog.LOSS_SCALE, value=initinal_data.get("loss_scale"))
|
||||
main()
|
||||
Reference in New Issue
Block a user