Binary file not shown.
@@ -0,0 +1,50 @@
|
||||
# 训练benchmark
|
||||
|
||||
## 支持的产品
|
||||
Atlas 800 (Model 9000)
|
||||
|
||||
## 操作系统
|
||||
|
||||
centos7.6 & ubuntu 18.04
|
||||
|
||||
|
||||
## 训练方法
|
||||
|
||||
1. 根据实际情况修改 ./yaml/ 目录下的对应的 yaml 文件,建议备份原文件,且保持 yaml 文件名与模型名称相同。
|
||||
2. 在当前目录(train)下,执行:`./benchmark.sh --help` 查看帮助信息。
|
||||
3. 根据 **帮助信息** 或本文件中的 **运行参数说明** 选择配置运行参数后,执行:`./benchmark.sh`
|
||||
|
||||
## 示例
|
||||
- 示例1,docker 环境下启动 MobileNet 多卡(8p)训练:`./benchmark.sh -e MobileNet -hw 8p -y ./yaml/MobileNet.yaml -docker`
|
||||
- 示例2,host 环境下启动 MobileNet 单卡(1p)训练,yaml 使用默认文件:`./benchmark.sh -e MobileNet`
|
||||
- 示例3,host 环境下启动 ResNet50 集群(cluster)训练,yaml 使用默认文件:`./benchmark.sh -e ResNet50 -hw ct`
|
||||
- 示例4,host 环境下启动 pytorch模型DeepMar单卡(1p)训练,yaml 使用默认文件:`./benchmark.sh -e DeepMar -hw 1p -f pytorch`
|
||||
- 示例5,host 环境下启动 pytorch模型DeepMar多卡(8p)训练,yaml 使用默认文件:`./benchmark.sh -e DeepMar -hw 8p -f pytorch`
|
||||
- 示例6,docker环境下启动 pytorch模型DeepMar多卡(8p)训练,yaml 使用默认文件:`./benchmark.sh -e DeepMar -hw 8p -f pytorch -docker`
|
||||
|
||||
## 运行参数说明
|
||||
|
||||
| 参数 | 是否必填 | 参数说明 | 默认值 |
|
||||
| --------------- | -------- | -------------------- |------------------------ |
|
||||
| --execmodel, -e | 选填 | 需要执行的模型名称 | ResNet50 |
|
||||
| --hardware, -hw | 选填 | 选择 1p, 2p, 4p, 8p, cluster/ct | 1p |
|
||||
| --yamlpath, -y | 选填 | yaml 文件的路径 | ./yaml/{execmodel}.yaml |
|
||||
| --framework, -f | 选填 | 模型训练框架 | tensorflow |
|
||||
| -docker, -host | 选填 | 选择 docker 或 host | host |
|
||||
| --help, -h | 选填 | 显示帮助信息 | NA |
|
||||
| --list, -l | 选填 | 显示当前支持的模型与框架 | NA |
|
||||
|
||||
## 查看日志
|
||||
|
||||
- 可在 train/result/ 目录下查看各个模型最后生成的含性能与精度数据的日志。
|
||||
- 中间结果ckpt或其他文件存放在 *device id* 下。
|
||||
- train_x.log 为模型训练过程日志,内容较为详细;以 hw 开头的日志为打点日志,仅记录数据。
|
||||
|
||||
## 注意事项
|
||||
|
||||
- yaml 文件中的值可以参考注释,根据实际情况自行修改。键不可随意修改,否则可能导致训练失败或训练结果偏离实际。
|
||||
- 集群(cluster)执行时,请保证各节点环境配置相同,且包括**配置文件、数据集、代码**绝对路径相同。
|
||||
|
||||
## Benchmark工具资料参考
|
||||
|
||||
https://support.huawei.com/enterprise/zh/ascend-computing/atlas-data-center-solution-pid-251167910/software/251732401?idAbsPath=fixnode01%7C23710424%7C251366513%7C22892968%7C251167910
|
||||
@@ -0,0 +1,40 @@
|
||||
# DeepMar_pytorch训练说明
|
||||
|
||||
### 1. 数据集处理
|
||||
|
||||
#### 1.1. 下载并准备数据集:
|
||||
百度云盘https://pan.baidu.com/s/1q8nsydT7xkDjZJOxvPcoEw
|
||||
passwd: 5vep
|
||||
或者https://drive.google.com/open?id=1q4cux17K3zNBgIrDV4FtcHJPLzXNKfYG
|
||||
|
||||
存放地址
|
||||
./dataset/peta/images/*.png
|
||||
./dataset/peta/PETA.mat
|
||||
|
||||
#### 1.2 运行以下命令,分割训练集、测试集(路径修改成自己存放数据集路径)
|
||||
python script/dataset/transform_peta.py
|
||||
生成 peta_dataset.pkl,peta_partition.pkl 文件
|
||||
|
||||
### 2. 模型训练参数配置
|
||||
|
||||
在train/yaml/DeepMar.yaml中修改相应配置, 配置项含义:
|
||||
|
||||
```
|
||||
pytorch_config:
|
||||
data_url: 数据集路径
|
||||
epoches: 跑多少个epoch
|
||||
batch_size:1p 参数为256 2p 512 4p 1024 8p为2048
|
||||
seed: 49
|
||||
lr: 默认参数1p 0.01 2p 0.016 4p 0.016 8p 0.016
|
||||
docker_image: docker 镜像名称:版本号
|
||||
```
|
||||
|
||||
------
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
+81
@@ -0,0 +1,81 @@
|
||||
import torch.utils.data as data
|
||||
import os
|
||||
from PIL import Image
|
||||
import numpy as np
|
||||
import pickle
|
||||
import copy
|
||||
|
||||
class AttDataset(data.Dataset):
|
||||
"""
|
||||
person attribute dataset interface
|
||||
"""
|
||||
def __init__(
|
||||
self,
|
||||
dataset,
|
||||
partition,
|
||||
split='train',
|
||||
partition_idx=0,
|
||||
transform=None,
|
||||
target_transform=None,
|
||||
**kwargs):
|
||||
if os.path.exists( dataset ):
|
||||
file = open(dataset, 'rb')
|
||||
self.dataset = pickle.load(file)
|
||||
else:
|
||||
print (dataset + ' does not exist in dataset.')
|
||||
raise ValueError
|
||||
if os.path.exists( partition ):
|
||||
part = open(partition, 'rb')
|
||||
self.partition = pickle.load(part)
|
||||
else:
|
||||
print (partition + ' does not exist in dataset.')
|
||||
raise ValueError
|
||||
if split not in self.partition:
|
||||
print (split + ' does not exist in dataset.')
|
||||
raise ValueError
|
||||
|
||||
if partition_idx > len(self.partition[split])-1:
|
||||
print ('partition_idx is out of range in partition.')
|
||||
raise ValueError
|
||||
|
||||
self.transform = transform
|
||||
self.target_transform = target_transform
|
||||
|
||||
# create image, label based on the selected partition and dataset split
|
||||
self.root_path = self.dataset['root']
|
||||
self.att_name = [self.dataset['att_name'][i] for i in self.dataset['selected_attribute']]
|
||||
self.image = []
|
||||
self.label = []
|
||||
for idx in self.partition[split][partition_idx]:
|
||||
self.image.append(self.dataset['image'][idx])
|
||||
label_tmp = np.array(self.dataset['att'][idx])[self.dataset['selected_attribute']].tolist()
|
||||
self.label.append(label_tmp)
|
||||
|
||||
def __getitem__(self, index):
|
||||
"""
|
||||
Args:
|
||||
index (int): Index
|
||||
Returns:
|
||||
tuple: (image, target) where target is the index of the target class
|
||||
"""
|
||||
imgname, target = self.image[index], self.label[index]
|
||||
# load image and labels
|
||||
imgname = os.path.join(self.dataset['root'], imgname)
|
||||
img = Image.open(imgname)
|
||||
if self.transform is not None:
|
||||
img = self.transform( img )
|
||||
|
||||
# default no transform
|
||||
target = np.array(target).astype(np.float32)
|
||||
target[target == 0] = -1
|
||||
target[target == 2] = 0
|
||||
if self.target_transform is not None:
|
||||
target = self.transform( target )
|
||||
|
||||
return img, target
|
||||
|
||||
# useless for personal batch sampler
|
||||
def __len__(self):
|
||||
return len(self.image)
|
||||
|
||||
|
||||
+65
@@ -0,0 +1,65 @@
|
||||
import torch
|
||||
import numpy as np
|
||||
import numbers
|
||||
__all__ = ["AddPad", "AddCrop"]
|
||||
|
||||
class AddCrop(object):
|
||||
def __init__(self, size):
|
||||
self.size = size # two
|
||||
assert len(self.size) == 2
|
||||
def __repr__(self):
|
||||
return self.__class__.__name__ + '(size={0})'.format(self.size)
|
||||
def __call__(self, img):
|
||||
shape = img.shape # 3*H*W
|
||||
h_high = shape[1] - self.size[0]
|
||||
w_high = shape[2] - self.size[1]
|
||||
h_start = np.random.randint(low=0, high=h_high)
|
||||
w_start = np.random.randint(low=0, high=w_high)
|
||||
return img[:, h_start: h_start+self.size[0], w_start: w_start+self.size[1]]
|
||||
|
||||
class AddPad(object):
|
||||
def __init__(self, padding, fill=0):
|
||||
self.padding = padding
|
||||
self.fill = fill
|
||||
if isinstance(self.padding, numbers.Number):
|
||||
self.pad_l = int(self.padding)
|
||||
self.pad_r = int(self.padding)
|
||||
self.pad_u = int(self.padding)
|
||||
self.pad_d = int(self.padding)
|
||||
elif isinstance(self.padding, (list, tuple)) and len(self.padding) == 4:
|
||||
self.pad_l = int(self.padding[0])
|
||||
self.pad_r = int(self.padding[1])
|
||||
self.pad_u = int(self.padding[2])
|
||||
self.pad_d = int(self.padding[3])
|
||||
else:
|
||||
print ("The type of padding is not right.")
|
||||
raise ValueError
|
||||
if self.pad_l <0 or self.pad_r < 0 or self.pad_u < 0 or self.pad_d < 0:
|
||||
raise ValueError
|
||||
if isinstance(self.fill, numbers.Number):
|
||||
self.fill_value = [self.fill]
|
||||
elif isinstance(self.fill, list):
|
||||
self.fill_value = self.fill
|
||||
|
||||
def __repr__(self):
|
||||
return self.__class__.__name__ + '(padding={0})'.format(self.padding)
|
||||
|
||||
def __call__(self, img):
|
||||
"""
|
||||
Args:
|
||||
img: a 3-dimensional torch tensor with shape [R,G,B]*H*W
|
||||
Returns:
|
||||
img: a 3-dimensional padded tensor with shape [R,G,B]*H'*W'
|
||||
"""
|
||||
if not (self.pad_l or self.pad_r or self.pad_u or self.pad_d):
|
||||
return img
|
||||
shape = img.shape
|
||||
img_ = torch.rand(shape[0], shape[1]+self.pad_u+self.pad_d, \
|
||||
shape[2]+self.pad_l+self.pad_r)
|
||||
for i in range(shape[0]):
|
||||
img_[i, 0:self.pad_u, :] = self.fill_value[i%len(self.fill_value)]
|
||||
img_[i, -(self.pad_d+1):-1, :] = self.fill_value[i%len(self.fill_value)]
|
||||
img_[i, :, 0:self.pad_l] = self.fill_value[i%len(self.fill_value)]
|
||||
img_[i, :, -(self.pad_r+1):-1] = self.fill_value[i%len(self.fill_value)]
|
||||
img_[i, self.pad_u:self.pad_u+shape[1], self.pad_l:self.pad_l+shape[2]] = img[i, :, :]
|
||||
return img_
|
||||
+81
@@ -0,0 +1,81 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.init as init
|
||||
import torch.nn.functional as F
|
||||
from torch.autograd import Variable
|
||||
import numpy as np
|
||||
from .resnet import resnet50
|
||||
|
||||
|
||||
class DeepMAR_ResNet50(nn.Module):
|
||||
def __init__(
|
||||
self,
|
||||
**kwargs
|
||||
):
|
||||
super(DeepMAR_ResNet50, self).__init__()
|
||||
# init the necessary parameter for netwokr structure
|
||||
if 'num_att' in kwargs:
|
||||
self.num_att = kwargs['num_att']
|
||||
else:
|
||||
self.num_att = 35
|
||||
if 'last_conv_stride' in kwargs:
|
||||
self.last_conv_stride = kwargs['last_conv_stride']
|
||||
else:
|
||||
self.last_conv_stride = 2
|
||||
if 'drop_pool5' in kwargs:
|
||||
self.drop_pool5 = kwargs['drop_pool5']
|
||||
else:
|
||||
self.drop_pool5 = True
|
||||
if 'drop_pool5_rate' in kwargs:
|
||||
self.drop_pool5_rate = kwargs['drop_pool5_rate']
|
||||
else:
|
||||
self.drop_pool5_rate = 0.5
|
||||
if 'pretrained' in kwargs:
|
||||
self.pretrained = kwargs['pretrained']
|
||||
else:
|
||||
self.pretrained = True
|
||||
|
||||
self.base = resnet50(pretrained=self.pretrained, last_conv_stride=self.last_conv_stride)
|
||||
|
||||
self.classifier = nn.Linear(2048, self.num_att)
|
||||
init.normal_(self.classifier.weight, std=0.001)
|
||||
init.constant_(self.classifier.bias, 0)
|
||||
|
||||
def forward(self, x):
|
||||
|
||||
x = self.base(x)
|
||||
x = F.avg_pool2d(x, x.shape[2:])
|
||||
# x = x.view(x.size(0), -1)
|
||||
x = torch.flatten(x, 1)
|
||||
if self.drop_pool5:
|
||||
# x = x.to("cpu")
|
||||
x = F.dropout(x, p=self.drop_pool5_rate, training=self.training)
|
||||
# x = x.to("npu")
|
||||
x = self.classifier(x)
|
||||
return x
|
||||
|
||||
class DeepMAR_ResNet50_ExtractFeature(object):
|
||||
"""
|
||||
A feature extraction function
|
||||
"""
|
||||
def __init__(self, model, **kwargs):
|
||||
self.model = model
|
||||
|
||||
def __call__(self, imgs):
|
||||
old_train_eval_model = self.model.training
|
||||
|
||||
# set the model to be eval
|
||||
self.model.eval()
|
||||
|
||||
# imgs should be Variable
|
||||
if not isinstance(imgs, Variable):
|
||||
print ('imgs should be type: Variable')
|
||||
raise ValueError
|
||||
# compute output
|
||||
score = self.model(imgs)
|
||||
score = score.data.cpu().numpy()
|
||||
|
||||
# set the model to be training
|
||||
self.model.train(old_train_eval_model)
|
||||
|
||||
return score
|
||||
+217
@@ -0,0 +1,217 @@
|
||||
import torch.nn as nn
|
||||
import math
|
||||
import torch.utils.model_zoo as model_zoo
|
||||
import ssl
|
||||
|
||||
ssl._create_default_https_context = ssl._create_unverified_context
|
||||
|
||||
|
||||
__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
|
||||
'resnet152']
|
||||
|
||||
|
||||
model_urls = {
|
||||
'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
|
||||
'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
|
||||
'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
|
||||
'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
|
||||
'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
|
||||
}
|
||||
|
||||
|
||||
def conv3x3(in_planes, out_planes, stride=1):
|
||||
"""3x3 convolution with padding"""
|
||||
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
|
||||
padding=1, bias=False)
|
||||
|
||||
|
||||
class BasicBlock(nn.Module):
|
||||
expansion = 1
|
||||
|
||||
def __init__(self, inplanes, planes, stride=1, downsample=None):
|
||||
super(BasicBlock, self).__init__()
|
||||
self.conv1 = conv3x3(inplanes, planes, stride)
|
||||
self.bn1 = nn.BatchNorm2d(planes)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
self.conv2 = conv3x3(planes, planes)
|
||||
self.bn2 = nn.BatchNorm2d(planes)
|
||||
self.downsample = downsample
|
||||
self.stride = stride
|
||||
|
||||
def forward(self, x):
|
||||
residual = x
|
||||
|
||||
out = self.conv1(x)
|
||||
out = self.bn1(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = self.conv2(out)
|
||||
out = self.bn2(out)
|
||||
|
||||
if self.downsample is not None:
|
||||
residual = self.downsample(x)
|
||||
|
||||
out += residual
|
||||
out = self.relu(out)
|
||||
|
||||
return out
|
||||
|
||||
|
||||
class Bottleneck(nn.Module):
|
||||
expansion = 4
|
||||
|
||||
def __init__(self, inplanes, planes, stride=1, downsample=None):
|
||||
super(Bottleneck, self).__init__()
|
||||
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
|
||||
self.bn1 = nn.BatchNorm2d(planes)
|
||||
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
|
||||
padding=1, bias=False)
|
||||
self.bn2 = nn.BatchNorm2d(planes)
|
||||
self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
|
||||
self.bn3 = nn.BatchNorm2d(planes * 4)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
self.downsample = downsample
|
||||
self.stride = stride
|
||||
|
||||
def forward(self, x):
|
||||
residual = x
|
||||
|
||||
out = self.conv1(x)
|
||||
out = self.bn1(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = self.conv2(out)
|
||||
out = self.bn2(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = self.conv3(out)
|
||||
out = self.bn3(out)
|
||||
|
||||
if self.downsample is not None:
|
||||
residual = self.downsample(x)
|
||||
|
||||
out += residual
|
||||
out = self.relu(out)
|
||||
|
||||
return out
|
||||
|
||||
|
||||
class ResNet(nn.Module):
|
||||
|
||||
def __init__(self, block, layers, last_conv_stride=2):
|
||||
self.inplanes = 64
|
||||
super(ResNet, self).__init__()
|
||||
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
|
||||
bias=False)
|
||||
self.bn1 = nn.BatchNorm2d(64)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
|
||||
self.layer1 = self._make_layer(block, 64, layers[0])
|
||||
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
|
||||
self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
|
||||
self.layer4 = self._make_layer(block, 512, layers[3], stride=last_conv_stride)
|
||||
|
||||
for m in self.modules():
|
||||
if isinstance(m, nn.Conv2d):
|
||||
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
|
||||
m.weight.data.normal_(0, math.sqrt(2. / n))
|
||||
elif isinstance(m, nn.BatchNorm2d):
|
||||
m.weight.data.fill_(1)
|
||||
m.bias.data.zero_()
|
||||
|
||||
def _make_layer(self, block, planes, blocks, stride=1):
|
||||
downsample = None
|
||||
if stride != 1 or self.inplanes != planes * block.expansion:
|
||||
downsample = nn.Sequential(
|
||||
nn.Conv2d(self.inplanes, planes * block.expansion,
|
||||
kernel_size=1, stride=stride, bias=False),
|
||||
nn.BatchNorm2d(planes * block.expansion),
|
||||
)
|
||||
|
||||
layers = []
|
||||
layers.append(block(self.inplanes, planes, stride, downsample))
|
||||
self.inplanes = planes * block.expansion
|
||||
for i in range(1, blocks):
|
||||
layers.append(block(self.inplanes, planes))
|
||||
|
||||
return nn.Sequential(*layers)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv1(x)
|
||||
x = self.bn1(x)
|
||||
x = self.relu(x)
|
||||
x = self.maxpool(x)
|
||||
|
||||
x = self.layer1(x)
|
||||
x = self.layer2(x)
|
||||
x = self.layer3(x)
|
||||
x = self.layer4(x)
|
||||
|
||||
return x
|
||||
|
||||
def remove_fc(state_dict):
|
||||
""" Remove the fc layer parameter from state_dict. """
|
||||
for key, value in list(state_dict.items()):
|
||||
if key.startswith('fc.'):
|
||||
del state_dict[key]
|
||||
return state_dict
|
||||
|
||||
|
||||
def resnet18(pretrained=False, **kwargs):
|
||||
"""Constructs a ResNet-18 model.
|
||||
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
"""
|
||||
model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
|
||||
if pretrained:
|
||||
model.load_state_dict(remove_fc(model_zoo.load_url(model_urls['resnet18'])))
|
||||
return model
|
||||
|
||||
|
||||
def resnet34(pretrained=False, **kwargs):
|
||||
"""Constructs a ResNet-34 model.
|
||||
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
"""
|
||||
model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
|
||||
if pretrained:
|
||||
model.load_state_dict(remove_fc(model_zoo.load_url(model_urls['resnet34'])))
|
||||
return model
|
||||
|
||||
|
||||
def resnet50(pretrained=False, **kwargs):
|
||||
"""Constructs a ResNet-50 model.
|
||||
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
"""
|
||||
model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
|
||||
if pretrained:
|
||||
model.load_state_dict(remove_fc(model_zoo.load_url(model_urls['resnet50'])))
|
||||
return model
|
||||
|
||||
|
||||
def resnet101(pretrained=False, **kwargs):
|
||||
"""Constructs a ResNet-101 model.
|
||||
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
"""
|
||||
model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
|
||||
if pretrained:
|
||||
model.load_state_dict(remove_fc(model_zoo.load_url(model_urls['resnet101'])))
|
||||
return model
|
||||
|
||||
|
||||
def resnet152(pretrained=False, **kwargs):
|
||||
"""Constructs a ResNet-152 model.
|
||||
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
"""
|
||||
model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs)
|
||||
if pretrained:
|
||||
model.load_state_dict(remove_fc(model_zoo.load_url(model_urls['resnet152'])))
|
||||
return model
|
||||
+105
@@ -0,0 +1,105 @@
|
||||
import os
|
||||
import torch
|
||||
from torch.autograd import Variable
|
||||
import numpy as np
|
||||
import copy
|
||||
import time
|
||||
import sys
|
||||
|
||||
|
||||
def extract_feat(feat_func, dataset, device_id, **kwargs):
|
||||
"""
|
||||
extract feature for images
|
||||
"""
|
||||
test_loader = torch.utils.data.DataLoader(
|
||||
dataset=dataset, batch_size=32,
|
||||
num_workers=32, pin_memory=True,
|
||||
drop_last=True)
|
||||
# extract feature for all the images of test/val identities
|
||||
start_time = time.time()
|
||||
total_eps = len(test_loader)
|
||||
N = len(dataset.image)
|
||||
start = 0
|
||||
with torch.no_grad():
|
||||
for ep, (imgs, labels) in enumerate(test_loader):
|
||||
# imgs_var = Variable(imgs).cuda()
|
||||
# imgs_var = Variable(imgs)
|
||||
imgs_var = Variable(imgs).to(device_id)
|
||||
feat_tmp = feat_func(imgs_var)
|
||||
batch_size = feat_tmp.shape[0]
|
||||
if ep == 0:
|
||||
feat = np.zeros((N, int(feat_tmp.size/batch_size)))
|
||||
feat[start:start+batch_size, :] = feat_tmp.reshape((batch_size, -1))
|
||||
start += batch_size
|
||||
end_time = time.time()
|
||||
print('{} batches done, total {:.2f}s'.format(total_eps, end_time-start_time))
|
||||
return feat
|
||||
|
||||
# attribute recognition evaluation
|
||||
def attribute_evaluate(feat_func, dataset, device_id, **kwargs):
|
||||
print ("extracting features for attribute recognition")
|
||||
pt_result = extract_feat(feat_func, dataset, device_id)
|
||||
# obain the attributes from the attribute dictionary
|
||||
print ("computing attribute recognition result")
|
||||
N = pt_result.shape[0]
|
||||
L = pt_result.shape[1]
|
||||
gt_result = np.zeros(pt_result.shape)
|
||||
# get the groundtruth attributes
|
||||
for idx, label in enumerate(dataset.label):
|
||||
gt_result[idx, :] = label
|
||||
pt_result[pt_result>=0] = 1
|
||||
pt_result[pt_result<0] = 0
|
||||
return attribute_evaluate_lidw(gt_result, pt_result)
|
||||
|
||||
def attribute_evaluate_lidw(gt_result, pt_result):
|
||||
"""
|
||||
Input:
|
||||
gt_result, pt_result, N*L, with 0/1
|
||||
Output:
|
||||
result
|
||||
a dictionary, including label-based and instance-based evaluation
|
||||
label-based: label_pos_acc, label_neg_acc, label_acc
|
||||
instance-based: instance_acc, instance_precision, instance_recall, instance_F1
|
||||
"""
|
||||
# obtain the label-based and instance-based accuracy
|
||||
# compute the label-based accuracy
|
||||
if gt_result.shape != pt_result.shape:
|
||||
print ('Shape beteen groundtruth and predicted results are different')
|
||||
# compute the label-based accuracy
|
||||
result = {}
|
||||
gt_pos = np.sum((gt_result == 1).astype(float), axis=0)
|
||||
gt_neg = np.sum((gt_result == 0).astype(float), axis=0)
|
||||
pt_pos = np.sum((gt_result == 1).astype(float) * (pt_result == 1).astype(float), axis=0)
|
||||
pt_neg = np.sum((gt_result == 0).astype(float) * (pt_result == 0).astype(float), axis=0)
|
||||
label_pos_acc = 1.0*pt_pos/gt_pos
|
||||
label_neg_acc = 1.0*pt_neg/gt_neg
|
||||
label_acc = (label_pos_acc + label_neg_acc)/2
|
||||
result['label_pos_acc'] = label_pos_acc
|
||||
result['label_neg_acc'] = label_neg_acc
|
||||
result['label_acc'] = label_acc
|
||||
# compute the instance-based accuracy
|
||||
# precision
|
||||
gt_pos = np.sum((gt_result == 1).astype(float), axis=1)
|
||||
pt_pos = np.sum((pt_result == 1).astype(float), axis=1)
|
||||
floatersect_pos = np.sum((gt_result == 1).astype(float)*(pt_result == 1).astype(float), axis=1)
|
||||
union_pos = np.sum(((gt_result == 1)+(pt_result == 1)).astype(float),axis=1)
|
||||
# avoid empty label in predicted results
|
||||
cnt_eff = float(gt_result.shape[0])
|
||||
for iter, key in enumerate(gt_pos):
|
||||
if key == 0:
|
||||
union_pos[iter] = 1
|
||||
pt_pos[iter] = 1
|
||||
gt_pos[iter] = 1
|
||||
cnt_eff = cnt_eff - 1
|
||||
continue
|
||||
if pt_pos[iter] == 0:
|
||||
pt_pos[iter] = 1
|
||||
instance_acc = np.sum(floatersect_pos/union_pos)/cnt_eff
|
||||
instance_precision = np.sum(floatersect_pos/pt_pos)/cnt_eff
|
||||
instance_recall = np.sum(floatersect_pos/gt_pos)/cnt_eff
|
||||
floatance_F1 = 2*instance_precision*instance_recall/(instance_precision+instance_recall)
|
||||
result['instance_acc'] = instance_acc
|
||||
result['instance_precision'] = instance_precision
|
||||
result['instance_recall'] = instance_recall
|
||||
result['instance_F1'] = floatance_F1
|
||||
return result
|
||||
+347
@@ -0,0 +1,347 @@
|
||||
import os
|
||||
import pickle
|
||||
import datetime
|
||||
import time
|
||||
# from contextlib import contextmanger
|
||||
import torch
|
||||
from torch.autograd import Variable
|
||||
import random
|
||||
import numpy as np
|
||||
import torch.backends.cudnn as cudnn
|
||||
|
||||
def time_str(fmt=None):
|
||||
if fmt is None:
|
||||
fmt = '%Y-%m-%d_%H:%M:%S'
|
||||
return datetime.datetime.today().strftime(fmt)
|
||||
|
||||
def str2bool(v):
|
||||
return v.lower() in ("yes", "true", "1")
|
||||
|
||||
def is_iterable(obj):
|
||||
return hasattr(obj, '__len__')
|
||||
|
||||
def to_scalar(vt):
|
||||
"""
|
||||
transform a 1-length pytorch Variable or Tensor to scalar
|
||||
"""
|
||||
if isinstance(vt, Variable):
|
||||
return vt.data.cpu().numpy().flatten()[0]
|
||||
if torch.is_tensor(vt):
|
||||
return vt.cpu().numpy().flatten()[0]
|
||||
raise TypeError('Input should be a variable or tensor')
|
||||
|
||||
def set_seed(rand_seed):
|
||||
np.random.seed( rand_seed )
|
||||
random.seed( rand_seed )
|
||||
torch.backends.cudnn.enabled = True
|
||||
torch.manual_seed( rand_seed )
|
||||
torch.cuda.manual_seed( rand_seed )
|
||||
|
||||
def seed_everything(seed):
|
||||
random.seed(seed)
|
||||
os.environ['PYTHONHASHSEED'] = str(seed)
|
||||
np.random.seed(seed)
|
||||
torch.manual_seed(seed)
|
||||
torch.cuda.manual_seed(seed)
|
||||
torch.cuda.manual_seed_all(seed)
|
||||
cudnn.deterministic = True
|
||||
torch.backends.cudnn.deterministic = True
|
||||
torch.backends.cudnn.benchmark = False
|
||||
|
||||
def may_mkdir(fname):
|
||||
if not os.path.exists(os.path.dirname(os.path.abspath(fname))):
|
||||
os.makedirs(os.path.dirname(os.path.abspath(fname)))
|
||||
|
||||
class AverageMeter(object):
|
||||
"""
|
||||
Computes and stores the average and current value
|
||||
"""
|
||||
def __init__(self):
|
||||
self.val = 0
|
||||
self.avg = 0
|
||||
self.sum = 0
|
||||
self.count = 0
|
||||
def reset(self):
|
||||
self.val = 0
|
||||
self.avg = 0
|
||||
self.sum = 0
|
||||
self.count = 0
|
||||
def update(self, val, n=1):
|
||||
self.val = val
|
||||
self.sum += val * n
|
||||
self.count += n
|
||||
self.avg = float(self.sum) / (self.count + 1e-10)
|
||||
|
||||
class RunningAverageMeter(object):
|
||||
"""
|
||||
Computes and stores the running average and current value
|
||||
"""
|
||||
def __init__(self, hist=0.99):
|
||||
self.val = None
|
||||
self.avg = None
|
||||
self.hist = hist
|
||||
|
||||
def reset(self):
|
||||
self.val = None
|
||||
self.avg = None
|
||||
|
||||
def update(self, val):
|
||||
if self.avg is None:
|
||||
self.avg = val
|
||||
else:
|
||||
self.avg = self.avg * self.hist + val * (1 - self.hist)
|
||||
self.val = val
|
||||
|
||||
class RecentAverageMeter(object):
|
||||
"""
|
||||
Stores and computes the average of recent values
|
||||
"""
|
||||
def __init__(self, hist_size=100):
|
||||
self.hist_size = hist_size
|
||||
self.fifo = []
|
||||
self.val = 0
|
||||
|
||||
def reset(self):
|
||||
self.fifo = []
|
||||
self.val = 0
|
||||
|
||||
def update(self, val):
|
||||
self.val = val
|
||||
self.fifo.append(val)
|
||||
if len(self.fifo) > self.hist_size:
|
||||
del self.fifo[0]
|
||||
@property
|
||||
def avg(self):
|
||||
assert len(self.fifo) > 0
|
||||
return float(sum(self.fifo)) / len(self.fifo)
|
||||
|
||||
class ReDirectSTD(object):
|
||||
"""
|
||||
overwrites the sys.stdout or sys.stderr
|
||||
Args:
|
||||
fpath: file path
|
||||
console: one of ['stdout', 'stderr']
|
||||
immediately_visiable: False
|
||||
Usage example:
|
||||
ReDirectSTD('stdout.txt', 'stdout', False)
|
||||
ReDirectSTD('stderr.txt', 'stderr', False)
|
||||
"""
|
||||
def __init__(self, fpath=None, console='stdout', immediately_visiable=False):
|
||||
import sys
|
||||
import os
|
||||
assert console in ['stdout', 'stderr']
|
||||
self.console = sys.stdout if console == "stdout" else sys.stderr
|
||||
self.file = fpath
|
||||
self.f = None
|
||||
self.immediately_visiable = immediately_visiable
|
||||
if fpath is not None:
|
||||
# Remove existing log file
|
||||
if os.path.exists(fpath):
|
||||
os.remove(fpath)
|
||||
if console == 'stdout':
|
||||
sys.stdout = self
|
||||
else:
|
||||
sys.stderr = self
|
||||
|
||||
def __del__(self):
|
||||
self.close()
|
||||
|
||||
def __enter__(self):
|
||||
pass
|
||||
|
||||
def __exit__(self, **args):
|
||||
self.close()
|
||||
|
||||
def write(self, msg):
|
||||
self.console.write(msg)
|
||||
if self.file is not None:
|
||||
if not os.path.exists(os.path.dirname(os.path.abspath(self.file))):
|
||||
os.mkdir(os.path.dirname(os.path.abspath(self.file)))
|
||||
if self.immediately_visiable:
|
||||
with open(self.file, 'a') as f:
|
||||
f.write(msg)
|
||||
else:
|
||||
if self.f is None:
|
||||
self.f = open(self.file, 'w')
|
||||
self.f.write(msg)
|
||||
|
||||
def flush(self):
|
||||
self.console.flush()
|
||||
if self.f is not None:
|
||||
self.f.flush()
|
||||
import os
|
||||
os.fsync(self.f.fileno())
|
||||
|
||||
def close(self):
|
||||
self.console.close()
|
||||
if self.f is not None:
|
||||
self.f.close()
|
||||
|
||||
def find_index(seq, item):
|
||||
for i, x in enumerate(seq):
|
||||
if item == x:
|
||||
return i
|
||||
return -1
|
||||
|
||||
def set_devices(sys_device_ids):
|
||||
"""
|
||||
Args:
|
||||
sys_device_ids: a tuple; which GPUs to use
|
||||
e.g. sys_device_ids = (), only use cpu
|
||||
sys_device_ids = (3,), use the 4-th gpu
|
||||
sys_device_ids = (0, 1, 2, 3,), use the first 4 gpus
|
||||
sys_device_ids = (0, 2, 4,), use the 1, 3 and 5 gpus
|
||||
"""
|
||||
import os
|
||||
visiable_devices = ''
|
||||
for i in sys_device_ids:
|
||||
visiable_devices += '{}, '.format(i)
|
||||
os.environ['CUDA_VISIBLE_DEVICES'] = visiable_devices
|
||||
# Return wrappers
|
||||
# Models and user defined Variables/Tensors would be transferred to
|
||||
# the first device
|
||||
device_id = 0 if len(sys_device_ids) > 0 else -1
|
||||
|
||||
def transfer_optims(optims, device_id=-1):
|
||||
for optim in optims:
|
||||
if isinstance(optim, torch.optim.Optimizer):
|
||||
transfer_optim_state(optim.state, device_id=device_id)
|
||||
|
||||
def transfer_optim_state(state, device_id=-1):
|
||||
for key, val in state.items():
|
||||
if isinstance(val, dict):
|
||||
transfer_optim_state(val, device_id=device_id)
|
||||
elif isinstance(val, Variable):
|
||||
raise RuntimeError("Oops, state[{}] is a Variable!".format(key))
|
||||
elif isinstance(val, torch.nn.Parameter):
|
||||
raise RuntimeError("Oops, state[{}] is a Parameter!".format(key))
|
||||
else:
|
||||
try:
|
||||
if device_id == -1:
|
||||
state[key] = val.cpu()
|
||||
else:
|
||||
#state[key] = val.cuda(device=device_id)
|
||||
state[key] = val.npu(device=device_id)
|
||||
except:
|
||||
pass
|
||||
|
||||
|
||||
def load_state_dict(model, src_state_dict):
|
||||
"""
|
||||
copy parameter from src_state_dict to model
|
||||
Arguments:
|
||||
model: A torch.nn.Module object
|
||||
src_state_dict: a dict containing parameters and persistent buffers
|
||||
"""
|
||||
from torch.nn import Parameter
|
||||
dest_state_dict = model.state_dict()
|
||||
for name, param in src_state_dict.items():
|
||||
if name not in dest_state_dict:
|
||||
continue
|
||||
if isinstance(param, Parameter):
|
||||
param = param.data
|
||||
try:
|
||||
dest_state_dict[name].copy_(param)
|
||||
except Exception:
|
||||
print("Warning: Error occurs when copying '{}'".format(name))
|
||||
|
||||
src_missing = set(dest_state_dict.keys()) - set(src_state_dict.keys())
|
||||
if len(src_missing) > 0:
|
||||
print ("Keys not found in source state_dict: ")
|
||||
for n in src_missing:
|
||||
print('\t', n)
|
||||
|
||||
dest_missint = set(src_state_dict.keys()) - set(dest_state_dict.keys())
|
||||
if len(dest_missint):
|
||||
print ("Keys not found in destination state_dict: ")
|
||||
for n in dest_missint:
|
||||
print('\t', n)
|
||||
|
||||
def load_ckpt(modules_optims, ckpt_file, load_to_cpu=True, verbose=True):
|
||||
"""
|
||||
load state_dict of module & optimizer from file
|
||||
Args:
|
||||
modules_optims: A two-element list which contains module and optimizer
|
||||
ckpt_file: the check point file
|
||||
load_to_cpu: Boolean, whether to transform tensors in model & optimizer to cpu type
|
||||
"""
|
||||
map_location = (lambda storage, loc: storage) if load_to_cpu else None
|
||||
ckpt = torch.load(ckpt_file, map_location=map_location)
|
||||
for m, sd in zip(modules_optims, ckpt['state_dicts']):
|
||||
m.load_state_dict(sd)
|
||||
if verbose:
|
||||
print("Resume from ckpt {}, \nepoch: {}, scores: {}".format(
|
||||
ckpt_file, ckpt['ep'], ckpt['scores']))
|
||||
return ckpt['ep'], ckpt['scores']
|
||||
|
||||
def save_ckpt(modules_optims, ep, scores, ckpt_file):
|
||||
"""
|
||||
save state_dict of modules/optimizers to file
|
||||
Args:
|
||||
modules_optims: a two-element list which contains a module and a optimizer
|
||||
ep: the current epoch number
|
||||
scores: the performance of current module
|
||||
ckpt_file: the check point file path
|
||||
Note:
|
||||
torch.save() reserves device type and id of tensors to save.
|
||||
So when loading ckpt, you have to inform torch.load() to load these tensors
|
||||
to cpu or your desired gpu, if you change devices.
|
||||
"""
|
||||
state_dicts = [m.state_dict() for m in modules_optims]
|
||||
ckpt = dict(state_dicts = state_dicts,
|
||||
ep = ep,
|
||||
scores = scores)
|
||||
if not os.path.exists(os.path.dirname(os.path.abspath(ckpt_file))):
|
||||
os.mkdir(os.path.dirname(os.path.abspath(ckpt_file)))
|
||||
torch.save(ckpt, ckpt_file)
|
||||
|
||||
def adjust_lr_staircase(param_groups, base_lrs, ep, decay_at_epochs, factor):
|
||||
""" Multiplied by a factor at the beging of specified epochs. Different
|
||||
params groups specify thier own base learning rates.
|
||||
Args:
|
||||
param_groups: a list of params
|
||||
base_lrs: starting learning rate, len(base_lrs) = len(params_groups)
|
||||
ep: current epoch, ep >= 1
|
||||
decay_at_epochs: a list or tuple; learning rates are multiplied by a factor
|
||||
at the begining of these epochs
|
||||
factor: a number in range (0, 1)
|
||||
Example:
|
||||
base_lrs = [0.1, 0.01]
|
||||
decay_at_epochs = [51, 101]
|
||||
factor = 0.1
|
||||
Note:
|
||||
It is meant to be called at the begining of an epoch
|
||||
"""
|
||||
assert len(base_lrs) == len(param_groups), \
|
||||
'You should specify base lr for each param group.'
|
||||
assert ep >= 1, "Current epoch number should be >= 1"
|
||||
|
||||
if ep not in decay_at_epochs:
|
||||
return
|
||||
|
||||
ind = find_index(decay_at_epochs, ep)
|
||||
for i, (g, base_lr) in enumerate(zip(param_groups, base_lrs)):
|
||||
g['lr'] = base_lr * factor ** (ind + 1)
|
||||
print('=====> Param group {}: lr adjusted to {:.10f}'
|
||||
.format(i, g['lr']).rstrip('0'))
|
||||
|
||||
def adjust_lr(optimizer, ep, finetuned_params_lr):
|
||||
"""Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
|
||||
# lr = args.lr * (0.1 ** (ep// 30))
|
||||
lr = finetuned_params_lr * (0.96 ** (ep // 8)) #decreasing the learning rate by 4% every 8 epoch
|
||||
for param_group in optimizer.param_groups:
|
||||
param_group['lr'] = lr
|
||||
|
||||
def may_set_mode(maybe_modules, mode):
|
||||
"""
|
||||
maybe_modules, an object or a list of objects.
|
||||
"""
|
||||
assert mode in ['train', 'eval']
|
||||
if not is_iterable(maybe_modules):
|
||||
maybe_modules = [maybe_modules]
|
||||
for m in maybe_modules:
|
||||
if isinstance(m, torch.nn.Module):
|
||||
if mode == 'train':
|
||||
m.train()
|
||||
else:
|
||||
m.eval()
|
||||
BIN
Binary file not shown.
|
After Width: | Height: | Size: 8.4 KiB |
@@ -0,0 +1,141 @@
|
||||
import sys
|
||||
import os
|
||||
import numpy as np
|
||||
import random
|
||||
import math
|
||||
|
||||
import torch
|
||||
import torchvision.transforms as transforms
|
||||
from torch.autograd import Variable
|
||||
from torch.nn.parallel import DataParallel
|
||||
import pickle
|
||||
import time
|
||||
import argparse
|
||||
from PIL import Image, ImageFont, ImageDraw
|
||||
|
||||
from baseline.model.DeepMAR import DeepMAR_ResNet50
|
||||
from baseline.utils.utils import str2bool
|
||||
from baseline.utils.utils import save_ckpt, load_ckpt
|
||||
from baseline.utils.utils import load_state_dict
|
||||
from baseline.utils.utils import set_devices
|
||||
from baseline.utils.utils import set_seed
|
||||
|
||||
|
||||
class Config(object):
|
||||
def __init__(self):
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('-d', '--sys_device_ids', type=eval, default=(0,))
|
||||
parser.add_argument('--set_seed', type=str2bool, default=False)
|
||||
# model
|
||||
parser.add_argument('--resize', type=eval, default=(224, 224))
|
||||
parser.add_argument('--last_conv_stride', type=int, default=2, choices=[1,2])
|
||||
# demo image
|
||||
parser.add_argument('--demo_image', type=str, default='./dataset/demo/demo_image.png')
|
||||
## dataset parameter
|
||||
parser.add_argument('--dataset', type=str, default='peta',
|
||||
choices=['peta','rap', 'pa100k'])
|
||||
# utils
|
||||
parser.add_argument('--load_model_weight', type=str2bool, default=True)
|
||||
parser.add_argument('--model_weight_file', type=str, default='./exp/deepmar_resnet50/peta/partition0/run1/model/ckpt_epoch150.pth')
|
||||
args = parser.parse_args()
|
||||
|
||||
# gpu ids
|
||||
self.sys_device_ids = args.sys_device_ids
|
||||
|
||||
# random
|
||||
self.set_seed = args.set_seed
|
||||
if self.set_seed:
|
||||
self.rand_seed = 0
|
||||
else:
|
||||
self.rand_seed = None
|
||||
self.resize = args.resize
|
||||
self.mean = [0.485, 0.456, 0.406]
|
||||
self.std = [0.229, 0.224, 0.225]
|
||||
|
||||
# utils
|
||||
self.load_model_weight = args.load_model_weight
|
||||
self.model_weight_file = args.model_weight_file
|
||||
if self.load_model_weight:
|
||||
if self.model_weight_file == '':
|
||||
print ('Please input the model_weight_file if you want to load model weight')
|
||||
raise ValueError
|
||||
# dataset
|
||||
datasets = dict()
|
||||
datasets['peta'] = './dataset/peta/peta_dataset.pkl'
|
||||
datasets['rap'] = './dataset/rap/rap_dataset.pkl'
|
||||
datasets['pa100k'] = './dataset/pa100k/pa100k_dataset.pkl'
|
||||
|
||||
if args.dataset in datasets:
|
||||
dataset = pickle.load(open(datasets[args.dataset]))
|
||||
else:
|
||||
print ('%s does not exist.'%(args.dataset))
|
||||
raise ValueError
|
||||
self.att_list = [dataset['att_name'][i] for i in dataset['selected_attribute']]
|
||||
|
||||
# demo image
|
||||
self.demo_image = args.demo_image
|
||||
|
||||
# model
|
||||
model_kwargs = dict()
|
||||
model_kwargs['num_att'] = len(self.att_list)
|
||||
model_kwargs['last_conv_stride'] = args.last_conv_stride
|
||||
self.model_kwargs = model_kwargs
|
||||
|
||||
### main function ###
|
||||
cfg = Config()
|
||||
|
||||
# dump the configuration to log.
|
||||
import pprint
|
||||
print('-' * 60)
|
||||
print('cfg.__dict__')
|
||||
pprint.pprint(cfg.__dict__)
|
||||
print('-' * 60)
|
||||
|
||||
# set the random seed
|
||||
if cfg.set_seed:
|
||||
set_seed( cfg.rand_seed )
|
||||
# init the gpu ids
|
||||
set_devices(cfg.sys_device_ids)
|
||||
|
||||
# dataset
|
||||
normalize = transforms.Normalize(mean=cfg.mean, std=cfg.std)
|
||||
test_transform = transforms.Compose([
|
||||
transforms.Resize(cfg.resize),
|
||||
transforms.ToTensor(),
|
||||
normalize,])
|
||||
|
||||
### Att model ###
|
||||
model = DeepMAR_ResNet50(**cfg.model_kwargs)
|
||||
|
||||
# load model weight if necessary
|
||||
if cfg.load_model_weight:
|
||||
map_location = (lambda storage, loc:storage)
|
||||
ckpt = torch.load(cfg.model_weight_file, map_location=map_location)
|
||||
model.load_state_dict(ckpt['state_dicts'][0])
|
||||
|
||||
model.cuda()
|
||||
model.eval()
|
||||
|
||||
# load one image
|
||||
img = Image.open(cfg.demo_image)
|
||||
img_trans = test_transform( img )
|
||||
img_trans = torch.unsqueeze(img_trans, dim=0)
|
||||
img_var = Variable(img_trans).cuda()
|
||||
score = model(img_var).data.cpu().numpy()
|
||||
|
||||
# show the score in command line
|
||||
for idx in range(len(cfg.att_list)):
|
||||
if score[0, idx] >= 0:
|
||||
print ('%s: %.2f'%(cfg.att_list[idx], score[0, idx]))
|
||||
|
||||
# show the score in the image
|
||||
img = img.resize(size=(256, 512), resample=Image.BILINEAR)
|
||||
draw = ImageDraw.Draw(img)
|
||||
positive_cnt = 0
|
||||
for idx in range(len(cfg.att_list)):
|
||||
if score[0, idx] >= 0:
|
||||
txt = '%s: %.2f'%(cfg.att_list[idx], score[0, idx])
|
||||
draw.text((10, 10 + 10*positive_cnt), txt, (255, 0, 0))
|
||||
positive_cnt += 1
|
||||
img.save('./dataset/demo/demo_image_result.png')
|
||||
+483
@@ -0,0 +1,483 @@
|
||||
import sys
|
||||
import os
|
||||
import numpy as np
|
||||
import random
|
||||
import math
|
||||
|
||||
import torch
|
||||
import torch.optim as optim
|
||||
import torchvision.transforms as transforms
|
||||
import torch.nn.functional as F
|
||||
import torch.backends.cudnn as cudnn
|
||||
from torch.autograd import Variable
|
||||
from torch.nn.parallel import DataParallel
|
||||
import pickle
|
||||
import time
|
||||
import argparse
|
||||
import pdb
|
||||
import sys
|
||||
from torch.utils.tensorboard import SummaryWriter
|
||||
|
||||
from baseline.dataset import add_transforms
|
||||
from baseline.dataset.Dataset import AttDataset
|
||||
from baseline.model.DeepMAR import DeepMAR_ResNet50
|
||||
from baseline.model.DeepMAR import DeepMAR_ResNet50_ExtractFeature
|
||||
from baseline.utils.evaluate import attribute_evaluate
|
||||
from baseline.utils.utils import str2bool
|
||||
from baseline.utils.utils import transfer_optim_state
|
||||
from baseline.utils.utils import time_str
|
||||
from baseline.utils.utils import save_ckpt, load_ckpt
|
||||
from baseline.utils.utils import load_state_dict
|
||||
from baseline.utils.utils import ReDirectSTD
|
||||
from baseline.utils.utils import adjust_lr_staircase
|
||||
from baseline.utils.utils import adjust_lr
|
||||
from baseline.utils.utils import set_devices
|
||||
from baseline.utils.utils import AverageMeter
|
||||
from baseline.utils.utils import to_scalar
|
||||
from baseline.utils.utils import may_set_mode
|
||||
from baseline.utils.utils import may_mkdir
|
||||
from baseline.utils.utils import set_seed
|
||||
from baseline.utils.utils import seed_everything
|
||||
|
||||
# Apex
|
||||
import numpy as np
|
||||
from apex import amp
|
||||
import torch.npu
|
||||
|
||||
from benchmark_log import hwlog
|
||||
from benchmark_log.basic_utils import get_environment_info
|
||||
from benchmark_log.basic_utils import get_model_parameter
|
||||
|
||||
hwlog.ROOT_DIR = os.path.split(os.path.abspath(__file__))[0]
|
||||
cpu_info, npu_info, framework_info, os_info, benchmark_version = get_environment_info("pytorch")
|
||||
config_info = get_model_parameter("pytorch_config")
|
||||
initinal_data = {"base_lr": 0.1, "dataset": "imagenet", "optimizer": "SGD", "loss_scale": 1024}
|
||||
hwlog.remark_print(key=hwlog.CPU_INFO, value=cpu_info)
|
||||
hwlog.remark_print(key=hwlog.NPU_INFO, value=npu_info)
|
||||
hwlog.remark_print(key=hwlog.OS_INFO, value=os_info)
|
||||
hwlog.remark_print(key=hwlog.FRAMEWORK_INFO, value=framework_info)
|
||||
hwlog.remark_print(key=hwlog.BENCHMARK_VERSION, value=benchmark_version)
|
||||
hwlog.remark_print(key=hwlog.CONFIG_INFO, value=config_info)
|
||||
hwlog.remark_print(key=hwlog.BASE_LR, value=initinal_data.get("base_lr"))
|
||||
hwlog.remark_print(key=hwlog.DATASET, value=initinal_data.get("dataset"))
|
||||
hwlog.remark_print(key=hwlog.OPT_NAME, value=initinal_data.get("optimizer"))
|
||||
hwlog.remark_print(key=hwlog.LOSS_SCALE, value=initinal_data.get("loss_scale"))
|
||||
|
||||
CALCULATE_DEVICE = "npu:7"
|
||||
PRINT_DEVICE = "cpu"
|
||||
|
||||
class Config(object):
|
||||
def __init__(self):
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--npu', type=int, default=0, help='NPU id to use.')
|
||||
parser.add_argument('--set_seed', type=str2bool, default=False)
|
||||
## dataset parameter
|
||||
parser.add_argument('--dataset', type=str, default='peta',
|
||||
choices=['peta','rap', 'pa100k', 'rap2'])
|
||||
parser.add_argument('--save_dir', type=str, default='/home/zhusiyi/dataset/peta/')
|
||||
parser.add_argument('--split', type=str, default='trainval',
|
||||
choices=['trainval', 'train'])
|
||||
parser.add_argument('--test_split', type=str, default='test')
|
||||
parser.add_argument('--partition_idx', type=int, default=0)
|
||||
parser.add_argument('--resize', type=eval, default=(224, 224))
|
||||
parser.add_argument('--mirror', type=str2bool, default=True)
|
||||
parser.add_argument('--batch_size', type=int, default=32)
|
||||
parser.add_argument('--workers', type=int, default=2)
|
||||
# model
|
||||
parser.add_argument('--num_att', type=int, default=35)
|
||||
parser.add_argument('--pretrained', type=str2bool, default=True)
|
||||
parser.add_argument('--last_conv_stride', type=int, default=2, choices=[1,2])
|
||||
parser.add_argument('--drop_pool5', type=str2bool, default=True)
|
||||
parser.add_argument('--drop_pool5_rate', type=float, default=0.5)
|
||||
|
||||
parser.add_argument('--sgd_weight_decay', type=float, default=0.0005)
|
||||
parser.add_argument('--sgd_momentum', type=float, default=0.9)
|
||||
parser.add_argument('--new_params_lr', type=float, default=0.001)
|
||||
parser.add_argument('--finetuned_params_lr', type=float, default=0.001)
|
||||
parser.add_argument('--staircase_decay_at_epochs', type=eval,
|
||||
default=(51, ))
|
||||
parser.add_argument('--staircase_decay_multiple_factor', type=float,
|
||||
default=0.1)
|
||||
parser.add_argument('--total_epochs', type=int, default=150)
|
||||
parser.add_argument('--weighted_entropy', type=str2bool, default=True)
|
||||
# utils
|
||||
parser.add_argument('--resume', type=str2bool, default=False)
|
||||
parser.add_argument('--ckpt_file', type=str, default='')
|
||||
parser.add_argument('--load_model_weight', type=str2bool, default=False)
|
||||
parser.add_argument('--model_weight_file', type=str, default='')
|
||||
parser.add_argument('--test_only', type=str2bool, default=False)
|
||||
parser.add_argument('--exp_dir', type=str, default='')
|
||||
parser.add_argument('--exp_subpath', type=str, default='deepmar_resnet50')
|
||||
parser.add_argument('--log_to_file', type=str2bool, default=True)
|
||||
parser.add_argument('--steps_per_log', type=int, default=20)
|
||||
parser.add_argument('--epochs_per_val', type=int, default=10)
|
||||
parser.add_argument('--epochs_per_save', type=int, default=50)
|
||||
parser.add_argument('--run', type=int, default=1)
|
||||
# apex
|
||||
parser.add_argument('--amp', default=False, action='store_true',
|
||||
help='use amp to train the model')
|
||||
parser.add_argument('--loss_scale', default=-1., type=float,
|
||||
help='loss scale using in amp, default -1 means dynamic')
|
||||
parser.add_argument('--opt_level', default='O1', type=str,
|
||||
help='opt level using in amp, default O1 means FP16')
|
||||
args = parser.parse_args()
|
||||
|
||||
# gpu ids
|
||||
self.npu = args.npu
|
||||
# random
|
||||
self.set_seed = args.set_seed
|
||||
if self.set_seed:
|
||||
self.seed = 0
|
||||
else:
|
||||
self.seed = None
|
||||
# amp
|
||||
self.amp = args.amp
|
||||
self.loss_scale = args.loss_scale
|
||||
self.opt_level = args.opt_level
|
||||
# run time index
|
||||
self.run = args.run
|
||||
# Dataset #
|
||||
datasets = dict()
|
||||
#datasets['peta'] = '/home/zhusiyi/dataset/peta/peta_dataset.pkl'
|
||||
#datasets['peta'] = os.path.join(os.path.abspath(os.path.dirname(__file__)),'dataset/peta/peta_dataset.pkl')
|
||||
datasets['peta'] = args.save_dir + '/peta_dataset.pkl'
|
||||
partitions = dict()
|
||||
#partitions['peta'] = '/home/zhusiyi/dataset/peta/peta_partition.pkl'
|
||||
#partitions['peta'] = os.path.join(os.path.abspath(os.path.dirname(__file__)),'dataset/peta/peta_partition.pkl')
|
||||
partitions['peta'] = args.save_dir + '/peta_partition.pkl'
|
||||
|
||||
self.dataset_name = args.dataset
|
||||
if args.dataset not in datasets or args.dataset not in partitions:
|
||||
print ("Please select the right dataset name.")
|
||||
raise ValueError
|
||||
else:
|
||||
self.dataset = datasets[args.dataset]
|
||||
self.partition = partitions[args.dataset]
|
||||
self.partition_idx = args.partition_idx
|
||||
self.split = args.split
|
||||
self.test_split = args.test_split
|
||||
self.resize = args.resize
|
||||
self.mirror = args.mirror
|
||||
self.mean = [0.485, 0.456, 0.406]
|
||||
self.std = [0.229, 0.224, 0.225]
|
||||
self.batch_size = args.batch_size
|
||||
self.workers = args.workers
|
||||
# optimization
|
||||
self.sgd_momentum = args.sgd_momentum
|
||||
self.sgd_weight_decay = args.sgd_weight_decay
|
||||
self.new_params_lr = args.new_params_lr
|
||||
self.finetuned_params_lr = args.finetuned_params_lr
|
||||
self.staircase_decay_at_epochs = args.staircase_decay_at_epochs
|
||||
self.staircase_decay_multiple_factor = args.staircase_decay_multiple_factor
|
||||
self.total_epochs = args.total_epochs
|
||||
self.weighted_entropy = args.weighted_entropy
|
||||
|
||||
# utils
|
||||
self.resume = args.resume
|
||||
self.ckpt_file = args.ckpt_file
|
||||
if self.resume:
|
||||
if self.ckpt_file == '':
|
||||
print ('Please input the ckpt_file if you want to resume training')
|
||||
raise ValueError
|
||||
self.load_model_weight = args.load_model_weight
|
||||
self.model_weight_file = args.model_weight_file
|
||||
if self.load_model_weight:
|
||||
if self.model_weight_file == '':
|
||||
print ('Please input the model_weight_file if you want to load model weight')
|
||||
raise ValueError
|
||||
self.test_only = args.test_only
|
||||
self.exp_dir = args.exp_dir
|
||||
self.exp_subpath = args.exp_subpath
|
||||
self.log_to_file = args.log_to_file
|
||||
self.steps_per_log = args.steps_per_log
|
||||
self.epochs_per_val = args.epochs_per_val
|
||||
self.epochs_per_save = args.epochs_per_save
|
||||
self.run = args.run
|
||||
|
||||
# for model
|
||||
model_kwargs = dict()
|
||||
model_kwargs['num_att'] = args.num_att
|
||||
model_kwargs['last_conv_stride'] = args.last_conv_stride
|
||||
model_kwargs['drop_pool5'] = args.drop_pool5
|
||||
model_kwargs['drop_pool5_rate'] = args.drop_pool5_rate
|
||||
self.model_kwargs = model_kwargs
|
||||
# for evaluation
|
||||
self.test_kwargs = dict()
|
||||
|
||||
if self.exp_dir == '':
|
||||
self.exp_dir = os.path.join('exp',
|
||||
'{}'.format(self.exp_subpath),
|
||||
'{}'.format(self.dataset_name),
|
||||
'partition{}'.format(self.partition_idx),
|
||||
'run{}'.format(self.run))
|
||||
self.stdout_file = os.path.join(self.exp_dir, \
|
||||
'log', 'stdout_{}.txt'.format(time_str()))
|
||||
self.stderr_file = os.path.join(self.exp_dir, \
|
||||
'log', 'stderr_{}.txt'.format(time_str()))
|
||||
may_mkdir(self.stdout_file)
|
||||
|
||||
### main function ###
|
||||
cfg = Config()
|
||||
|
||||
# log
|
||||
if cfg.log_to_file:
|
||||
ReDirectSTD(cfg.stdout_file, 'stdout', False)
|
||||
ReDirectSTD(cfg.stderr_file, 'stderr', False)
|
||||
|
||||
# dump the configuration to log.
|
||||
import pprint
|
||||
print('-' * 60)
|
||||
print('cfg.__dict__')
|
||||
pprint.pprint(cfg.__dict__)
|
||||
print('-' * 60)
|
||||
|
||||
# set the random seed
|
||||
print(cfg.seed)
|
||||
if cfg.set_seed:
|
||||
set_seed(cfg.seed)
|
||||
seed_everything(cfg.seed)
|
||||
|
||||
# init the npu ids
|
||||
CALCULATE_DEVICE = "npu:{}".format(cfg.npu)
|
||||
torch.npu.set_device(CALCULATE_DEVICE)
|
||||
|
||||
# dataset
|
||||
normalize = transforms.Normalize(mean=cfg.mean, std=cfg.std)
|
||||
transform = transforms.Compose([
|
||||
transforms.Resize(cfg.resize),
|
||||
transforms.RandomHorizontalFlip(),
|
||||
transforms.ToTensor(), # 3*H*W, [0, 1]
|
||||
normalize,]) # normalize with mean/std
|
||||
# by a subset of attributes
|
||||
train_set = AttDataset(
|
||||
dataset = cfg.dataset,
|
||||
partition = cfg.partition,
|
||||
split = cfg.split,
|
||||
partition_idx= cfg.partition_idx,
|
||||
transform = transform)
|
||||
|
||||
num_att = len(train_set.dataset['selected_attribute'])
|
||||
cfg.model_kwargs['num_att'] = num_att
|
||||
|
||||
train_loader = torch.utils.data.DataLoader(
|
||||
dataset = train_set,
|
||||
batch_size = cfg.batch_size,
|
||||
shuffle = True,
|
||||
num_workers = cfg.workers,
|
||||
pin_memory = True,
|
||||
drop_last = True)
|
||||
|
||||
test_transform = transforms.Compose([
|
||||
transforms.Resize(cfg.resize),
|
||||
transforms.ToTensor(),
|
||||
normalize,])
|
||||
|
||||
test_set = AttDataset(
|
||||
dataset = cfg.dataset,
|
||||
partition = cfg.partition,
|
||||
split = cfg.test_split,
|
||||
partition_idx = cfg.partition_idx,
|
||||
transform = test_transform)
|
||||
|
||||
### Att model ###
|
||||
model = DeepMAR_ResNet50(**cfg.model_kwargs)
|
||||
|
||||
# Optimizer
|
||||
finetuned_params = []
|
||||
new_params = []
|
||||
for n, p in model.named_parameters():
|
||||
if n.find('classifier') >=0:
|
||||
new_params.append(p)
|
||||
else:
|
||||
finetuned_params.append(p)
|
||||
param_groups = [{'params': finetuned_params, 'lr': cfg.finetuned_params_lr},
|
||||
{'params': new_params, 'lr': cfg.new_params_lr}]
|
||||
|
||||
optimizer = optim.SGD(
|
||||
param_groups,
|
||||
momentum = cfg.sgd_momentum,
|
||||
weight_decay = cfg.sgd_weight_decay)
|
||||
|
||||
model = model.to(CALCULATE_DEVICE)
|
||||
# apex
|
||||
if cfg.amp:
|
||||
# Initialization
|
||||
model, optimizer = amp.initialize(model, optimizer, opt_level=cfg.opt_level, loss_scale=cfg.loss_scale)
|
||||
print("=> Using amp mode.")
|
||||
|
||||
|
||||
# using the weighted cross entropy loss
|
||||
if cfg.weighted_entropy:
|
||||
rate = np.array(train_set.partition['weight_' + cfg.split][cfg.partition_idx])
|
||||
rate = rate[train_set.dataset['selected_attribute']].tolist()
|
||||
else:
|
||||
rate = None
|
||||
# compute the weight of positive and negative
|
||||
if rate is None:
|
||||
weight_pos = [1 for i in range(num_att)]
|
||||
weight_neg = [1 for i in range(num_att)]
|
||||
else:
|
||||
if len(rate) != num_att:
|
||||
print ("the length of rate should be equal to %d" % (num_att))
|
||||
raise ValueError
|
||||
weight_pos = []
|
||||
weight_neg = []
|
||||
for idx, v in enumerate(rate):
|
||||
weight_pos.append(math.exp(1.0 - v))
|
||||
weight_neg.append(math.exp(v))
|
||||
|
||||
# bind the model and optimizer
|
||||
modules_optims = [model, optimizer]
|
||||
|
||||
# load model weight if necessary
|
||||
if cfg.load_model_weight:
|
||||
map_location = (lambda storage, loc:storage)
|
||||
ckpt = torch.load(cfg.model_weight_file, map_location=map_location)
|
||||
model.load_state_dict(ckpt['state_dicts'][0], strict=False)
|
||||
# print(ckpt['state_dicts'][0])
|
||||
|
||||
### Resume or not ###
|
||||
if cfg.resume:
|
||||
# store the model, optimizer, epoch
|
||||
start_epoch, scores = load_ckpt(modules_optims, cfg.ckpt_file)
|
||||
else:
|
||||
start_epoch = 0
|
||||
|
||||
model = torch.nn.DataParallel(model)
|
||||
# model_w.cuda()
|
||||
transfer_optim_state(state=optimizer.state, device_id=cfg.npu)
|
||||
|
||||
# cudnn.benchmark = True
|
||||
# for evaluation
|
||||
feat_func_att = DeepMAR_ResNet50_ExtractFeature(model=model)
|
||||
|
||||
def attribute_evaluate_subfunc(feat_func, test_set, device_id, **test_kwargs):
|
||||
""" evaluate the attribute recognition precision """
|
||||
result = attribute_evaluate(feat_func, test_set, device_id, **test_kwargs)
|
||||
print ('-' * 60)
|
||||
print ('Evaluation on %s set:' % (cfg.test_split))
|
||||
print ('Label-based evaluation: \n mA: %.4f'%(np.mean(result['label_acc'])))
|
||||
print ('Instance-based evaluation: \n Acc: %.4f, Prec: %.4f, Rec: %.4f, F1: %.4f' \
|
||||
%(result['instance_acc'], result['instance_precision'], result['instance_recall'], result['instance_F1']))
|
||||
print ('-' * 60)
|
||||
hwlog.remark_print(key=hwlog.ACC, value="{:.4f}".format(result['instance_acc']))
|
||||
hwlog.remark_print(key=hwlog.PREC, value="{:.4f}".format(result['instance_precision']))
|
||||
hwlog.remark_print(key=hwlog.REC, value="{:.4f}".format(result['instance_recall']))
|
||||
hwlog.remark_print(key=hwlog.F1, value="{:.4f}".format(result['instance_recall']))
|
||||
return result['instance_acc']
|
||||
|
||||
|
||||
# print the model into log
|
||||
# test only
|
||||
if cfg.test_only:
|
||||
print ('test with feat_func_att')
|
||||
attribute_evaluate_subfunc(feat_func_att, test_set, **cfg.test_kwargs)
|
||||
sys.exit(0)
|
||||
|
||||
|
||||
# writer = SummaryWriter(os.path.join('runs/deepmar', str(cfg.npu)))
|
||||
|
||||
# training
|
||||
for epoch in range(start_epoch, cfg.total_epochs):
|
||||
if cfg.seed is not None:
|
||||
cfg.seed += 1
|
||||
seed_everything(cfg.seed)
|
||||
# adjust the learning rate
|
||||
adjust_lr_staircase(
|
||||
optimizer.param_groups,
|
||||
[cfg.finetuned_params_lr, cfg.new_params_lr],
|
||||
epoch + 1,
|
||||
cfg.staircase_decay_at_epochs,
|
||||
cfg.staircase_decay_multiple_factor)
|
||||
# adjust_lr(optimizer,epoch+1,cfg.finetuned_params_lr)n
|
||||
|
||||
may_set_mode(modules_optims, 'train')
|
||||
# recording loss
|
||||
loss_meter = AverageMeter()
|
||||
dataset_L = len(train_loader) # crop batch data
|
||||
ep_st = time.time()
|
||||
ep_st_mark=ep_st
|
||||
# runing every batch data
|
||||
for step, (imgs, targets) in enumerate(train_loader):
|
||||
|
||||
step_st = time.time()
|
||||
# measure data loading time
|
||||
data_time = step_st-ep_st
|
||||
|
||||
imgs_var = Variable(imgs)
|
||||
targets_var = Variable(targets)
|
||||
|
||||
# compute the weight
|
||||
weights = torch.zeros(targets_var.shape)
|
||||
for i in range(targets_var.shape[0]):
|
||||
for j in range(targets_var.shape[1]):
|
||||
if targets_var.data.cpu()[i, j] == -1:
|
||||
weights[i, j] = weight_neg[j]
|
||||
elif targets_var.data.cpu()[i, j] == 1:
|
||||
weights[i, j] = weight_pos[j]
|
||||
else:
|
||||
weights[i, j] = 0
|
||||
|
||||
targets_var[targets_var == -1] = 0
|
||||
targets_var = targets_var.to(CALCULATE_DEVICE)
|
||||
imgs_var = imgs_var.to(CALCULATE_DEVICE)
|
||||
weights = weights.to(CALCULATE_DEVICE)
|
||||
score = model(imgs_var)
|
||||
|
||||
criterion = torch.nn.BCEWithLogitsLoss(weight=Variable(weights)).to(CALCULATE_DEVICE)
|
||||
loss = criterion(score, targets_var) * num_att
|
||||
optimizer.zero_grad()
|
||||
if cfg.amp:
|
||||
with amp.scale_loss(loss, optimizer) as scaled_loss:
|
||||
scaled_loss.backward()
|
||||
else:
|
||||
loss.backward()
|
||||
|
||||
optimizer.step()
|
||||
|
||||
|
||||
############
|
||||
# step log #
|
||||
############
|
||||
loss_meter.update(to_scalar(loss))
|
||||
# one batch time using backward calculation
|
||||
batch_time = time.time() - ep_st # include data load time
|
||||
ep_st = time.time()
|
||||
fps = cfg.batch_size / batch_time
|
||||
# do not include data load time
|
||||
|
||||
if (step + 1) % cfg.steps_per_log == 0 or (step + 1) % len(train_loader) == 0:
|
||||
log = '{}, Step {}/{} in Ep {}, {:.2f}s, datatime:{:.6f}, batchtime:{:.6f}, FPS:{:.2f}, loss:{:.4f}'.format( \
|
||||
time_str(), step + 1, dataset_L, epoch + 1, time.time() - step_st, data_time, batch_time, fps, loss_meter.val)
|
||||
|
||||
print(log)
|
||||
|
||||
hwlog.remark_print(key=hwlog.FPS, value='{:.2f}'.format(fps))
|
||||
|
||||
##############
|
||||
# epoch log #
|
||||
##############
|
||||
epoch_time = time.time() - ep_st_mark
|
||||
log = 'Ep{}, {:.2f}s, loss {:.4f}'.format(
|
||||
epoch+1, epoch_time, loss_meter.avg)
|
||||
print(log)
|
||||
|
||||
# writer.add_scalar('Train/Time', epoch_time, epoch+1)
|
||||
# writer.add_scalar('Train/Loss', loss_meter.avg, epoch+1)
|
||||
# writer.add_scalar('Train/LR', optimizer.param_groups[0]['lr'], epoch+1)
|
||||
|
||||
|
||||
# model ckpt
|
||||
if (epoch + 1) % cfg.epochs_per_save == 0 or epoch+1 == cfg.total_epochs:
|
||||
ckpt_file = os.path.join(cfg.exp_dir, 'model', 'ckpt_epoch%d.pth'%(epoch+1))
|
||||
save_ckpt(modules_optims, epoch+1, 0, ckpt_file)
|
||||
|
||||
##########################
|
||||
# test on validation set #
|
||||
##########################
|
||||
if (epoch + 1) % cfg.epochs_per_val == 0 or epoch+1 == cfg.total_epochs:
|
||||
print ('att test with feat_func_att')
|
||||
res = attribute_evaluate_subfunc(feat_func_att, test_set, CALCULATE_DEVICE, **cfg.test_kwargs)
|
||||
|
||||
# writer.add_scalar('Val/Acc', res, epoch)
|
||||
+587
@@ -0,0 +1,587 @@
|
||||
import os
|
||||
import random
|
||||
import math
|
||||
|
||||
import torch
|
||||
import torch.optim as optim
|
||||
import torchvision.transforms as transforms
|
||||
import torch.nn.functional as F
|
||||
import torch.backends.cudnn as cudnn
|
||||
from torch.autograd import Variable
|
||||
from torch.nn.parallel import DataParallel
|
||||
import pickle
|
||||
import time
|
||||
import argparse
|
||||
import pdb
|
||||
import sys
|
||||
from torch.utils.tensorboard import SummaryWriter
|
||||
|
||||
from baseline.dataset import add_transforms
|
||||
from baseline.dataset.Dataset import AttDataset
|
||||
from baseline.model.DeepMAR import DeepMAR_ResNet50
|
||||
from baseline.model.DeepMAR import DeepMAR_ResNet50_ExtractFeature
|
||||
from baseline.utils.evaluate import attribute_evaluate
|
||||
from baseline.utils.utils import str2bool
|
||||
from baseline.utils.utils import transfer_optim_state
|
||||
from baseline.utils.utils import time_str
|
||||
from baseline.utils.utils import save_ckpt, load_ckpt
|
||||
from baseline.utils.utils import load_state_dict
|
||||
from baseline.utils.utils import ReDirectSTD
|
||||
from baseline.utils.utils import adjust_lr_staircase
|
||||
from baseline.utils.utils import adjust_lr
|
||||
from baseline.utils.utils import set_devices
|
||||
from baseline.utils.utils import AverageMeter
|
||||
from baseline.utils.utils import to_scalar
|
||||
from baseline.utils.utils import may_set_mode
|
||||
from baseline.utils.utils import may_mkdir
|
||||
from baseline.utils.utils import set_seed
|
||||
from baseline.utils.utils import seed_everything
|
||||
import torch.distributed as dist
|
||||
import torch.multiprocessing as mp
|
||||
import torch.utils.data.distributed
|
||||
|
||||
# Apex
|
||||
import numpy as np
|
||||
from apex import amp
|
||||
import torch.npu
|
||||
|
||||
from benchmark_log import hwlog
|
||||
from benchmark_log.basic_utils import get_environment_info
|
||||
from benchmark_log.basic_utils import get_model_parameter
|
||||
|
||||
|
||||
class Config(object):
|
||||
def __init__(self):
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--sys_device_ids', type=eval, default=6)
|
||||
parser.add_argument('--npu', default=None, type=int, help='NPU id to use.')
|
||||
parser.add_argument('--set_seed', type=str2bool, default=False)
|
||||
## dataset parameter
|
||||
parser.add_argument('--dataset', type=str, default='peta',
|
||||
choices=['peta','rap', 'pa100k', 'rap2'])
|
||||
parser.add_argument('--save_dir', type=str, default='/home/zhusiyi/dataset/peta/')
|
||||
parser.add_argument('--split', type=str, default='trainval',
|
||||
choices=['trainval', 'train'])
|
||||
parser.add_argument('--test_split', type=str, default='test')
|
||||
parser.add_argument('--partition_idx', type=int, default=0)
|
||||
parser.add_argument('--resize', type=eval, default=(224, 224))
|
||||
parser.add_argument('--mirror', type=str2bool, default=True)
|
||||
parser.add_argument('--batch_size', type=int, default=32)
|
||||
parser.add_argument('--workers', type=int, default=2)
|
||||
# model
|
||||
parser.add_argument('--num_att', type=int, default=35)
|
||||
parser.add_argument('--pretrained', type=str2bool, default=True)
|
||||
parser.add_argument('--last_conv_stride', type=int, default=2, choices=[1,2])
|
||||
parser.add_argument('--drop_pool5', type=str2bool, default=True)
|
||||
parser.add_argument('--drop_pool5_rate', type=float, default=0.5)
|
||||
|
||||
parser.add_argument('--sgd_weight_decay', type=float, default=0.0005)
|
||||
parser.add_argument('--sgd_momentum', type=float, default=0.9)
|
||||
parser.add_argument('--new_params_lr', type=float, default=0.001)
|
||||
parser.add_argument('--finetuned_params_lr', type=float, default=0.001)
|
||||
parser.add_argument('--staircase_decay_at_epochs', type=eval,
|
||||
default=(51, ))
|
||||
parser.add_argument('--staircase_decay_multiple_factor', type=float,
|
||||
default=0.1)
|
||||
parser.add_argument('--total_epochs', type=int, default=150)
|
||||
parser.add_argument('--weighted_entropy', type=str2bool, default=True)
|
||||
# utils
|
||||
parser.add_argument('--resume', type=str2bool, default=False)
|
||||
parser.add_argument('--ckpt_file', type=str, default='')
|
||||
parser.add_argument('--load_model_weight', type=str2bool, default=False)
|
||||
parser.add_argument('--model_weight_file', type=str, default='')
|
||||
parser.add_argument('--test_only', type=str2bool, default=False)
|
||||
parser.add_argument('--exp_dir', type=str, default='')
|
||||
parser.add_argument('--exp_subpath', type=str, default='deepmar_resnet50')
|
||||
parser.add_argument('--log_to_file', type=str2bool, default=True)
|
||||
parser.add_argument('--steps_per_log', type=int, default=20)
|
||||
parser.add_argument('--epochs_per_val', type=int, default=10)
|
||||
parser.add_argument('--epochs_per_save', type=int, default=50)
|
||||
parser.add_argument('--run', type=int, default=1)
|
||||
# apex
|
||||
parser.add_argument('--amp', default=False, action='store_true',
|
||||
help='use amp to train the model')
|
||||
parser.add_argument('--loss_scale', default=-1., type=float,
|
||||
help='loss scale using in amp, default -1 means dynamic')
|
||||
parser.add_argument('--opt_level', default='O1', type=str,
|
||||
help='opt level using in amp, default O1 means FP16')
|
||||
# distributed
|
||||
parser.add_argument('--addr', default='90.90.176.152', type=str,
|
||||
help='master addr')
|
||||
parser.add_argument('--world_size', default=-1, type=int,
|
||||
help='number of nodes for distributed training')
|
||||
parser.add_argument('--rank', default=-1, type=int,
|
||||
help='node rank for distributed training')
|
||||
parser.add_argument('--dist_url', default='tcp://224.66.41.62:23456', type=str,
|
||||
help='url used to set up distributed training')
|
||||
parser.add_argument('--dist_backend', default='nccl', type=str,
|
||||
help='distributed backend')
|
||||
parser.add_argument('--multiprocessing_distributed', action='store_true',
|
||||
help='Use multi-processing distributed training to launch '
|
||||
'N processes per node, which has N NPUs. This is the '
|
||||
'fastest way to use PyTorch for either single node or '
|
||||
'multi node data parallel training')
|
||||
parser.add_argument('--npus_per_node', default=None, type=int,
|
||||
help='number of npus to use for distributed train on each node')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# gpu ids
|
||||
# self.sys_device_ids = args.sys_device_ids
|
||||
self.npus_per_node=args.npus_per_node
|
||||
self.npu = args.npu
|
||||
# random
|
||||
self.set_seed = args.set_seed
|
||||
if self.set_seed:
|
||||
self.seed = 0
|
||||
else:
|
||||
self.seed = None
|
||||
# amp
|
||||
self.amp = args.amp
|
||||
self.loss_scale = args.loss_scale
|
||||
self.opt_level = args.opt_level
|
||||
# run time index
|
||||
self.run = args.run
|
||||
# Dataset #
|
||||
datasets = dict()
|
||||
datasets['peta'] = args.save_dir + '/peta_dataset.pkl'
|
||||
partitions = dict()
|
||||
partitions['peta'] = args.save_dir + '/peta_partition.pkl'
|
||||
|
||||
self.dataset_name = args.dataset
|
||||
if args.dataset not in datasets or args.dataset not in partitions:
|
||||
print ("Please select the right dataset name.")
|
||||
raise ValueError
|
||||
else:
|
||||
self.dataset = datasets[args.dataset]
|
||||
self.partition = partitions[args.dataset]
|
||||
self.partition_idx = args.partition_idx
|
||||
self.split = args.split
|
||||
self.test_split = args.test_split
|
||||
self.resize = args.resize
|
||||
self.mirror = args.mirror
|
||||
self.mean = [0.485, 0.456, 0.406]
|
||||
self.std = [0.229, 0.224, 0.225]
|
||||
self.batch_size = args.batch_size
|
||||
self.workers = args.workers
|
||||
# optimization
|
||||
self.sgd_momentum = args.sgd_momentum
|
||||
self.sgd_weight_decay = args.sgd_weight_decay
|
||||
self.new_params_lr = args.new_params_lr
|
||||
self.finetuned_params_lr = args.finetuned_params_lr
|
||||
self.staircase_decay_at_epochs = args.staircase_decay_at_epochs
|
||||
self.staircase_decay_multiple_factor = args.staircase_decay_multiple_factor
|
||||
self.total_epochs = args.total_epochs
|
||||
self.weighted_entropy = args.weighted_entropy
|
||||
# distributed
|
||||
self.addr = args.addr
|
||||
self.world_size = args.world_size
|
||||
self.rank = args.rank
|
||||
self.dist_url = args.dist_url
|
||||
self.dist_backend = args.dist_backend
|
||||
self.multiprocessing_distributed = args.multiprocessing_distributed
|
||||
# utils
|
||||
self.resume = args.resume
|
||||
self.ckpt_file = args.ckpt_file
|
||||
if self.resume:
|
||||
if self.ckpt_file == '':
|
||||
print ('Please input the ckpt_file if you want to resume training')
|
||||
raise ValueError
|
||||
self.load_model_weight = args.load_model_weight
|
||||
self.model_weight_file = args.model_weight_file
|
||||
if self.load_model_weight:
|
||||
if self.model_weight_file == '':
|
||||
print ('Please input the model_weight_file if you want to load model weight')
|
||||
raise ValueError
|
||||
self.test_only = args.test_only
|
||||
self.exp_dir = args.exp_dir
|
||||
self.exp_subpath = args.exp_subpath
|
||||
self.log_to_file = args.log_to_file
|
||||
self.steps_per_log = args.steps_per_log
|
||||
self.epochs_per_val = args.epochs_per_val
|
||||
self.epochs_per_save = args.epochs_per_save
|
||||
self.run = args.run
|
||||
|
||||
# for model
|
||||
model_kwargs = dict()
|
||||
model_kwargs['num_att'] = args.num_att
|
||||
model_kwargs['last_conv_stride'] = args.last_conv_stride
|
||||
model_kwargs['drop_pool5'] = args.drop_pool5
|
||||
model_kwargs['drop_pool5_rate'] = args.drop_pool5_rate
|
||||
self.model_kwargs = model_kwargs
|
||||
# for evaluation
|
||||
self.test_kwargs = dict()
|
||||
|
||||
if self.exp_dir == '':
|
||||
self.exp_dir = os.path.join('exp',
|
||||
'{}'.format(self.exp_subpath),
|
||||
'{}'.format(self.dataset_name),
|
||||
'partition{}'.format(self.partition_idx),
|
||||
'run{}'.format(self.run))
|
||||
self.stdout_file = os.path.join(self.exp_dir, \
|
||||
'log', 'stdout_{}.txt'.format(time_str()))
|
||||
self.stderr_file = os.path.join(self.exp_dir, \
|
||||
'log', 'stderr_{}.txt'.format(time_str()))
|
||||
may_mkdir(self.stdout_file)
|
||||
|
||||
def main():
|
||||
### main function ###
|
||||
# pdb.set_trace()
|
||||
cfg = Config()
|
||||
|
||||
# log
|
||||
if cfg.log_to_file:
|
||||
ReDirectSTD(cfg.stdout_file, 'stdout', False)
|
||||
ReDirectSTD(cfg.stderr_file, 'stderr', False)
|
||||
|
||||
# dump the configuration to log.
|
||||
import pprint
|
||||
print('-' * 60)
|
||||
print('cfg.__dict__')
|
||||
pprint.pprint(cfg.__dict__)
|
||||
print('-' * 60)
|
||||
|
||||
os.environ['KERNEL_NAME_ID'] = str(0)
|
||||
print("+++++++++++++++++++++++++++KERNEL_NAME_ID:",os.environ['KERNEL_NAME_ID'])
|
||||
|
||||
# set the random seed
|
||||
print(cfg.seed)
|
||||
if cfg.set_seed:
|
||||
set_seed(cfg.seed)
|
||||
seed_everything(cfg.seed)
|
||||
|
||||
os.environ['MASTER_ADDR'] = cfg.addr
|
||||
os.environ['MASTER_PORT'] = '29501'
|
||||
if cfg.dist_url == "env://" and cfg.world_size == -1:
|
||||
cfg.world_size = int(os.environ["WORLD_SIZE"])
|
||||
|
||||
npus_per_node=cfg.npus_per_node
|
||||
#npus_per_node = torch.npu.device_count()
|
||||
if cfg.multiprocessing_distributed:
|
||||
# Since we have ngpus_per_node processes per node, the total world_size needs to be adjusted accordingly
|
||||
cfg.world_size = npus_per_node * cfg.world_size # world_size means nums of all devices or nums of processes
|
||||
mp.spawn(main_worker, nprocs=npus_per_node, args=(npus_per_node, cfg))
|
||||
|
||||
def main_worker(npu, npus_per_node, cfg):
|
||||
cfg.npu = npu
|
||||
print("[npu id:", npu, "]", "+++++++++++++++++++++++++++ before set KERNEL_NAME_ID:", os.environ['KERNEL_NAME_ID'])
|
||||
os.environ['KERNEL_NAME_ID'] = str(npu)
|
||||
|
||||
print("[npu id:", npu, "]", "+++++++++++++++++++++++++++KERNEL_NAME_ID:", os.environ['KERNEL_NAME_ID'])
|
||||
|
||||
if npu is not None:
|
||||
print("[npu id:", npu, "]", "Use NPU: {} for training".format(npu))
|
||||
|
||||
if cfg.dist_url == "env://" and cfg.rank == -1:
|
||||
cfg.rank = int(os.environ["RANK"])
|
||||
if cfg.multiprocessing_distributed:
|
||||
# For multiprocessing distributed training, rank needs to be the
|
||||
# global rank among all the processes
|
||||
#cfg.rank = cfg.rank * npus_per_node + npu
|
||||
cfg.rank = npu
|
||||
print("rank:",cfg.rank)
|
||||
dist.init_process_group(backend=cfg.dist_backend, #init_method=cfg.dist_url,
|
||||
world_size=cfg.world_size, rank=cfg.rank)
|
||||
|
||||
CALCULATE_DEVICE = 'npu:{}'.format(npu)
|
||||
print(CALCULATE_DEVICE)
|
||||
torch.npu.set_device(CALCULATE_DEVICE)
|
||||
|
||||
# DistributedDataParallel, we need to divide the batch size
|
||||
# ourselves based on the total number of NPUs we have
|
||||
cfg.batch_size = int(cfg.batch_size / npus_per_node)
|
||||
cfg.workers = int((cfg.workers + npus_per_node - 1) / npus_per_node)
|
||||
print("batchsize:", cfg.batch_size)
|
||||
print("workers", cfg.workers)
|
||||
|
||||
# dataset
|
||||
normalize = transforms.Normalize(mean=cfg.mean, std=cfg.std)
|
||||
transform = transforms.Compose([
|
||||
transforms.Resize(cfg.resize),
|
||||
transforms.RandomHorizontalFlip(),
|
||||
transforms.ToTensor(), # 3*H*W, [0, 1]
|
||||
normalize,]) # normalize with mean/std
|
||||
# by a subset of attributes
|
||||
train_set = AttDataset(
|
||||
dataset = cfg.dataset,
|
||||
partition = cfg.partition,
|
||||
split = cfg.split,
|
||||
partition_idx= cfg.partition_idx,
|
||||
transform = transform)
|
||||
|
||||
num_att = len(train_set.dataset['selected_attribute'])
|
||||
cfg.model_kwargs['num_att'] = num_att
|
||||
|
||||
distributed = cfg.world_size > 1 or cfg.multiprocessing_distributed
|
||||
if distributed:
|
||||
train_sampler = torch.utils.data.distributed.DistributedSampler(train_set)
|
||||
else:
|
||||
train_sampler = None
|
||||
|
||||
train_loader = torch.utils.data.DataLoader(
|
||||
dataset = train_set,
|
||||
batch_size = cfg.batch_size,
|
||||
shuffle=(train_sampler is None),
|
||||
num_workers = cfg.workers,
|
||||
pin_memory = True,
|
||||
sampler=train_sampler,
|
||||
drop_last = True)
|
||||
|
||||
test_transform = transforms.Compose([
|
||||
transforms.Resize(cfg.resize),
|
||||
transforms.ToTensor(),
|
||||
normalize,])
|
||||
|
||||
test_set = AttDataset(
|
||||
dataset = cfg.dataset,
|
||||
partition = cfg.partition,
|
||||
split = cfg.test_split,
|
||||
partition_idx = cfg.partition_idx,
|
||||
transform = test_transform)
|
||||
|
||||
### Att model ###
|
||||
model = DeepMAR_ResNet50(**cfg.model_kwargs)
|
||||
|
||||
# Optimizer
|
||||
finetuned_params = []
|
||||
new_params = []
|
||||
for n, p in model.named_parameters():
|
||||
if n.find('classifier') >=0:
|
||||
new_params.append(p)
|
||||
else:
|
||||
finetuned_params.append(p)
|
||||
param_groups = [{'params': finetuned_params, 'lr': cfg.finetuned_params_lr},
|
||||
{'params': new_params, 'lr': cfg.new_params_lr}]
|
||||
|
||||
optimizer = optim.SGD(
|
||||
param_groups,
|
||||
momentum = cfg.sgd_momentum,
|
||||
weight_decay = cfg.sgd_weight_decay)
|
||||
|
||||
# model = model.cuda()
|
||||
model = model.to(CALCULATE_DEVICE)
|
||||
# apex
|
||||
if cfg.amp:
|
||||
# Initialization
|
||||
model, optimizer = amp.initialize(model, optimizer, opt_level=cfg.opt_level, loss_scale=cfg.loss_scale)
|
||||
print("=> Using amp mode.")
|
||||
|
||||
# Wrap the model after set_devices, data parallel
|
||||
# model_w = torch.nn.DataParallel(model)
|
||||
model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[cfg.npu], broadcast_buffers=False)
|
||||
|
||||
# using the weighted cross entropy loss
|
||||
if cfg.weighted_entropy:
|
||||
rate = np.array(train_set.partition['weight_' + cfg.split][cfg.partition_idx])
|
||||
rate = rate[train_set.dataset['selected_attribute']].tolist()
|
||||
else:
|
||||
rate = None
|
||||
# compute the weight of positive and negative
|
||||
if rate is None:
|
||||
weight_pos = [1 for i in range(num_att)]
|
||||
weight_neg = [1 for i in range(num_att)]
|
||||
else:
|
||||
if len(rate) != num_att:
|
||||
print ("the length of rate should be equal to %d" % (num_att))
|
||||
raise ValueError
|
||||
weight_pos = []
|
||||
weight_neg = []
|
||||
for idx, v in enumerate(rate):
|
||||
weight_pos.append(math.exp(1.0 - v))
|
||||
weight_neg.append(math.exp(v))
|
||||
|
||||
# bind the model and optimizer
|
||||
modules_optims = [model, optimizer]
|
||||
|
||||
# load model weight if necessary
|
||||
if cfg.load_model_weight:
|
||||
map_location = (lambda storage, loc:storage)
|
||||
ckpt = torch.load(cfg.model_weight_file, map_location=map_location)
|
||||
model.load_state_dict(ckpt['state_dicts'][0], strict=False)
|
||||
|
||||
### Resume or not ###
|
||||
if cfg.resume:
|
||||
# store the model, optimizer, epoch
|
||||
start_epoch, scores = load_ckpt(modules_optims, cfg.ckpt_file)
|
||||
else:
|
||||
start_epoch = 0
|
||||
|
||||
#model = torch.nn.DataParallel(model)
|
||||
#transfer_optim_state(state=optimizer.state, device_id=npu)
|
||||
|
||||
# cudnn.benchmark = True
|
||||
# for evaluation
|
||||
feat_func_att = DeepMAR_ResNet50_ExtractFeature(model=model)
|
||||
|
||||
# print the model into log
|
||||
# print (model)
|
||||
# test only
|
||||
if cfg.test_only:
|
||||
print ('test with feat_func_att')
|
||||
attribute_evaluate_subfunc(feat_func_att, test_set, **cfg.test_kwargs)
|
||||
sys.exit(0)
|
||||
|
||||
|
||||
# writer = SummaryWriter('runs/deepmar/6')
|
||||
|
||||
# training
|
||||
for epoch in range(start_epoch, cfg.total_epochs):
|
||||
if cfg.seed is not None:
|
||||
cfg.seed += 1
|
||||
seed_everything(cfg.seed)
|
||||
|
||||
if distributed:
|
||||
train_sampler.set_epoch(epoch)
|
||||
# adjust the learning rate
|
||||
adjust_lr_staircase(
|
||||
optimizer.param_groups,
|
||||
[cfg.finetuned_params_lr, cfg.new_params_lr],
|
||||
epoch + 1,
|
||||
cfg.staircase_decay_at_epochs,
|
||||
cfg.staircase_decay_multiple_factor)
|
||||
# adjust_lr(optimizer,epoch+1,cfg.finetuned_params_lr)n
|
||||
|
||||
may_set_mode(modules_optims, 'train')
|
||||
# recording loss
|
||||
loss_meter = AverageMeter()
|
||||
dataset_L = len(train_loader) # crop batch data
|
||||
ep_st = time.time()
|
||||
ep_st_mark=ep_st
|
||||
# runing every batch data
|
||||
for step, (imgs, targets) in enumerate(train_loader):
|
||||
|
||||
step_st = time.time()
|
||||
# measure data loading time
|
||||
data_time = step_st-ep_st
|
||||
|
||||
imgs_var = Variable(imgs)
|
||||
targets_var = Variable(targets)
|
||||
# if 'npu' in CALCULATE_DEVICE:
|
||||
# targets = targets.to(torch.int32)
|
||||
# imgs, targets = imgs.to(CALCULATE_DEVICE, non_blocking=True), targets.to(CALCULATE_DEVICE, non_blocking=True)
|
||||
# compute the weight
|
||||
weights = torch.zeros(targets_var.shape)
|
||||
for i in range(targets_var.shape[0]):
|
||||
for j in range(targets_var.shape[1]):
|
||||
if targets_var.data.cpu()[i, j] == -1:
|
||||
weights[i, j] = weight_neg[j]
|
||||
elif targets_var.data.cpu()[i, j] == 1:
|
||||
weights[i, j] = weight_pos[j]
|
||||
else:
|
||||
weights[i, j] = 0
|
||||
|
||||
# loss for the attribute classification, average over the batch size
|
||||
targets_var[targets_var == -1] = 0
|
||||
targets_var = targets_var.to(CALCULATE_DEVICE)
|
||||
imgs_var = imgs_var.to(CALCULATE_DEVICE)
|
||||
weights = weights.to(CALCULATE_DEVICE)
|
||||
score = model(imgs_var)
|
||||
|
||||
criterion = torch.nn.BCEWithLogitsLoss(weight=Variable(weights)).to(CALCULATE_DEVICE)
|
||||
loss = criterion(score, targets_var) * num_att
|
||||
optimizer.zero_grad()
|
||||
if cfg.amp:
|
||||
with amp.scale_loss(loss, optimizer) as scaled_loss:
|
||||
scaled_loss.backward()
|
||||
else:
|
||||
loss.backward()
|
||||
# for name, parms in model.named_parameters():
|
||||
# print('-->name:', name, ' -->grad_value_max:', torch.max(parms.grad), ' -->grad_value_min:',
|
||||
# torch.min(parms.grad))
|
||||
optimizer.step()
|
||||
|
||||
|
||||
############
|
||||
# step log #
|
||||
############
|
||||
loss_meter.update(to_scalar(loss))
|
||||
# one batch time using backward calculation
|
||||
batch_time = time.time() - ep_st # include data load time
|
||||
ep_st = time.time()
|
||||
fps = npus_per_node*cfg.batch_size / batch_time
|
||||
# do not include data load time
|
||||
|
||||
if (step + 1) % cfg.steps_per_log == 0 or (step + 1) % len(train_loader) == 0:
|
||||
log = '{}, Step {}/{} in Ep {}, {:.2f}s, datatime:{:.6f}, batchtime:{:.6f}, FPS:{:.2f}, loss:{:.4f}'.format( \
|
||||
time_str(), step + 1, dataset_L, epoch + 1, time.time() - step_st, data_time, batch_time, fps, loss_meter.val)
|
||||
print(log)
|
||||
|
||||
hwlog.remark_print(key=hwlog.FPS, value='{:.2f}'.format(fps))
|
||||
|
||||
|
||||
##############
|
||||
# epoch log #
|
||||
##############
|
||||
epoch_time = time.time() - ep_st_mark
|
||||
log = 'Ep{}, {:.2f}s, loss {:.4f}'.format(
|
||||
epoch+1, epoch_time, loss_meter.avg)
|
||||
print(log)
|
||||
|
||||
# writer.add_scalar('Train/Time', epoch_time, epoch+1)
|
||||
# writer.add_scalar('Train/Loss', loss_meter.avg, epoch+1)
|
||||
# # writer.add_scalar('Train/Acc', res['instance_acc'], epoch+1)
|
||||
# writer.add_scalar('Train/LR', optimizer.param_groups[0]['lr'], epoch+1)
|
||||
|
||||
|
||||
# model ckpt
|
||||
if (epoch + 1) % cfg.epochs_per_save == 0 or epoch+1 == cfg.total_epochs:
|
||||
ckpt_file = os.path.join(cfg.exp_dir, 'model', 'ckpt_epoch%d.pth'%(epoch+1))
|
||||
save_ckpt(modules_optims, epoch+1, 0, ckpt_file)
|
||||
|
||||
##########################
|
||||
# test on validation set #
|
||||
##########################
|
||||
if (epoch + 1) % cfg.epochs_per_val == 0 or epoch+1 == cfg.total_epochs:
|
||||
print ('att test with feat_func_att')
|
||||
res = attribute_evaluate_subfunc(feat_func_att, test_set, CALCULATE_DEVICE, cfg, **cfg.test_kwargs)
|
||||
|
||||
# writer.add_scalar('Val/Acc', res, epoch)
|
||||
# writer.close()
|
||||
|
||||
def attribute_evaluate_subfunc(feat_func, test_set, device_id, cfg, **test_kwargs):
|
||||
""" evaluate the attribute recognition precision """
|
||||
result = attribute_evaluate(feat_func, test_set, device_id, **test_kwargs)
|
||||
print ('-' * 60)
|
||||
print ('Evaluation on %s set:' % (cfg.test_split))
|
||||
print ('Label-based evaluation: \n mA: %.4f'%(np.mean(result['label_acc'])))
|
||||
print ('Instance-based evaluation: \n Acc: %.4f, Prec: %.4f, Rec: %.4f, F1: %.4f' \
|
||||
%(result['instance_acc'], result['instance_precision'], result['instance_recall'], result['instance_F1']))
|
||||
print ('-' * 60)
|
||||
hwlog.remark_print(key=hwlog.ACC, value="{:.4f}".format(result['instance_acc']))
|
||||
hwlog.remark_print(key=hwlog.PREC, value="{:.4f}".format(result['instance_precision']))
|
||||
hwlog.remark_print(key=hwlog.REC, value="{:.4f}".format(result['instance_recall']))
|
||||
hwlog.remark_print(key=hwlog.F1, value="{:.4f}".format(result['instance_recall']))
|
||||
return result['instance_acc']
|
||||
|
||||
# intermediate variable
|
||||
inter_feature = {}
|
||||
inter_gradient = {}
|
||||
def make_hook(name, flag):
|
||||
if flag == 'forward':
|
||||
def hook(m, input, output):
|
||||
inter_feature[name] = input
|
||||
return hook
|
||||
elif flag == 'backward':
|
||||
def hook(m, input, output):
|
||||
inter_gradient[name] = output
|
||||
return hook
|
||||
else:
|
||||
assert False
|
||||
|
||||
if __name__ == '__main__':
|
||||
hwlog.ROOT_DIR = os.path.split(os.path.abspath(__file__))[0]
|
||||
cpu_info, npu_info, framework_info, os_info, benchmark_version = get_environment_info("pytorch")
|
||||
config_info = get_model_parameter("pytorch_config")
|
||||
initinal_data = {"base_lr": 0.1, "dataset": "imagenet", "optimizer": "SGD", "loss_scale": 1024}
|
||||
hwlog.remark_print(key=hwlog.CPU_INFO, value=cpu_info)
|
||||
hwlog.remark_print(key=hwlog.NPU_INFO, value=npu_info)
|
||||
hwlog.remark_print(key=hwlog.OS_INFO, value=os_info)
|
||||
hwlog.remark_print(key=hwlog.FRAMEWORK_INFO, value=framework_info)
|
||||
hwlog.remark_print(key=hwlog.BENCHMARK_VERSION, value=benchmark_version)
|
||||
hwlog.remark_print(key=hwlog.CONFIG_INFO, value=config_info)
|
||||
hwlog.remark_print(key=hwlog.BASE_LR, value=initinal_data.get("base_lr"))
|
||||
hwlog.remark_print(key=hwlog.DATASET, value=initinal_data.get("dataset"))
|
||||
hwlog.remark_print(key=hwlog.OPT_NAME, value=initinal_data.get("optimizer"))
|
||||
hwlog.remark_print(key=hwlog.LOSS_SCALE, value=initinal_data.get("loss_scale"))
|
||||
main()
|
||||
+96
@@ -0,0 +1,96 @@
|
||||
import os
|
||||
import numpy as np
|
||||
import random
|
||||
import cPickle as pickle
|
||||
from scipy.io import loadmat
|
||||
|
||||
np.random.seed(0)
|
||||
random.seed(0)
|
||||
|
||||
def make_dir(path):
|
||||
if os.path.exists(path):
|
||||
pass
|
||||
else:
|
||||
os.mkdir(path)
|
||||
|
||||
def generate_data_description(save_dir):
|
||||
"""
|
||||
create a dataset description file, which consists of images, labels
|
||||
"""
|
||||
dataset = dict()
|
||||
dataset['description'] = 'pa100k'
|
||||
dataset['root'] = './dataset/pa100k/data/'
|
||||
dataset['image'] = []
|
||||
dataset['att'] = []
|
||||
dataset['att_name'] = []
|
||||
dataset['selected_attribute'] = range(26)
|
||||
# load ANNOTATION.MAT
|
||||
data = loadmat(open('./dataset/pa100k/annotation.mat', 'r'))
|
||||
for idx in range(26):
|
||||
dataset['att_name'].append(data['attributes'][idx][0][0])
|
||||
|
||||
for idx in range(80000):
|
||||
dataset['image'].append(data['train_images_name'][idx][0][0])
|
||||
dataset['att'].append(data['train_label'][idx, :].tolist())
|
||||
|
||||
for idx in range(10000):
|
||||
dataset['image'].append(data['val_images_name'][idx][0][0])
|
||||
dataset['att'].append(data['val_label'][idx, :].tolist())
|
||||
|
||||
for idx in range(10000):
|
||||
dataset['image'].append(data['test_images_name'][idx][0][0])
|
||||
dataset['att'].append(data['test_label'][idx, :].tolist())
|
||||
|
||||
with open(os.path.join(save_dir, 'pa100k_dataset.pkl'), 'w+') as f:
|
||||
pickle.dump(dataset, f)
|
||||
|
||||
def create_trainvaltest_split(traintest_split_file):
|
||||
"""
|
||||
create a dataset split file, which consists of index of the train/val/test splits
|
||||
"""
|
||||
partition = dict()
|
||||
partition['trainval'] = []
|
||||
partition['train'] = []
|
||||
partition['val'] = []
|
||||
partition['test'] = []
|
||||
partition['weight_trainval'] = []
|
||||
partition['weight_train'] = []
|
||||
# load ANNOTATION.MAT
|
||||
data = loadmat(open('./dataset/pa100k/annotation.mat', 'r'))
|
||||
train = range(80000)
|
||||
val = [i+80000 for i in range(10000)]
|
||||
test = [i+90000 for i in range(10000)]
|
||||
trainval = train + val
|
||||
partition['train'].append(train)
|
||||
partition['val'].append(val)
|
||||
partition['trainval'].append(trainval)
|
||||
partition['test'].append(test)
|
||||
# weight
|
||||
train_label = data['train_label'].astype('float32')
|
||||
trainval_label = np.concatenate((data['train_label'], data['val_label']), axis=0).astype('float32')
|
||||
weight_train = np.mean(train_label==1, axis=0).tolist()
|
||||
weight_trainval = np.mean(trainval_label==1, axis=0).tolist()
|
||||
|
||||
partition['weight_trainval'].append(weight_trainval)
|
||||
partition['weight_train'].append(weight_train)
|
||||
|
||||
with open(traintest_split_file, 'w+') as f:
|
||||
pickle.dump(partition, f)
|
||||
|
||||
if __name__ == "__main__":
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser(description="pa100k dataset")
|
||||
parser.add_argument(
|
||||
'--save_dir',
|
||||
type=str,
|
||||
default='./dataset/pa100k/')
|
||||
parser.add_argument(
|
||||
'--traintest_split_file',
|
||||
type=str,
|
||||
default="./dataset/pa100k/pa100k_partition.pkl")
|
||||
args = parser.parse_args()
|
||||
save_dir = args.save_dir
|
||||
traintest_split_file = args.traintest_split_file
|
||||
|
||||
generate_data_description(save_dir)
|
||||
create_trainvaltest_split(traintest_split_file)
|
||||
+84
@@ -0,0 +1,84 @@
|
||||
import os
|
||||
import numpy as np
|
||||
import random
|
||||
import pickle
|
||||
from scipy.io import loadmat
|
||||
|
||||
np.random.seed(0)
|
||||
random.seed(0)
|
||||
|
||||
def make_dir(path):
|
||||
if os.path.exists(path):
|
||||
pass
|
||||
else:
|
||||
os.mkdir(path)
|
||||
|
||||
def generate_data_description(save_dir):
|
||||
"""
|
||||
create a dataset description file, which consists of images, labels
|
||||
"""
|
||||
dataset = dict()
|
||||
dataset['description'] = 'peta'
|
||||
dataset['root'] = save_dir + '/images/'
|
||||
dataset['image'] = []
|
||||
dataset['att'] = []
|
||||
dataset['att_name'] = []
|
||||
dataset['selected_attribute'] = range(35)
|
||||
# load PETA.MAT
|
||||
data = loadmat(save_dir + '/PETA.mat')
|
||||
for idx in range(105):
|
||||
dataset['att_name'].append(data['peta'][0][0][1][idx,0][0])
|
||||
|
||||
for idx in range(19000):
|
||||
dataset['image'].append('%05d.png'%(idx+1))
|
||||
dataset['att'].append(data['peta'][0][0][0][idx, 4:].tolist())
|
||||
with open(os.path.join(save_dir, 'peta_dataset.pkl'), 'wb') as f:
|
||||
pickle.dump(dataset, f)
|
||||
|
||||
def create_trainvaltest_split(traintest_split_file):
|
||||
"""
|
||||
create a dataset split file, which consists of index of the train/val/test splits
|
||||
"""
|
||||
partition = dict()
|
||||
partition['trainval'] = []
|
||||
partition['train'] = []
|
||||
partition['val'] = []
|
||||
partition['test'] = []
|
||||
partition['weight_trainval'] = []
|
||||
partition['weight_train'] = []
|
||||
# load PETA.MAT
|
||||
data = loadmat(save_dir + '/PETA.mat')
|
||||
for idx in range(5):
|
||||
train = (data['peta'][0][0][3][idx][0][0][0][0][:,0]-1).tolist()
|
||||
val = (data['peta'][0][0][3][idx][0][0][0][1][:,0]-1).tolist()
|
||||
test = (data['peta'][0][0][3][idx][0][0][0][2][:,0]-1).tolist()
|
||||
trainval = train + val
|
||||
partition['train'].append(train)
|
||||
partition['val'].append(val)
|
||||
partition['trainval'].append(trainval)
|
||||
partition['test'].append(test)
|
||||
# weight
|
||||
weight_trainval = np.mean(data['peta'][0][0][0][trainval, 4:].astype('float32')==1, axis=0).tolist()
|
||||
weight_train = np.mean(data['peta'][0][0][0][train, 4:].astype('float32')==1, axis=0).tolist()
|
||||
partition['weight_trainval'].append(weight_trainval)
|
||||
partition['weight_train'].append(weight_train)
|
||||
with open(traintest_split_file, 'wb') as f:
|
||||
pickle.dump(partition, f)
|
||||
|
||||
if __name__ == "__main__":
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser(description="peta dataset")
|
||||
parser.add_argument(
|
||||
'--save_dir',
|
||||
type=str,
|
||||
default='/home/zhusiyi/dataset/peta/')
|
||||
parser.add_argument(
|
||||
'--traintest_split_file',
|
||||
type=str,
|
||||
default="/home/zhusiyi/dataset/peta/peta_partition.pkl")
|
||||
args = parser.parse_args()
|
||||
save_dir = args.save_dir
|
||||
traintest_split_file = args.traintest_split_file
|
||||
|
||||
generate_data_description(save_dir)
|
||||
create_trainvaltest_split(traintest_split_file)
|
||||
+76
@@ -0,0 +1,76 @@
|
||||
import os
|
||||
import numpy as np
|
||||
import random
|
||||
import cPickle as pickle
|
||||
from scipy.io import loadmat
|
||||
|
||||
np.random.seed(0)
|
||||
random.seed(0)
|
||||
|
||||
def make_dir(path):
|
||||
if os.path.exists(path):
|
||||
pass
|
||||
else:
|
||||
os.mkdir(path)
|
||||
|
||||
def generate_data_description(save_dir):
|
||||
"""
|
||||
create a dataset description file, which consists of images, labels
|
||||
"""
|
||||
dataset = dict()
|
||||
dataset['description'] = 'rap'
|
||||
dataset['root'] = './dataset/rap/RAP_dataset/'
|
||||
dataset['image'] = []
|
||||
dataset['att'] = []
|
||||
dataset['att_name'] = []
|
||||
dataset['selected_attribute'] = range(51)
|
||||
# load Rap_annotation.mat
|
||||
data = loadmat(open('./dataset/rap/RAP_annotation/RAP_annotation.mat', 'r'))
|
||||
for idx in range(51):
|
||||
dataset['att_name'].append(data['RAP_annotation'][0][0][6][idx][0][0])
|
||||
|
||||
for idx in range(41585):
|
||||
dataset['image'].append(data['RAP_annotation'][0][0][5][idx][0][0])
|
||||
dataset['att'].append(data['RAP_annotation'][0][0][1][idx, :].tolist())
|
||||
|
||||
with open(os.path.join(save_dir, 'rap_dataset.pkl'), 'w+') as f:
|
||||
pickle.dump(dataset, f)
|
||||
|
||||
def create_trainvaltest_split(traintest_split_file):
|
||||
"""
|
||||
create a dataset split file, which consists of index of the train/val/test splits
|
||||
"""
|
||||
partition = dict()
|
||||
partition['trainval'] = []
|
||||
partition['test'] = []
|
||||
partition['weight_trainval'] = []
|
||||
# load RAP_annotation.mat
|
||||
data = loadmat(open('./dataset/rap/RAP_annotation/RAP_annotation.mat', 'r'))
|
||||
for idx in range(5):
|
||||
trainval = (data['RAP_annotation'][0][0][0][idx][0][0][0][0][0,:]-1).tolist()
|
||||
test = (data['RAP_annotation'][0][0][0][idx][0][0][0][1][0,:]-1).tolist()
|
||||
partition['trainval'].append(trainval)
|
||||
partition['test'].append(test)
|
||||
# weight
|
||||
weight_trainval = np.mean(data['RAP_annotation'][0][0][1][trainval, :].astype('float32')==1, axis=0).tolist()
|
||||
partition['weight_trainval'].append(weight_trainval)
|
||||
with open(traintest_split_file, 'w+') as f:
|
||||
pickle.dump(partition, f)
|
||||
|
||||
if __name__ == "__main__":
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser(description="rap dataset")
|
||||
parser.add_argument(
|
||||
'--save_dir',
|
||||
type=str,
|
||||
default='./dataset/rap/')
|
||||
parser.add_argument(
|
||||
'--traintest_split_file',
|
||||
type=str,
|
||||
default="./dataset/rap/rap_partition.pkl")
|
||||
args = parser.parse_args()
|
||||
save_dir = args.save_dir
|
||||
traintest_split_file = args.traintest_split_file
|
||||
|
||||
generate_data_description(save_dir)
|
||||
create_trainvaltest_split(traintest_split_file)
|
||||
+86
@@ -0,0 +1,86 @@
|
||||
import os
|
||||
import numpy as np
|
||||
import random
|
||||
import cPickle as pickle
|
||||
from scipy.io import loadmat
|
||||
|
||||
np.random.seed(0)
|
||||
random.seed(0)
|
||||
|
||||
def make_dir(path):
|
||||
if os.path.exists(path):
|
||||
pass
|
||||
else:
|
||||
os.mkdir(path)
|
||||
|
||||
def generate_data_description(save_dir):
|
||||
"""
|
||||
create a dataset description file, which consists of images, labels
|
||||
"""
|
||||
dataset = dict()
|
||||
dataset['description'] = 'rap2'
|
||||
dataset['root'] = './dataset/rap2/RAP_dataset/'
|
||||
dataset['image'] = []
|
||||
dataset['att'] = []
|
||||
dataset['att_name'] = []
|
||||
# load RAP_annotation.mat
|
||||
data = loadmat(open('./dataset/rap2/RAP_annotation/RAP_annotation.mat', 'r'))
|
||||
dataset['selected_attribute'] = (data['RAP_annotation'][0][0][3][0,:]-1).tolist()
|
||||
for idx in range(152):
|
||||
dataset['att_name'].append(data['RAP_annotation'][0][0][2][idx][0][0])
|
||||
|
||||
for idx in range(84928):
|
||||
dataset['image'].append(data['RAP_annotation'][0][0][0][idx][0][0])
|
||||
dataset['att'].append(data['RAP_annotation'][0][0][1][idx, :].tolist())
|
||||
|
||||
with open(os.path.join(save_dir, 'rap2_dataset.pkl'), 'w+') as f:
|
||||
pickle.dump(dataset, f)
|
||||
|
||||
def create_trainvaltest_split(traintest_split_file):
|
||||
"""
|
||||
create a dataset split file, which consists of index of the train/val/test splits
|
||||
"""
|
||||
partition = dict()
|
||||
partition['train'] = []
|
||||
partition['val'] = []
|
||||
partition['trainval'] = []
|
||||
partition['test'] = []
|
||||
partition['weight_train'] = []
|
||||
partition['weight_trainval'] = []
|
||||
# load RAP_annotation.mat
|
||||
data = loadmat(open('./dataset/rap2/RAP_annotation/RAP_annotation.mat', 'r'))
|
||||
for idx in range(5):
|
||||
train = (data['RAP_annotation'][0][0][4][0, idx][0][0][0][0,:]-1).tolist()
|
||||
val = (data['RAP_annotation'][0][0][4][0, idx][0][0][1][0,:]-1).tolist()
|
||||
test = (data['RAP_annotation'][0][0][4][0, idx][0][0][2][0,:]-1).tolist()
|
||||
trainval = train + val
|
||||
partition['trainval'].append(trainval)
|
||||
partition['train'].append(train)
|
||||
partition['val'].append(val)
|
||||
partition['test'].append(test)
|
||||
# weight
|
||||
weight_train = np.mean(data['RAP_annotation'][0][0][1][train, :].astype('float32')==1, axis=0).tolist()
|
||||
weight_trainval = np.mean(data['RAP_annotation'][0][0][1][trainval, :].astype('float32')==1, axis=0).tolist()
|
||||
partition['weight_train'].append(weight_train)
|
||||
partition['weight_trainval'].append(weight_trainval)
|
||||
|
||||
with open(traintest_split_file, 'w+') as f:
|
||||
pickle.dump(partition, f)
|
||||
|
||||
if __name__ == "__main__":
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser(description="rap2 dataset")
|
||||
parser.add_argument(
|
||||
'--save_dir',
|
||||
type=str,
|
||||
default='./dataset/rap2/')
|
||||
parser.add_argument(
|
||||
'--traintest_split_file',
|
||||
type=str,
|
||||
default="./dataset/rap2/rap2_partition.pkl")
|
||||
args = parser.parse_args()
|
||||
save_dir = args.save_dir
|
||||
traintest_split_file = args.traintest_split_file
|
||||
|
||||
generate_data_description(save_dir)
|
||||
create_trainvaltest_split(traintest_split_file)
|
||||
+31
@@ -0,0 +1,31 @@
|
||||
############## toolkit situation ################
|
||||
#export LD_LIBRARY_PATH=/usr/local/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:/usr/local/lib/:/usr/lib64/:/usr/lib/:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/:/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:/usr/local/Ascend/add-ons/:/usr/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH
|
||||
#export PATH=$PATH:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin/:/usr/local/Ascend/ascend-toolkit/latest/toolkit/tools/ide_daemon/bin/
|
||||
#export ASCEND_OPP_PATH=/usr/local/Ascend/ascend-toolkit/latest/opp/
|
||||
#export OPTION_EXEC_EXTERN_PLUGIN_PATH=/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so
|
||||
#export PYTHONPATH=/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH
|
||||
|
||||
|
||||
############## nnae situation ################
|
||||
|
||||
|
||||
if [ -d /usr/local/Ascend/nnae/latest ];then
|
||||
export LD_LIBRARY_PATH=/usr/local/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:/usr/local/lib/:/usr/lib64/:/usr/lib/:/usr/local/Ascend/nnae/latest/fwkacllib/lib64/:/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:/usr/local/Ascend/add-ons/:/usr/lib/aarch64_64-linux-gnu:$LD_LIBRARY_PATH
|
||||
export PATH=$PATH:/usr/local/Ascend/nnae/latest/fwkacllib/ccec_compiler/bin/:/usr/local/Ascend/nnae/latest/toolkit/tools/ide_daemon/bin/
|
||||
export ASCEND_OPP_PATH=/usr/local/Ascend/nnae/latest/opp/
|
||||
export OPTION_EXEC_EXTERN_PLUGIN_PATH=/usr/local/Ascend/nnae/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:/usr/local/Ascend/nnae/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:/usr/local/Ascend/nnae/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so
|
||||
export PYTHONPATH=/usr/local/Ascend/nnae/latest/fwkacllib/python/site-packages/:/usr/local/Ascend/nnae/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:/usr/local/Ascend/nnae/latest/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH
|
||||
else
|
||||
export LD_LIBRARY_PATH=/usr/local/:/usr/local/lib/:/usr/lib64/:/usr/lib/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/:/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:/usr/local/Ascend/add-ons/:/usr/lib/aarch64-linux-gnu:$LD_LIBRARY_PATH
|
||||
export PATH=$PATH:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin/:/usr/local/Ascend/ascend-toolkit/latest/toolkit/tools/ide_daemon/bin/
|
||||
export ASCEND_OPP_PATH=/usr/local/Ascend/ascend-toolkit/latest/opp/
|
||||
export OPTION_EXEC_EXTERN_PLUGIN_PATH=/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so
|
||||
export PYTHONPATH=/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH
|
||||
fi
|
||||
|
||||
# ln -s /usr/local/Ascend/ascend-toolkit/latest/toolkit/bin/adc /usr/local/bin/
|
||||
|
||||
export SLOG_PRINT_TO_STDOUT=0
|
||||
#su HwHiAiUser -c "adc --host 0.0.0.0:22118 --log \"SetLogLevel(0)[error]\" --device 0"
|
||||
|
||||
export TASK_QUEUE_ENABLE=1
|
||||
@@ -0,0 +1,62 @@
|
||||
#!/bin/bash
|
||||
|
||||
rank_size=$1
|
||||
yamlPath=$2
|
||||
toolsPath=$3
|
||||
|
||||
currentDir=$(cd "$(dirname "$0")/.."; pwd)
|
||||
model_name=$(cd $currentDir/..;basename `pwd`)
|
||||
if [ -f /.dockerenv ];then
|
||||
CLUSTER=$4
|
||||
MPIRUN_ALL_IP="$5"
|
||||
export CLUSTER=${CLUSTER}
|
||||
fi
|
||||
# 从 yaml 获取配置
|
||||
eval $(${toolsPath}/get_params_for_yaml.sh ${yamlPath} "pytorch_config")
|
||||
|
||||
# 清除旧日志
|
||||
rm -rf /var/log/npu/slog/host-0/*
|
||||
rm -rf ${currentDir}/result/*.log
|
||||
|
||||
#mkdir train job path
|
||||
currtime=`date +%Y%m%d%H%M%S`
|
||||
mkdir -p ${currentDir%train*}/train/result/pt_deepmar/training_job_${currtime}/
|
||||
export train_job_dir=${currentDir%train*}/train/result/pt_deepmar/training_job_${currtime}/
|
||||
echo "[`date +%Y%m%d-%H:%M:%S`] [INFO] ${train_job_dir} &"
|
||||
# device 列表, 若无指定 device 根据 rank_size 顺序选择
|
||||
eval device_group=\$device_group_${rank_size}p
|
||||
if [ x"${device_group}" == x"" ] || [ ${rank_size} -ge 8 ];then
|
||||
device_group="$(seq 0 "$(expr $rank_size - 1)")"
|
||||
fi
|
||||
|
||||
# get last device id in device_group, hw log in performance from the dir named last_device_id
|
||||
device_group_str=`echo ${device_group} | sed 's/ //g'`
|
||||
first_device_id=`echo ${device_group_str: 0:1}`
|
||||
|
||||
if [ x"${CLUSTER}" == x"True" ];then
|
||||
this_ip=$(hostname -I |awk '{print $1}')
|
||||
ln -snf ${currentDir%train*}/train/result/pt_deepmar/training_job_${currtime}/0/hw_deepmar.log ${currentDir%train*}/train/result/pt_deepmar/training_job_${currtime}/
|
||||
for ip in $MPIRUN_ALL_IP;do
|
||||
if [ x"$ip" != x"$this_ip" ];then
|
||||
scp $yamlPath root@$ip:$yamlPath
|
||||
scp ${jsonFilePath} root@$ip:${jsonFilePath}
|
||||
fi
|
||||
done
|
||||
export PATH=$PATH:/usr/local/mpirun4.0/bin
|
||||
mpirun -H ${mpirun_ip} \
|
||||
--bind-to none -map-by slot\
|
||||
--allow-run-as-root \
|
||||
--mca btl_tcp_if_exclude lo,docker0,endvnic,virbr0,vethf40501b,docker_gwbridge,br-f42ac38052b4\
|
||||
--prefix /usr/local/mpirun4.0/ \
|
||||
${currentDir}/scripts/train.sh 0 $rank_size $yamlPath $currtime ${toolsPath} ${CLUSTER}
|
||||
else
|
||||
rank_id=0
|
||||
#for device_id in $device_group;do
|
||||
ln -snf ${currentDir%train*}/train/result/pt_deepmar/training_job_${currtime}/${first_device_id}/hw_deepmar.log ${currentDir%train*}/train/result/pt_deepmar/training_job_${currtime}/
|
||||
${currentDir}/scripts/train.sh 0 $rank_size $yamlPath $currtime ${toolsPath} $rank_id &
|
||||
# let rank_id++
|
||||
# done
|
||||
fi
|
||||
wait
|
||||
|
||||
|
||||
@@ -0,0 +1,173 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
device_id=$1
|
||||
rank_size=$2
|
||||
yamlPath=$3
|
||||
|
||||
currentDir=$(cd "$(dirname "$0")/.."; pwd)
|
||||
currtime=$4
|
||||
toolsPath=$5
|
||||
export YAML_PATH=$3
|
||||
mkdir -p ${currentDir%train*}/train/result/pt_deepmar/training_job_${currtime}/
|
||||
export train_job_dir=${currentDir%train*}/train/result/pt_deepmar/training_job_${currtime}/
|
||||
|
||||
# 从 yaml 获取配置
|
||||
eval $(${toolsPath}/get_params_for_yaml.sh ${yamlPath} "pytorch_config")
|
||||
|
||||
export REMARK_LOG_FILE=hw_deepmar.log # 打点日志文件名称, 必须hw_后跟模型名称小写
|
||||
benchmark_log_path=${currentDir%atlas_benchmark-master*}/atlas_benchmark-master/utils
|
||||
export PYTHONPATH=$PYTHONPATH:${benchmark_log_path}
|
||||
|
||||
|
||||
#source ${currentDir}/config/npu_set_env.sh
|
||||
source ${currentDir}/config/set_env_b023.sh
|
||||
# user env
|
||||
export HCCL_CONNECT_TIMEOUT=600
|
||||
export JOB_ID=9999001
|
||||
export HCCL_RANK_TABLE_PATH=${currentDir}/config/${rank_size}p.json
|
||||
export RANK_SIZE=${rank_size}
|
||||
export SLOG_PRINT_TO_STDOUT=0
|
||||
export DEVICE_ID=${device_id}
|
||||
DEVICE_INDEX=$(( DEVICE_ID + RANK_INDEX * 8 ))
|
||||
export DEVICE_INDEX=${DEVICE_INDEX}
|
||||
|
||||
cd ${train_job_dir}
|
||||
curd_dir=${currentDir%atlas_benchmark-master*}/atlas_benchmark-master/utils/atlasboost
|
||||
export PYTHONPATH=$PYTHONPATH:${curd_dir}
|
||||
|
||||
if [ x"$6" != x"True" ];then
|
||||
rank_id=$6
|
||||
export RANK_ID=$6
|
||||
else
|
||||
device_id_mo=$(python3.7 -c "import src.tensorflow.mpi_ops as atlasboost;atlasboost.init(); \
|
||||
device_id = atlasboost.local_rank();cluster_device_id = str(device_id); \
|
||||
atlasboost.set_device_id(device_id);print(atlasboost.rank())")
|
||||
device_id_mo=`echo $device_id_mo`
|
||||
rank_id=${device_id_mo##* }
|
||||
export RANK_ID=${rank_id}
|
||||
device=${device_id_mo##*deviceid = }
|
||||
device_id=${device%% phyid=*}
|
||||
export DEVICE_ID=${device_id}
|
||||
hccljson=${train_job_dir}/*.json
|
||||
cp ${hccljson} ${currentDir}/config/${rank_size}p.json
|
||||
fi
|
||||
|
||||
#mkdir exec path
|
||||
mkdir -p ${train_job_dir}/${device_id}
|
||||
cd ${train_job_dir}/${device_id}
|
||||
|
||||
startTime=`date +%Y%m%d-%H:%M:%S`
|
||||
startTime_s=`date +%s`
|
||||
|
||||
# 数据集预处理
|
||||
python3.7 ${currentDir}/code/transform_peta.py \
|
||||
--save_dir=${data_url} \
|
||||
--traintest_split_file=${data_url}/peta_partition.pkl
|
||||
|
||||
# 根据单卡/多卡区分调用参数
|
||||
if [ x"$6" == x"True" ];then
|
||||
# 多卡多机
|
||||
export CLUSTER=True
|
||||
fi
|
||||
|
||||
if [ x"${mode}" == x"evaluate" ];then
|
||||
pass
|
||||
|
||||
|
||||
elif [ x"${rank_size}" == x"1" ];then
|
||||
# 单卡
|
||||
python3.7 ${currentDir}/code/train_deepmar_resnet50.py \
|
||||
--dataset=peta \
|
||||
--save_dir=${data_url} \
|
||||
--workers=32 \
|
||||
--npu=${device} \
|
||||
--partition_idx=0 \
|
||||
--split=trainval \
|
||||
--test_split=test \
|
||||
--batch_size=${batch_size} \
|
||||
--resize="(224,224)" \
|
||||
--exp_subpath=deepmar_resnet50 \
|
||||
--new_params_lr=0.01 \
|
||||
--finetuned_params_lr=0.01 \
|
||||
--staircase_decay_at_epochs="(50,100)" \
|
||||
--total_epochs=${epoches} \
|
||||
--epochs_per_val=10 \
|
||||
--epochs_per_save=50 \
|
||||
--steps_per_log=10 \
|
||||
--drop_pool5=True \
|
||||
--drop_pool5_rate=0.5 \
|
||||
--run=1 \
|
||||
--resume=False \
|
||||
--ckpt_file= \
|
||||
--load_model_weight=False \
|
||||
--model_weight_file= \
|
||||
--amp \
|
||||
--opt_level O2 \
|
||||
--loss_scale 512 \
|
||||
--set_seed True \
|
||||
--pretrained True \
|
||||
--test_only=False > ${train_job_dir}/train_${rank_size}p.log 2>&1
|
||||
|
||||
elif [ ${rank_size} -le 8 ];then
|
||||
# 单机多卡
|
||||
#source ${currentDir}/config/set_env_b023.sh
|
||||
python3.7 ${currentDir}/code/train_deepmar_resnet50_8p.py \
|
||||
--addr=$(hostname -I |awk '{print $1}') \
|
||||
--save_dir=${data_url} \
|
||||
--dataset=peta \
|
||||
--workers=80 \
|
||||
--partition_idx=0 \
|
||||
--split=trainval \
|
||||
--test_split=test \
|
||||
--batch_size=${batch_size} \
|
||||
--resize="(224,224)" \
|
||||
--exp_subpath=deepmar_resnet50 \
|
||||
--new_params_lr=${lr} \
|
||||
--finetuned_params_lr=${lr} \
|
||||
--staircase_decay_at_epochs="(50,100)" \
|
||||
--total_epochs=${epoches} \
|
||||
--epochs_per_val=10 \
|
||||
--epochs_per_save=50 \
|
||||
--steps_per_log=10 \
|
||||
--drop_pool5=True \
|
||||
--drop_pool5_rate=0.5 \
|
||||
--run=1 \
|
||||
--resume=False \
|
||||
--ckpt_file= \
|
||||
--load_model_weight=False \
|
||||
--model_weight_file=ckpt_epoch101.pth\
|
||||
--amp \
|
||||
--opt_level O2 \
|
||||
--loss_scale 512.0 \
|
||||
--set_seed True \
|
||||
--pretrained True \
|
||||
--test_only=False \
|
||||
--dist_url 'tcp://127.0.0.1:50000' \
|
||||
--dist_backend 'hccl' \
|
||||
--multiprocessing_distributed \
|
||||
--world_size 1 \
|
||||
--npus_per_node=${rank_size} \
|
||||
--rank 0 > ${train_job_dir}/train_${rank_size}p.log 2>&1
|
||||
|
||||
|
||||
fi
|
||||
|
||||
#taskset -c 0-20 python3.7 ${currentDir}/code/densenet121.py > ./train.log 2>&1
|
||||
|
||||
if [ $? -eq 0 ];then
|
||||
echo ":::ABK 1.0.0 deepmar train success"
|
||||
echo ":::ABK 1.0.0 deepmar train success" >> ${train_job_dir}/train_${rank_size}p.log
|
||||
echo ":::ABK 1.0.0 deepmar train success" >> ./hw_deepmar.log
|
||||
else
|
||||
echo ":::ABK 1.0.0 deepmar train success"
|
||||
echo ":::ABK 1.0.0 deepmar train failed" >> ${train_job_dir}/train_${rank_size}p.log
|
||||
echo ":::ABK 1.0.0 deepmar train failed" >> ./hw_deepmar.log
|
||||
fi
|
||||
|
||||
endTime=`date +%Y%m%d-%H:%M:%S`
|
||||
endTime_s=`date +%s`
|
||||
sumTime=$[ $endTime_s - $startTime_s ]
|
||||
hour=$(( $sumTime/3600 ))
|
||||
min=$(( ($sumTime-${hour}*3600)/60 ))
|
||||
sec=$(( $sumTime-${hour}*3600-${min}*60 ))
|
||||
echo ":::ABK 1.0.0 deepmar train total time: ${hour}:${min}:${sec}" >> ${train_job_dir}/${device_id}/hw_deepmar.log
|
||||
@@ -0,0 +1,25 @@
|
||||
# DenseNet121_pytorch训练说明
|
||||
|
||||
### 1. 模型训练参数配置
|
||||
|
||||
在train/yaml/DenseNet121.yaml中修改相应配置, 配置项含义:
|
||||
|
||||
```
|
||||
pytorch_config:
|
||||
data_url: 数据集路径
|
||||
epoches: 跑多少个epoch
|
||||
batch_size: 1p 参数为256 2p 512 4p 1024 8p为2048
|
||||
lr: 默认参数1p 0.1 2p 0.2 4p 0.4 8p 0.8
|
||||
seed: 49
|
||||
docker_image: docker 镜像名称:版本号
|
||||
```
|
||||
|
||||
------
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
+515
@@ -0,0 +1,515 @@
|
||||
import argparse
|
||||
import os
|
||||
import random
|
||||
import shutil
|
||||
import time
|
||||
import warnings
|
||||
import sys
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.parallel
|
||||
import torch.backends.cudnn as cudnn
|
||||
import torch.distributed as dist
|
||||
import torch.optim
|
||||
import torch.multiprocessing as mp
|
||||
import torch.utils.data
|
||||
import torch.utils.data.distributed
|
||||
import torchvision.transforms as transforms
|
||||
import torchvision.datasets as datasets
|
||||
import torchvision.models as models
|
||||
import torch.npu
|
||||
|
||||
from torch.utils.tensorboard import SummaryWriter
|
||||
from densenet_0_2_2 import densenet121
|
||||
|
||||
import numpy as np
|
||||
from apex import amp
|
||||
|
||||
|
||||
from benchmark_log import hwlog
|
||||
from benchmark_log.basic_utils import get_environment_info
|
||||
from benchmark_log.basic_utils import get_model_parameter
|
||||
|
||||
|
||||
warnings.filterwarnings('ignore')
|
||||
|
||||
model_names = sorted(name for name in models.__dict__
|
||||
if name.islower() and not name.startswith("__")
|
||||
and callable(models.__dict__[name]))
|
||||
|
||||
parser = argparse.ArgumentParser(description='PyTorch ImageNet Training')
|
||||
parser.add_argument('--data', metavar='DIR', default='/opt/npu/dataset/imagenet',
|
||||
help='path to dataset')
|
||||
parser.add_argument('-a', '--arch', metavar='ARCH', default='densenet121',
|
||||
choices=model_names,
|
||||
help='model architecture: ' +
|
||||
' | '.join(model_names) +
|
||||
' (default: resnet18)')
|
||||
parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
|
||||
help='number of data loading workers (default: 8)')
|
||||
parser.add_argument('--epochs', default=90, type=int, metavar='N',
|
||||
help='number of total epochs to run')
|
||||
parser.add_argument('--start-epoch', default=0, type=int, metavar='N',
|
||||
help='manual epoch number (useful on restarts)')
|
||||
parser.add_argument('-b', '--batch-size', default=128, type=int,
|
||||
metavar='N',
|
||||
help='mini-batch size (default: 256), this is the total '
|
||||
'batch size of all GPUs on the current node when '
|
||||
'using Data Parallel or Distributed Data Parallel')
|
||||
parser.add_argument('--lr', '--learning-rate', default=0.1, type=float,
|
||||
metavar='LR', help='initial learning rate', dest='lr')
|
||||
parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
|
||||
help='momentum')
|
||||
parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,
|
||||
metavar='W', help='weight decay (default: 1e-4)',
|
||||
dest='weight_decay')
|
||||
parser.add_argument('-p', '--print-freq', default=1, type=int,
|
||||
metavar='N', help='print frequency (default: 10)')
|
||||
parser.add_argument('-ef', '--eval-freq', default=5, type=int,
|
||||
metavar='N', help='evaluate frequency (default: 5)')
|
||||
parser.add_argument('--resume', default='', type=str, metavar='PATH',
|
||||
help='path to latest checkpoint (default: none)')
|
||||
parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true',
|
||||
help='evaluate model on validation set')
|
||||
parser.add_argument('--pretrained', dest='pretrained', action='store_true',
|
||||
help='use pre-trained model')
|
||||
parser.add_argument('--world-size', default=-1, type=int,
|
||||
help='number of nodes for distributed training')
|
||||
parser.add_argument('--rank', default=-1, type=int,
|
||||
help='node rank for distributed training')
|
||||
parser.add_argument('--dist-url', default='tcp://224.66.41.62:23456', type=str,
|
||||
help='url used to set up distributed training')
|
||||
parser.add_argument('--dist-backend', default='nccl', type=str,
|
||||
help='distributed backend')
|
||||
parser.add_argument('--seed', default=None, type=int,
|
||||
help='seed for initializing training. ')
|
||||
parser.add_argument('--gpu', default=None, type=int,
|
||||
help='GPU id to use.')
|
||||
parser.add_argument('--multiprocessing-distributed', action='store_true',
|
||||
help='Use multi-processing distributed training to launch '
|
||||
'N processes per node, which has N GPUs. This is the '
|
||||
'fastest way to use PyTorch for either single node or '
|
||||
'multi node data parallel training')
|
||||
parser.add_argument('--npu', default=None, type=int,
|
||||
help='NPU id to use.')
|
||||
|
||||
# apex
|
||||
parser.add_argument('--amp', default=False, action='store_true',
|
||||
help='use amp to train the model')
|
||||
parser.add_argument('--loss-scale', default=1024., type=float,
|
||||
help='loss scale using in amp, default -1 means dynamic')
|
||||
parser.add_argument('--opt-level', default='O2', type=str,
|
||||
help='loss scale using in amp, default -1 means dynamic')
|
||||
|
||||
|
||||
|
||||
def main():
|
||||
args = parser.parse_args()
|
||||
print(args)
|
||||
|
||||
if args.npu is None:
|
||||
args.npu = 0
|
||||
global CALCULATE_DEVICE
|
||||
global best_acc1
|
||||
|
||||
best_acc1 = 0
|
||||
CALCULATE_DEVICE = "npu:{}".format(args.npu)
|
||||
torch.npu.set_device(CALCULATE_DEVICE)
|
||||
|
||||
if args.seed is not None:
|
||||
random.seed(seed)
|
||||
os.environ['PYTHONHASHSEED'] = str(seed)
|
||||
np.random.seed(seed)
|
||||
torch.manual_seed(seed)
|
||||
|
||||
if args.gpu is not None:
|
||||
warnings.warn('You have chosen a specific GPU. This will completely '
|
||||
'disable data parallelism.')
|
||||
|
||||
if args.dist_url == "env://" and args.world_size == -1:
|
||||
args.world_size = int(os.environ["WORLD_SIZE"])
|
||||
|
||||
args.distributed = args.world_size > 1 or args.multiprocessing_distributed
|
||||
|
||||
ngpus_per_node = torch.npu.device_count()
|
||||
print('{} node found.'.format(ngpus_per_node))
|
||||
if args.multiprocessing_distributed:
|
||||
# Since we have ngpus_per_node processes per node, the total world_size
|
||||
# needs to be adjusted accordingly
|
||||
args.world_size = ngpus_per_node * args.world_size
|
||||
# Use torch.multiprocessing.spawn to launch distributed processes: the
|
||||
# main_worker process function
|
||||
mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args))
|
||||
else:
|
||||
# Simply call main_worker function
|
||||
main_worker(args.gpu, ngpus_per_node, args)
|
||||
|
||||
|
||||
def main_worker(gpu, ngpus_per_node, args):
|
||||
global best_acc1
|
||||
args.gpu = gpu
|
||||
|
||||
if args.gpu is not None:
|
||||
print("Use GPU: {} for training".format(args.gpu))
|
||||
|
||||
if args.distributed:
|
||||
if args.dist_url == "env://" and args.rank == -1:
|
||||
args.rank = int(os.environ["RANK"])
|
||||
if args.multiprocessing_distributed:
|
||||
# For multiprocessing distributed training, rank needs to be the
|
||||
# global rank among all the processes
|
||||
args.rank = args.rank * ngpus_per_node + gpu
|
||||
dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
|
||||
world_size=args.world_size, rank=args.rank)
|
||||
# create model
|
||||
if args.pretrained:
|
||||
print("=> using pre-trained model '{}'".format(args.arch))
|
||||
model = models.__dict__[args.arch](pretrained=True)
|
||||
else:
|
||||
print("=> creating model '{}'".format(args.arch))
|
||||
# model = models.__dict__[args.arch]()
|
||||
model = densenet121()
|
||||
|
||||
if args.distributed:
|
||||
# For multiprocessing distributed, DistributedDataParallel constructor
|
||||
# should always set the single device scope, otherwise,
|
||||
# DistributedDataParallel will use all available devices.
|
||||
if args.gpu is not None:
|
||||
torch.cuda.set_device(args.gpu)
|
||||
model.cuda(args.gpu)
|
||||
# When using a single GPU per process and per
|
||||
# DistributedDataParallel, we need to divide the batch size
|
||||
# ourselves based on the total number of GPUs we have
|
||||
args.batch_size = int(args.batch_size / ngpus_per_node)
|
||||
args.workers = int((args.workers + ngpus_per_node - 1) / ngpus_per_node)
|
||||
model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
|
||||
else:
|
||||
model.cuda()
|
||||
# DistributedDataParallel will divide and allocate batch_size to all
|
||||
# available GPUs if device_ids are not set
|
||||
model = torch.nn.parallel.DistributedDataParallel(model)
|
||||
elif args.gpu is not None:
|
||||
torch.cuda.set_device(args.gpu)
|
||||
model = model.cuda(args.gpu)
|
||||
else:
|
||||
# DataParallel will divide and allocate batch_size to all available GPUs
|
||||
if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
|
||||
model.features = torch.nn.DataParallel(model.features)
|
||||
model.cuda()
|
||||
else:
|
||||
model = model.to(CALCULATE_DEVICE)
|
||||
#for item in model.npu_unsupport_list:
|
||||
# print("npu_unsupport: ", item)
|
||||
# item.cpu()
|
||||
|
||||
# define loss function (criterion) and optimizer
|
||||
criterion = nn.CrossEntropyLoss().to(CALCULATE_DEVICE)
|
||||
|
||||
optimizer = torch.optim.SGD(model.parameters(), args.lr,
|
||||
momentum=args.momentum,
|
||||
weight_decay=args.weight_decay)
|
||||
|
||||
if args.amp:
|
||||
model, optimizer = amp.initialize(model, optimizer, opt_level=args.opt_level, loss_scale=args.loss_scale)
|
||||
|
||||
# optionally resume from a checkpoint
|
||||
if args.resume:
|
||||
if os.path.isfile(args.resume):
|
||||
print("=> loading checkpoint '{}'".format(args.resume))
|
||||
checkpoint = torch.load(args.resume, map_location=CALCULATE_DEVICE)
|
||||
args.start_epoch = checkpoint['epoch']
|
||||
best_acc1 = checkpoint['best_acc1']
|
||||
model.load_state_dict(checkpoint['state_dict'])
|
||||
optimizer.load_state_dict(checkpoint['optimizer'])
|
||||
if args.amp:
|
||||
amp.load_state_dict(checkpoint['amp'])
|
||||
print("=> loaded checkpoint '{}' (epoch {})"
|
||||
.format(args.resume, checkpoint['epoch']))
|
||||
else:
|
||||
print("=> no checkpoint found at '{}'".format(args.resume))
|
||||
|
||||
cudnn.benchmark = True
|
||||
|
||||
# Data loading code
|
||||
traindir = os.path.join(args.data, 'train')
|
||||
valdir = os.path.join(args.data, 'val')
|
||||
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
|
||||
std=[0.229, 0.224, 0.225])
|
||||
|
||||
train_dataset = datasets.ImageFolder(
|
||||
traindir,
|
||||
transforms.Compose([
|
||||
transforms.RandomResizedCrop(224),
|
||||
transforms.RandomHorizontalFlip(),
|
||||
transforms.ToTensor(),
|
||||
normalize,
|
||||
]))
|
||||
|
||||
if args.distributed:
|
||||
train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
|
||||
else:
|
||||
train_sampler = None
|
||||
|
||||
train_loader = torch.utils.data.DataLoader(
|
||||
train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None),
|
||||
num_workers=args.workers, pin_memory=False, sampler=train_sampler, drop_last=True)
|
||||
|
||||
val_loader = torch.utils.data.DataLoader(
|
||||
datasets.ImageFolder(valdir, transforms.Compose([
|
||||
transforms.Resize(256),
|
||||
transforms.CenterCrop(224),
|
||||
transforms.ToTensor(),
|
||||
normalize,
|
||||
])),
|
||||
batch_size=args.batch_size, shuffle=True,
|
||||
num_workers=args.workers, pin_memory=False, drop_last=True)
|
||||
|
||||
if args.evaluate:
|
||||
validate(val_loader, model, criterion, args)
|
||||
return
|
||||
|
||||
|
||||
writer = SummaryWriter(os.path.join('runs/densenet121'))
|
||||
for epoch in range(args.start_epoch, args.epochs):
|
||||
if args.distributed:
|
||||
train_sampler.set_epoch(epoch)
|
||||
adjust_learning_rate(optimizer, epoch, args)
|
||||
|
||||
# train for one epoch
|
||||
train(train_loader, model, criterion, optimizer, epoch, args, writer)
|
||||
|
||||
if (epoch+1)%(args.eval_freq)==0 or epoch==args.epochs-1 :
|
||||
# evaluate on validation set
|
||||
acc1 = validate(val_loader, model, criterion, args, epoch, writer)
|
||||
|
||||
# remember best acc@1 and save checkpoint
|
||||
is_best = acc1 > best_acc1
|
||||
best_acc1 = max(acc1, best_acc1)
|
||||
|
||||
if not args.multiprocessing_distributed or (args.multiprocessing_distributed
|
||||
and args.rank % ngpus_per_node == 0 and epoch == args.epochs - 1):
|
||||
if args.amp:
|
||||
save_checkpoint({
|
||||
'epoch': epoch + 1,
|
||||
'arch': args.arch,
|
||||
'state_dict': model.state_dict(),
|
||||
'best_acc1': best_acc1,
|
||||
'optimizer' : optimizer.state_dict(),
|
||||
'amp': amp.state_dict(),
|
||||
}, is_best)
|
||||
else:
|
||||
save_checkpoint({
|
||||
'epoch': epoch + 1,
|
||||
'arch': args.arch,
|
||||
'state_dict': model.state_dict(),
|
||||
'best_acc1': best_acc1,
|
||||
'optimizer' : optimizer.state_dict(),
|
||||
}, is_best)
|
||||
|
||||
writer.close()
|
||||
|
||||
def train(train_loader, model, criterion, optimizer, epoch, args, writer):
|
||||
batch_time = AverageMeter('Time', ':6.3f')
|
||||
data_time = AverageMeter('Data', ':6.3f')
|
||||
losses = AverageMeter('Loss', ':.4e')
|
||||
top1 = AverageMeter('Acc@1', ':6.2f')
|
||||
top5 = AverageMeter('Acc@5', ':6.2f')
|
||||
progress = ProgressMeter(
|
||||
len(train_loader),
|
||||
[batch_time, data_time, losses, top1, top5],
|
||||
prefix="Epoch: [{}]".format(epoch))
|
||||
|
||||
# switch to train mode
|
||||
model.train()
|
||||
|
||||
end = time.time()
|
||||
for i, (images, target) in enumerate(train_loader):
|
||||
# measure data loading time
|
||||
data_time.update(time.time() - end)
|
||||
|
||||
target = target.to(torch.int32)
|
||||
images, target = images.to(CALCULATE_DEVICE, non_blocking=False), target.to(CALCULATE_DEVICE, non_blocking=False)
|
||||
|
||||
# compute output
|
||||
output = model(images)
|
||||
loss = criterion(output, target)
|
||||
|
||||
# measure accuracy and record loss
|
||||
acc1, acc5 = accuracy(output, target, topk=(1, 5))
|
||||
losses.update(loss.item(), images.size(0))
|
||||
top1.update(acc1[0], images.size(0))
|
||||
top5.update(acc5[0], images.size(0))
|
||||
|
||||
# add tensorboard
|
||||
writer.add_scalar('Train/Loss', losses.val, epoch * len(train_loader) + i)
|
||||
writer.add_scalar('Train/Acc@1', top1.val, epoch * len(train_loader) + i)
|
||||
writer.add_scalar('Train/Acc@5', top5.val, epoch * len(train_loader) + i)
|
||||
writer.add_scalar('Train/LR', optimizer.param_groups[0]['lr'], epoch * len(train_loader) + i)
|
||||
|
||||
# compute gradient and do SGD step
|
||||
optimizer.zero_grad()
|
||||
if args.amp:
|
||||
with amp.scale_loss(loss, optimizer) as scaled_loss:
|
||||
scaled_loss.backward()
|
||||
else:
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
# measure elapsed time
|
||||
batch_time.update(time.time() - end)
|
||||
writer.add_scalar('Train/Time', batch_time.val, epoch * len(train_loader) + i)
|
||||
writer.add_scalar('Train/Time_Data', data_time.val, epoch * len(train_loader) + i)
|
||||
end = time.time()
|
||||
|
||||
if i % args.print_freq == 0:
|
||||
progress.display(i)
|
||||
|
||||
print(' * FPS@all {:.3f}'.format(args.batch_size/batch_time.avg))
|
||||
hwlog.remark_print(key=hwlog.FPS, value=' * FPS@all {:.3f}'.format(args.batch_size/batch_time.avg))
|
||||
|
||||
def validate(val_loader, model, criterion, args, epoch=0, writer=None):
|
||||
batch_time = AverageMeter('Time', ':6.3f')
|
||||
losses = AverageMeter('Loss', ':.4e')
|
||||
top1 = AverageMeter('Acc@1', ':6.2f')
|
||||
top5 = AverageMeter('Acc@5', ':6.2f')
|
||||
progress = ProgressMeter(
|
||||
len(val_loader),
|
||||
[batch_time, losses, top1, top5],
|
||||
prefix='Test: ')
|
||||
|
||||
# switch to evaluate mode
|
||||
model.eval()
|
||||
|
||||
with torch.no_grad():
|
||||
end = time.time()
|
||||
for i, (images, target) in enumerate(val_loader):
|
||||
target = target.to(torch.int32)
|
||||
images, target = images.to(CALCULATE_DEVICE, non_blocking=False), target.to(CALCULATE_DEVICE, non_blocking=False)
|
||||
|
||||
# compute output
|
||||
output = model(images)
|
||||
loss = criterion(output, target)
|
||||
|
||||
# measure accuracy and record loss
|
||||
acc1, acc5 = accuracy(output, target, topk=(1, 5))
|
||||
losses.update(loss.item(), images.size(0))
|
||||
top1.update(acc1[0], images.size(0))
|
||||
top5.update(acc5[0], images.size(0))
|
||||
|
||||
# measure elapsed time
|
||||
batch_time.update(time.time() - end)
|
||||
end = time.time()
|
||||
|
||||
if i % args.print_freq == 0:
|
||||
progress.display(i)
|
||||
|
||||
# TODO: this should also be done with the ProgressMeter
|
||||
print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'
|
||||
.format(top1=top1, top5=top5))
|
||||
hwlog.remark_print(key=hwlog.EVAL_ACCURACY_TOP1, value="{top1.avg:.3f}".format(top1=top1))
|
||||
hwlog.remark_print(key=hwlog.EVAL_ACCURACY_TOP5, value="{top5.avg:.3f}".format(top5=top5))
|
||||
|
||||
if writer: # and args.gpu==0:
|
||||
writer.add_scalar('Val/Time', batch_time.avg, epoch)
|
||||
writer.add_scalar('Val/Loss', losses.avg, epoch)
|
||||
writer.add_scalar('Val/Acc@1', top1.avg, epoch)
|
||||
writer.add_scalar('Val/Acc@5', top5.avg, epoch)
|
||||
|
||||
return top1.avg
|
||||
|
||||
|
||||
def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
|
||||
torch.save(state, filename)
|
||||
if is_best:
|
||||
shutil.copyfile(filename, 'model_best_acc%.4f_epoch%d.pth.tar'%(state['best_acc1'], state['epoch']))
|
||||
|
||||
|
||||
class AverageMeter(object):
|
||||
"""Computes and stores the average and current value"""
|
||||
def __init__(self, name, fmt=':f'):
|
||||
self.name = name
|
||||
self.fmt = fmt
|
||||
self.reset()
|
||||
self.start_count_index = 10
|
||||
|
||||
def reset(self):
|
||||
self.val = 0
|
||||
self.avg = 0
|
||||
self.sum = 0
|
||||
self.count = 0
|
||||
|
||||
def update(self, val, n=1):
|
||||
self.val = val
|
||||
self.count += n
|
||||
if self.count>(self.start_count_index*n):
|
||||
self.sum += val * n
|
||||
self.avg = self.sum / (self.count-self.start_count_index*n)
|
||||
|
||||
def __str__(self):
|
||||
fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
|
||||
return fmtstr.format(**self.__dict__)
|
||||
|
||||
|
||||
class ProgressMeter(object):
|
||||
def __init__(self, num_batches, meters, prefix=""):
|
||||
self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
|
||||
self.meters = meters
|
||||
self.prefix = prefix
|
||||
|
||||
def display(self, batch):
|
||||
entries = [self.prefix + self.batch_fmtstr.format(batch)]
|
||||
entries += [str(meter) for meter in self.meters]
|
||||
print('\t'.join(entries))
|
||||
# 日志打点
|
||||
train_acc1 = str(entries).split("Acc@1")[1].strip().split(" ")[0]
|
||||
train_acc5 = str(entries).split("Acc@5")[1].strip().split(" ")[0]
|
||||
hwlog.remark_print(key=hwlog.TRAIN_ACCURACY_TOP1, value=train_acc1)
|
||||
hwlog.remark_print(key=hwlog.TRAIN_ACCURACY_TOP5, value=train_acc5)
|
||||
|
||||
def _get_batch_fmtstr(self, num_batches):
|
||||
num_digits = len(str(num_batches // 1))
|
||||
fmt = '{:' + str(num_digits) + 'd}'
|
||||
return '[' + fmt + '/' + fmt.format(num_batches) + ']'
|
||||
|
||||
|
||||
def adjust_learning_rate(optimizer, epoch, args):
|
||||
"""Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
|
||||
lr = args.lr * (0.1 ** (epoch // 30))
|
||||
for param_group in optimizer.param_groups:
|
||||
param_group['lr'] = lr
|
||||
|
||||
|
||||
def accuracy(output, target, topk=(1,)):
|
||||
"""Computes the accuracy over the k top predictions for the specified values of k"""
|
||||
with torch.no_grad():
|
||||
maxk = max(topk)
|
||||
batch_size = target.size(0)
|
||||
|
||||
_, pred = output.topk(maxk, 1, True, True)
|
||||
pred = pred.t()
|
||||
correct = pred.eq(target.view(1, -1).expand_as(pred))
|
||||
|
||||
res = []
|
||||
for k in topk:
|
||||
correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
|
||||
res.append(correct_k.mul_(100.0 / batch_size))
|
||||
return res
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
hwlog.ROOT_DIR = os.path.split(os.path.abspath(__file__))[0]
|
||||
cpu_info, npu_info, framework_info, os_info, benchmark_version = get_environment_info("pytorch")
|
||||
config_info = get_model_parameter("pytorch_config")
|
||||
initinal_data = {"base_lr": 0.1, "dataset": "imagenet", "optimizer": "SGD", "loss_scale": 1024}
|
||||
hwlog.remark_print(key=hwlog.CPU_INFO, value=cpu_info)
|
||||
hwlog.remark_print(key=hwlog.NPU_INFO, value=npu_info)
|
||||
hwlog.remark_print(key=hwlog.OS_INFO, value=os_info)
|
||||
hwlog.remark_print(key=hwlog.FRAMEWORK_INFO, value=framework_info)
|
||||
hwlog.remark_print(key=hwlog.BENCHMARK_VERSION, value=benchmark_version)
|
||||
hwlog.remark_print(key=hwlog.CONFIG_INFO, value=config_info)
|
||||
hwlog.remark_print(key=hwlog.BASE_LR, value=initinal_data.get("base_lr"))
|
||||
hwlog.remark_print(key=hwlog.DATASET, value=initinal_data.get("dataset"))
|
||||
hwlog.remark_print(key=hwlog.OPT_NAME, value=initinal_data.get("optimizer"))
|
||||
hwlog.remark_print(key=hwlog.LOSS_SCALE, value=initinal_data.get("loss_scale"))
|
||||
main()
|
||||
+538
@@ -0,0 +1,538 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import random
|
||||
import shutil
|
||||
import time
|
||||
import warnings
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.parallel
|
||||
import torch.backends.cudnn as cudnn
|
||||
import torch.distributed as dist
|
||||
import torch.optim
|
||||
import torch.multiprocessing as mp
|
||||
import torch.utils.data
|
||||
import torch.utils.data.distributed
|
||||
import torchvision.transforms as transforms
|
||||
import torchvision.datasets as datasets
|
||||
import torchvision.models as models
|
||||
from densenet_0_2_2 import densenet121
|
||||
|
||||
from apex import amp
|
||||
|
||||
from benchmark_log import hwlog
|
||||
from benchmark_log.basic_utils import get_environment_info
|
||||
from benchmark_log.basic_utils import get_model_parameter
|
||||
|
||||
BATCH_SIZE = 512
|
||||
OPTIMIZER_BATCH_SIZE=2048
|
||||
model_names = sorted(name for name in models.__dict__
|
||||
if name.islower() and not name.startswith("__")
|
||||
and callable(models.__dict__[name]))
|
||||
|
||||
parser = argparse.ArgumentParser(description='PyTorch ImageNet Training')
|
||||
parser.add_argument('--data', metavar='DIR', default='/opt/npu/dataset/imagenet',
|
||||
help='path to dataset')
|
||||
parser.add_argument('-a', '--arch', metavar='ARCH', default='resnet50',
|
||||
choices=model_names,
|
||||
help='model architecture: ' +
|
||||
' | '.join(model_names) +
|
||||
' (default: resnet18)')
|
||||
parser.add_argument('-j', '--workers', default=32, type=int, metavar='N',
|
||||
help='number of data loading workers (default: 4)')
|
||||
parser.add_argument('--epochs', default=90, type=int, metavar='N',
|
||||
help='number of total epochs to run')
|
||||
parser.add_argument('--start-epoch', default=0, type=int, metavar='N',
|
||||
help='manual epoch number (useful on restarts)')
|
||||
parser.add_argument('-b', '--batch-size', default=BATCH_SIZE, type=int,
|
||||
metavar='N',
|
||||
help='mini-batch size (default: 256), this is the total '
|
||||
'batch size of all GPUs on the current node when '
|
||||
'using Data Parallel or Distributed Data Parallel')
|
||||
parser.add_argument('--lr', '--learning-rate', default=0.1, type=float,
|
||||
metavar='LR', help='initial learning rate', dest='lr')
|
||||
parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
|
||||
help='momentum')
|
||||
parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,
|
||||
metavar='W', help='weight decay (default: 1e-4)',
|
||||
dest='weight_decay')
|
||||
parser.add_argument('--workspace',type=str,default='./',metavar='DIR',
|
||||
help='path to directory where checkpoints will be stored')
|
||||
parser.add_argument('-p', '--print-freq', default=10, type=int,
|
||||
metavar='N', help='print frequency (default: 10)')
|
||||
parser.add_argument('-ef', '--eval-freq', default=5, type=int,
|
||||
metavar='N', help='evaluate frequency (default: 5)')
|
||||
parser.add_argument('--resume', default='', type=str, metavar='PATH',
|
||||
help='path to latest checkpoint (default: none)')
|
||||
parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true',
|
||||
help='evaluate model on validation set')
|
||||
parser.add_argument('--pretrained', dest='pretrained', action='store_true',
|
||||
help='use pre-trained model')
|
||||
parser.add_argument('--world-size', default=-1, type=int,
|
||||
help='number of nodes for distributed training')
|
||||
parser.add_argument('--rank', default=-1, type=int,
|
||||
help='node rank for distributed training')
|
||||
parser.add_argument('--dist-url', default='tcp://224.66.41.62:23456', type=str,
|
||||
help='url used to set up distributed training')
|
||||
parser.add_argument('--dist-backend', default='nccl', type=str,
|
||||
help='distributed backend')
|
||||
parser.add_argument('--seed', default=None, type=int,
|
||||
help='seed for initializing training. ')
|
||||
parser.add_argument('--gpu', default=None, type=int,
|
||||
help='GPU id to use.')
|
||||
parser.add_argument('--multiprocessing-distributed', action='store_true',
|
||||
help='Use multi-processing distributed training to launch '
|
||||
'N processes per node, which has N GPUs. This is the '
|
||||
'fastest way to use PyTorch for either single node or '
|
||||
'multi node data parallel training')
|
||||
parser.add_argument('-bm', '--benchmark', default=0, type=int,
|
||||
metavar='N', help='set benchmark status (default: 1,run benchmark)')
|
||||
parser.add_argument('--device', default='npu', type=str,
|
||||
help='npu or gpu')
|
||||
parser.add_argument('--addr', default='10.136.181.115', type=str,
|
||||
help='master addr')
|
||||
parser.add_argument('--checkpoint-nameprefix', default='checkpoint', type=str,
|
||||
help='checkpoint-nameprefix')
|
||||
parser.add_argument('--checkpoint-freq', default=0, type=int,
|
||||
metavar='N', help='checkpoint frequency (default: 0)'
|
||||
'0: save only one file whitch per epoch;'
|
||||
'n: save diff file per n epoch'
|
||||
'-1:no checkpoint,not support')
|
||||
parser.add_argument('--device-list', default='0,1,2,3,4,5,6,7', type=str, help='device id list')
|
||||
# apex
|
||||
parser.add_argument('--amp', default=False, action='store_true',
|
||||
help='use amp to train the model')
|
||||
parser.add_argument('--loss-scale', default=1024., type=float,
|
||||
help='loss scale using in amp, default -1 means dynamic')
|
||||
parser.add_argument('--opt-level', default='O2', type=str,
|
||||
help='loss scale using in amp, default -1 means dynamic')
|
||||
|
||||
warnings.filterwarnings('ignore')
|
||||
best_acc1 = 0
|
||||
def device_id_to_process_device_map(device_list):
|
||||
devices = device_list.split(",")
|
||||
devices = [int(x) for x in devices]
|
||||
devices.sort()
|
||||
|
||||
process_device_map = dict()
|
||||
for process_id, device_id in enumerate(devices):
|
||||
process_device_map[process_id] = device_id
|
||||
|
||||
return process_device_map
|
||||
|
||||
def main():
|
||||
args = parser.parse_args()
|
||||
print("===============main()=================")
|
||||
print(args)
|
||||
print("===============main()=================")
|
||||
|
||||
os.environ['KERNEL_NAME_ID'] = str(0)
|
||||
print("+++++++++++++++++++++++++++KERNEL_NAME_ID:",os.environ['KERNEL_NAME_ID'])
|
||||
|
||||
if args.seed is not None:
|
||||
random.seed(args.seed)
|
||||
torch.manual_seed(args.seed)
|
||||
cudnn.deterministic = True
|
||||
warnings.warn('You have chosen to seed training. '
|
||||
'This will turn on the CUDNN deterministic setting, '
|
||||
'which can slow down your training considerably! '
|
||||
'You may see unexpected behavior when restarting '
|
||||
'from checkpoints.')
|
||||
|
||||
os.environ['MASTER_ADDR'] = args.addr # '10.136.181.51'
|
||||
os.environ['MASTER_PORT'] = '29688'
|
||||
|
||||
if args.gpu is not None:
|
||||
warnings.warn('You have chosen a specific GPU. This will completely '
|
||||
'disable data parallelism.')
|
||||
|
||||
if args.dist_url == "env://" and args.world_size == -1:
|
||||
args.world_size = int(os.environ["WORLD_SIZE"])
|
||||
|
||||
args.distributed = args.world_size > 1 or args.multiprocessing_distributed
|
||||
|
||||
args.process_device_map = device_id_to_process_device_map(args.device_list)
|
||||
|
||||
if args.device == 'npu':
|
||||
ngpus_per_node = len(args.process_device_map)
|
||||
else:
|
||||
ngpus_per_node = torch.cuda.device_count()
|
||||
if args.multiprocessing_distributed:
|
||||
# Since we have ngpus_per_node processes per node, the total world_size
|
||||
# needs to be adjusted accordingly
|
||||
args.world_size = ngpus_per_node * args.world_size
|
||||
# Use torch.multiprocessing.spawn to launch distributed processes: the
|
||||
# main_worker process function
|
||||
# The child process uses the environment variables of the parent process,
|
||||
# we have to set KERNEL_NAME_ID for every proc
|
||||
mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args))
|
||||
|
||||
else:
|
||||
# Simply call main_worker function
|
||||
main_worker(args.gpu, ngpus_per_node, args)
|
||||
|
||||
|
||||
def main_worker(gpu, ngpus_per_node, args):
|
||||
global best_acc1
|
||||
args.gpu = args.process_device_map[gpu]
|
||||
print("[npu id:",args.gpu,"]","+++++++++++++++++++++++++++ before set KERNEL_NAME_ID:",os.environ['KERNEL_NAME_ID'])
|
||||
os.environ['KERNEL_NAME_ID'] = str(gpu)
|
||||
print("[npu id:",args.gpu,"]","+++++++++++++++++++++++++++KERNEL_NAME_ID:",os.environ['KERNEL_NAME_ID'])
|
||||
|
||||
if args.gpu is not None:
|
||||
print("[npu id:",args.gpu,"]","Use GPU: {} for training".format(args.gpu))
|
||||
|
||||
if args.distributed:
|
||||
if args.dist_url == "env://" and args.rank == -1:
|
||||
args.rank = int(os.environ["RANK"])
|
||||
if args.multiprocessing_distributed:
|
||||
# For multiprocessing distributed training, rank needs to be the
|
||||
# global rank among all the processes
|
||||
args.rank = args.rank * ngpus_per_node + gpu
|
||||
|
||||
if args.device == 'npu':
|
||||
dist.init_process_group(backend=args.dist_backend, #init_method=args.dist_url,
|
||||
world_size=args.world_size, rank=args.rank)
|
||||
else:
|
||||
dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
|
||||
world_size=args.world_size, rank=args.rank)
|
||||
|
||||
|
||||
loc = 'npu:{}'.format(args.gpu)
|
||||
torch.npu.set_device(loc)
|
||||
|
||||
args.batch_size = int(args.batch_size / ngpus_per_node)
|
||||
args.workers = int((args.workers + ngpus_per_node - 1) / ngpus_per_node)
|
||||
|
||||
print("[npu id:",args.gpu,"]","===============main_worker()=================")
|
||||
print("[npu id:",args.gpu,"]",args)
|
||||
print("[npu id:",args.gpu,"]","===============main_worker()=================")
|
||||
|
||||
|
||||
# Data loading code
|
||||
traindir = os.path.join(args.data, 'train')
|
||||
valdir = os.path.join(args.data, 'val')
|
||||
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
|
||||
std=[0.229, 0.224, 0.225])
|
||||
|
||||
train_dataset = datasets.ImageFolder(
|
||||
traindir,
|
||||
transforms.Compose([
|
||||
transforms.RandomResizedCrop(224),
|
||||
transforms.RandomHorizontalFlip(),
|
||||
transforms.ToTensor(),
|
||||
normalize,
|
||||
]))
|
||||
|
||||
if args.distributed:
|
||||
train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
|
||||
else:
|
||||
train_sampler = None
|
||||
|
||||
train_loader = torch.utils.data.DataLoader(
|
||||
train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None),
|
||||
num_workers=args.workers, pin_memory=False, sampler=train_sampler, drop_last=True)
|
||||
|
||||
val_loader = torch.utils.data.DataLoader(
|
||||
datasets.ImageFolder(valdir, transforms.Compose([
|
||||
transforms.Resize(256),
|
||||
transforms.CenterCrop(224),
|
||||
transforms.ToTensor(),
|
||||
normalize,
|
||||
])),
|
||||
batch_size=args.batch_size, shuffle=True,
|
||||
num_workers=args.workers, pin_memory=False, drop_last=True)
|
||||
|
||||
# create model
|
||||
print("[npu id:",args.gpu,"]","=> creating model '{}'".format(args.arch))
|
||||
# model = models.__dict__[args.arch]()
|
||||
model = densenet121()
|
||||
model = model.to(loc)
|
||||
|
||||
# define loss function (criterion) and optimizer
|
||||
criterion = nn.CrossEntropyLoss().to(loc)
|
||||
optimizer = torch.optim.SGD(model.parameters(), args.lr,
|
||||
momentum=args.momentum,
|
||||
weight_decay=args.weight_decay)
|
||||
|
||||
if args.amp:
|
||||
model, optimizer = amp.initialize(model, optimizer, opt_level=args.opt_level, loss_scale=args.loss_scale)
|
||||
model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu], broadcast_buffers=False)
|
||||
|
||||
# optionally resume from a checkpoint
|
||||
if args.resume:
|
||||
if os.path.isfile(args.resume):
|
||||
print("=> loading checkpoint '{}'".format(args.resume))
|
||||
checkpoint = torch.load(args.resume, map_location=loc)
|
||||
args.start_epoch = checkpoint['epoch']
|
||||
best_acc1 = checkpoint['best_acc1']
|
||||
model.load_state_dict(checkpoint['state_dict'])
|
||||
optimizer.load_state_dict(checkpoint['optimizer'])
|
||||
if args.amp:
|
||||
amp.load_state_dict(checkpoint['amp'])
|
||||
print("=> loaded checkpoint '{}' (epoch {})"
|
||||
.format(args.resume, checkpoint['epoch']))
|
||||
else:
|
||||
print("=> no checkpoint found at '{}'".format(args.resume))
|
||||
|
||||
cudnn.benchmark = True
|
||||
|
||||
|
||||
if args.evaluate:
|
||||
validate(val_loader, model, criterion, args)
|
||||
return
|
||||
|
||||
for epoch in range(args.start_epoch, args.epochs):
|
||||
if args.distributed:
|
||||
train_sampler.set_epoch(epoch)
|
||||
adjust_learning_rate(optimizer, epoch, args)
|
||||
|
||||
# train for one epoch
|
||||
train(train_loader, model, criterion, optimizer, epoch, args,ngpus_per_node)
|
||||
|
||||
if (epoch+1)%(args.eval_freq)==0 or epoch==args.epochs-1 :
|
||||
# evaluate on validation set
|
||||
acc1 = validate(val_loader, model, criterion, args,ngpus_per_node)
|
||||
|
||||
# remember best acc@1 and save checkpoint
|
||||
is_best = acc1 > best_acc1
|
||||
best_acc1 = max(acc1, best_acc1)
|
||||
|
||||
if not args.multiprocessing_distributed or (args.multiprocessing_distributed
|
||||
and args.rank % ngpus_per_node == 0 and epoch == args.epochs - 1):
|
||||
if args.amp:
|
||||
save_checkpoint({
|
||||
'epoch': epoch + 1,
|
||||
'arch': args.arch,
|
||||
'state_dict': model.state_dict(),
|
||||
'best_acc1': best_acc1,
|
||||
'optimizer' : optimizer.state_dict(),
|
||||
'amp': amp.state_dict(),
|
||||
}, is_best)
|
||||
else:
|
||||
save_checkpoint({
|
||||
'epoch': epoch + 1,
|
||||
'arch': args.arch,
|
||||
'state_dict': model.state_dict(),
|
||||
'best_acc1': best_acc1,
|
||||
'optimizer' : optimizer.state_dict(),
|
||||
}, is_best)
|
||||
|
||||
def train(train_loader, model, criterion, optimizer, epoch, args,ngpus_per_node):
|
||||
batch_time = AverageMeter('Time', ':6.3f')
|
||||
data_time = AverageMeter('Data', ':6.3f')
|
||||
losses = AverageMeter('Loss', ':.4e')
|
||||
top1 = AverageMeter('Acc@1', ':6.2f')
|
||||
top5 = AverageMeter('Acc@5', ':6.2f')
|
||||
progress = ProgressMeter(
|
||||
len(train_loader),
|
||||
[batch_time, data_time, losses, top1, top5],
|
||||
prefix="Epoch: [{}]".format(epoch))
|
||||
|
||||
# switch to train mode
|
||||
model.train()
|
||||
end = time.time()
|
||||
if args.benchmark == 1 :
|
||||
optimizer.zero_grad()
|
||||
for i, (images, target) in enumerate(train_loader):
|
||||
# measure data loading time
|
||||
data_time.update(time.time() - end)
|
||||
|
||||
loc = 'npu:{}'.format(args.gpu)
|
||||
target = target.to(torch.int32)
|
||||
images, target = images.to(loc, non_blocking=False), target.to(loc, non_blocking=False)
|
||||
|
||||
# compute output
|
||||
output = model(images)
|
||||
|
||||
loss = criterion(output, target)
|
||||
|
||||
# measure accuracy and record loss
|
||||
acc1, acc5 = accuracy(output, target, topk=(1, 5))
|
||||
losses.update(loss.item(), images.size(0))
|
||||
top1.update(acc1[0], images.size(0))
|
||||
top5.update(acc5[0], images.size(0))
|
||||
|
||||
# compute gradient and do SGD step
|
||||
if args.benchmark == 0 :
|
||||
optimizer.zero_grad()
|
||||
|
||||
if args.amp:
|
||||
with amp.scale_loss(loss, optimizer) as scaled_loss:
|
||||
scaled_loss.backward()
|
||||
else:
|
||||
loss.backward()
|
||||
|
||||
if args.benchmark == 0 :
|
||||
optimizer.step()
|
||||
elif args.benchmark == 1 :
|
||||
BATCH_SIZE_multiplier = int(OPTIMIZER_BATCH_SIZE / args.batch_size)
|
||||
BM_optimizer_step = ((i + 1) % BATCH_SIZE_multiplier) == 0
|
||||
if BM_optimizer_step:
|
||||
for param_group in optimizer.param_groups:
|
||||
for param in param_group['params']:
|
||||
param.grad /= BATCH_SIZE_multiplier
|
||||
optimizer.step()
|
||||
optimizer.zero_grad()
|
||||
|
||||
if i % args.print_freq == 0:
|
||||
if not args.multiprocessing_distributed or (args.multiprocessing_distributed
|
||||
and args.rank % ngpus_per_node == 0):
|
||||
progress.display(i)
|
||||
|
||||
# measure elapsed time
|
||||
batch_time.update(time.time() - end)
|
||||
end = time.time()
|
||||
|
||||
if not args.multiprocessing_distributed or (args.multiprocessing_distributed
|
||||
and args.rank % ngpus_per_node == 0):
|
||||
print("[npu id:",args.gpu,"]",'* FPS@all {:.3f}'.format(ngpus_per_node*args.batch_size/batch_time.avg))
|
||||
hwlog.remark_print(key=hwlog.FPS, value=' * FPS@all {:.3f}'.format(ngpus_per_node*args.batch_size / batch_time.avg))
|
||||
|
||||
def validate(val_loader, model, criterion, args,ngpus_per_node):
|
||||
batch_time = AverageMeter('Time', ':6.3f')
|
||||
losses = AverageMeter('Loss', ':.4e')
|
||||
top1 = AverageMeter('Acc@1', ':6.2f')
|
||||
top5 = AverageMeter('Acc@5', ':6.2f')
|
||||
progress = ProgressMeter(
|
||||
len(val_loader),
|
||||
[batch_time, losses, top1, top5],
|
||||
prefix='Test: ')
|
||||
|
||||
# switch to evaluate mode
|
||||
model.eval()
|
||||
|
||||
with torch.no_grad():
|
||||
end = time.time()
|
||||
for i, (images, target) in enumerate(val_loader):
|
||||
|
||||
loc = 'npu:{}'.format(args.gpu)
|
||||
target = target.to(torch.int32)
|
||||
images, target = images.to(loc, non_blocking=False), target.to(loc, non_blocking=False)
|
||||
|
||||
# compute output
|
||||
output = model(images)
|
||||
loss = criterion(output, target)
|
||||
|
||||
# measure accuracy and record loss
|
||||
acc1, acc5 = accuracy(output, target, topk=(1, 5))
|
||||
losses.update(loss.item(), images.size(0))
|
||||
top1.update(acc1[0], images.size(0))
|
||||
top5.update(acc5[0], images.size(0))
|
||||
|
||||
# measure elapsed time
|
||||
batch_time.update(time.time() - end)
|
||||
end = time.time()
|
||||
|
||||
if i % args.print_freq == 0:
|
||||
if not args.multiprocessing_distributed or (args.multiprocessing_distributed
|
||||
and args.rank % ngpus_per_node == 0):
|
||||
progress.display(i)
|
||||
|
||||
# TODO: this should also be done with the ProgressMeter
|
||||
if not args.multiprocessing_distributed or (args.multiprocessing_distributed
|
||||
and args.rank % ngpus_per_node == 0):
|
||||
print("[npu id:",args.gpu,"]",'[AVG-ACC] * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'
|
||||
.format(top1=top1, top5=top5))
|
||||
hwlog.remark_print(key=hwlog.EVAL_ACCURACY_TOP1, value="{top1.avg:.3f}".format(top1=top1))
|
||||
hwlog.remark_print(key=hwlog.EVAL_ACCURACY_TOP5, value="{top5.avg:.3f}".format(top5=top5))
|
||||
|
||||
return top1.avg
|
||||
|
||||
def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
|
||||
torch.save(state, filename)
|
||||
if is_best:
|
||||
shutil.copyfile(filename, 'model_best_acc%.4f_epoch%d.pth.tar'%(state['best_acc1'], state['epoch']))
|
||||
|
||||
class AverageMeter(object):
|
||||
"""Computes and stores the average and current value"""
|
||||
def __init__(self, name, fmt=':f'):
|
||||
self.name = name
|
||||
self.fmt = fmt
|
||||
self.reset()
|
||||
self.start_count_index = 10
|
||||
|
||||
def reset(self):
|
||||
self.val = 0
|
||||
self.avg = 0
|
||||
self.sum = 0
|
||||
self.count = 0
|
||||
|
||||
def update(self, val, n=1):
|
||||
self.val = val
|
||||
self.count += n
|
||||
if self.count>(self.start_count_index*n):
|
||||
self.sum += val * n
|
||||
self.avg = self.sum / (self.count-self.start_count_index*n)
|
||||
|
||||
def __str__(self):
|
||||
fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
|
||||
return fmtstr.format(**self.__dict__)
|
||||
|
||||
|
||||
class ProgressMeter(object):
|
||||
def __init__(self, num_batches, meters, prefix=""):
|
||||
self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
|
||||
self.meters = meters
|
||||
self.prefix = prefix
|
||||
|
||||
def display(self, batch):
|
||||
entries = [self.prefix + self.batch_fmtstr.format(batch)]
|
||||
entries += [str(meter) for meter in self.meters]
|
||||
print("[npu id:",os.environ['KERNEL_NAME_ID'],"]",'\t'.join(entries))
|
||||
# 日志打点
|
||||
train_acc1 = str(entries).split("Acc@1")[1].strip().split(" ")[0]
|
||||
train_acc5 = str(entries).split("Acc@5")[1].strip().split(" ")[0]
|
||||
hwlog.remark_print(key=hwlog.TRAIN_ACCURACY_TOP1, value=train_acc1)
|
||||
hwlog.remark_print(key=hwlog.TRAIN_ACCURACY_TOP5, value=train_acc5)
|
||||
|
||||
def _get_batch_fmtstr(self, num_batches):
|
||||
num_digits = len(str(num_batches // 1))
|
||||
fmt = '{:' + str(num_digits) + 'd}'
|
||||
return '[' + fmt + '/' + fmt.format(num_batches) + ']'
|
||||
|
||||
|
||||
def adjust_learning_rate(optimizer, epoch, args):
|
||||
"""Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
|
||||
lr = args.lr * (0.1 ** (epoch // 30))
|
||||
for param_group in optimizer.param_groups:
|
||||
param_group['lr'] = lr
|
||||
|
||||
|
||||
def accuracy(output, target, topk=(1,)):
|
||||
"""Computes the accuracy over the k top predictions for the specified values of k"""
|
||||
with torch.no_grad():
|
||||
maxk = max(topk)
|
||||
batch_size = target.size(0)
|
||||
|
||||
_, pred = output.topk(maxk, 1, True, True)
|
||||
pred = pred.t()
|
||||
correct = pred.eq(target.view(1, -1).expand_as(pred))
|
||||
|
||||
res = []
|
||||
for k in topk:
|
||||
correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
|
||||
res.append(correct_k.mul_(100.0 / batch_size))
|
||||
return res
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
hwlog.ROOT_DIR = os.path.split(os.path.abspath(__file__))[0]
|
||||
cpu_info, npu_info, framework_info, os_info, benchmark_version = get_environment_info("pytorch")
|
||||
config_info = get_model_parameter("pytorch_config")
|
||||
initinal_data = {"base_lr": 0.1, "dataset": "imagenet", "optimizer": "SGD", "loss_scale": 1024}
|
||||
hwlog.remark_print(key=hwlog.CPU_INFO, value=cpu_info)
|
||||
hwlog.remark_print(key=hwlog.NPU_INFO, value=npu_info)
|
||||
hwlog.remark_print(key=hwlog.OS_INFO, value=os_info)
|
||||
hwlog.remark_print(key=hwlog.FRAMEWORK_INFO, value=framework_info)
|
||||
hwlog.remark_print(key=hwlog.BENCHMARK_VERSION, value=benchmark_version)
|
||||
hwlog.remark_print(key=hwlog.CONFIG_INFO, value=config_info)
|
||||
hwlog.remark_print(key=hwlog.BASE_LR, value=initinal_data.get("base_lr"))
|
||||
hwlog.remark_print(key=hwlog.DATASET, value=initinal_data.get("dataset"))
|
||||
hwlog.remark_print(key=hwlog.OPT_NAME, value=initinal_data.get("optimizer"))
|
||||
hwlog.remark_print(key=hwlog.LOSS_SCALE, value=initinal_data.get("loss_scale"))
|
||||
main()
|
||||
|
||||
+225
@@ -0,0 +1,225 @@
|
||||
import re
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import torch.utils.model_zoo as model_zoo
|
||||
from collections import OrderedDict
|
||||
|
||||
__all__ = ['DenseNet', 'densenet121', 'densenet169', 'densenet201', 'densenet161']
|
||||
|
||||
|
||||
model_urls = {
|
||||
'densenet121': 'https://download.pytorch.org/models/densenet121-a639ec97.pth',
|
||||
'densenet169': 'https://download.pytorch.org/models/densenet169-b2777c0a.pth',
|
||||
'densenet201': 'https://download.pytorch.org/models/densenet201-c1103571.pth',
|
||||
'densenet161': 'https://download.pytorch.org/models/densenet161-8d451a50.pth',
|
||||
}
|
||||
|
||||
|
||||
class _DenseLayer(nn.Sequential):
|
||||
def __init__(self, num_input_features, growth_rate, bn_size, drop_rate):
|
||||
super(_DenseLayer, self).__init__()
|
||||
self.add_module('norm1', nn.BatchNorm2d(num_input_features)),
|
||||
self.add_module('relu1', nn.ReLU(inplace=True)),
|
||||
self.add_module('conv1', nn.Conv2d(num_input_features, bn_size *
|
||||
growth_rate, kernel_size=1, stride=1, bias=False)),
|
||||
self.add_module('norm2', nn.BatchNorm2d(bn_size * growth_rate)),
|
||||
self.add_module('relu2', nn.ReLU(inplace=True)),
|
||||
self.add_module('conv2', nn.Conv2d(bn_size * growth_rate, growth_rate,
|
||||
kernel_size=3, stride=1, padding=1, bias=False)),
|
||||
self.drop_rate = drop_rate
|
||||
|
||||
def forward(self, x):
|
||||
new_features = super(_DenseLayer, self).forward(x)
|
||||
if self.drop_rate > 0:
|
||||
new_features = F.dropout(new_features, p=self.drop_rate, training=self.training)
|
||||
return torch.cat([x, new_features], 1)
|
||||
|
||||
|
||||
class _DenseBlock(nn.Sequential):
|
||||
def __init__(self, num_layers, num_input_features, bn_size, growth_rate, drop_rate):
|
||||
super(_DenseBlock, self).__init__()
|
||||
for i in range(num_layers):
|
||||
layer = _DenseLayer(num_input_features + i * growth_rate, growth_rate, bn_size, drop_rate)
|
||||
self.add_module('denselayer%d' % (i + 1), layer)
|
||||
|
||||
|
||||
class _Transition(nn.Sequential):
|
||||
def __init__(self, num_input_features, num_output_features):
|
||||
super(_Transition, self).__init__()
|
||||
self.add_module('norm', nn.BatchNorm2d(num_input_features))
|
||||
self.add_module('relu', nn.ReLU(inplace=True))
|
||||
self.add_module('conv', nn.Conv2d(num_input_features, num_output_features,
|
||||
kernel_size=1, stride=1, bias=False))
|
||||
self.add_module('pool', nn.AvgPool2d(kernel_size=2, stride=2))
|
||||
|
||||
|
||||
class DenseNet(nn.Module):
|
||||
r"""Densenet-BC model class, based on
|
||||
`"Densely Connected Convolutional Networks" <https://arxiv.org/pdf/1608.06993.pdf>`_
|
||||
|
||||
Args:
|
||||
growth_rate (int) - how many filters to add each layer (`k` in paper)
|
||||
block_config (list of 4 ints) - how many layers in each pooling block
|
||||
num_init_features (int) - the number of filters to learn in the first convolution layer
|
||||
bn_size (int) - multiplicative factor for number of bottle neck layers
|
||||
(i.e. bn_size * k features in the bottleneck layer)
|
||||
drop_rate (float) - dropout rate after each dense layer
|
||||
num_classes (int) - number of classification classes
|
||||
"""
|
||||
|
||||
def __init__(self, growth_rate=32, block_config=(6, 12, 24, 16),
|
||||
num_init_features=64, bn_size=4, drop_rate=0, num_classes=1000):
|
||||
|
||||
super(DenseNet, self).__init__()
|
||||
|
||||
# First convolution
|
||||
self.features = nn.Sequential(OrderedDict([
|
||||
('conv0', nn.Conv2d(3, num_init_features, kernel_size=7, stride=2, padding=3, bias=False)),
|
||||
('norm0', nn.BatchNorm2d(num_init_features)),
|
||||
('relu0', nn.ReLU(inplace=True)),
|
||||
('pool0', nn.MaxPool2d(kernel_size=3, stride=2, padding=1)),
|
||||
]))
|
||||
|
||||
# Each denseblock
|
||||
num_features = num_init_features
|
||||
for i, num_layers in enumerate(block_config):
|
||||
block = _DenseBlock(num_layers=num_layers, num_input_features=num_features,
|
||||
bn_size=bn_size, growth_rate=growth_rate, drop_rate=drop_rate)
|
||||
self.features.add_module('denseblock%d' % (i + 1), block)
|
||||
num_features = num_features + num_layers * growth_rate
|
||||
if i != len(block_config) - 1:
|
||||
trans = _Transition(num_input_features=num_features, num_output_features=num_features // 2)
|
||||
self.features.add_module('transition%d' % (i + 1), trans)
|
||||
num_features = num_features // 2
|
||||
|
||||
# Final batch norm
|
||||
self.features.add_module('norm5', nn.BatchNorm2d(num_features))
|
||||
|
||||
# Linear layer
|
||||
self.classifier = nn.Linear(num_features, num_classes)
|
||||
|
||||
# Official init from torch repo.
|
||||
for m in self.modules():
|
||||
if isinstance(m, nn.Conv2d):
|
||||
nn.init.kaiming_normal_(m.weight)
|
||||
elif isinstance(m, nn.BatchNorm2d):
|
||||
nn.init.constant_(m.weight, 1)
|
||||
nn.init.constant_(m.bias, 0)
|
||||
elif isinstance(m, nn.Linear):
|
||||
nn.init.constant_(m.bias, 0)
|
||||
|
||||
def forward(self, x):
|
||||
features = self.features(x)
|
||||
out = F.relu(features, inplace=True)
|
||||
out = F.adaptive_avg_pool2d(out, (1, 1)).view(features.size(0), -1)
|
||||
out = self.classifier(out)
|
||||
return out
|
||||
|
||||
|
||||
def densenet121(pretrained=False, **kwargs):
|
||||
r"""Densenet-121 model from
|
||||
`"Densely Connected Convolutional Networks" <https://arxiv.org/pdf/1608.06993.pdf>`_
|
||||
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
"""
|
||||
model = DenseNet(num_init_features=64, growth_rate=32, block_config=(6, 12, 24, 16),
|
||||
**kwargs)
|
||||
if pretrained:
|
||||
# '.'s are no longer allowed in module names, but pervious _DenseLayer
|
||||
# has keys 'norm.1', 'relu.1', 'conv.1', 'norm.2', 'relu.2', 'conv.2'.
|
||||
# They are also in the checkpoints in model_urls. This pattern is used
|
||||
# to find such keys.
|
||||
pattern = re.compile(
|
||||
r'^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\.(?:weight|bias|running_mean|running_var))$')
|
||||
state_dict = model_zoo.load_url(model_urls['densenet121'])
|
||||
for key in list(state_dict.keys()):
|
||||
res = pattern.match(key)
|
||||
if res:
|
||||
new_key = res.group(1) + res.group(2)
|
||||
state_dict[new_key] = state_dict[key]
|
||||
del state_dict[key]
|
||||
model.load_state_dict(state_dict)
|
||||
return model
|
||||
|
||||
|
||||
def densenet169(pretrained=False, **kwargs):
|
||||
r"""Densenet-169 model from
|
||||
`"Densely Connected Convolutional Networks" <https://arxiv.org/pdf/1608.06993.pdf>`_
|
||||
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
"""
|
||||
model = DenseNet(num_init_features=64, growth_rate=32, block_config=(6, 12, 32, 32),
|
||||
**kwargs)
|
||||
if pretrained:
|
||||
# '.'s are no longer allowed in module names, but pervious _DenseLayer
|
||||
# has keys 'norm.1', 'relu.1', 'conv.1', 'norm.2', 'relu.2', 'conv.2'.
|
||||
# They are also in the checkpoints in model_urls. This pattern is used
|
||||
# to find such keys.
|
||||
pattern = re.compile(
|
||||
r'^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\.(?:weight|bias|running_mean|running_var))$')
|
||||
state_dict = model_zoo.load_url(model_urls['densenet169'])
|
||||
for key in list(state_dict.keys()):
|
||||
res = pattern.match(key)
|
||||
if res:
|
||||
new_key = res.group(1) + res.group(2)
|
||||
state_dict[new_key] = state_dict[key]
|
||||
del state_dict[key]
|
||||
model.load_state_dict(state_dict)
|
||||
return model
|
||||
|
||||
|
||||
def densenet201(pretrained=False, **kwargs):
|
||||
r"""Densenet-201 model from
|
||||
`"Densely Connected Convolutional Networks" <https://arxiv.org/pdf/1608.06993.pdf>`_
|
||||
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
"""
|
||||
model = DenseNet(num_init_features=64, growth_rate=32, block_config=(6, 12, 48, 32),
|
||||
**kwargs)
|
||||
if pretrained:
|
||||
# '.'s are no longer allowed in module names, but pervious _DenseLayer
|
||||
# has keys 'norm.1', 'relu.1', 'conv.1', 'norm.2', 'relu.2', 'conv.2'.
|
||||
# They are also in the checkpoints in model_urls. This pattern is used
|
||||
# to find such keys.
|
||||
pattern = re.compile(
|
||||
r'^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\.(?:weight|bias|running_mean|running_var))$')
|
||||
state_dict = model_zoo.load_url(model_urls['densenet201'])
|
||||
for key in list(state_dict.keys()):
|
||||
res = pattern.match(key)
|
||||
if res:
|
||||
new_key = res.group(1) + res.group(2)
|
||||
state_dict[new_key] = state_dict[key]
|
||||
del state_dict[key]
|
||||
model.load_state_dict(state_dict)
|
||||
return model
|
||||
|
||||
|
||||
def densenet161(pretrained=False, **kwargs):
|
||||
r"""Densenet-161 model from
|
||||
`"Densely Connected Convolutional Networks" <https://arxiv.org/pdf/1608.06993.pdf>`_
|
||||
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
"""
|
||||
model = DenseNet(num_init_features=96, growth_rate=48, block_config=(6, 12, 36, 24),
|
||||
**kwargs)
|
||||
if pretrained:
|
||||
# '.'s are no longer allowed in module names, but pervious _DenseLayer
|
||||
# has keys 'norm.1', 'relu.1', 'conv.1', 'norm.2', 'relu.2', 'conv.2'.
|
||||
# They are also in the checkpoints in model_urls. This pattern is used
|
||||
# to find such keys.
|
||||
pattern = re.compile(
|
||||
r'^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\.(?:weight|bias|running_mean|running_var))$')
|
||||
state_dict = model_zoo.load_url(model_urls['densenet161'])
|
||||
for key in list(state_dict.keys()):
|
||||
res = pattern.match(key)
|
||||
if res:
|
||||
new_key = res.group(1) + res.group(2)
|
||||
state_dict[new_key] = state_dict[key]
|
||||
del state_dict[key]
|
||||
model.load_state_dict(state_dict)
|
||||
return model
|
||||
+279
@@ -0,0 +1,279 @@
|
||||
import re
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import torch.utils.checkpoint as cp
|
||||
from collections import OrderedDict
|
||||
#from .utils import load_state_dict_from_url
|
||||
from torch import Tensor
|
||||
from torch.jit.annotations import List
|
||||
|
||||
|
||||
__all__ = ['DenseNet', 'densenet121', 'densenet169', 'densenet201', 'densenet161']
|
||||
|
||||
model_urls = {
|
||||
'densenet121': 'https://download.pytorch.org/models/densenet121-a639ec97.pth',
|
||||
'densenet169': 'https://download.pytorch.org/models/densenet169-b2777c0a.pth',
|
||||
'densenet201': 'https://download.pytorch.org/models/densenet201-c1103571.pth',
|
||||
'densenet161': 'https://download.pytorch.org/models/densenet161-8d451a50.pth',
|
||||
}
|
||||
|
||||
|
||||
class _DenseLayer(nn.Module):
|
||||
def __init__(self, num_input_features, growth_rate, bn_size, drop_rate, memory_efficient=False):
|
||||
super(_DenseLayer, self).__init__()
|
||||
self.add_module('norm1', nn.BatchNorm2d(num_input_features)),
|
||||
self.add_module('relu1', nn.ReLU(inplace=True)),
|
||||
self.add_module('conv1', nn.Conv2d(num_input_features, bn_size *
|
||||
growth_rate, kernel_size=1, stride=1,
|
||||
bias=False)),
|
||||
self.add_module('norm2', nn.BatchNorm2d(bn_size * growth_rate)),
|
||||
self.add_module('relu2', nn.ReLU(inplace=True)),
|
||||
self.add_module('conv2', nn.Conv2d(bn_size * growth_rate, growth_rate,
|
||||
kernel_size=3, stride=1, padding=1,
|
||||
bias=False)),
|
||||
self.drop_rate = float(drop_rate)
|
||||
self.memory_efficient = memory_efficient
|
||||
|
||||
def bn_function(self, inputs):
|
||||
# type: (List[Tensor]) -> Tensor
|
||||
concated_features = torch.cat(inputs, 1)
|
||||
bottleneck_output = self.conv1(self.relu1(self.norm1(concated_features))) # noqa: T484
|
||||
return bottleneck_output
|
||||
|
||||
# todo: rewrite when torchscript supports any
|
||||
def any_requires_grad(self, input):
|
||||
# type: (List[Tensor]) -> bool
|
||||
for tensor in input:
|
||||
if tensor.requires_grad:
|
||||
return True
|
||||
return False
|
||||
|
||||
@torch.jit.unused # noqa: T484
|
||||
def call_checkpoint_bottleneck(self, input):
|
||||
# type: (List[Tensor]) -> Tensor
|
||||
def closure(*inputs):
|
||||
return self.bn_function(*inputs)
|
||||
|
||||
return cp.checkpoint(closure, input)
|
||||
|
||||
@torch.jit._overload_method # noqa: F811
|
||||
def forward(self, input):
|
||||
# type: (List[Tensor]) -> (Tensor)
|
||||
pass
|
||||
|
||||
@torch.jit._overload_method # noqa: F811
|
||||
def forward(self, input):
|
||||
# type: (Tensor) -> (Tensor)
|
||||
pass
|
||||
|
||||
# torchscript does not yet support *args, so we overload method
|
||||
# allowing it to take either a List[Tensor] or single Tensor
|
||||
def forward(self, input): # noqa: F811
|
||||
if isinstance(input, Tensor):
|
||||
prev_features = [input]
|
||||
else:
|
||||
prev_features = input
|
||||
|
||||
if self.memory_efficient and self.any_requires_grad(prev_features):
|
||||
if torch.jit.is_scripting():
|
||||
raise Exception("Memory Efficient not supported in JIT")
|
||||
|
||||
bottleneck_output = self.call_checkpoint_bottleneck(prev_features)
|
||||
else:
|
||||
bottleneck_output = self.bn_function(prev_features)
|
||||
|
||||
new_features = self.conv2(self.relu2(self.norm2(bottleneck_output)))
|
||||
if self.drop_rate > 0:
|
||||
new_features = F.dropout(new_features, p=self.drop_rate,
|
||||
training=self.training)
|
||||
return new_features
|
||||
|
||||
|
||||
class _DenseBlock(nn.ModuleDict):
|
||||
_version = 2
|
||||
|
||||
def __init__(self, num_layers, num_input_features, bn_size, growth_rate, drop_rate, memory_efficient=False):
|
||||
super(_DenseBlock, self).__init__()
|
||||
for i in range(num_layers):
|
||||
layer = _DenseLayer(
|
||||
num_input_features + i * growth_rate,
|
||||
growth_rate=growth_rate,
|
||||
bn_size=bn_size,
|
||||
drop_rate=drop_rate,
|
||||
memory_efficient=memory_efficient,
|
||||
)
|
||||
self.add_module('denselayer%d' % (i + 1), layer)
|
||||
|
||||
def forward(self, init_features):
|
||||
features = [init_features]
|
||||
for name, layer in self.items():
|
||||
new_features = layer(features)
|
||||
features.append(new_features)
|
||||
return torch.cat(features, 1)
|
||||
|
||||
|
||||
class _Transition(nn.Sequential):
|
||||
def __init__(self, num_input_features, num_output_features):
|
||||
super(_Transition, self).__init__()
|
||||
self.add_module('norm', nn.BatchNorm2d(num_input_features))
|
||||
self.add_module('relu', nn.ReLU(inplace=True))
|
||||
self.add_module('conv', nn.Conv2d(num_input_features, num_output_features,
|
||||
kernel_size=1, stride=1, bias=False))
|
||||
self.add_module('pool', nn.AvgPool2d(kernel_size=2, stride=2))
|
||||
|
||||
|
||||
class DenseNet(nn.Module):
|
||||
r"""Densenet-BC model class, based on
|
||||
`"Densely Connected Convolutional Networks" <https://arxiv.org/pdf/1608.06993.pdf>`_
|
||||
|
||||
Args:
|
||||
growth_rate (int) - how many filters to add each layer (`k` in paper)
|
||||
block_config (list of 4 ints) - how many layers in each pooling block
|
||||
num_init_features (int) - the number of filters to learn in the first convolution layer
|
||||
bn_size (int) - multiplicative factor for number of bottle neck layers
|
||||
(i.e. bn_size * k features in the bottleneck layer)
|
||||
drop_rate (float) - dropout rate after each dense layer
|
||||
num_classes (int) - number of classification classes
|
||||
memory_efficient (bool) - If True, uses checkpointing. Much more memory efficient,
|
||||
but slower. Default: *False*. See `"paper" <https://arxiv.org/pdf/1707.06990.pdf>`_
|
||||
"""
|
||||
|
||||
def __init__(self, growth_rate=32, block_config=(6, 12, 24, 16),
|
||||
num_init_features=64, bn_size=4, drop_rate=0, num_classes=1000, memory_efficient=False):
|
||||
|
||||
super(DenseNet, self).__init__()
|
||||
|
||||
# First convolution
|
||||
self.features = nn.Sequential(OrderedDict([
|
||||
('conv0', nn.Conv2d(3, num_init_features, kernel_size=7, stride=2,
|
||||
padding=3, bias=False)),
|
||||
('norm0', nn.BatchNorm2d(num_init_features)),
|
||||
('relu0', nn.ReLU(inplace=True)),
|
||||
('pool0', nn.MaxPool2d(kernel_size=3, stride=2, padding=1)),
|
||||
]))
|
||||
|
||||
# Each denseblock
|
||||
num_features = num_init_features
|
||||
for i, num_layers in enumerate(block_config):
|
||||
block = _DenseBlock(
|
||||
num_layers=num_layers,
|
||||
num_input_features=num_features,
|
||||
bn_size=bn_size,
|
||||
growth_rate=growth_rate,
|
||||
drop_rate=drop_rate,
|
||||
memory_efficient=memory_efficient
|
||||
)
|
||||
self.features.add_module('denseblock%d' % (i + 1), block)
|
||||
num_features = num_features + num_layers * growth_rate
|
||||
if i != len(block_config) - 1:
|
||||
trans = _Transition(num_input_features=num_features,
|
||||
num_output_features=num_features // 2)
|
||||
self.features.add_module('transition%d' % (i + 1), trans)
|
||||
num_features = num_features // 2
|
||||
|
||||
# Final batch norm
|
||||
self.features.add_module('norm5', nn.BatchNorm2d(num_features))
|
||||
|
||||
# Linear layer
|
||||
self.classifier = nn.Linear(num_features, num_classes)
|
||||
|
||||
# Official init from torch repo.
|
||||
for m in self.modules():
|
||||
if isinstance(m, nn.Conv2d):
|
||||
nn.init.kaiming_normal_(m.weight)
|
||||
elif isinstance(m, nn.BatchNorm2d):
|
||||
nn.init.constant_(m.weight, 1)
|
||||
nn.init.constant_(m.bias, 0)
|
||||
elif isinstance(m, nn.Linear):
|
||||
nn.init.constant_(m.bias, 0)
|
||||
|
||||
def forward(self, x):
|
||||
features = self.features(x)
|
||||
out = F.relu(features, inplace=True)
|
||||
out = F.adaptive_avg_pool2d(out, (1, 1))
|
||||
out = torch.flatten(out, 1)
|
||||
out = self.classifier(out)
|
||||
return out
|
||||
|
||||
|
||||
def _load_state_dict(model, model_url, progress):
|
||||
# '.'s are no longer allowed in module names, but previous _DenseLayer
|
||||
# has keys 'norm.1', 'relu.1', 'conv.1', 'norm.2', 'relu.2', 'conv.2'.
|
||||
# They are also in the checkpoints in model_urls. This pattern is used
|
||||
# to find such keys.
|
||||
pattern = re.compile(
|
||||
r'^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\.(?:weight|bias|running_mean|running_var))$')
|
||||
|
||||
state_dict = load_state_dict_from_url(model_url, progress=progress)
|
||||
for key in list(state_dict.keys()):
|
||||
res = pattern.match(key)
|
||||
if res:
|
||||
new_key = res.group(1) + res.group(2)
|
||||
state_dict[new_key] = state_dict[key]
|
||||
del state_dict[key]
|
||||
model.load_state_dict(state_dict)
|
||||
|
||||
|
||||
def _densenet(arch, growth_rate, block_config, num_init_features, pretrained, progress,
|
||||
**kwargs):
|
||||
model = DenseNet(growth_rate, block_config, num_init_features, **kwargs)
|
||||
if pretrained:
|
||||
_load_state_dict(model, model_urls[arch], progress)
|
||||
return model
|
||||
|
||||
|
||||
def densenet121(pretrained=False, progress=True, **kwargs):
|
||||
r"""Densenet-121 model from
|
||||
`"Densely Connected Convolutional Networks" <https://arxiv.org/pdf/1608.06993.pdf>`_
|
||||
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
progress (bool): If True, displays a progress bar of the download to stderr
|
||||
memory_efficient (bool) - If True, uses checkpointing. Much more memory efficient,
|
||||
but slower. Default: *False*. See `"paper" <https://arxiv.org/pdf/1707.06990.pdf>`_
|
||||
"""
|
||||
return _densenet('densenet121', 32, (6, 12, 24, 16), 64, pretrained, progress,
|
||||
**kwargs)
|
||||
|
||||
|
||||
def densenet161(pretrained=False, progress=True, **kwargs):
|
||||
r"""Densenet-161 model from
|
||||
`"Densely Connected Convolutional Networks" <https://arxiv.org/pdf/1608.06993.pdf>`_
|
||||
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
progress (bool): If True, displays a progress bar of the download to stderr
|
||||
memory_efficient (bool) - If True, uses checkpointing. Much more memory efficient,
|
||||
but slower. Default: *False*. See `"paper" <https://arxiv.org/pdf/1707.06990.pdf>`_
|
||||
"""
|
||||
return _densenet('densenet161', 48, (6, 12, 36, 24), 96, pretrained, progress,
|
||||
**kwargs)
|
||||
|
||||
|
||||
def densenet169(pretrained=False, progress=True, **kwargs):
|
||||
r"""Densenet-169 model from
|
||||
`"Densely Connected Convolutional Networks" <https://arxiv.org/pdf/1608.06993.pdf>`_
|
||||
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
progress (bool): If True, displays a progress bar of the download to stderr
|
||||
memory_efficient (bool) - If True, uses checkpointing. Much more memory efficient,
|
||||
but slower. Default: *False*. See `"paper" <https://arxiv.org/pdf/1707.06990.pdf>`_
|
||||
"""
|
||||
return _densenet('densenet169', 32, (6, 12, 32, 32), 64, pretrained, progress,
|
||||
**kwargs)
|
||||
|
||||
|
||||
def densenet201(pretrained=False, progress=True, **kwargs):
|
||||
r"""Densenet-201 model from
|
||||
`"Densely Connected Convolutional Networks" <https://arxiv.org/pdf/1608.06993.pdf>`_
|
||||
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
progress (bool): If True, displays a progress bar of the download to stderr
|
||||
memory_efficient (bool) - If True, uses checkpointing. Much more memory efficient,
|
||||
but slower. Default: *False*. See `"paper" <https://arxiv.org/pdf/1707.06990.pdf>`_
|
||||
"""
|
||||
return _densenet('densenet201', 32, (6, 12, 48, 32), 64, pretrained, progress,
|
||||
**kwargs)
|
||||
@@ -0,0 +1,22 @@
|
||||
export ASCEND_HOME=/usr/local/Ascend
|
||||
export LD_LIBRARY_PATH=/usr/local/lib/:/usr/lib/:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64:/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:/usr/local/Ascend/add-ons/
|
||||
export PYTHONPATH=$PYTHONPATH:/usr/local/Ascend/ascend-toolkit/latest/opp/op_impl/built-in/ai_core/tbe:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/te:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/topi:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/hccl:/usr/local/Ascend/ascend-toolkit/latest/tfplugin/python/site-packages:$currentDir
|
||||
export PATH=$PATH:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin
|
||||
export ASCEND_OPP_PATH=/usr/local/Ascend/ascend-toolkit/latest/opp/
|
||||
|
||||
export SLOG_PRINT_TO_STDOUT=0
|
||||
su HwHiAiUser -c "adc --host 0.0.0.0:22118 --log \"SetLogLevel(0)[error]\" --device 7"
|
||||
|
||||
export TASK_QUEUE_ENABLE=0
|
||||
taskset -c 111-150 python3 densenet121_1p_main.py \
|
||||
--workers 40 \
|
||||
--arch densenet121 \
|
||||
--npu 7 \
|
||||
--lr 0.1 \
|
||||
--momentum 0.9 \
|
||||
--amp \
|
||||
--batch-size 256 \
|
||||
--epoch 90 \
|
||||
--evaluate \
|
||||
--resume checkpoint.pth.tar \
|
||||
--data /opt/npu/dataset/imagenet
|
||||
+275
@@ -0,0 +1,275 @@
|
||||
import re
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import torch.utils.model_zoo as model_zoo
|
||||
from collections import OrderedDict
|
||||
|
||||
__all__ = ['DenseNet', 'densenet121', 'densenet169', 'densenet201', 'densenet161']
|
||||
|
||||
|
||||
model_urls = {
|
||||
'densenet121': 'https://download.pytorch.org/models/densenet121-a639ec97.pth',
|
||||
'densenet169': 'https://download.pytorch.org/models/densenet169-b2777c0a.pth',
|
||||
'densenet201': 'https://download.pytorch.org/models/densenet201-c1103571.pth',
|
||||
'densenet161': 'https://download.pytorch.org/models/densenet161-8d451a50.pth',
|
||||
}
|
||||
|
||||
|
||||
class _DenseLayer(nn.Sequential):
|
||||
def __init__(self, num_input_features, growth_rate, bn_size, drop_rate):
|
||||
super(_DenseLayer, self).__init__()
|
||||
self.add_module('norm1', nn.BatchNorm2d(num_input_features)),
|
||||
self.add_module('relu1', nn.ReLU(inplace=True)),
|
||||
self.add_module('conv1', nn.Conv2d(num_input_features, bn_size *
|
||||
growth_rate, kernel_size=1, stride=1, bias=False)),
|
||||
self.add_module('norm2', nn.BatchNorm2d(bn_size * growth_rate)),
|
||||
self.add_module('relu2', nn.ReLU(inplace=True)),
|
||||
self.add_module('conv2', nn.Conv2d(bn_size * growth_rate, growth_rate,
|
||||
kernel_size=3, stride=1, padding=1, bias=False)),
|
||||
self.drop_rate = drop_rate
|
||||
|
||||
def forward(self, x):
|
||||
new_features = super(_DenseLayer, self).forward(x)
|
||||
if self.drop_rate > 0:
|
||||
new_features = F.dropout(new_features, p=self.drop_rate, training=self.training)
|
||||
return torch.cat([x, new_features], 1)
|
||||
|
||||
|
||||
class _DenseBlock(nn.Sequential):
|
||||
def __init__(self, num_layers, num_input_features, bn_size, growth_rate, drop_rate):
|
||||
super(_DenseBlock, self).__init__()
|
||||
for i in range(num_layers):
|
||||
layer = _DenseLayer(num_input_features + i * growth_rate, growth_rate, bn_size, drop_rate)
|
||||
self.add_module('denselayer%d' % (i + 1), layer)
|
||||
|
||||
|
||||
class _Transition(nn.Sequential):
|
||||
def __init__(self, num_input_features, num_output_features):
|
||||
super(_Transition, self).__init__()
|
||||
self.add_module('norm', nn.BatchNorm2d(num_input_features))
|
||||
self.add_module('relu', nn.ReLU(inplace=True))
|
||||
self.add_module('conv', nn.Conv2d(num_input_features, num_output_features,
|
||||
kernel_size=1, stride=1, bias=False))
|
||||
#self.add_module('pool', nn.AvgPool2d(kernel_size=2, stride=2)) ######### xupeng add ##########
|
||||
|
||||
|
||||
class DenseNet(nn.Module):
|
||||
r"""Densenet-BC model class, based on
|
||||
`"Densely Connected Convolutional Networks" <https://arxiv.org/pdf/1608.06993.pdf>`_
|
||||
|
||||
Args:
|
||||
growth_rate (int) - how many filters to add each layer (`k` in paper)
|
||||
block_config (list of 4 ints) - how many layers in each pooling block
|
||||
num_init_features (int) - the number of filters to learn in the first convolution layer
|
||||
bn_size (int) - multiplicative factor for number of bottle neck layers
|
||||
(i.e. bn_size * k features in the bottleneck layer)
|
||||
drop_rate (float) - dropout rate after each dense layer
|
||||
num_classes (int) - number of classification classes
|
||||
"""
|
||||
|
||||
def __init__(self, growth_rate=32, block_config=(6, 12, 24, 16),
|
||||
num_init_features=64, bn_size=4, drop_rate=0, num_classes=1000):
|
||||
|
||||
super(DenseNet, self).__init__()
|
||||
|
||||
self.avg_pool = nn.AvgPool2d(kernel_size=2, stride=2)
|
||||
|
||||
################ block 0 ################
|
||||
num_features = num_init_features
|
||||
i=0
|
||||
num_layers=block_config[i]
|
||||
block = _DenseBlock(num_layers=num_layers, num_input_features=num_features, bn_size=bn_size, growth_rate=growth_rate, drop_rate=drop_rate)
|
||||
num_features = num_features + num_layers * growth_rate
|
||||
trans = _Transition(num_input_features=num_features, num_output_features=num_features // 2)
|
||||
self.features0 = nn.Sequential(OrderedDict([
|
||||
('conv0', nn.Conv2d(3, num_init_features, kernel_size=7, stride=2, padding=3, bias=False)),
|
||||
('norm0', nn.BatchNorm2d(num_init_features)),
|
||||
('relu0', nn.ReLU(inplace=True)),
|
||||
('pool0', nn.MaxPool2d(kernel_size=3, stride=2, padding=1)),
|
||||
('denseblock%d' % (i + 1), block),
|
||||
('transition%d' % (i + 1), trans)
|
||||
]))
|
||||
|
||||
################ block 1 ##############
|
||||
num_features = num_features // 2
|
||||
i=1
|
||||
num_layers=block_config[i]
|
||||
block = _DenseBlock(num_layers=num_layers, num_input_features=num_features, bn_size=bn_size, growth_rate=growth_rate, drop_rate=drop_rate)
|
||||
num_features = num_features + num_layers * growth_rate
|
||||
trans = _Transition(num_input_features=num_features, num_output_features=num_features // 2)
|
||||
self.features1 = nn.Sequential(OrderedDict([
|
||||
('denseblock%d' % (i + 1), block),
|
||||
('transition%d' % (i + 1), trans),
|
||||
]))
|
||||
|
||||
################ block 2 ##############
|
||||
num_features = num_features // 2
|
||||
i=2
|
||||
num_layers=block_config[i]
|
||||
block = _DenseBlock(num_layers=num_layers, num_input_features=num_features, bn_size=bn_size, growth_rate=growth_rate, drop_rate=drop_rate)
|
||||
num_features = num_features + num_layers * growth_rate
|
||||
trans = _Transition(num_input_features=num_features, num_output_features=num_features // 2)
|
||||
self.features2 = nn.Sequential(OrderedDict([
|
||||
('denseblock%d' % (i + 1), block),
|
||||
('transition%d' % (i + 1), trans),
|
||||
]))
|
||||
|
||||
################ block 3 ##############
|
||||
num_features = num_features // 2
|
||||
i=3
|
||||
num_layers=block_config[i]
|
||||
block = _DenseBlock(num_layers=num_layers, num_input_features=num_features, bn_size=bn_size, growth_rate=growth_rate, drop_rate=drop_rate)
|
||||
num_features = num_features + num_layers * growth_rate
|
||||
self.features3 = nn.Sequential(OrderedDict([
|
||||
('denseblock%d' % (i + 1), block),
|
||||
('norm5', nn.BatchNorm2d(num_features)),
|
||||
]))
|
||||
|
||||
# Linear layer
|
||||
self.classifier = nn.Linear(num_features, num_classes)
|
||||
|
||||
# Official init from torch repo.
|
||||
for m in self.modules():
|
||||
if isinstance(m, nn.Conv2d):
|
||||
nn.init.kaiming_normal_(m.weight)
|
||||
elif isinstance(m, nn.BatchNorm2d):
|
||||
nn.init.constant_(m.weight, 1)
|
||||
nn.init.constant_(m.bias, 0)
|
||||
elif isinstance(m, nn.Linear):
|
||||
nn.init.constant_(m.bias, 0)
|
||||
|
||||
def forward(self, x):
|
||||
#CALCULATE_DEVICE = "npu:0"
|
||||
#self.avg_pool = self.avg_pool.cpu()
|
||||
#print("avg_pool move to cpu")
|
||||
#print("tag0")
|
||||
features0 = self.features0(x)
|
||||
#features0 = features0.cpu()
|
||||
avg_pool_0 = self.avg_pool(features0)
|
||||
#avg_pool_0 = avg_pool_0.to(CALCULATE_DEVICE)
|
||||
#print("tag1")
|
||||
features1 = self.features1(avg_pool_0)
|
||||
#features1 = features1.cpu()
|
||||
avg_pool_1 = self.avg_pool(features1)
|
||||
#avg_pool_1 = avg_pool_1.to(CALCULATE_DEVICE)
|
||||
#print("tag2")
|
||||
features2 = self.features2(avg_pool_1)
|
||||
#features2 = features2.cpu()
|
||||
avg_pool_2 = self.avg_pool(features2)
|
||||
#avg_pool_2 = avg_pool_2.to(CALCULATE_DEVICE)
|
||||
#print("tag3")
|
||||
features3 = self.features3(avg_pool_2)
|
||||
|
||||
out = F.relu(features3, inplace=True)
|
||||
out = F.adaptive_avg_pool2d(out, (1, 1)).view(features3.size(0), -1)
|
||||
out = self.classifier(out)
|
||||
return out
|
||||
|
||||
|
||||
def densenet121(pretrained=False, **kwargs):
|
||||
r"""Densenet-121 model from
|
||||
`"Densely Connected Convolutional Networks" <https://arxiv.org/pdf/1608.06993.pdf>`_
|
||||
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
"""
|
||||
model = DenseNet(num_init_features=64, growth_rate=32, block_config=(6, 12, 24, 16),
|
||||
**kwargs)
|
||||
if pretrained:
|
||||
# '.'s are no longer allowed in module names, but pervious _DenseLayer
|
||||
# has keys 'norm.1', 'relu.1', 'conv.1', 'norm.2', 'relu.2', 'conv.2'.
|
||||
# They are also in the checkpoints in model_urls. This pattern is used
|
||||
# to find such keys.
|
||||
pattern = re.compile(
|
||||
r'^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\.(?:weight|bias|running_mean|running_var))$')
|
||||
state_dict = model_zoo.load_url(model_urls['densenet121'])
|
||||
for key in list(state_dict.keys()):
|
||||
res = pattern.match(key)
|
||||
if res:
|
||||
new_key = res.group(1) + res.group(2)
|
||||
state_dict[new_key] = state_dict[key]
|
||||
del state_dict[key]
|
||||
model.load_state_dict(state_dict)
|
||||
return model
|
||||
|
||||
|
||||
def densenet169(pretrained=False, **kwargs):
|
||||
r"""Densenet-169 model from
|
||||
`"Densely Connected Convolutional Networks" <https://arxiv.org/pdf/1608.06993.pdf>`_
|
||||
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
"""
|
||||
model = DenseNet(num_init_features=64, growth_rate=32, block_config=(6, 12, 32, 32),
|
||||
**kwargs)
|
||||
if pretrained:
|
||||
# '.'s are no longer allowed in module names, but pervious _DenseLayer
|
||||
# has keys 'norm.1', 'relu.1', 'conv.1', 'norm.2', 'relu.2', 'conv.2'.
|
||||
# They are also in the checkpoints in model_urls. This pattern is used
|
||||
# to find such keys.
|
||||
pattern = re.compile(
|
||||
r'^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\.(?:weight|bias|running_mean|running_var))$')
|
||||
state_dict = model_zoo.load_url(model_urls['densenet169'])
|
||||
for key in list(state_dict.keys()):
|
||||
res = pattern.match(key)
|
||||
if res:
|
||||
new_key = res.group(1) + res.group(2)
|
||||
state_dict[new_key] = state_dict[key]
|
||||
del state_dict[key]
|
||||
model.load_state_dict(state_dict)
|
||||
return model
|
||||
|
||||
|
||||
def densenet201(pretrained=False, **kwargs):
|
||||
r"""Densenet-201 model from
|
||||
`"Densely Connected Convolutional Networks" <https://arxiv.org/pdf/1608.06993.pdf>`_
|
||||
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
"""
|
||||
model = DenseNet(num_init_features=64, growth_rate=32, block_config=(6, 12, 48, 32),
|
||||
**kwargs)
|
||||
if pretrained:
|
||||
# '.'s are no longer allowed in module names, but pervious _DenseLayer
|
||||
# has keys 'norm.1', 'relu.1', 'conv.1', 'norm.2', 'relu.2', 'conv.2'.
|
||||
# They are also in the checkpoints in model_urls. This pattern is used
|
||||
# to find such keys.
|
||||
pattern = re.compile(
|
||||
r'^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\.(?:weight|bias|running_mean|running_var))$')
|
||||
state_dict = model_zoo.load_url(model_urls['densenet201'])
|
||||
for key in list(state_dict.keys()):
|
||||
res = pattern.match(key)
|
||||
if res:
|
||||
new_key = res.group(1) + res.group(2)
|
||||
state_dict[new_key] = state_dict[key]
|
||||
del state_dict[key]
|
||||
model.load_state_dict(state_dict)
|
||||
return model
|
||||
|
||||
|
||||
def densenet161(pretrained=False, **kwargs):
|
||||
r"""Densenet-161 model from
|
||||
`"Densely Connected Convolutional Networks" <https://arxiv.org/pdf/1608.06993.pdf>`_
|
||||
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
"""
|
||||
model = DenseNet(num_init_features=96, growth_rate=48, block_config=(6, 12, 36, 24),
|
||||
**kwargs)
|
||||
if pretrained:
|
||||
# '.'s are no longer allowed in module names, but pervious _DenseLayer
|
||||
# has keys 'norm.1', 'relu.1', 'conv.1', 'norm.2', 'relu.2', 'conv.2'.
|
||||
# They are also in the checkpoints in model_urls. This pattern is used
|
||||
# to find such keys.
|
||||
pattern = re.compile(
|
||||
r'^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\.(?:weight|bias|running_mean|running_var))$')
|
||||
state_dict = model_zoo.load_url(model_urls['densenet161'])
|
||||
for key in list(state_dict.keys()):
|
||||
res = pattern.match(key)
|
||||
if res:
|
||||
new_key = res.group(1) + res.group(2)
|
||||
state_dict[new_key] = state_dict[key]
|
||||
del state_dict[key]
|
||||
model.load_state_dict(state_dict)
|
||||
return model
|
||||
+275
@@ -0,0 +1,275 @@
|
||||
import re
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import torch.utils.model_zoo as model_zoo
|
||||
from collections import OrderedDict
|
||||
|
||||
__all__ = ['DenseNet', 'densenet121', 'densenet169', 'densenet201', 'densenet161']
|
||||
|
||||
|
||||
model_urls = {
|
||||
'densenet121': 'https://download.pytorch.org/models/densenet121-a639ec97.pth',
|
||||
'densenet169': 'https://download.pytorch.org/models/densenet169-b2777c0a.pth',
|
||||
'densenet201': 'https://download.pytorch.org/models/densenet201-c1103571.pth',
|
||||
'densenet161': 'https://download.pytorch.org/models/densenet161-8d451a50.pth',
|
||||
}
|
||||
|
||||
|
||||
class _DenseLayer(nn.Sequential):
|
||||
def __init__(self, num_input_features, growth_rate, bn_size, drop_rate):
|
||||
super(_DenseLayer, self).__init__()
|
||||
self.add_module('norm1', nn.BatchNorm2d(num_input_features)),
|
||||
self.add_module('relu1', nn.ReLU(inplace=True)),
|
||||
self.add_module('conv1', nn.Conv2d(num_input_features, bn_size *
|
||||
growth_rate, kernel_size=1, stride=1, bias=False)),
|
||||
self.add_module('norm2', nn.BatchNorm2d(bn_size * growth_rate)),
|
||||
self.add_module('relu2', nn.ReLU(inplace=True)),
|
||||
self.add_module('conv2', nn.Conv2d(bn_size * growth_rate, growth_rate,
|
||||
kernel_size=3, stride=1, padding=1, bias=False)),
|
||||
self.drop_rate = drop_rate
|
||||
|
||||
def forward(self, x):
|
||||
new_features = super(_DenseLayer, self).forward(x)
|
||||
if self.drop_rate > 0:
|
||||
new_features = F.dropout(new_features, p=self.drop_rate, training=self.training)
|
||||
return torch.cat([x, new_features], 1)
|
||||
|
||||
|
||||
class _DenseBlock(nn.Sequential):
|
||||
def __init__(self, num_layers, num_input_features, bn_size, growth_rate, drop_rate):
|
||||
super(_DenseBlock, self).__init__()
|
||||
for i in range(num_layers):
|
||||
layer = _DenseLayer(num_input_features + i * growth_rate, growth_rate, bn_size, drop_rate)
|
||||
self.add_module('denselayer%d' % (i + 1), layer)
|
||||
|
||||
|
||||
class _Transition(nn.Sequential):
|
||||
def __init__(self, num_input_features, num_output_features):
|
||||
super(_Transition, self).__init__()
|
||||
self.add_module('norm', nn.BatchNorm2d(num_input_features))
|
||||
self.add_module('relu', nn.ReLU(inplace=True))
|
||||
self.add_module('conv', nn.Conv2d(num_input_features, num_output_features,
|
||||
kernel_size=1, stride=1, bias=False))
|
||||
#self.add_module('pool', nn.AvgPool2d(kernel_size=2, stride=2)) ######### xupeng add ##########
|
||||
|
||||
|
||||
class DenseNet(nn.Module):
|
||||
r"""Densenet-BC model class, based on
|
||||
`"Densely Connected Convolutional Networks" <https://arxiv.org/pdf/1608.06993.pdf>`_
|
||||
|
||||
Args:
|
||||
growth_rate (int) - how many filters to add each layer (`k` in paper)
|
||||
block_config (list of 4 ints) - how many layers in each pooling block
|
||||
num_init_features (int) - the number of filters to learn in the first convolution layer
|
||||
bn_size (int) - multiplicative factor for number of bottle neck layers
|
||||
(i.e. bn_size * k features in the bottleneck layer)
|
||||
drop_rate (float) - dropout rate after each dense layer
|
||||
num_classes (int) - number of classification classes
|
||||
"""
|
||||
|
||||
def __init__(self, growth_rate=32, block_config=(6, 12, 24, 16),
|
||||
num_init_features=64, bn_size=4, drop_rate=0, num_classes=1000):
|
||||
|
||||
super(DenseNet, self).__init__()
|
||||
|
||||
self.avg_pool = nn.AvgPool2d(kernel_size=2, stride=2)
|
||||
|
||||
################ block 0 ################
|
||||
num_features = num_init_features
|
||||
i=0
|
||||
num_layers=block_config[i]
|
||||
block = _DenseBlock(num_layers=num_layers, num_input_features=num_features, bn_size=bn_size, growth_rate=growth_rate, drop_rate=drop_rate)
|
||||
num_features = num_features + num_layers * growth_rate
|
||||
trans = _Transition(num_input_features=num_features, num_output_features=num_features // 2)
|
||||
self.features0 = nn.Sequential(OrderedDict([
|
||||
('conv0', nn.Conv2d(3, num_init_features, kernel_size=7, stride=2, padding=3, bias=False)),
|
||||
('norm0', nn.BatchNorm2d(num_init_features)),
|
||||
('relu0', nn.ReLU(inplace=True)),
|
||||
('pool0', nn.MaxPool2d(kernel_size=3, stride=2, padding=1)),
|
||||
('denseblock%d' % (i + 1), block),
|
||||
('transition%d' % (i + 1), trans)
|
||||
]))
|
||||
|
||||
################ block 1 ##############
|
||||
num_features = num_features // 2
|
||||
i=1
|
||||
num_layers=block_config[i]
|
||||
block = _DenseBlock(num_layers=num_layers, num_input_features=num_features, bn_size=bn_size, growth_rate=growth_rate, drop_rate=drop_rate)
|
||||
num_features = num_features + num_layers * growth_rate
|
||||
trans = _Transition(num_input_features=num_features, num_output_features=num_features // 2)
|
||||
self.features1 = nn.Sequential(OrderedDict([
|
||||
('denseblock%d' % (i + 1), block),
|
||||
('transition%d' % (i + 1), trans),
|
||||
]))
|
||||
|
||||
################ block 2 ##############
|
||||
num_features = num_features // 2
|
||||
i=2
|
||||
num_layers=block_config[i]
|
||||
block = _DenseBlock(num_layers=num_layers, num_input_features=num_features, bn_size=bn_size, growth_rate=growth_rate, drop_rate=drop_rate)
|
||||
num_features = num_features + num_layers * growth_rate
|
||||
trans = _Transition(num_input_features=num_features, num_output_features=num_features // 2)
|
||||
self.features2 = nn.Sequential(OrderedDict([
|
||||
('denseblock%d' % (i + 1), block),
|
||||
('transition%d' % (i + 1), trans),
|
||||
]))
|
||||
|
||||
################ block 3 ##############
|
||||
num_features = num_features // 2
|
||||
i=3
|
||||
num_layers=block_config[i]
|
||||
block = _DenseBlock(num_layers=num_layers, num_input_features=num_features, bn_size=bn_size, growth_rate=growth_rate, drop_rate=drop_rate)
|
||||
num_features = num_features + num_layers * growth_rate
|
||||
self.features3 = nn.Sequential(OrderedDict([
|
||||
('denseblock%d' % (i + 1), block),
|
||||
('norm5', nn.BatchNorm2d(num_features)),
|
||||
]))
|
||||
|
||||
# Linear layer
|
||||
self.classifier = nn.Linear(num_features, num_classes)
|
||||
|
||||
# Official init from torch repo.
|
||||
for m in self.modules():
|
||||
if isinstance(m, nn.Conv2d):
|
||||
nn.init.kaiming_normal_(m.weight)
|
||||
elif isinstance(m, nn.BatchNorm2d):
|
||||
nn.init.constant_(m.weight, 1)
|
||||
nn.init.constant_(m.bias, 0)
|
||||
elif isinstance(m, nn.Linear):
|
||||
nn.init.constant_(m.bias, 0)
|
||||
|
||||
def forward(self, x):
|
||||
CALCULATE_DEVICE = "npu:0"
|
||||
self.avg_pool = self.avg_pool.cpu()
|
||||
#print("avg_pool move to cpu")
|
||||
#print("tag0")
|
||||
features0 = self.features0(x)
|
||||
features0 = features0.cpu()
|
||||
avg_pool_0 = self.avg_pool(features0)
|
||||
avg_pool_0 = avg_pool_0.to(CALCULATE_DEVICE)
|
||||
#print("tag1")
|
||||
features1 = self.features1(avg_pool_0)
|
||||
features1 = features1.cpu()
|
||||
avg_pool_1 = self.avg_pool(features1)
|
||||
avg_pool_1 = avg_pool_1.to(CALCULATE_DEVICE)
|
||||
#print("tag2")
|
||||
features2 = self.features2(avg_pool_1)
|
||||
features2 = features2.cpu()
|
||||
avg_pool_2 = self.avg_pool(features2)
|
||||
avg_pool_2 = avg_pool_2.to(CALCULATE_DEVICE)
|
||||
#print("tag3")
|
||||
features3 = self.features3(avg_pool_2)
|
||||
|
||||
out = F.relu(features3, inplace=True)
|
||||
out = F.adaptive_avg_pool2d(out, (1, 1)).view(features3.size(0), -1)
|
||||
out = self.classifier(out)
|
||||
return out
|
||||
|
||||
|
||||
def densenet121(pretrained=False, **kwargs):
|
||||
r"""Densenet-121 model from
|
||||
`"Densely Connected Convolutional Networks" <https://arxiv.org/pdf/1608.06993.pdf>`_
|
||||
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
"""
|
||||
model = DenseNet(num_init_features=64, growth_rate=32, block_config=(6, 12, 24, 16),
|
||||
**kwargs)
|
||||
if pretrained:
|
||||
# '.'s are no longer allowed in module names, but pervious _DenseLayer
|
||||
# has keys 'norm.1', 'relu.1', 'conv.1', 'norm.2', 'relu.2', 'conv.2'.
|
||||
# They are also in the checkpoints in model_urls. This pattern is used
|
||||
# to find such keys.
|
||||
pattern = re.compile(
|
||||
r'^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\.(?:weight|bias|running_mean|running_var))$')
|
||||
state_dict = model_zoo.load_url(model_urls['densenet121'])
|
||||
for key in list(state_dict.keys()):
|
||||
res = pattern.match(key)
|
||||
if res:
|
||||
new_key = res.group(1) + res.group(2)
|
||||
state_dict[new_key] = state_dict[key]
|
||||
del state_dict[key]
|
||||
model.load_state_dict(state_dict)
|
||||
return model
|
||||
|
||||
|
||||
def densenet169(pretrained=False, **kwargs):
|
||||
r"""Densenet-169 model from
|
||||
`"Densely Connected Convolutional Networks" <https://arxiv.org/pdf/1608.06993.pdf>`_
|
||||
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
"""
|
||||
model = DenseNet(num_init_features=64, growth_rate=32, block_config=(6, 12, 32, 32),
|
||||
**kwargs)
|
||||
if pretrained:
|
||||
# '.'s are no longer allowed in module names, but pervious _DenseLayer
|
||||
# has keys 'norm.1', 'relu.1', 'conv.1', 'norm.2', 'relu.2', 'conv.2'.
|
||||
# They are also in the checkpoints in model_urls. This pattern is used
|
||||
# to find such keys.
|
||||
pattern = re.compile(
|
||||
r'^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\.(?:weight|bias|running_mean|running_var))$')
|
||||
state_dict = model_zoo.load_url(model_urls['densenet169'])
|
||||
for key in list(state_dict.keys()):
|
||||
res = pattern.match(key)
|
||||
if res:
|
||||
new_key = res.group(1) + res.group(2)
|
||||
state_dict[new_key] = state_dict[key]
|
||||
del state_dict[key]
|
||||
model.load_state_dict(state_dict)
|
||||
return model
|
||||
|
||||
|
||||
def densenet201(pretrained=False, **kwargs):
|
||||
r"""Densenet-201 model from
|
||||
`"Densely Connected Convolutional Networks" <https://arxiv.org/pdf/1608.06993.pdf>`_
|
||||
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
"""
|
||||
model = DenseNet(num_init_features=64, growth_rate=32, block_config=(6, 12, 48, 32),
|
||||
**kwargs)
|
||||
if pretrained:
|
||||
# '.'s are no longer allowed in module names, but pervious _DenseLayer
|
||||
# has keys 'norm.1', 'relu.1', 'conv.1', 'norm.2', 'relu.2', 'conv.2'.
|
||||
# They are also in the checkpoints in model_urls. This pattern is used
|
||||
# to find such keys.
|
||||
pattern = re.compile(
|
||||
r'^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\.(?:weight|bias|running_mean|running_var))$')
|
||||
state_dict = model_zoo.load_url(model_urls['densenet201'])
|
||||
for key in list(state_dict.keys()):
|
||||
res = pattern.match(key)
|
||||
if res:
|
||||
new_key = res.group(1) + res.group(2)
|
||||
state_dict[new_key] = state_dict[key]
|
||||
del state_dict[key]
|
||||
model.load_state_dict(state_dict)
|
||||
return model
|
||||
|
||||
|
||||
def densenet161(pretrained=False, **kwargs):
|
||||
r"""Densenet-161 model from
|
||||
`"Densely Connected Convolutional Networks" <https://arxiv.org/pdf/1608.06993.pdf>`_
|
||||
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
"""
|
||||
model = DenseNet(num_init_features=96, growth_rate=48, block_config=(6, 12, 36, 24),
|
||||
**kwargs)
|
||||
if pretrained:
|
||||
# '.'s are no longer allowed in module names, but pervious _DenseLayer
|
||||
# has keys 'norm.1', 'relu.1', 'conv.1', 'norm.2', 'relu.2', 'conv.2'.
|
||||
# They are also in the checkpoints in model_urls. This pattern is used
|
||||
# to find such keys.
|
||||
pattern = re.compile(
|
||||
r'^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\.(?:weight|bias|running_mean|running_var))$')
|
||||
state_dict = model_zoo.load_url(model_urls['densenet161'])
|
||||
for key in list(state_dict.keys()):
|
||||
res = pattern.match(key)
|
||||
if res:
|
||||
new_key = res.group(1) + res.group(2)
|
||||
state_dict[new_key] = state_dict[key]
|
||||
del state_dict[key]
|
||||
model.load_state_dict(state_dict)
|
||||
return model
|
||||
+300
@@ -0,0 +1,300 @@
|
||||
import re
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import torch.utils.checkpoint as cp
|
||||
from collections import OrderedDict
|
||||
#from .utils import load_state_dict_from_url
|
||||
from torch import Tensor
|
||||
from torch.jit.annotations import List
|
||||
|
||||
|
||||
__all__ = ['DenseNet', 'densenet121', 'densenet169', 'densenet201', 'densenet161']
|
||||
|
||||
model_urls = {
|
||||
'densenet121': 'https://download.pytorch.org/models/densenet121-a639ec97.pth',
|
||||
'densenet169': 'https://download.pytorch.org/models/densenet169-b2777c0a.pth',
|
||||
'densenet201': 'https://download.pytorch.org/models/densenet201-c1103571.pth',
|
||||
'densenet161': 'https://download.pytorch.org/models/densenet161-8d451a50.pth',
|
||||
}
|
||||
|
||||
|
||||
class _DenseLayer(nn.Module):
|
||||
def __init__(self, num_input_features, growth_rate, bn_size, drop_rate, memory_efficient=False):
|
||||
super(_DenseLayer, self).__init__()
|
||||
self.add_module('norm1', nn.BatchNorm2d(num_input_features)),
|
||||
self.add_module('relu1', nn.ReLU(inplace=True)),
|
||||
self.add_module('conv1', nn.Conv2d(num_input_features, bn_size *
|
||||
growth_rate, kernel_size=1, stride=1,
|
||||
bias=False)),
|
||||
self.add_module('norm2', nn.BatchNorm2d(bn_size * growth_rate)),
|
||||
self.add_module('relu2', nn.ReLU(inplace=True)),
|
||||
self.add_module('conv2', nn.Conv2d(bn_size * growth_rate, growth_rate,
|
||||
kernel_size=3, stride=1, padding=1,
|
||||
bias=False)),
|
||||
self.drop_rate = float(drop_rate)
|
||||
self.memory_efficient = memory_efficient
|
||||
|
||||
def bn_function(self, inputs):
|
||||
# type: (List[Tensor]) -> Tensor
|
||||
concated_features = torch.cat(inputs, 1)
|
||||
bottleneck_output = self.conv1(self.relu1(self.norm1(concated_features))) # noqa: T484
|
||||
return bottleneck_output
|
||||
|
||||
# todo: rewrite when torchscript supports any
|
||||
def any_requires_grad(self, input):
|
||||
# type: (List[Tensor]) -> bool
|
||||
for tensor in input:
|
||||
if tensor.requires_grad:
|
||||
return True
|
||||
return False
|
||||
|
||||
@torch.jit.unused # noqa: T484
|
||||
def call_checkpoint_bottleneck(self, input):
|
||||
# type: (List[Tensor]) -> Tensor
|
||||
def closure(*inputs):
|
||||
return self.bn_function(*inputs)
|
||||
|
||||
return cp.checkpoint(closure, input)
|
||||
|
||||
@torch.jit._overload_method # noqa: F811
|
||||
def forward(self, input):
|
||||
# type: (List[Tensor]) -> (Tensor)
|
||||
pass
|
||||
|
||||
@torch.jit._overload_method # noqa: F811
|
||||
def forward(self, input):
|
||||
# type: (Tensor) -> (Tensor)
|
||||
pass
|
||||
|
||||
# torchscript does not yet support *args, so we overload method
|
||||
# allowing it to take either a List[Tensor] or single Tensor
|
||||
def forward(self, input): # noqa: F811
|
||||
if isinstance(input, Tensor):
|
||||
prev_features = [input]
|
||||
else:
|
||||
prev_features = input
|
||||
|
||||
if self.memory_efficient and self.any_requires_grad(prev_features):
|
||||
if torch.jit.is_scripting():
|
||||
raise Exception("Memory Efficient not supported in JIT")
|
||||
|
||||
bottleneck_output = self.call_checkpoint_bottleneck(prev_features)
|
||||
else:
|
||||
bottleneck_output = self.bn_function(prev_features)
|
||||
|
||||
new_features = self.conv2(self.relu2(self.norm2(bottleneck_output)))
|
||||
if self.drop_rate > 0:
|
||||
new_features = F.dropout(new_features, p=self.drop_rate,
|
||||
training=self.training)
|
||||
return new_features
|
||||
|
||||
|
||||
class _DenseBlock(nn.ModuleDict):
|
||||
_version = 2
|
||||
|
||||
def __init__(self, num_layers, num_input_features, bn_size, growth_rate, drop_rate, memory_efficient=False):
|
||||
super(_DenseBlock, self).__init__()
|
||||
for i in range(num_layers):
|
||||
layer = _DenseLayer(
|
||||
num_input_features + i * growth_rate,
|
||||
growth_rate=growth_rate,
|
||||
bn_size=bn_size,
|
||||
drop_rate=drop_rate,
|
||||
memory_efficient=memory_efficient,
|
||||
)
|
||||
self.add_module('denselayer%d' % (i + 1), layer)
|
||||
|
||||
def forward(self, init_features):
|
||||
features = [init_features]
|
||||
for name, layer in self.items():
|
||||
new_features = layer(features)
|
||||
features.append(new_features)
|
||||
return torch.cat(features, 1)
|
||||
|
||||
|
||||
class _Transition(nn.Sequential):
|
||||
def __init__(self, num_input_features, num_output_features):
|
||||
super(_Transition, self).__init__()
|
||||
self.add_module('norm', nn.BatchNorm2d(num_input_features))
|
||||
self.add_module('relu', nn.ReLU(inplace=True))
|
||||
self.add_module('conv', nn.Conv2d(num_input_features, num_output_features,
|
||||
kernel_size=1, stride=1, bias=False))
|
||||
self.add_module('pool', nn.AvgPool2d(kernel_size=2, stride=2))
|
||||
|
||||
class PrintLayer(nn.Module):
|
||||
def __init__(self, name):
|
||||
super(PrintLayer, self).__init__()
|
||||
self.name = name
|
||||
|
||||
def forward(self, x):
|
||||
# Do your print / debug stuff here
|
||||
print("{} mean data: {}".format(self.name, x.mean().item())) #print(x.shape)
|
||||
return x
|
||||
|
||||
class DenseNet(nn.Module):
|
||||
r"""Densenet-BC model class, based on
|
||||
`"Densely Connected Convolutional Networks" <https://arxiv.org/pdf/1608.06993.pdf>`_
|
||||
|
||||
Args:
|
||||
growth_rate (int) - how many filters to add each layer (`k` in paper)
|
||||
block_config (list of 4 ints) - how many layers in each pooling block
|
||||
num_init_features (int) - the number of filters to learn in the first convolution layer
|
||||
bn_size (int) - multiplicative factor for number of bottle neck layers
|
||||
(i.e. bn_size * k features in the bottleneck layer)
|
||||
drop_rate (float) - dropout rate after each dense layer
|
||||
num_classes (int) - number of classification classes
|
||||
memory_efficient (bool) - If True, uses checkpointing. Much more memory efficient,
|
||||
but slower. Default: *False*. See `"paper" <https://arxiv.org/pdf/1707.06990.pdf>`_
|
||||
"""
|
||||
|
||||
def __init__(self, growth_rate=32, block_config=(6, 12, 24, 16),
|
||||
num_init_features=64, bn_size=4, drop_rate=0, num_classes=1000, memory_efficient=False):
|
||||
|
||||
super(DenseNet, self).__init__()
|
||||
|
||||
# First convolution
|
||||
self.features = nn.Sequential(OrderedDict([
|
||||
('conv0', nn.Conv2d(3, num_init_features, kernel_size=7, stride=2,
|
||||
padding=3, bias=False)),
|
||||
('conv0_p', PrintLayer('conv0_p')),
|
||||
('norm0', nn.BatchNorm2d(num_init_features)),
|
||||
('norm0_p', PrintLayer('norm0_p')),
|
||||
('relu0', nn.ReLU(inplace=True)),
|
||||
('pool0', nn.MaxPool2d(kernel_size=3, stride=2, padding=1)),
|
||||
('pool0_p', PrintLayer('pool0_p')),
|
||||
]))
|
||||
|
||||
|
||||
|
||||
# Each denseblock
|
||||
num_features = num_init_features
|
||||
for i, num_layers in enumerate(block_config):
|
||||
block = _DenseBlock(
|
||||
num_layers=num_layers,
|
||||
num_input_features=num_features,
|
||||
bn_size=bn_size,
|
||||
growth_rate=growth_rate,
|
||||
drop_rate=drop_rate,
|
||||
memory_efficient=memory_efficient
|
||||
)
|
||||
self.features.add_module('denseblock%d' % (i + 1), block)
|
||||
self.features.add_module('denseblock%d_p' % (i + 1), PrintLayer('denseblock%d_p' % (i + 1)))
|
||||
num_features = num_features + num_layers * growth_rate
|
||||
if i != len(block_config) - 1:
|
||||
trans = _Transition(num_input_features=num_features,
|
||||
num_output_features=num_features // 2)
|
||||
self.features.add_module('transition%d' % (i + 1), trans)
|
||||
self.features.add_module('transition%d_p' % (i + 1), PrintLayer('transition%d_p' % (i + 1)))
|
||||
num_features = num_features // 2
|
||||
|
||||
# Final batch norm
|
||||
self.features.add_module('norm5', nn.BatchNorm2d(num_features))
|
||||
|
||||
# Linear layer
|
||||
self.classifier = nn.Linear(num_features, num_classes)
|
||||
|
||||
# Official init from torch repo.
|
||||
for m in self.modules():
|
||||
if isinstance(m, nn.Conv2d):
|
||||
nn.init.kaiming_normal_(m.weight)
|
||||
elif isinstance(m, nn.BatchNorm2d):
|
||||
nn.init.constant_(m.weight, 1)
|
||||
nn.init.constant_(m.bias, 0)
|
||||
elif isinstance(m, nn.Linear):
|
||||
nn.init.constant_(m.bias, 0)
|
||||
|
||||
def forward(self, x):
|
||||
features = self.features(x)
|
||||
|
||||
# features_p = features.to('cpu')'
|
||||
print('the features mean: {}'.format(features.mean().item()))
|
||||
|
||||
out = F.relu(features, inplace=True)
|
||||
out = F.adaptive_avg_pool2d(out, (1, 1))
|
||||
out = torch.flatten(out, 1)
|
||||
print('the flatten mean: {}'.format(out.mean().item()))
|
||||
out = self.classifier(out)
|
||||
return out
|
||||
|
||||
|
||||
def _load_state_dict(model, model_url, progress):
|
||||
# '.'s are no longer allowed in module names, but previous _DenseLayer
|
||||
# has keys 'norm.1', 'relu.1', 'conv.1', 'norm.2', 'relu.2', 'conv.2'.
|
||||
# They are also in the checkpoints in model_urls. This pattern is used
|
||||
# to find such keys.
|
||||
pattern = re.compile(
|
||||
r'^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\.(?:weight|bias|running_mean|running_var))$')
|
||||
|
||||
state_dict = load_state_dict_from_url(model_url, progress=progress)
|
||||
for key in list(state_dict.keys()):
|
||||
res = pattern.match(key)
|
||||
if res:
|
||||
new_key = res.group(1) + res.group(2)
|
||||
state_dict[new_key] = state_dict[key]
|
||||
del state_dict[key]
|
||||
model.load_state_dict(state_dict)
|
||||
|
||||
|
||||
def _densenet(arch, growth_rate, block_config, num_init_features, pretrained, progress,
|
||||
**kwargs):
|
||||
model = DenseNet(growth_rate, block_config, num_init_features, **kwargs)
|
||||
if pretrained:
|
||||
_load_state_dict(model, model_urls[arch], progress)
|
||||
return model
|
||||
|
||||
|
||||
def densenet121(pretrained=False, progress=True, **kwargs):
|
||||
r"""Densenet-121 model from
|
||||
`"Densely Connected Convolutional Networks" <https://arxiv.org/pdf/1608.06993.pdf>`_
|
||||
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
progress (bool): If True, displays a progress bar of the download to stderr
|
||||
memory_efficient (bool) - If True, uses checkpointing. Much more memory efficient,
|
||||
but slower. Default: *False*. See `"paper" <https://arxiv.org/pdf/1707.06990.pdf>`_
|
||||
"""
|
||||
return _densenet('densenet121', 32, (6, 12, 24, 16), 64, pretrained, progress,
|
||||
**kwargs)
|
||||
|
||||
|
||||
def densenet161(pretrained=False, progress=True, **kwargs):
|
||||
r"""Densenet-161 model from
|
||||
`"Densely Connected Convolutional Networks" <https://arxiv.org/pdf/1608.06993.pdf>`_
|
||||
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
progress (bool): If True, displays a progress bar of the download to stderr
|
||||
memory_efficient (bool) - If True, uses checkpointing. Much more memory efficient,
|
||||
but slower. Default: *False*. See `"paper" <https://arxiv.org/pdf/1707.06990.pdf>`_
|
||||
"""
|
||||
return _densenet('densenet161', 48, (6, 12, 36, 24), 96, pretrained, progress,
|
||||
**kwargs)
|
||||
|
||||
|
||||
def densenet169(pretrained=False, progress=True, **kwargs):
|
||||
r"""Densenet-169 model from
|
||||
`"Densely Connected Convolutional Networks" <https://arxiv.org/pdf/1608.06993.pdf>`_
|
||||
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
progress (bool): If True, displays a progress bar of the download to stderr
|
||||
memory_efficient (bool) - If True, uses checkpointing. Much more memory efficient,
|
||||
but slower. Default: *False*. See `"paper" <https://arxiv.org/pdf/1707.06990.pdf>`_
|
||||
"""
|
||||
return _densenet('densenet169', 32, (6, 12, 32, 32), 64, pretrained, progress,
|
||||
**kwargs)
|
||||
|
||||
|
||||
def densenet201(pretrained=False, progress=True, **kwargs):
|
||||
r"""Densenet-201 model from
|
||||
`"Densely Connected Convolutional Networks" <https://arxiv.org/pdf/1608.06993.pdf>`_
|
||||
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
progress (bool): If True, displays a progress bar of the download to stderr
|
||||
memory_efficient (bool) - If True, uses checkpointing. Much more memory efficient,
|
||||
but slower. Default: *False*. See `"paper" <https://arxiv.org/pdf/1707.06990.pdf>`_
|
||||
"""
|
||||
return _densenet('densenet201', 32, (6, 12, 48, 32), 64, pretrained, progress,
|
||||
**kwargs)
|
||||
+32
@@ -0,0 +1,32 @@
|
||||
{
|
||||
"board_id": "0x0000",
|
||||
"chip_info": "910",
|
||||
"deploy_mode": "lab",
|
||||
"group_count": "1",
|
||||
"group_list": [
|
||||
{
|
||||
"device_num": "1",
|
||||
"server_num": "1",
|
||||
"group_name": "",
|
||||
"instance_count": "1",
|
||||
"instance_list": [
|
||||
{
|
||||
"devices": [
|
||||
{
|
||||
"device_id": "0",
|
||||
"device_ip": "192.168.100.101"
|
||||
}
|
||||
],
|
||||
"rank_id": "0",
|
||||
"server_id": "10.246.246.76"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"para_plane_nic_location": "device",
|
||||
"para_plane_nic_name": [
|
||||
"eth0"
|
||||
],
|
||||
"para_plane_nic_num": "1",
|
||||
"status": "completed"
|
||||
}
|
||||
+44
@@ -0,0 +1,44 @@
|
||||
{
|
||||
"board_id": "0x0000",
|
||||
"chip_info": "910",
|
||||
"deploy_mode": "lab",
|
||||
"group_count": "1",
|
||||
"group_list": [
|
||||
{
|
||||
"device_num": "2",
|
||||
"server_num": "1",
|
||||
"group_name": "",
|
||||
"instance_count": "2",
|
||||
"instance_list": [
|
||||
{
|
||||
"devices": [
|
||||
{
|
||||
"device_id": "0",
|
||||
"device_ip": "192.168.100.101"
|
||||
}
|
||||
],
|
||||
"rank_id": "0",
|
||||
"server_id": "10.246.246.76"
|
||||
},
|
||||
{
|
||||
"devices": [
|
||||
{
|
||||
"device_id": "1",
|
||||
"device_ip": "192.168.101.101"
|
||||
}
|
||||
],
|
||||
"rank_id": "1",
|
||||
"server_id": "10.246.246.76"
|
||||
},
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"para_plane_nic_location": "device",
|
||||
"para_plane_nic_name": [
|
||||
"eth0",
|
||||
"eth1"
|
||||
],
|
||||
"para_plane_nic_num": "2",
|
||||
"status": "completed"
|
||||
}
|
||||
+65
@@ -0,0 +1,65 @@
|
||||
{
|
||||
"board_id": "0x0000",
|
||||
"chip_info": "910",
|
||||
"deploy_mode": "lab",
|
||||
"group_count": "1",
|
||||
"group_list": [
|
||||
{
|
||||
"device_num": "4",
|
||||
"server_num": "1",
|
||||
"group_name": "",
|
||||
"instance_count": "4",
|
||||
"instance_list": [
|
||||
{
|
||||
"devices": [
|
||||
{
|
||||
"device_id": "0",
|
||||
"device_ip": "192.168.190.102"
|
||||
}
|
||||
],
|
||||
"rank_id": "0",
|
||||
"server_id": "10.246.246.76"
|
||||
},
|
||||
{
|
||||
"devices": [
|
||||
{
|
||||
"device_id": "1",
|
||||
"device_ip": "192.168.191.102"
|
||||
}
|
||||
],
|
||||
"rank_id": "1",
|
||||
"server_id": "10.246.246.76"
|
||||
},
|
||||
{
|
||||
"devices": [
|
||||
{
|
||||
"device_id": "2",
|
||||
"device_ip": "192.168.192.102"
|
||||
}
|
||||
],
|
||||
"rank_id": "2",
|
||||
"server_id": "10.246.246.76"
|
||||
},
|
||||
{
|
||||
"devices": [
|
||||
{
|
||||
"device_id": "3",
|
||||
"device_ip": "192.168.193.102"
|
||||
}
|
||||
],
|
||||
"rank_id": "3",
|
||||
"server_id": "10.246.246.76"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"para_plane_nic_location": "device",
|
||||
"para_plane_nic_name": [
|
||||
"eth0",
|
||||
"eth1",
|
||||
"eth2",
|
||||
"eth3"
|
||||
],
|
||||
"para_plane_nic_num": "4",
|
||||
"status": "completed"
|
||||
}
|
||||
+109
@@ -0,0 +1,109 @@
|
||||
{
|
||||
"board_id": "0x002f",
|
||||
"chip_info": "910",
|
||||
"deploy_mode": "lab",
|
||||
"group_count": "1",
|
||||
"group_list": [
|
||||
{
|
||||
"device_num": "8",
|
||||
"server_num": "1",
|
||||
"group_name": "",
|
||||
"instance_count": "8",
|
||||
"instance_list": [
|
||||
{
|
||||
"devices": [
|
||||
{
|
||||
"device_id": "0",
|
||||
"device_ip": "192.168.100.101"
|
||||
}
|
||||
],
|
||||
"rank_id": "0",
|
||||
"server_id": "10.246.246.76"
|
||||
},
|
||||
{
|
||||
"devices": [
|
||||
{
|
||||
"device_id": "1",
|
||||
"device_ip": "192.168.101.101"
|
||||
}
|
||||
],
|
||||
"rank_id": "1",
|
||||
"server_id": "10.246.246.76"
|
||||
},
|
||||
{
|
||||
"devices": [
|
||||
{
|
||||
"device_id": "2",
|
||||
"device_ip": "192.168.102.101"
|
||||
}
|
||||
],
|
||||
"rank_id": "2",
|
||||
"server_id": "10.246.246.76"
|
||||
},
|
||||
{
|
||||
"devices": [
|
||||
{
|
||||
"device_id": "3",
|
||||
"device_ip": "192.168.103.101"
|
||||
}
|
||||
],
|
||||
"rank_id": "3",
|
||||
"server_id": "10.246.246.76"
|
||||
},
|
||||
{
|
||||
"devices": [
|
||||
{
|
||||
"device_id": "4",
|
||||
"device_ip": "192.168.100.100"
|
||||
}
|
||||
],
|
||||
"rank_id": "4",
|
||||
"server_id": "10.246.246.76"
|
||||
},
|
||||
{
|
||||
"devices": [
|
||||
{
|
||||
"device_id": "5",
|
||||
"device_ip": "192.168.101.100"
|
||||
}
|
||||
],
|
||||
"rank_id": "5",
|
||||
"server_id": "10.246.246.76"
|
||||
},
|
||||
{
|
||||
"devices": [
|
||||
{
|
||||
"device_id": "6",
|
||||
"device_ip": "192.168.102.100"
|
||||
}
|
||||
],
|
||||
"rank_id": "6",
|
||||
"server_id": "10.246.246.76"
|
||||
},
|
||||
{
|
||||
"devices": [
|
||||
{
|
||||
"device_id": "7",
|
||||
"device_ip": "192.168.103.100"
|
||||
}
|
||||
],
|
||||
"rank_id": "7",
|
||||
"server_id": "10.246.246.76"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"para_plane_nic_location": "device",
|
||||
"para_plane_nic_name": [
|
||||
"eth0",
|
||||
"eth1",
|
||||
"eth2",
|
||||
"eth3",
|
||||
"eth4",
|
||||
"eth5",
|
||||
"eth6",
|
||||
"eth7"
|
||||
],
|
||||
"para_plane_nic_num": "8",
|
||||
"status": "completed"
|
||||
}
|
||||
+40
@@ -0,0 +1,40 @@
|
||||
import os
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from torch.utils.tensorboard import SummaryWriter
|
||||
from torchvision import transforms
|
||||
import torchvision.models as models
|
||||
|
||||
"""
|
||||
alexnet | densenet121 |
|
||||
densenet161 | densenet169 | densenet201 |
|
||||
resnet101 | resnet152 | resnet18 | resnet34 |
|
||||
resnet50 | squeezenet1_0 | squeezenet1_1 | vgg11 |
|
||||
vgg11_bn | vgg13 | vgg13_bn | vgg16 | vgg16_bn | vgg19 |
|
||||
mobilenet_v2 | shufflenet_v2_x0_5 |
|
||||
vgg19_bn (default: resnet18)
|
||||
"""
|
||||
model_name='densenet121'
|
||||
model = models.__dict__[model_name]()
|
||||
|
||||
img = torch.rand(size=(1,3,224,224))
|
||||
|
||||
#print(model(img))
|
||||
|
||||
labels = torch.rand(size=(1,))
|
||||
criterion = nn.CrossEntropyLoss()
|
||||
with torch.autograd.profiler.profile(record_shapes=True) as prof:
|
||||
outputs = model(img)
|
||||
loss = criterion(outputs, labels)
|
||||
with torch.autograd.profiler.record_function("label-bp"):
|
||||
loss.backward()
|
||||
|
||||
#print(prof.key_averages().table())
|
||||
print(prof)
|
||||
prof.export_chrome_trace(model_name + ".prof")
|
||||
|
||||
|
||||
with SummaryWriter(os.path.join('runs',model_name)) as w:
|
||||
w.add_graph(model, img)
|
||||
|
||||
+20
@@ -0,0 +1,20 @@
|
||||
export LD_LIBRARY_PATH=/usr/local/lib/:/usr/lib/:/usr/local/Ascend/fwkacllib/lib64/:/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:/usr/local/Ascend/add-ons/:/usr/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH
|
||||
export PATH=$PATH:/usr/local/Ascend/fwkacllib/ccec_compiler/bin
|
||||
export ASCEND_OPP_PATH=/usr/local/Ascend/opp
|
||||
export NEW_GE_FE_ID=1
|
||||
export GE_AICPU_FLAG=1
|
||||
export PYTHONPATH=/usr/local/Ascend/atc/python/site-packages/te.egg:/usr/local/Ascend/atc/python/site-packages/topi.egg:/usr/local/Ascend/atc/python/site-packages/auto_tune.egg:/usr/local/Ascend/atc/python/site-packages/schedule_search.egg:/usr/local
|
||||
export CUSTOM_OP_LIB_PATH=/usr/local/Ascend/ops/framework/built-in/tensorflow
|
||||
export OPTION_EXEC_EXTERN_PLUGIN_PATH=/usr/local/Ascend/fwkacllib/lib64/plugin/opskernel/libfe.so:/usr/local/Ascend/fwkacllib/lib64/plugin/opskernel/libaicpu_plugin.so:/usr/local/Ascend/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so
|
||||
export PLUGIN_LOAD_PATH=/usr/local/Ascend/fwkacllib/lib64/plugin/opskernel/libfe.so:/usr/local/Ascend/fwkacllib/lib64/plugin/opskernel/libaicpu_plugin.so:/usr/local/Ascend/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so:/usr/local/Ascend/fwkacllib/lib64/plugin/opskernel/librts_engine.so
|
||||
|
||||
#export DEVICE_ID=0
|
||||
#export SLOG_PRINT_TO_STDOUT=1
|
||||
|
||||
#su HwHiAiUser -c "adc --host 0.0.0.0:22118 --log \"SetLogLevel(0)[error]\" --device 0"
|
||||
|
||||
#python3 pytorch-benchmark-resnet50.py
|
||||
python3 net_show_cpu.py
|
||||
#python3 pytorch-resnet50-profiling.py
|
||||
|
||||
|
||||
+51
@@ -0,0 +1,51 @@
|
||||
import os
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from torch.utils.tensorboard import SummaryWriter
|
||||
from torchvision import transforms
|
||||
import torchvision.models as models
|
||||
|
||||
CALCULATE_DEVICE = "npu:0"
|
||||
torch.npu.set_device(CALCULATE_DEVICE)
|
||||
|
||||
"""
|
||||
alexnet | densenet121 |
|
||||
densenet161 | densenet169 | densenet201 |
|
||||
resnet101 | resnet152 | resnet18 | resnet34 |
|
||||
resnet50 | squeezenet1_0 | squeezenet1_1 | vgg11 |
|
||||
vgg11_bn | vgg13 | vgg13_bn | vgg16 | vgg16_bn | vgg19 |
|
||||
mobilenet_v2 | shufflenet_v2_x0_5 |
|
||||
vgg19_bn (default: resnet18)
|
||||
"""
|
||||
|
||||
img = torch.rand(size=(1,3,224,224),dtype=torch.float32).to(CALCULATE_DEVICE, non_blocking=True)
|
||||
print("img prepared")
|
||||
|
||||
model_name='densenet121'
|
||||
model = models.__dict__[model_name]().to(CALCULATE_DEVICE)
|
||||
model.train()
|
||||
print("model prepared")
|
||||
|
||||
outputs = model(img)
|
||||
print("cal done, results is {}".format(outputs))
|
||||
|
||||
labels=torch.rand(size=(1,)).to(torch.int32).to(CALCULATE_DEVICE, non_blocking=True)
|
||||
criterion = nn.CrossEntropyLoss().to(CALCULATE_DEVICE)
|
||||
with torch.autograd.profiler.profile(record_shapes=True,use_npu=True) as prof:
|
||||
outputs = model(img)
|
||||
print("output ok")
|
||||
loss = criterion(outputs, labels)
|
||||
print("loss ok")
|
||||
with torch.autograd.profiler.record_function("label-bp"):
|
||||
loss.backward()
|
||||
|
||||
#print(prof.key_averages().table())
|
||||
print(prof)
|
||||
prof.export_chrome_trace(model_name + ".prof")
|
||||
|
||||
|
||||
# with SummaryWriter(os.path.join('runs',model_name)) as w:
|
||||
# w.add_graph(model, img)
|
||||
# print("tenorboard add graph ok")
|
||||
|
||||
@@ -0,0 +1,14 @@
|
||||
{
|
||||
"server_count": "1",
|
||||
"server_list": [{
|
||||
"device": [
|
||||
{
|
||||
"device_id": "0",
|
||||
"device_ip": "192.168.10.103",
|
||||
"rank_id": "0"
|
||||
}],
|
||||
"server_id": "127.0.0.1"
|
||||
}],
|
||||
"status": "completed",
|
||||
"version": "1.0"
|
||||
}
|
||||
+9
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"server_count": "1",
|
||||
"server_list": [{
|
||||
"device": [{devices}],
|
||||
"server_id": "127.0.0.1"
|
||||
}],
|
||||
"status": "completed",
|
||||
"version": "1.0"
|
||||
}
|
||||
+52
@@ -0,0 +1,52 @@
|
||||
# main env
|
||||
export PYTHONPATH=/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/schedule_search.egg:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/:/usr/local/Ascend/ascend-toolkit/latest/opp/op_impl/built-in/ai_core/tbe
|
||||
export LD_LIBRARY_PATH=/usr/local/:/usr/local/lib/:/usr/local/lib/:/usr/lib/:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/:/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:/usr/local/Ascend/add-ons/:$LD_LIBRARY_PATH
|
||||
export PATH=$PATH:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin/
|
||||
export ASCEND_OPP_PATH=/usr/local/Ascend/ascend-toolkit/latest/opp/
|
||||
export OPTION_EXEC_EXTERN_PLUGIN_PATH=/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so
|
||||
export PLUGIN_LOAD_PATH=/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/librts_engine.so
|
||||
export TASK_QUEUE_ENABLE=0
|
||||
export CUSTOM_OP_LIB_PATH=/usr/local/Ascend/ascend-toolkit/20.10.0.B022/arm64-linux_gcc7.3.0/opp/framework/built-in/tensorflow/
|
||||
|
||||
|
||||
export NEW_GE_FE_ID=1
|
||||
export GE_AICPU_FLAG=1
|
||||
export GEN_TO_SOURCE=1
|
||||
|
||||
|
||||
|
||||
|
||||
#export LD_LIBRARY_PATH=/usr/local/OpenBLAS/lib/:/usr/local/lib/:/usr/lib/:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/:/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:/usr/local/Ascend/add-ons/:/usr/lib/aarch64-linux-gnu/
|
||||
#export PATH=$PATH:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin
|
||||
#export ASCEND_OPP_PATH=/usr/local/Ascend/opp
|
||||
|
||||
|
||||
#export DDK_VERSION_FLAG=1.60.T17.B830
|
||||
#export NEW_GE_FE_ID=1
|
||||
#export GE_AICPU_FLAG=1
|
||||
#export SOC_VERSION=Ascend910
|
||||
|
||||
#export DUMP_GE_GRAPH=2
|
||||
|
||||
|
||||
#export DEVICE_ID=0
|
||||
#export DEVICE_INDEX=0
|
||||
|
||||
#export PRINT_MODEL=0
|
||||
#export ENABLE_DATA_PRE_PROC=1
|
||||
#export RANK_ID=0
|
||||
#export RANK_SIZE=1
|
||||
#export JOB_ID=10087
|
||||
#export FUSION_TENSOR_SIZE=1000000000
|
||||
#PYTHONPATH=/usr/local/Ascend/ascend-toolkit/latest/atc/python/site-packages/auto_tune.egg/auto_tune:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/schedule_search.egg:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/:/usr/local/Ascend/ascend-toolkit/latest/opp/op_impl/built-in/ai_core/tbe
|
||||
|
||||
#export PYTHONPATH=/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/schedule_search.egg:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/:/usr/local/Ascend/ascend-toolkit/latest/opp/op_impl/built-in/ai_core/tbe
|
||||
|
||||
|
||||
#export CUSTOM_OP_LIB_PATH=/usr/local/Ascend/ascend-toolkit/20.10.0.B023/arm64-linux_gcc7.3.0/opp/framework/built-in/tensorflow/
|
||||
#export OPTION_EXEC_EXTERN_PLUGIN_PATH=/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so
|
||||
#export PLUGIN_LOAD_PATH=/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/librts_engine.so
|
||||
#export WHICH_OP=GEOP
|
||||
#export NEW_GE_FE_ID=1
|
||||
#export GE_AICPU_FLAG=1
|
||||
|
||||
+9
@@ -0,0 +1,9 @@
|
||||
export ASCEND_HOME=/usr/local/Ascend
|
||||
export LD_LIBRARY_PATH=/usr/local/lib/:/usr/lib/:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64:/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:/usr/local/Ascend/add-ons/
|
||||
export PYTHONPATH=$PYTHONPATH:/usr/local/Ascend/ascend-toolkit/latest/opp/op_impl/built-in/ai_core/tbe:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/te:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/topi:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/hccl:/usr/local/Ascend/ascend-toolkit/latest/tfplugin/python/site-packages:$currentDir
|
||||
export PATH=$PATH:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin
|
||||
export ASCEND_OPP_PATH=/usr/local/Ascend/ascend-toolkit/latest/opp/
|
||||
|
||||
export SLOG_PRINT_TO_STDOUT=0
|
||||
|
||||
export TASK_QUEUE_ENABLE=0
|
||||
+21
@@ -0,0 +1,21 @@
|
||||
############## toolkit situation ################
|
||||
#export ASCEND_HOME=/usr/local/Ascend
|
||||
#export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib/:/usr/lib/:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64:/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:/usr/local/Ascend/add-ons/
|
||||
#export PYTHONPATH=/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/schedule_search.egg:/usr/local/Ascend/ascend-toolkit/latest/opp/op_impl/built-in/ai_core/tbe:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/hccl
|
||||
#export PATH=$PATH:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin
|
||||
#export ASCEND_OPP_PATH=/usr/local/Ascend/ascend-toolkit/latest/opp/
|
||||
|
||||
############## nnae situation ################
|
||||
export ASCEND_HOME=/usr/local/Ascend
|
||||
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib/:/usr/local/python3.7.5/lib/:/usr/lib/:/usr/local/Ascend/nnae/latest/fwkacllib/lib64:/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:/usr/local/Ascend/add-ons/
|
||||
export PYTHONPATH=/usr/local/Ascend/nnae/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:/usr/local/Ascend/nnae/latest/fwkacllib/python/site-packages/schedule_search.egg:/usr/local/Ascend/nnae/latest/opp/op_impl/built-in/ai_core/tbe:/usr/local/Ascend/nnae/latest/fwkacllib/python/site-packages/hccl
|
||||
export PATH=$PATH:/usr/local/Ascend/nnae/latest/fwkacllib/ccec_compiler/bin
|
||||
export ASCEND_OPP_PATH=/usr/local/Ascend/nnae/latest/opp/
|
||||
|
||||
# pip3.7 install --upgrade /usr/local/Ascend/nnae/latest/fwkacllib/lib64/topi-0.4.0-py3-none-any.whl
|
||||
# pip3.7 install --upgrade /usr/local/Ascend/nnae/latest/fwkacllib/lib64/te-0.4.0-py3-none-any.whl
|
||||
|
||||
export SLOG_PRINT_TO_STDOUT=0
|
||||
#su HwHiAiUser -c "adc --host 0.0.0.0:22118 --log \"SetLogLevel(0)[error]\" --device 0"
|
||||
|
||||
export TASK_QUEUE_ENABLE=0
|
||||
+31
@@ -0,0 +1,31 @@
|
||||
############## toolkit situation ################
|
||||
#export LD_LIBRARY_PATH=/usr/local/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:/usr/local/lib/:/usr/lib64/:/usr/lib/:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/:/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:/usr/local/Ascend/add-ons/:/usr/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH
|
||||
#export PATH=$PATH:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin/:/usr/local/Ascend/ascend-toolkit/latest/toolkit/tools/ide_daemon/bin/
|
||||
#export ASCEND_OPP_PATH=/usr/local/Ascend/ascend-toolkit/latest/opp/
|
||||
#export OPTION_EXEC_EXTERN_PLUGIN_PATH=/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so
|
||||
#export PYTHONPATH=/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH
|
||||
|
||||
|
||||
############## nnae situation ################
|
||||
|
||||
|
||||
if [ -d /usr/local/Ascend/nnae/latest ];then
|
||||
export LD_LIBRARY_PATH=/usr/local/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:/usr/local/lib/:/usr/lib64/:/usr/lib/:/usr/local/Ascend/nnae/latest/fwkacllib/lib64/:/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:/usr/local/Ascend/add-ons/:/usr/lib/aarch64_64-linux-gnu:$LD_LIBRARY_PATH
|
||||
export PATH=$PATH:/usr/local/Ascend/nnae/latest/fwkacllib/ccec_compiler/bin/:/usr/local/Ascend/nnae/latest/toolkit/tools/ide_daemon/bin/
|
||||
export ASCEND_OPP_PATH=/usr/local/Ascend/nnae/latest/opp/
|
||||
export OPTION_EXEC_EXTERN_PLUGIN_PATH=/usr/local/Ascend/nnae/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:/usr/local/Ascend/nnae/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:/usr/local/Ascend/nnae/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so
|
||||
export PYTHONPATH=/usr/local/Ascend/nnae/latest/fwkacllib/python/site-packages/:/usr/local/Ascend/nnae/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:/usr/local/Ascend/nnae/latest/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH
|
||||
else
|
||||
export LD_LIBRARY_PATH=/usr/local/:/usr/local/lib/:/usr/lib64/:/usr/lib/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/:/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:/usr/local/Ascend/add-ons/:/usr/lib/aarch64-linux-gnu:$LD_LIBRARY_PATH
|
||||
export PATH=$PATH:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin/:/usr/local/Ascend/ascend-toolkit/latest/toolkit/tools/ide_daemon/bin/
|
||||
export ASCEND_OPP_PATH=/usr/local/Ascend/ascend-toolkit/latest/opp/
|
||||
export OPTION_EXEC_EXTERN_PLUGIN_PATH=/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so
|
||||
export PYTHONPATH=/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH
|
||||
fi
|
||||
|
||||
# ln -s /usr/local/Ascend/ascend-toolkit/latest/toolkit/bin/adc /usr/local/bin/
|
||||
|
||||
export SLOG_PRINT_TO_STDOUT=0
|
||||
#su HwHiAiUser -c "adc --host 0.0.0.0:22118 --log \"SetLogLevel(0)[error]\" --device 0"
|
||||
|
||||
export TASK_QUEUE_ENABLE=1
|
||||
@@ -0,0 +1,22 @@
|
||||
export ASCEND_HOME=/usr/local/Ascend
|
||||
export LD_LIBRARY_PATH=/usr/local/lib/:/usr/lib/:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64:/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:/usr/local/Ascend/add-ons/
|
||||
export PYTHONPATH=$PYTHONPATH:/usr/local/Ascend/ascend-toolkit/latest/opp/op_impl/built-in/ai_core/tbe:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/te:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/topi:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/hccl:/usr/local/Ascend/ascend-toolkit/latest/tfplugin/python/site-packages:$currentDir
|
||||
export PATH=$PATH:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin
|
||||
export ASCEND_OPP_PATH=/usr/local/Ascend/ascend-toolkit/latest/opp/
|
||||
|
||||
export SLOG_PRINT_TO_STDOUT=0
|
||||
su HwHiAiUser -c "adc --host 0.0.0.0:22118 --log \"SetLogLevel(0)[error]\" --device 7"
|
||||
|
||||
export TASK_QUEUE_ENABLE=0
|
||||
taskset -c 111-150 python3 densenet121_1p_main.py \
|
||||
--workers 40 \
|
||||
--arch densenet121 \
|
||||
--npu 7 \
|
||||
--lr 0.1 \
|
||||
--momentum 0.9 \
|
||||
--amp \
|
||||
--batch-size 256 \
|
||||
--epoch 90 \
|
||||
--evaluate \
|
||||
--resume checkpoint.pth.tar \
|
||||
--data /opt/npu/dataset/imagenet
|
||||
@@ -0,0 +1,62 @@
|
||||
#!/bin/bash
|
||||
|
||||
rank_size=$1
|
||||
yamlPath=$2
|
||||
toolsPath=$3
|
||||
|
||||
currentDir=$(cd "$(dirname "$0")/.."; pwd)
|
||||
model_name=$(cd $currentDir/..;basename `pwd`)
|
||||
if [ -f /.dockerenv ];then
|
||||
CLUSTER=$4
|
||||
MPIRUN_ALL_IP="$5"
|
||||
export CLUSTER=${CLUSTER}
|
||||
fi
|
||||
# 从 yaml 获取配置
|
||||
eval $(${toolsPath}/get_params_for_yaml.sh ${yamlPath} "pytorch_config")
|
||||
|
||||
# 清除旧日志
|
||||
rm -rf /var/log/npu/slog/host-0/*
|
||||
rm -rf ${currentDir}/result/*.log
|
||||
|
||||
#mkdir train job path
|
||||
currtime=`date +%Y%m%d%H%M%S`
|
||||
mkdir -p ${currentDir%train*}/train/result/pt_densenet121/training_job_${currtime}/
|
||||
export train_job_dir=${currentDir%train*}/train/result/pt_densenet121/training_job_${currtime}/
|
||||
echo "[`date +%Y%m%d-%H:%M:%S`] [INFO] ${train_job_dir} &"
|
||||
# device 列表, 若无指定 device 根据 rank_size 顺序选择
|
||||
eval device_group=\$device_group_${rank_size}p
|
||||
if [ x"${device_group}" == x"" ] || [ ${rank_size} -ge 8 ];then
|
||||
device_group="$(seq 0 "$(expr $rank_size - 1)")"
|
||||
fi
|
||||
|
||||
# get last device id in device_group, hw log in performance from the dir named last_device_id
|
||||
device_group_str=`echo ${device_group} | sed 's/ //g'`
|
||||
first_device_id=`echo ${device_group_str: 0:1}`
|
||||
|
||||
if [ x"${CLUSTER}" == x"True" ];then
|
||||
this_ip=$(hostname -I |awk '{print $1}')
|
||||
ln -snf ${currentDir%train*}/train/result/pt_densenet121/training_job_${currtime}/0/hw_densenet121.log ${currentDir%train*}/train/result/pt_densenet121/training_job_${currtime}/
|
||||
for ip in $MPIRUN_ALL_IP;do
|
||||
if [ x"$ip" != x"$this_ip" ];then
|
||||
scp $yamlPath root@$ip:$yamlPath
|
||||
scp ${jsonFilePath} root@$ip:${jsonFilePath}
|
||||
fi
|
||||
done
|
||||
export PATH=$PATH:/usr/local/mpirun4.0/bin
|
||||
mpirun -H ${mpirun_ip} \
|
||||
--bind-to none -map-by slot\
|
||||
--allow-run-as-root \
|
||||
--mca btl_tcp_if_exclude lo,docker0,endvnic,virbr0,vethf40501b,docker_gwbridge,br-f42ac38052b4\
|
||||
--prefix /usr/local/mpirun4.0/ \
|
||||
${currentDir}/scripts/train.sh 0 $rank_size $yamlPath $currtime ${toolsPath} ${CLUSTER}
|
||||
else
|
||||
rank_id=0
|
||||
#for device_id in $device_group;do
|
||||
ln -snf ${currentDir%train*}/train/result/pt_densenet121/training_job_${currtime}/${first_device_id}/hw_densenet121.log ${currentDir%train*}/train/result/pt_densenet121/training_job_${currtime}/
|
||||
${currentDir}/scripts/train.sh 0 $rank_size $yamlPath $currtime ${toolsPath} $rank_id &
|
||||
# let rank_id++
|
||||
# done
|
||||
fi
|
||||
wait
|
||||
|
||||
|
||||
+141
@@ -0,0 +1,141 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
device_id=$1
|
||||
rank_size=$2
|
||||
yamlPath=$3
|
||||
|
||||
currentDir=$(cd "$(dirname "$0")/.."; pwd)
|
||||
currtime=$4
|
||||
toolsPath=$5
|
||||
export YAML_PATH=$3
|
||||
mkdir -p ${currentDir%train*}/train/result/pt_densenet121/training_job_${currtime}/
|
||||
export train_job_dir=${currentDir%train*}/train/result/pt_densenet121/training_job_${currtime}/
|
||||
|
||||
# 从 yaml 获取配置
|
||||
eval $(${toolsPath}/get_params_for_yaml.sh ${yamlPath} "pytorch_config")
|
||||
|
||||
export REMARK_LOG_FILE=hw_densenet121.log # 打点日志文件名称, 必须hw_后跟模型名称小写
|
||||
benchmark_log_path=${currentDir%atlas_benchmark-master*}/atlas_benchmark-master/utils
|
||||
export PYTHONPATH=$PYTHONPATH:${benchmark_log_path}
|
||||
|
||||
|
||||
#source ${currentDir}/config/npu_set_env.sh
|
||||
source ${currentDir}/config/set_env_b023.sh
|
||||
# user env
|
||||
export HCCL_CONNECT_TIMEOUT=600
|
||||
export JOB_ID=9999001
|
||||
export HCCL_RANK_TABLE_PATH=${currentDir}/config/${rank_size}p.json
|
||||
export RANK_SIZE=${rank_size}
|
||||
export SLOG_PRINT_TO_STDOUT=0
|
||||
export DEVICE_ID=${device_id}
|
||||
DEVICE_INDEX=$(( DEVICE_ID + RANK_INDEX * 8 ))
|
||||
export DEVICE_INDEX=${DEVICE_INDEX}
|
||||
|
||||
cd ${train_job_dir}
|
||||
curd_dir=${currentDir%atlas_benchmark-master*}/atlas_benchmark-master/utils/atlasboost
|
||||
export PYTHONPATH=$PYTHONPATH:${curd_dir}
|
||||
|
||||
if [ x"$6" != x"True" ];then
|
||||
rank_id=$6
|
||||
export RANK_ID=$6
|
||||
else
|
||||
device_id_mo=$(python3.7 -c "import src.tensorflow.mpi_ops as atlasboost;atlasboost.init(); \
|
||||
device_id = atlasboost.local_rank();cluster_device_id = str(device_id); \
|
||||
atlasboost.set_device_id(device_id);print(atlasboost.rank())")
|
||||
device_id_mo=`echo $device_id_mo`
|
||||
rank_id=${device_id_mo##* }
|
||||
export RANK_ID=${rank_id}
|
||||
device=${device_id_mo##*deviceid = }
|
||||
device_id=${device%% phyid=*}
|
||||
export DEVICE_ID=${device_id}
|
||||
hccljson=${train_job_dir}/*.json
|
||||
cp ${hccljson} ${currentDir}/config/${rank_size}p.json
|
||||
fi
|
||||
|
||||
#mkdir exec path
|
||||
mkdir -p ${train_job_dir}/${device_id}
|
||||
cd ${train_job_dir}/${device_id}
|
||||
|
||||
startTime=`date +%Y%m%d-%H:%M:%S`
|
||||
startTime_s=`date +%s`
|
||||
|
||||
# 根据单卡/多卡区分调用参数
|
||||
if [ x"$6" == x"True" ];then
|
||||
# 多卡多机
|
||||
export CLUSTER=True
|
||||
fi
|
||||
|
||||
if [ x"${mode}" == x"evaluate" ];then
|
||||
taskset -c 111-150 python3.7 ${currentDir}/code/densenet121_1p_main.py \
|
||||
--workers 40 \
|
||||
--arch densenet121 \
|
||||
--npu 7 \
|
||||
--lr 0.1 \
|
||||
--momentum 0.9 \
|
||||
--amp \
|
||||
--batch-size 256 \
|
||||
--epoch 90 \
|
||||
--evaluate \
|
||||
--resume checkpoint.pth.tar \
|
||||
--data ${data_url} > ${train_job_dir}/train_${rank_size}p.log 2>&1
|
||||
|
||||
|
||||
elif [ x"${rank_size}" == x"1" ];then
|
||||
# 单卡
|
||||
#source ${currentDir}/config/set_env_b023.sh
|
||||
|
||||
taskset -c 1-40 python3.7 ${currentDir}/code/densenet121_1p_main.py \
|
||||
--workers 40 \
|
||||
--arch densenet121 \
|
||||
--npu ${device_single} \
|
||||
--lr 0.1 \
|
||||
--momentum 0.9 \
|
||||
--amp \
|
||||
--batch-size ${batch_size} \
|
||||
--epoch ${epoches} \
|
||||
--data ${data_url} > ${train_job_dir}/train_${rank_size}p.log 2>&1
|
||||
|
||||
elif [ ${rank_size} -le 8 ];then
|
||||
# 单机多卡
|
||||
#source ${currentDir}/config/set_env_b023.sh
|
||||
python3.7 ${currentDir}/code/densenet121_8p_main.py \
|
||||
--addr=$(hostname -I |awk '{print $1}') \
|
||||
--seed 49 \
|
||||
--workers 160 \
|
||||
--lr ${lr} \
|
||||
--print-freq 1 \
|
||||
--eval-freq 5\
|
||||
--arch densenet121 \
|
||||
--dist-url 'tcp://127.0.0.1:50000' \
|
||||
--dist-backend 'hccl' \
|
||||
--multiprocessing-distributed \
|
||||
--world-size 1 \
|
||||
--batch-size ${batch_size} \
|
||||
--epochs ${epoches} \
|
||||
--rank 0 \
|
||||
--amp \
|
||||
--benchmark 0 \
|
||||
--device-list ${device_group_multi} \
|
||||
--data ${data_url} > ${train_job_dir}/train_${rank_size}p.log 2>&1
|
||||
|
||||
fi
|
||||
|
||||
#taskset -c 0-20 python3.7 ${currentDir}/code/densenet121.py > ./train.log 2>&1
|
||||
|
||||
if [ $? -eq 0 ];then
|
||||
echo ":::ABK 1.0.0 densenet121 train success"
|
||||
echo ":::ABK 1.0.0 densenet121 train success" >> ${train_job_dir}/train_${rank_size}p.log
|
||||
echo ":::ABK 1.0.0 densenet121 train success" >> ./hw_densenet121.log
|
||||
else
|
||||
echo ":::ABK 1.0.0 densenet121 train failed"
|
||||
echo ":::ABK 1.0.0 densenet121 train failed" >> ${train_job_dir}/train_${rank_size}p.log
|
||||
echo ":::ABK 1.0.0 densenet121 train failed" >> ./hw_densenet121.log
|
||||
fi
|
||||
|
||||
endTime=`date +%Y%m%d-%H:%M:%S`
|
||||
endTime_s=`date +%s`
|
||||
sumTime=$[ $endTime_s - $startTime_s ]
|
||||
hour=$(( $sumTime/3600 ))
|
||||
min=$(( ($sumTime-${hour}*3600)/60 ))
|
||||
sec=$(( $sumTime-${hour}*3600-${min}*60 ))
|
||||
echo ":::ABK 1.0.0 densenet121 train total time: ${hour}:${min}:${sec}" >> ${train_job_dir}/${device_id}/hw_densenet121.log
|
||||
@@ -0,0 +1,46 @@
|
||||
# DenseNet121_tensorflow训练说明
|
||||
|
||||
### 1. 模型训练参数配置
|
||||
|
||||
在train/yaml/DenseNet121.yaml中修改相应配置, 配置项含义:
|
||||
|
||||
```
|
||||
tensorflow_config:
|
||||
# 基本参数
|
||||
data_url: 数据集路径
|
||||
epoches: 跑多少个epoch
|
||||
epochs_between_evals: 1
|
||||
batch_size: 32
|
||||
log_dir: ./ckpt
|
||||
|
||||
# 1p参数
|
||||
mode_1p: train # train、evaluate、train_and_evaluate三种模式
|
||||
max_train_steps_1p: 100
|
||||
iterations_per_loop_1p: 10
|
||||
display_every: 10
|
||||
log_name_1p: densenet121_1p.log
|
||||
|
||||
# 8p参数
|
||||
mode_8p: train_and_evaluate # train、evaluate、train_and_evaluate三种模式
|
||||
iterations_per_loop_8p: 5004
|
||||
lr: 0.1
|
||||
log_name_8p: densenet121_8p.log
|
||||
|
||||
mpirun_ip: 仅多机执行需要配置: ip1:卡数量1,ip2:卡数量2
|
||||
docker_image:docker 镜像名称:版本号
|
||||
|
||||
# 指定 device id, 多个 id 使用空格分隔, 数量需与 rank_size 相同
|
||||
device_group_1p: 0
|
||||
device_group_2p: 0 1
|
||||
device_group_4p: 0 1 2 3
|
||||
```
|
||||
|
||||
------
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
+22
@@ -0,0 +1,22 @@
|
||||
import tensorflow as tf
|
||||
import os,sys
|
||||
|
||||
|
||||
class CreateSession():
|
||||
def __init__(self):
|
||||
self.estimator_config = tf.ConfigProto(
|
||||
inter_op_parallelism_threads=10,
|
||||
intra_op_parallelism_threads=10,
|
||||
allow_soft_placement=True)
|
||||
|
||||
self.estimator_config.gpu_options.allow_growth = True
|
||||
|
||||
self.set_env()
|
||||
|
||||
def set_env(self):
|
||||
gpu_thread_count = 2
|
||||
os.environ['TF_GPU_THREAD_MODE'] = 'gpu_private'
|
||||
os.environ['TF_GPU_THREAD_COUNT'] = str(gpu_thread_count)
|
||||
os.environ['TF_USE_CUDNN_BATCHNORM_SPATIAL_PERSISTENT'] = '1'
|
||||
os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1'
|
||||
|
||||
+133
@@ -0,0 +1,133 @@
|
||||
import numpy as np
|
||||
import preprocessing
|
||||
import tensorflow as tf
|
||||
from tensorflow.python.util import nest
|
||||
import os,sys
|
||||
import numpy as np
|
||||
|
||||
|
||||
class DataLoader:
|
||||
|
||||
def __init__(self, args):
|
||||
self.args = args
|
||||
|
||||
filename_pattern = os.path.join(args.data_dir, '%s-*')
|
||||
filenames_train = sorted(tf.gfile.Glob(filename_pattern % 'train'))
|
||||
self.num_training_samples = get_num_records(filenames_train)
|
||||
self.args.num_training_samples = self.num_training_samples
|
||||
|
||||
filename_pattern = os.path.join(args.data_dir, '%s-*')
|
||||
filenames_val = sorted(tf.gfile.Glob(filename_pattern % 'validation'))
|
||||
self.num_evaluating_samples = get_num_records(filenames_val)
|
||||
self.args.num_evaluating_samples = self.num_evaluating_samples
|
||||
|
||||
print( 'total num_training_sampels: %d' % self.num_training_samples )
|
||||
print( 'total num_evaluating_sampels: %d' % self.num_evaluating_samples )
|
||||
|
||||
self.training_samples_per_rank = self.num_training_samples
|
||||
|
||||
def get_train_input_fn(self):
|
||||
take_count = self.training_samples_per_rank
|
||||
|
||||
return make_dataset(self.args, take_count, self.args.batch_size, training=True)
|
||||
|
||||
def get_eval_input_fn(self):
|
||||
take_count = self.num_evaluating_samples
|
||||
|
||||
return make_dataset(self.args, take_count, self.args.batch_size, training=False)
|
||||
|
||||
|
||||
def get_num_records(filenames):
|
||||
def count_records(tf_record_filename):
|
||||
count = 0
|
||||
for _ in tf.python_io.tf_record_iterator(tf_record_filename):
|
||||
count += 1
|
||||
return count
|
||||
|
||||
nfile = len(filenames)
|
||||
return (count_records(filenames[0]) * (nfile - 1) +
|
||||
count_records(filenames[-1]))
|
||||
|
||||
|
||||
def _parse_example_proto(example_serialized):
|
||||
feature_map = {
|
||||
'image/encoded': tf.FixedLenFeature([], dtype=tf.string,
|
||||
default_value=''),
|
||||
'image/class/label': tf.FixedLenFeature([], dtype=tf.int64, default_value=-1),
|
||||
'image/class/text': tf.FixedLenFeature([], dtype=tf.string,
|
||||
default_value=''),
|
||||
}
|
||||
sparse_float32 = tf.VarLenFeature(dtype=tf.float32)
|
||||
# Sparse features in Example proto.
|
||||
feature_map.update(
|
||||
{k: sparse_float32 for k in ['image/object/bbox/xmin',
|
||||
'image/object/bbox/ymin',
|
||||
'image/object/bbox/xmax',
|
||||
'image/object/bbox/ymax']})
|
||||
|
||||
features = tf.parse_single_example(example_serialized, feature_map)
|
||||
label = tf.cast(features['image/class/label'], dtype=tf.int32)
|
||||
|
||||
xmin = tf.expand_dims(features['image/object/bbox/xmin'].values, 0)
|
||||
ymin = tf.expand_dims(features['image/object/bbox/ymin'].values, 0)
|
||||
xmax = tf.expand_dims(features['image/object/bbox/xmax'].values, 0)
|
||||
ymax = tf.expand_dims(features['image/object/bbox/ymax'].values, 0)
|
||||
|
||||
# Note that we impose an ordering of (y, x) just to make life difficult.
|
||||
bbox = tf.concat([ymin, xmin, ymax, xmax], 0)
|
||||
|
||||
# Force the variable number of bounding boxes into the shape
|
||||
# [1, num_boxes, coords].
|
||||
bbox = tf.expand_dims(bbox, 0)
|
||||
bbox = tf.transpose(bbox, [0, 2, 1])
|
||||
|
||||
return features['image/encoded'], label, bbox
|
||||
|
||||
|
||||
# since the preprocessing is done here, we add args file
|
||||
def parse_record(raw_record, is_training):
|
||||
image_buffer, label, bbox = _parse_example_proto(raw_record)
|
||||
|
||||
image = preprocessing.parse_and_preprocess_image_record(image_buffer, bbox, training=is_training)
|
||||
|
||||
# label-1 for VGG16
|
||||
return image, label-1
|
||||
|
||||
|
||||
def make_dataset(args, take_count, batch_size,
|
||||
training=False, shard=False):
|
||||
|
||||
shuffle_buffer_size = 10000
|
||||
num_readers = 10
|
||||
|
||||
rank_size = int(os.getenv('RANK_SIZE'))
|
||||
rank_id = int(os.getenv('DEVICE_INDEX'))
|
||||
|
||||
if training:
|
||||
filename_pattern = os.path.join(args.data_dir, '%s-*')
|
||||
filenames = sorted(tf.gfile.Glob(filename_pattern % 'train'))
|
||||
else:
|
||||
filename_pattern = os.path.join(args.data_dir, '%s-*')
|
||||
filenames = sorted(tf.gfile.Glob(filename_pattern % 'validation'))
|
||||
|
||||
ds = tf.data.Dataset.from_tensor_slices(filenames)
|
||||
|
||||
if not training:
|
||||
ds = ds.take(take_count)
|
||||
|
||||
if training:
|
||||
ds = ds.shuffle(1000, seed=7*(1+rank_id))
|
||||
|
||||
ds = ds.interleave(tf.data.TFRecordDataset, cycle_length=num_readers, block_length=1)
|
||||
counter = tf.data.Dataset.range(sys.maxsize)
|
||||
ds = tf.data.Dataset.zip((ds, counter))
|
||||
|
||||
if training:
|
||||
ds = ds.apply(tf.data.experimental.shuffle_and_repeat(shuffle_buffer_size, seed=5*(1+rank_id)))
|
||||
|
||||
ds = ds.map(lambda image, counter: parse_record(image, training), num_parallel_calls=14)
|
||||
|
||||
ds = ds.batch(batch_size, drop_remainder=True)
|
||||
return ds
|
||||
|
||||
|
||||
+158
@@ -0,0 +1,158 @@
|
||||
import tensorflow as tf
|
||||
from tensorflow.contrib.layers import batch_norm, flatten
|
||||
from tensorflow.contrib.framework import arg_scope
|
||||
import numpy as np
|
||||
|
||||
class_num = 1000
|
||||
nb_blocks = 4
|
||||
nb_blocks_layers = (6, 12, 24, 16)
|
||||
bn_size = 4
|
||||
growth_rate = 32
|
||||
init_layers = 64
|
||||
|
||||
|
||||
'''
|
||||
denseNet:121,169,201,264
|
||||
return _densenet('densenet121', 32, (6, 12, 24, 16), 64, pretrained, progress,
|
||||
**kwargs)
|
||||
return _densenet('densenet161', 48, (6, 12, 36, 24), 96, pretrained, progress,
|
||||
**kwargs)
|
||||
return _densenet('densenet169', 32, (6, 12, 32, 32), 64, pretrained, progress,
|
||||
**kwargs)
|
||||
return _densenet('densenet201', 32, (6, 12, 48, 32), 64, pretrained, progress,
|
||||
**kwargs)
|
||||
'''
|
||||
|
||||
|
||||
|
||||
def conv_layer(input, filter, kernel, stride=1, layer_name="conv"):
|
||||
with tf.name_scope(layer_name):
|
||||
network = tf.layers.conv2d(inputs=input, filters=filter, kernel_size=kernel, strides=stride, padding='SAME', use_bias=False, kernel_initializer=tf.initializers.variance_scaling(scale=5.0, mode='fan_out')) # scale=5.0, mode='fan_out'
|
||||
return network
|
||||
|
||||
def Global_Average_Pooling(x, stride=1):
|
||||
|
||||
width = np.shape(x)[1]
|
||||
height = np.shape(x)[2]
|
||||
pool_size = [width, height]
|
||||
return tf.layers.average_pooling2d(inputs=x, pool_size=pool_size, strides=stride) # The stride value does not matter
|
||||
#It is global average pooling without tflearn
|
||||
|
||||
|
||||
#return global_avg_pool(x, name='Global_avg_pooling')
|
||||
# But maybe you need to install h5py and curses or not
|
||||
|
||||
|
||||
def Batch_Normalization(x, training, scope):
|
||||
with arg_scope([batch_norm],
|
||||
scope=scope,
|
||||
updates_collections=None,
|
||||
decay=0.9,
|
||||
center=True,
|
||||
scale=True,
|
||||
zero_debias_moving_mean=True) :
|
||||
training = tf.cast(training, tf.bool)
|
||||
return tf.cond(training,
|
||||
lambda : batch_norm(inputs=x, is_training=training, reuse=None),
|
||||
lambda : batch_norm(inputs=x, is_training=training, reuse=True))
|
||||
|
||||
def Drop_out(x, rate, training) :
|
||||
return tf.layers.dropout(inputs=x, rate=rate, training=training)
|
||||
|
||||
def Relu(x):
|
||||
return tf.nn.relu(x)
|
||||
|
||||
def Average_pooling(x, pool_size=[2,2], stride=2, padding='VALID'):
|
||||
return tf.layers.average_pooling2d(inputs=x, pool_size=pool_size, strides=stride, padding=padding)
|
||||
|
||||
|
||||
def Max_Pooling(x, pool_size=[3,3], stride=2, padding='VALID'):
|
||||
return tf.layers.max_pooling2d(inputs=x, pool_size=pool_size, strides=stride, padding=padding)
|
||||
|
||||
def Concatenation(layers):
|
||||
return tf.concat(layers, axis=3)
|
||||
|
||||
def Linear(x):
|
||||
return tf.layers.dense(inputs=x, units=class_num, name='linear')
|
||||
|
||||
|
||||
def bottleneck_layer(x, is_training, scope):
|
||||
# print(x)
|
||||
with tf.name_scope(scope):
|
||||
x = Batch_Normalization(x, training=is_training, scope=scope+'_batch1')
|
||||
x = Relu(x)
|
||||
x = conv_layer(x, filter= growth_rate*bn_size, kernel=[1,1], layer_name=scope+'_conv1')
|
||||
#x = Drop_out(x, rate=dropout_rate, training=is_training)
|
||||
#x = Drop_out(x, rate=dropout_rate, training=is_training)
|
||||
|
||||
x = Batch_Normalization(x, training=is_training, scope=scope+'_batch2')
|
||||
x = Relu(x)
|
||||
x = conv_layer(x, filter= growth_rate, kernel=[3,3], layer_name=scope+'_conv2')
|
||||
#x = Drop_out(x, rate=dropout_rate, training=self.training)
|
||||
|
||||
# print(x)
|
||||
|
||||
return x
|
||||
|
||||
def transition_layer(x, is_training, scope):
|
||||
with tf.name_scope(scope):
|
||||
x = Batch_Normalization(x, training=is_training, scope=scope+'_batch1')
|
||||
x = Relu(x)
|
||||
# x = conv_layer(x, filter=self.filters, kernel=[1,1], layer_name=scope+'_conv1')
|
||||
|
||||
# https://github.com/taki0112/Densenet-Tensorflow/issues/10
|
||||
|
||||
in_channel = int(x.shape[-1])
|
||||
x = conv_layer(x, filter=in_channel*0.5, kernel=[1,1], layer_name=scope+'_conv1')
|
||||
#x = Drop_out(x, rate=dropout_rate, training=self.training)
|
||||
x = Average_pooling(x, pool_size=[2,2], stride=2)
|
||||
|
||||
return x
|
||||
|
||||
def dense_block(input_x, nb_layers, is_training, layer_name):
|
||||
with tf.name_scope(layer_name):
|
||||
layers_concat = list()
|
||||
layers_concat.append(input_x)
|
||||
|
||||
x = bottleneck_layer(input_x, is_training, scope=layer_name + '_bottleN_' + str(0))
|
||||
|
||||
layers_concat.append(x)
|
||||
|
||||
for i in range(nb_layers - 1):
|
||||
x = Concatenation(layers_concat)
|
||||
x = bottleneck_layer(x, is_training, scope=layer_name + '_bottleN_' + str(i + 1))
|
||||
layers_concat.append(x)
|
||||
|
||||
x = Concatenation(layers_concat)
|
||||
|
||||
return x
|
||||
|
||||
def Dense_net(input_x, is_training):
|
||||
x = conv_layer(input_x, filter=init_layers , kernel=[7,7], stride=2, layer_name='conv0')
|
||||
x = Max_Pooling(x, pool_size=[3,3], stride=2)
|
||||
|
||||
for i in range(nb_blocks-1) :
|
||||
# 6 -> 12 -> 48
|
||||
x = dense_block(input_x=x, nb_layers=nb_blocks_layers[i], is_training=is_training, layer_name='dense_'+str(i))
|
||||
x = transition_layer(x, is_training, scope='trans_'+str(i))
|
||||
|
||||
"""
|
||||
x = self.dense_block(input_x=x, nb_layers=6, layer_name='dense_1')
|
||||
x = self.transition_layer(x, scope='trans_1')
|
||||
x = self.dense_block(input_x=x, nb_layers=12, layer_name='dense_2')
|
||||
x = self.transition_layer(x, scope='trans_2')
|
||||
x = self.dense_block(input_x=x, nb_layers=48, layer_name='dense_3')
|
||||
x = self.transition_layer(x, scope='trans_3')
|
||||
"""
|
||||
|
||||
x = dense_block(input_x=x, nb_layers=nb_blocks_layers[nb_blocks-1], is_training=is_training, layer_name='dense_final')
|
||||
|
||||
# 100 Layer
|
||||
x = Batch_Normalization(x, training=is_training, scope='linear_batch')
|
||||
x = Relu(x)
|
||||
x = Global_Average_Pooling(x)
|
||||
x = flatten(x)
|
||||
x = Linear(x)
|
||||
|
||||
# x = tf.reshape(x, [-1, 10])
|
||||
return x
|
||||
+44
@@ -0,0 +1,44 @@
|
||||
import tensorflow as tf
|
||||
import math
|
||||
import numpy as np
|
||||
|
||||
def warmup_cosine_annealing_lr(lr, steps_per_epoch, warmup_epochs, max_epoch, T_max, eta_min=0):
|
||||
base_lr = lr
|
||||
warmup_init_lr = 0
|
||||
total_steps = int(max_epoch * steps_per_epoch)
|
||||
warmup_steps = int(warmup_epochs * steps_per_epoch)
|
||||
|
||||
lr_each_step = []
|
||||
for i in range(total_steps):
|
||||
last_epoch = i // steps_per_epoch
|
||||
if i < warmup_steps:
|
||||
lr = linear_warmup_lr(i + 1, warmup_steps, base_lr, warmup_init_lr)
|
||||
else:
|
||||
lr = eta_min + (base_lr - eta_min) * (1. + math.cos(math.pi*last_epoch / T_max)) / 2
|
||||
lr_each_step.append(lr)
|
||||
|
||||
return np.array(lr_each_step).astype(np.float32)
|
||||
|
||||
|
||||
class HyperParams:
|
||||
def __init__(self, args):
|
||||
self.args=args
|
||||
nsteps_per_epoch = self.args.num_training_samples // self.args.global_batch_size
|
||||
self.args.nsteps_per_epoch = nsteps_per_epoch
|
||||
if self.args.max_epochs:
|
||||
nstep = nsteps_per_epoch * self.args.max_epochs
|
||||
else:
|
||||
nstep = self.args.max_train_steps
|
||||
self.args.nstep = nstep
|
||||
|
||||
self.cos_lr = warmup_cosine_annealing_lr(self.args.lr, nsteps_per_epoch, 0, self.args.T_max, self.args.T_max, 0.0)
|
||||
|
||||
def get_learning_rate(self):
|
||||
global_step = tf.train.get_global_step()
|
||||
|
||||
learning_rate = tf.gather(tf.convert_to_tensor(self.cos_lr), global_step)
|
||||
|
||||
learning_rate = tf.identity(learning_rate, 'learning_rate')
|
||||
|
||||
return learning_rate
|
||||
|
||||
+25
@@ -0,0 +1,25 @@
|
||||
import tensorflow as tf
|
||||
#from tensorflow.contrib.hccl.python.ops import hccl_ops
|
||||
#from npu_bridge.hccl import hccl_ops
|
||||
from benchmark_log import hwlog
|
||||
|
||||
class Layers:
|
||||
def get_accuracy(self, labels, predicted_classes, logits, args):
|
||||
accuracy = tf.metrics.accuracy(
|
||||
labels=labels, predictions=predicted_classes)
|
||||
top5acc = tf.metrics.mean(
|
||||
tf.cast(tf.nn.in_top_k(logits, labels, 5), tf.float32))
|
||||
if args.rank_size == 1:
|
||||
newaccuracy = (accuracy[0], accuracy[1])
|
||||
newtop5acc = (top5acc[0], top5acc[1])
|
||||
else:
|
||||
from npu_bridge.hccl import hccl_ops
|
||||
newaccuracy = (hccl_ops.allreduce(accuracy[0],"sum")/args.rank_size, accuracy[1])
|
||||
newtop5acc = (hccl_ops.allreduce(top5acc[0],"sum")/args.rank_size, top5acc[1])
|
||||
metrics = {'val-top1acc': newaccuracy, 'val-top5acc': newtop5acc}
|
||||
|
||||
return metrics
|
||||
|
||||
|
||||
|
||||
|
||||
+92
@@ -0,0 +1,92 @@
|
||||
from __future__ import print_function
|
||||
import tensorflow as tf
|
||||
from benchmark_log import hwlog
|
||||
import logging
|
||||
import numpy as np
|
||||
import time
|
||||
import sys,os
|
||||
|
||||
class LogSessionRunHook(tf.train.SessionRunHook):
|
||||
def __init__(self, args, warmup_steps=5):
|
||||
self.global_batch_size = args.global_batch_size
|
||||
if args.iterations_per_loop is not None:
|
||||
self.iterations_per_loop = args.iterations_per_loop
|
||||
else:
|
||||
self.iterations_per_loop = args.nsteps_per_epoch
|
||||
self.warmup_steps = warmup_steps
|
||||
self.iter_times = []
|
||||
self.num_records = args.num_training_samples
|
||||
self.display_every = args.display_every
|
||||
self.logger = get_logger(args.log_name, args.log_dir)
|
||||
rank0log(self.logger, 'PY' + str(sys.version) + 'TF' + str(tf.__version__))
|
||||
|
||||
|
||||
|
||||
def after_create_session(self, session, coord):
|
||||
rank0log(self.logger, 'Step Epoch Speed Loss FinLoss LR')
|
||||
self.elapsed_secs = 0.
|
||||
self.count = 0
|
||||
|
||||
def before_run(self, run_context):
|
||||
self.t0 = time.time()
|
||||
return tf.train.SessionRunArgs(
|
||||
fetches=[tf.train.get_global_step(), 'loss:0', 'total_loss:0', 'learning_rate:0'])
|
||||
|
||||
def after_run(self, run_context, run_values):
|
||||
batch_time = time.time() - self.t0
|
||||
self.iter_times.append(batch_time)
|
||||
self.elapsed_secs += batch_time
|
||||
self.count += 1
|
||||
global_step, loss, total_loss, lr = run_values.results
|
||||
if global_step == 1 or global_step % self.display_every == 0:
|
||||
dt = self.elapsed_secs / self.count
|
||||
img_per_sec = self.global_batch_size * self.iterations_per_loop / dt
|
||||
epoch = global_step * self.global_batch_size / self.num_records
|
||||
self.logger.info('step:%6i epoch:%5.1f FPS:%7.1f loss:%6.3f total_loss:%6.3f lr:%7.5f' %
|
||||
(global_step, epoch, img_per_sec, loss, total_loss, lr))
|
||||
self.elapsed_secs = 0.
|
||||
self.count = 0
|
||||
|
||||
# add by wx983399
|
||||
hwlog.remark_print(key=hwlog.GLOBAL_STEP, value=int(global_step))
|
||||
hwlog.remark_print(key=hwlog.CURRENT_EPOCH, value=epoch)
|
||||
hwlog.remark_print(key=hwlog.FPS, value=img_per_sec)
|
||||
|
||||
def get_average_speed(self):
|
||||
avg_time = np.mean(self.iter_times[self.warmup_steps:])
|
||||
speed = self.global_batch_size / avg_time
|
||||
return speed
|
||||
|
||||
|
||||
|
||||
def rank0log(logger, *args, **kwargs):
|
||||
if logger:
|
||||
logger.info(''.join([str(x) for x in list(args)]))
|
||||
else:
|
||||
print(*args, **kwargs)
|
||||
|
||||
|
||||
def get_logger(log_name, log_dir):
|
||||
logger = logging.getLogger(log_name)
|
||||
logger.setLevel(logging.INFO) # INFO, ERROR
|
||||
# file handler which logs debug messages
|
||||
if not os.path.isdir(log_dir):
|
||||
try:
|
||||
os.makedirs(log_dir)
|
||||
except FileExistsError:
|
||||
# if log_dir is common for multiple ranks like on nfs
|
||||
pass
|
||||
# console handler
|
||||
ch = logging.StreamHandler()
|
||||
ch.setLevel(logging.INFO)
|
||||
# add formatter to the handlers
|
||||
formatter = logging.Formatter('%(message)s')
|
||||
ch.setFormatter(formatter)
|
||||
logger.addHandler(ch)
|
||||
fh = logging.FileHandler(os.path.join(log_dir, log_name))
|
||||
fh.setLevel(logging.DEBUG)
|
||||
fh.setFormatter(formatter)
|
||||
# add handlers to logger
|
||||
logger.addHandler(fh)
|
||||
return logger
|
||||
|
||||
+72
@@ -0,0 +1,72 @@
|
||||
import tensorflow as tf
|
||||
from densenet import Dense_net
|
||||
|
||||
|
||||
class Model(object):
|
||||
def __init__(self, args, data, hyper_param, layers, logger):
|
||||
self.args = args
|
||||
self.data = data
|
||||
self.hyper_param = hyper_param
|
||||
self.layers = layers
|
||||
self.logger = logger
|
||||
|
||||
def get_estimator_model_func(self, features, labels, mode, params=None):
|
||||
labels = tf.reshape(labels, (-1,)) # Squash unnecessary unary dim #----------------not use when use onehot label
|
||||
|
||||
inputs = features # TODO: Should be using feature columns?
|
||||
is_training = (mode == tf.estimator.ModeKeys.TRAIN)
|
||||
|
||||
inputs = tf.cast(inputs, self.args.dtype)
|
||||
|
||||
top_layer = Dense_net(inputs, is_training)
|
||||
|
||||
logits = top_layer
|
||||
predicted_classes = tf.argmax(logits, axis=1, output_type=tf.int32)
|
||||
logits = tf.cast(logits, tf.float32)
|
||||
|
||||
labels_one_hot = tf.one_hot(labels, depth=1000)
|
||||
loss = tf.losses.softmax_cross_entropy(
|
||||
logits=logits, onehot_labels=labels_one_hot, label_smoothing=self.args.label_smoothing)
|
||||
|
||||
|
||||
base_loss = tf.identity(loss, name='loss') # For access by logger (TODO: Better way to access it?)
|
||||
|
||||
l2_loss = tf.add_n([tf.nn.l2_loss(tf.cast(v, tf.float32)) for v in tf.trainable_variables()])
|
||||
l2_loss = tf.multiply(l2_loss, self.args.weight_decay)
|
||||
total_loss = base_loss + l2_loss
|
||||
|
||||
total_loss = tf.identity(total_loss, name = 'total_loss')
|
||||
|
||||
if mode == tf.estimator.ModeKeys.EVAL:
|
||||
with tf.device(None):
|
||||
metrics = self.layers.get_accuracy( labels, predicted_classes, logits, self.args)
|
||||
|
||||
return tf.estimator.EstimatorSpec(
|
||||
mode, loss=loss, eval_metric_ops=metrics)
|
||||
|
||||
assert (mode == tf.estimator.ModeKeys.TRAIN)
|
||||
|
||||
batch_size = tf.shape(inputs)[0]
|
||||
|
||||
global_step = tf.train.get_global_step()
|
||||
learning_rate = self.hyper_param.get_learning_rate()
|
||||
|
||||
momentum = self.args.momentum
|
||||
|
||||
opt = tf.train.MomentumOptimizer(
|
||||
learning_rate, momentum, use_nesterov=self.args.use_nesterov)
|
||||
|
||||
from npu_bridge.estimator.npu.npu_optimizer import NPUDistributedOptimizer
|
||||
opt = NPUDistributedOptimizer(opt)
|
||||
|
||||
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) or []
|
||||
|
||||
with tf.control_dependencies(update_ops):
|
||||
gate_gradients = tf.train.Optimizer.GATE_NONE
|
||||
grads_and_vars = opt.compute_gradients(total_loss, gate_gradients=gate_gradients)
|
||||
train_op = opt.apply_gradients(grads_and_vars, global_step=global_step)
|
||||
|
||||
train_op = tf.group(train_op)
|
||||
|
||||
return tf.estimator.EstimatorSpec(mode, loss=total_loss, train_op=train_op)
|
||||
|
||||
+72
@@ -0,0 +1,72 @@
|
||||
import tensorflow as tf
|
||||
from tensorflow.contrib.image.python.ops import distort_image_ops
|
||||
import math
|
||||
import random
|
||||
|
||||
def decode_jpeg(imgdata, channels=3):
|
||||
return tf.image.decode_jpeg(imgdata, channels=channels,
|
||||
fancy_upscaling=False,
|
||||
dct_method='INTEGER_FAST')
|
||||
|
||||
|
||||
def random_horizontal_flip(image, prob):
|
||||
if prob > random.random():
|
||||
image = tf.image.flip_left_right(image)
|
||||
return image
|
||||
|
||||
|
||||
def decode_crop_and_resize(record, bbox, size, scale, ratio):
|
||||
with tf.name_scope('decode_crop_and_resize'):
|
||||
height = 224
|
||||
width = 224
|
||||
crop_ratio = 0.8
|
||||
initial_shape = [int(round(height / crop_ratio)),
|
||||
int(round(width / crop_ratio)), 3]
|
||||
jpeg_shape = tf.image.extract_jpeg_shape( record )
|
||||
|
||||
bbox_begin, bbox_size, bbox = \
|
||||
tf.image.sample_distorted_bounding_box(
|
||||
tf.image.extract_jpeg_shape(record),
|
||||
bounding_boxes=bbox,
|
||||
min_object_covered=0.1,
|
||||
aspect_ratio_range=ratio,
|
||||
area_range=scale,
|
||||
max_attempts=10,
|
||||
use_image_if_no_bounding_boxes=True)
|
||||
|
||||
# Reassemble the bounding box in the format the crop op requires.
|
||||
offset_y, offset_x, _ = tf.unstack(bbox_begin)
|
||||
target_height, target_width, _ = tf.unstack(bbox_size)
|
||||
crop_window = tf.stack([offset_y, offset_x, target_height, target_width])
|
||||
|
||||
image = tf.image.decode_and_crop_jpeg( record, crop_window, channels=3 )
|
||||
image = tf.image.resize_images( image, [height, width] )
|
||||
|
||||
return image
|
||||
|
||||
|
||||
def parse_and_preprocess_image_record(record, bbox, training):
|
||||
with tf.name_scope('preprocess'):
|
||||
if training:
|
||||
image = decode_crop_and_resize(record, bbox, 224, (0.08, 1.0), (0.75, 1.333))
|
||||
image = random_horizontal_flip(image, 0.5)
|
||||
image = normalize(image)
|
||||
else:
|
||||
image = decode_jpeg(record, channels=3)
|
||||
image = tf.image.resize_images(image, [256, 256])
|
||||
image = tf.image.central_crop(image, 224.0/256)
|
||||
image = normalize(image)
|
||||
|
||||
return image
|
||||
|
||||
|
||||
def normalize(inputs):
|
||||
imagenet_mean = [0.485 * 255, 0.456 * 255, 0.406 * 255]
|
||||
imagenet_std = [0.229 * 255, 0.224 * 255, 0.225 * 255]
|
||||
imagenet_mean = tf.expand_dims(tf.expand_dims(imagenet_mean, 0), 0)
|
||||
imagenet_std = tf.expand_dims(tf.expand_dims(imagenet_std, 0), 0)
|
||||
inputs = inputs - imagenet_mean
|
||||
inputs = inputs * (1.0 / imagenet_std)
|
||||
|
||||
return inputs
|
||||
|
||||
+140
@@ -0,0 +1,140 @@
|
||||
import tensorflow as tf
|
||||
import numpy as np
|
||||
import os
|
||||
import sys
|
||||
import ast
|
||||
sys.path.append(os.path.realpath(os.path.join(os.path.dirname(__file__), '../')))
|
||||
sys.path.append(os.path.realpath(os.path.join(os.path.dirname(__file__), '../config')))
|
||||
sys.path.append(os.path.realpath(os.path.join(os.path.dirname(__file__), '../../../../utils')))
|
||||
sys.path.append(os.path.realpath(os.path.join(os.path.dirname(__file__), '../../../../utils/atlasboost')))
|
||||
|
||||
import data_loader as dl
|
||||
|
||||
import model as ml
|
||||
import hyper_param as hp
|
||||
import layers as ly
|
||||
import logger as lg
|
||||
import trainer as tr
|
||||
import create_session as cs
|
||||
import argparse
|
||||
|
||||
from benchmark_log import hwlog
|
||||
from benchmark_log.basic_utils import get_environment_info
|
||||
from benchmark_log.basic_utils import get_model_parameter
|
||||
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
||||
|
||||
|
||||
parser.add_argument('--rank_size', default=1,type=int,
|
||||
help="""number of NPUs to use.""")
|
||||
|
||||
# mode and parameters related
|
||||
parser.add_argument('--mode', default='train_and_evaluate',
|
||||
help="""mode to run the program e.g. train, evaluate, and
|
||||
train_and_evaluate""")
|
||||
parser.add_argument('--max_train_steps', default=100,type=int,
|
||||
help="""train steps for one NPU""")
|
||||
parser.add_argument('--iterations_per_loop', default=10, type=int,
|
||||
help="""the number of steps in devices for each iteration""")
|
||||
parser.add_argument('--max_epochs', default=None, type=int,
|
||||
help="""total epochs for training""")
|
||||
parser.add_argument('--epochs_between_evals', default=5, type=int,
|
||||
help="""the interval between train and evaluation , only meaningful
|
||||
when the mode is train_and_evaluate""")
|
||||
|
||||
# dataset
|
||||
parser.add_argument('--data_dir', default='path/data',
|
||||
help="""directory to data.""")
|
||||
|
||||
# path for evaluation
|
||||
parser.add_argument('--eval_dir', default='path/eval',
|
||||
help="""directory to evaluate.""")
|
||||
|
||||
parser.add_argument('--dtype', default=tf.float32,
|
||||
help="""data type of inputs.""")
|
||||
parser.add_argument('--use_nesterov', default=True, type=ast.literal_eval,
|
||||
help=""" used in optimizer""")
|
||||
parser.add_argument('--label_smoothing', default=0.1, type=float,
|
||||
help="""label smoothing factor""")
|
||||
parser.add_argument('--weight_decay', default=0.0001,
|
||||
help="""weight decay""")
|
||||
parser.add_argument('--batch_size', default=32, type=int,
|
||||
help="""batch size for one NPU""")
|
||||
|
||||
# learning rate and momentum
|
||||
parser.add_argument('--lr', default=0.1, type=float,
|
||||
help="""learning rate""")
|
||||
parser.add_argument('--T_max', default=150, type=int,
|
||||
help="""T_max for cosing_annealing learning rate""")
|
||||
parser.add_argument('--momentum', default=0.9, type=float,
|
||||
help="""momentum used in optimizer.""")
|
||||
|
||||
# display frequency
|
||||
parser.add_argument('--display_every', default=1, type=int,
|
||||
help="""the frequency to display info""")
|
||||
|
||||
# log file
|
||||
parser.add_argument('--log_name', default='densenet121_training.log',
|
||||
help="""name of log file""")
|
||||
parser.add_argument('--log_dir', default='./model_1p',
|
||||
help="""log directory""")
|
||||
|
||||
args, unknown_args = parser.parse_known_args()
|
||||
# ['--config_file', 'densenet_config_1p_npu']
|
||||
|
||||
print(args, unknown_args)
|
||||
if len(unknown_args) > 0:
|
||||
for bad_arg in unknown_args:
|
||||
print("ERROR: Unknown command line arg: %s" % bad_arg)
|
||||
raise ValueError("Invalid command line arg(s)")
|
||||
|
||||
return args
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
args = parse_args()
|
||||
args.global_batch_size = args.batch_size * args.rank_size
|
||||
|
||||
session = cs.CreateSession()
|
||||
data = dl.DataLoader(args)
|
||||
hyper_param = hp.HyperParams(args)
|
||||
layers = ly.Layers()
|
||||
logger = lg.LogSessionRunHook(args)
|
||||
model = ml.Model(args, data, hyper_param, layers, logger)
|
||||
|
||||
trainer = tr.Trainer(session, args, data, model, logger)
|
||||
|
||||
if args.mode == 'train':
|
||||
trainer.train()
|
||||
elif args.mode == 'evaluate':
|
||||
trainer.evaluate()
|
||||
elif args.mode == 'train_and_evaluate':
|
||||
trainer.train_and_evaluate()
|
||||
else:
|
||||
raise ValueError("Invalid mode.")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
hwlog.ROOT_DIR = os.path.split(os.path.abspath(__file__))[0]
|
||||
cpu_info, npu_info, framework_info, os_info, benchmark_version = get_environment_info("tensorflow")
|
||||
config_info = get_model_parameter("tensorflow_config")
|
||||
initinal_data = {"base_lr": 0.128, "dataset": "imagenet1024", "optimizer": "SGD", "loss_scale": 512,
|
||||
"batchsize": 32}
|
||||
hwlog.remark_print(key=hwlog.CPU_INFO, value=cpu_info)
|
||||
hwlog.remark_print(key=hwlog.NPU_INFO, value=npu_info)
|
||||
hwlog.remark_print(key=hwlog.OS_INFO, value=os_info)
|
||||
hwlog.remark_print(key=hwlog.FRAMEWORK_INFO, value=framework_info)
|
||||
hwlog.remark_print(key=hwlog.BENCHMARK_VERSION, value=benchmark_version)
|
||||
hwlog.remark_print(key=hwlog.CONFIG_INFO, value=config_info)
|
||||
hwlog.remark_print(key=hwlog.BASE_LR, value=initinal_data.get("base_lr"))
|
||||
hwlog.remark_print(key=hwlog.DATASET, value=initinal_data.get("dataset"))
|
||||
hwlog.remark_print(key=hwlog.OPT_NAME, value=initinal_data.get("optimizer"))
|
||||
hwlog.remark_print(key=hwlog.LOSS_SCALE, value=initinal_data.get("loss_scale"))
|
||||
hwlog.remark_print(key=hwlog.INPUT_BATCH_SIZE, value=initinal_data.get("batchsize"))
|
||||
main()
|
||||
|
||||
+22
@@ -0,0 +1,22 @@
|
||||
import tensorflow as tf
|
||||
from tensorflow.python.ops import data_flow_ops
|
||||
import re
|
||||
import os
|
||||
from operator import itemgetter
|
||||
|
||||
|
||||
def sort_and_load_ckpts(log_dir):
|
||||
ckpts = []
|
||||
for f in os.listdir(log_dir):
|
||||
m = re.match(r'model.ckpt-([0-9]+).index', f)
|
||||
if m is None:
|
||||
continue
|
||||
fullpath = os.path.join(log_dir, f)
|
||||
ckpts.append({'step': int(m.group(1)),
|
||||
'path': os.path.splitext(fullpath)[0],
|
||||
'mtime': os.stat(fullpath).st_mtime,
|
||||
})
|
||||
ckpts.sort(key=itemgetter('step'))
|
||||
return ckpts
|
||||
|
||||
|
||||
+128
@@ -0,0 +1,128 @@
|
||||
import tensorflow as tf
|
||||
import math
|
||||
import time
|
||||
import os
|
||||
import train_helper
|
||||
from logger import rank0log
|
||||
from benchmark_log import hwlog
|
||||
|
||||
class Trainer(object):
|
||||
def __init__(self, session, args, data, model, logger):
|
||||
self.sess = session
|
||||
self.args = args
|
||||
self.data = data
|
||||
self.model = model
|
||||
self.logger = logger
|
||||
self.print_logger = self.logger.logger
|
||||
self.all_preds = []
|
||||
self.all_targets = []
|
||||
|
||||
self.classifier, self.training_hook = self.get_npu_classifier()
|
||||
|
||||
def get_npu_classifier(self):
|
||||
from npu_bridge.estimator.npu.npu_config import NPURunConfig
|
||||
from npu_bridge.estimator.npu.npu_estimator import NPUEstimator
|
||||
|
||||
run_config = NPURunConfig(
|
||||
hcom_parallel=True,
|
||||
precision_mode="allow_mix_precision",
|
||||
enable_data_pre_proc=True,
|
||||
save_checkpoints_steps=self.args.nsteps_per_epoch,
|
||||
session_config=self.sess.estimator_config,
|
||||
model_dir=self.args.log_dir,
|
||||
iterations_per_loop=self.args.iterations_per_loop,
|
||||
keep_checkpoint_max=5)
|
||||
|
||||
classifier =NPUEstimator(
|
||||
model_fn= self.model.get_estimator_model_func,
|
||||
config= run_config
|
||||
)
|
||||
|
||||
training_hooks = []
|
||||
training_hooks.append(self.logger)
|
||||
|
||||
return classifier, training_hooks
|
||||
|
||||
def train(self):
|
||||
print ('training steps: %d' % self.args.nstep)
|
||||
self.classifier.train( input_fn=lambda:self.data.get_train_input_fn(),
|
||||
max_steps = self.args.nstep,
|
||||
hooks = self.training_hook
|
||||
)
|
||||
|
||||
def evaluate(self):
|
||||
rank0log(self.print_logger, "Evaluating")
|
||||
rank0log(self.print_logger, "Validation dataset size: {}".format(self.args.num_evaluating_samples))
|
||||
time.sleep(5) # a little extra margin...
|
||||
try:
|
||||
ckpts = train_helper.sort_and_load_ckpts(self.args.eval_dir)
|
||||
print("=========ckpt==========")
|
||||
print(ckpts)
|
||||
print("=========ckpt==========")
|
||||
for i, c in enumerate(ckpts):
|
||||
eval_result = self.classifier.evaluate(
|
||||
input_fn=lambda: self.data.get_eval_input_fn(),
|
||||
checkpoint_path=c['path'])
|
||||
c['epoch'] = math.ceil(c['step'] / (self.args.num_training_samples/ (self.args.batch_size)))
|
||||
c['top1'] = eval_result['val-top1acc']
|
||||
c['top5'] = eval_result['val-top5acc']
|
||||
c['loss'] = eval_result['loss']
|
||||
|
||||
rank0log(self.print_logger, ' step epoch top1 top5 loss checkpoint_time(UTC)')
|
||||
for i, c in enumerate(ckpts):
|
||||
if 'top1' not in c:
|
||||
continue
|
||||
rank0log(self.print_logger,'{:5d} {:5.1f} {:5.3f} {:6.2f} {:6.2f} {time}'
|
||||
.format(c['step'],
|
||||
c['epoch'],
|
||||
c['top1'] * 100,
|
||||
c['top5'] * 100,
|
||||
c['loss'],
|
||||
time=time.strftime('%Y-%m-%d %H:%M:%S',
|
||||
time.localtime(c['mtime']))))
|
||||
rank0log(self.print_logger, "Finished evaluation")
|
||||
except KeyboardInterrupt:
|
||||
self.print_logger.error("Keyboard interrupt")
|
||||
|
||||
def train_and_evaluate(self):
|
||||
epochs_between_evals = self.args.epochs_between_evals
|
||||
|
||||
for i in range(self.args.max_epochs // epochs_between_evals):
|
||||
|
||||
rank0log(self.print_logger, "Starting a training cycle")
|
||||
|
||||
self.classifier.train(input_fn=lambda:self.data.get_train_input_fn(),
|
||||
steps = self.args.nsteps_per_epoch*epochs_between_evals,
|
||||
hooks = self.training_hook )
|
||||
|
||||
rank0log(self.print_logger, "Starting to evaluate")
|
||||
rank0log(self.print_logger, "Validation dataset size: {}".format(self.args.num_evaluating_samples))
|
||||
time.sleep(5) # a little extra margin...
|
||||
|
||||
ckpts = train_helper.sort_and_load_ckpts(self.args.log_dir)
|
||||
c = ckpts[-1]
|
||||
eval_result = self.classifier.evaluate(
|
||||
input_fn=lambda: self.data.get_eval_input_fn(),
|
||||
checkpoint_path=c['path'])
|
||||
|
||||
# top1 top5 Log dotting
|
||||
hwlog.remark_print(key=hwlog.EVAL_ACCURACY_TOP1, value=float(eval_result.get("val-top1acc")))
|
||||
hwlog.remark_print(key=hwlog.EVAL_ACCURACY_TOP1, value=float(eval_result.get("val-top5acc")))
|
||||
|
||||
|
||||
c['epoch'] = math.ceil(c['step'] / (self.args.num_training_samples / (self.args.batch_size * self.args.rank_size)))
|
||||
c['top1'] = eval_result['val-top1acc']
|
||||
c['top5'] = eval_result['val-top5acc']
|
||||
c['loss'] = eval_result['loss']
|
||||
|
||||
rank0log(self.print_logger, ' step epoch top1 top5 loss checkpoint_time(UTC)')
|
||||
|
||||
rank0log(self.print_logger,'{:5d} {:5.1f} {:5.3f} {:6.2f} {:6.2f} {time}'
|
||||
.format(c['step'],
|
||||
c['epoch'],
|
||||
c['top1'] * 100,
|
||||
c['top5'] * 100,
|
||||
c['loss'],
|
||||
time=time.strftime('%Y-%m-%d %H:%M:%S',
|
||||
time.localtime(c['mtime']))))
|
||||
|
||||
+23
@@ -0,0 +1,23 @@
|
||||
{
|
||||
"group_count": "1",
|
||||
"group_list": [
|
||||
{
|
||||
"group_name": "worker",
|
||||
"device_count": "1",
|
||||
"instance_count": "1",
|
||||
"instance_list": [
|
||||
{
|
||||
"devices": [
|
||||
{
|
||||
"device_id": "7",
|
||||
"device_ip": "192.168.193.103"
|
||||
}
|
||||
],
|
||||
"pod_name": "npu1p",
|
||||
"server_id": "127.0.0.1"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"status": "completed"
|
||||
}
|
||||
+51
@@ -0,0 +1,51 @@
|
||||
{
|
||||
"group_count": "1",
|
||||
"group_list": [
|
||||
{
|
||||
"group_name": "worker",
|
||||
"device_count": "8",
|
||||
"instance_count": "1",
|
||||
"instance_list": [
|
||||
{
|
||||
"devices": [
|
||||
{
|
||||
"device_id": "0",
|
||||
"device_ip": "192.168.190.102"
|
||||
},
|
||||
{
|
||||
"device_id": "1",
|
||||
"device_ip": "192.168.191.102"
|
||||
},
|
||||
{
|
||||
"device_id": "2",
|
||||
"device_ip": "192.168.192.102"
|
||||
},
|
||||
{
|
||||
"device_id": "3",
|
||||
"device_ip": "192.168.193.102"
|
||||
},
|
||||
{
|
||||
"device_id": "4",
|
||||
"device_ip": "192.168.190.103"
|
||||
},
|
||||
{
|
||||
"device_id": "5",
|
||||
"device_ip": "192.168.191.103"
|
||||
},
|
||||
{
|
||||
"device_id": "6",
|
||||
"device_ip": "192.168.192.103"
|
||||
},
|
||||
{
|
||||
"device_id": "7",
|
||||
"device_ip": "192.168.193.103"
|
||||
}
|
||||
],
|
||||
"pod_name": "npu8p",
|
||||
"server_id": "127.0.0.1"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"status": "completed"
|
||||
}
|
||||
+9
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"server_count": "1",
|
||||
"server_list": [{
|
||||
"device": [{devices}],
|
||||
"server_id": "127.0.0.1"
|
||||
}],
|
||||
"status": "completed",
|
||||
"version": "1.0"
|
||||
}
|
||||
+36
@@ -0,0 +1,36 @@
|
||||
#!/bin/bash
|
||||
|
||||
rm -rf /var/log/npu/slog/host-0/*
|
||||
#安装toolkit
|
||||
#export LD_LIBRARY_PATH=/usr/local/lib/:/usr/lib/:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64:/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:/usr/local/Ascend/add-ons/
|
||||
#export PYTHONPATH=$PYTHONPATH:/usr/local/Ascend/ascend-toolkit/latest/opp/op_impl/built-in/ai_core/tbe:/usr/local/Ascend/ascend-toolkit/latest//fwkacllib/python/site-packages/te:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/topi:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/hccl:/usr/local/Ascend/ascend-toolkit/latest/tfplugin/python/site-packages
|
||||
#export PATH=$PATH:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin
|
||||
#export ASCEND_OPP_PATH=/usr/local/Ascend/ascend-toolkit/latest/opp/
|
||||
|
||||
#安装nnae等
|
||||
#export LD_LIBRARY_PATH=/usr/local/:/usr/local/lib/:/usr/lib/:/usr/local/Ascend/nnae/latest/x86_64-linux_gcc7.3.0/fwkacllib/lib64/:/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:/usr/local/Ascend/add-ons/
|
||||
#export PYTHONPATH=/home/train/resnet50_tf/code:/usr/local/Ascend/nnae/latest/x86_64-linux_gcc7.3.0/opp/op_impl/built-in/ai_core/tbe/:/usr/local/Ascend/nnae/latest/x86_64-linux_gcc7.3.0/fwkacllib/python/site-packages/te/:/usr/local/Ascend/nnae/latest/x86_64-linux_gcc7.3.0/fwkacllib/python/site-packages/topi/:/usr/local/Ascend/nnae/latest/x86_64-linux_gcc7.3.0/fwkacllib/python/site-packages/hccl/:/usr/local/Ascend/tfplugin/latest/x86_64-linux_gcc7.3.0/tfplugin/python/site-packages/:/usr/local/Ascend/tfplugin/latest/x86_64-linux_gcc7.3.0/tfplugin/python/site-packages/npu_bridge:/code
|
||||
#export PATH=$PATH:/usr/local/Ascend/nnae/latest/x86_64-linux_gcc7.3.0/fwkacllib/ccec_compiler/bin/
|
||||
#export ASCEND_OPP_PATH=/usr/local/Ascend/nnae/latest/x86_64-linux_gcc7.3.0/opp/
|
||||
|
||||
|
||||
if [ -d /usr/local/Ascend/nnae/latest ];then
|
||||
|
||||
export LD_LIBRARY_PATH=/usr/local/:/usr/local/lib/:/usr/lib/:/usr/local/Ascend/nnae/latest/fwkacllib/lib64:/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:/usr/local/Ascend/add-ons/:/usr/local/Ascend/driver/tools/hccn_tool/:/usr/local/mpirun4.0/lib
|
||||
export PYTHONPATH=$PYTHONPATH:/usr/local/Ascend/tfplugin/latest/tfplugin/python/site-packages:/usr/local/Ascend/nnae/latest/opp/op_impl/built-in/ai_core/tbe:/usr/local/Ascend/nnae/latest/fwkacllib/python/site-packages/:/usr/local/Ascend/tfplugin/latest/tfplugin/python/site-packages
|
||||
export PATH=$PATH:/usr/local/Ascend/nnae/latest/fwkacllib/ccec_compiler/bin:/usr/local/mpirun4.0/bin
|
||||
export ASCEND_OPP_PATH=/usr/local/Ascend/nnae/latest/opp
|
||||
else
|
||||
export LD_LIBRARY_PATH=/usr/local/lib/:/usr/lib/:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64:/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:/usr/local/Ascend/add-ons/:/usr/local/mpirun4.0/lib
|
||||
export PYTHONPATH=$PYTHONPATH:/usr/local/Ascend/tfplugin/latest/tfplugin/python/site-packages:/usr/local/Ascend/ascend-toolkit/latest/opp/op_impl/built-in/ai_core/tbe:/usr/local/Ascend/ascend-toolkit/latest//fwkacllib/python/site-packages/:/usr/local/Ascend/ascend-toolkit/latest/tfplugin/python/site-packages:$projectDir
|
||||
export PATH=$PATH:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin:/usr/local/mpirun4.0/bin
|
||||
export ASCEND_OPP_PATH=/usr/local/Ascend/ascend-toolkit/latest/opp/
|
||||
|
||||
fi
|
||||
|
||||
export DDK_VERSION_FLAG=1.60.T17.B830
|
||||
export HCCL_CONNECT_TIMEOUT=600
|
||||
export JOB_ID=9999001
|
||||
|
||||
export SLOG_PRINT_TO_STDOUT=0
|
||||
|
||||
+70
@@ -0,0 +1,70 @@
|
||||
#!/bin/bash
|
||||
|
||||
rank_size=$1
|
||||
yamlPath=$2
|
||||
toolsPath=$3
|
||||
|
||||
currentDir=$(cd "$(dirname "$0")/.."; pwd)
|
||||
model_name=$(cd $currentDir/..;basename `pwd`)
|
||||
if [ -f /.dockerenv ];then
|
||||
CLUSTER=$4
|
||||
MPIRUN_ALL_IP="$5"
|
||||
export CLUSTER=${CLUSTER}
|
||||
fi
|
||||
# 从 yaml 获取配置
|
||||
eval $(${toolsPath}/get_params_for_yaml.sh ${yamlPath} "tensorflow_config")
|
||||
|
||||
if [ $? -eq 0 ] ;
|
||||
then
|
||||
echo "modify inner config file success"
|
||||
else
|
||||
echo "modify inner config file fail"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
#mkdir train job path
|
||||
currtime=`date +%Y%m%d%H%M%S`
|
||||
|
||||
mkdir -p ${currentDir%train*}/train/result/tf_densenet121/training_job_${currtime}/
|
||||
train_job_dir=${currentDir%train*}/train/result/tf_densenet121/training_job_${currtime}/
|
||||
echo "[`date +%Y%m%d-%H:%M:%S`] [INFO] ${train_job_dir} &"
|
||||
# device 列表, 若无指定 device 根据 rank_size 顺序选择
|
||||
eval device_group=\$device_group_${rank_size}p
|
||||
if [ x"${device_group}" == x"" ] || [ ${rank_size} -ge 8 ];then
|
||||
device_group="$(seq 0 "$(expr $rank_size - 1)")"
|
||||
fi
|
||||
|
||||
# get last device id in device_group, hw log in performance from the dir named last_device_id
|
||||
device_group_str=`echo ${device_group} | sed 's/ //g'`
|
||||
first_device_id=`echo ${device_group_str: 0:1}`
|
||||
|
||||
if [ x"${CLUSTER}" == x"True" ];then
|
||||
# ln hw log
|
||||
ln -snf ${currentDir%train*}/train/result/tf_densenet121/training_job_${currtime}/0/hw_densenet121.log ${currentDir%train*}/train/result/tf_densenet121/training_job_${currtime}/
|
||||
this_ip=$(hostname -I |awk '{print $1}')
|
||||
for ip in $MPIRUN_ALL_IP;do
|
||||
if [ x"$ip" != x"$this_ip" ];then
|
||||
scp $yamlPath root@$ip:$yamlPath
|
||||
scp ${jsonFilePath} root@$ip:${jsonFilePath}
|
||||
fi
|
||||
done
|
||||
export PATH=$PATH:/usr/local/mpirun4.0/bin
|
||||
mpirun -H ${mpirun_ip} \
|
||||
--bind-to none -map-by slot\
|
||||
--allow-run-as-root \
|
||||
--mca btl_tcp_if_exclude lo,docker0,endvnic,virbr0,vethf40501b,docker_gwbridge,br-f42ac38052b4\
|
||||
--prefix /usr/local/mpirun4.0/ \
|
||||
${currentDir}/scripts/train.sh 0 $rank_size $yamlPath $currtime ${toolsPath} ${CLUSTER}
|
||||
else
|
||||
# ln hw log
|
||||
ln -snf ${currentDir%train*}/train/result/tf_densenet121/training_job_${currtime}/${first_device_id}/hw_densenet121.log ${currentDir%train*}/train/result/tf_densenet121/training_job_${currtime}/
|
||||
rank_id=0
|
||||
for device_id in $device_group;do
|
||||
${currentDir}/scripts/train.sh $device_id $rank_size $yamlPath $currtime ${toolsPath} $rank_id &
|
||||
let rank_id++
|
||||
done
|
||||
fi
|
||||
wait
|
||||
|
||||
#echo "[`date +%Y%m%d-%H:%M:%S`] [INFO] all train exit " >> ${currentDir}/result/main.log
|
||||
|
||||
+97
@@ -0,0 +1,97 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
device_id=$1
|
||||
rank_size=$2
|
||||
yamlPath=$3
|
||||
|
||||
currentDir=$(cd "$(dirname "$0")/.."; pwd)
|
||||
model_name="densenet121"
|
||||
currtime=$4
|
||||
toolsPath=$5
|
||||
|
||||
export YAML_PATH=$3
|
||||
|
||||
|
||||
mkdir -p ${currentDir%train*}/train/result/tf_densenet121/training_job_${currtime}/
|
||||
export train_job_dir=${currentDir%train*}/train/result/tf_densenet121/training_job_${currtime}/
|
||||
|
||||
# 从 yaml 获取配置
|
||||
eval $(${toolsPath}/get_params_for_yaml.sh ${yamlPath} "tensorflow_config")
|
||||
|
||||
export REMARK_LOG_FILE=hw_densenet121.log # 打点日志文件名称, 必须hw_后跟模型名称小写
|
||||
benchmark_log_path=${currentDir%atlas_benchmark-master*}/atlas_benchmark-master/utils
|
||||
export PYTHONPATH=$PYTHONPATH:${benchmark_log_path}
|
||||
|
||||
source ${currentDir}/config/npu_set_env.sh
|
||||
|
||||
# user env
|
||||
export HCCL_CONNECT_TIMEOUT=600
|
||||
export JOB_ID=9999001
|
||||
export RANK_TABLE_FILE=${currentDir}/config/${rank_size}p.json
|
||||
export RANK_SIZE=${rank_size}
|
||||
export SLOG_PRINT_TO_STDOUT=0
|
||||
export DEVICE_ID=${device_id}
|
||||
DEVICE_INDEX=$(( DEVICE_ID + RANK_INDEX * 8 ))
|
||||
export DEVICE_INDEX=${DEVICE_INDEX}
|
||||
|
||||
cd ${train_job_dir}
|
||||
curd_dir=${currentDir%atlas_benchmark-master*}/atlas_benchmark-master/utils/atlasboost
|
||||
export PYTHONPATH=$PYTHONPATH:${curd_dir}
|
||||
|
||||
if [ x"$6" != x"True" ];then
|
||||
rank_id=$6
|
||||
export RANK_ID=$6
|
||||
else
|
||||
device_id_mo=$(python3.7 -c "import src.tensorflow.mpi_ops as atlasboost;atlasboost.init(); \
|
||||
device_id = atlasboost.local_rank();cluster_device_id = str(device_id); \
|
||||
atlasboost.set_device_id(device_id);print(atlasboost.rank())")
|
||||
device_id_mo=`echo $device_id_mo`
|
||||
rank_id=${device_id_mo##* }
|
||||
export RANK_ID=${rank_id}
|
||||
device=${device_id_mo##*deviceid = }
|
||||
device_id=${device%% phyid=*}
|
||||
export DEVICE_ID=${device_id}
|
||||
hccljson=${train_job_dir}/*.json
|
||||
cp ${hccljson} ${currentDir}/config/${rank_size}p.json
|
||||
fi
|
||||
|
||||
#mkdir exec path
|
||||
mkdir -p ${train_job_dir}/${device_id}
|
||||
cd ${train_job_dir}/${device_id}
|
||||
|
||||
startTime=`date +%Y%m%d-%H:%M:%S`
|
||||
startTime_s=`date +%s`
|
||||
|
||||
# 根据单卡/多卡区分调用参数
|
||||
if [ x"$6" == x"True" ];then
|
||||
# 多卡多机
|
||||
export CLUSTER=True
|
||||
python3.7 ${currentDir}/code/train.py --rank_size=${rank_size} --mode=${mode_8p} --max_epochs=${epoches} --iterations_per_loop=${iterations_per_loop_8p} --epochs_between_evals=${epochs_between_evals} --data_dir=${data_url} --lr=${lr} --log_dir=${log_dir} --log_name=${log_name_8p} > ${train_job_dir}/train_${device_id}.log 2>&1
|
||||
elif [ x"${rank_size}" == x"1" ];then
|
||||
# 单卡
|
||||
python3.7 ${currentDir}/code/train.py --rank_size=${rank_size} --mode=${mode_1p} --max_train_steps=${max_train_steps_1p} --iterations_per_loop=${iterations_per_loop_1p} --data_dir=${data_url} --display_every=${display_every} --log_dir=${log_dir} --log_name=${log_name_1p} > ${train_job_dir}/train_${device_id}.log 2>&1
|
||||
|
||||
elif [ ${rank_size} -le 8 ];then
|
||||
# 多卡单机
|
||||
|
||||
python3.7 ${currentDir}/code/train.py --rank_size=${rank_size} --mode=${mode_8p} --max_epochs=${epoches} --iterations_per_loop=${iterations_per_loop_8p} --epochs_between_evals=${epochs_between_evals} --data_dir=${data_url} --lr=${lr} --log_dir=${log_dir} --log_name=${log_name_8p} > ${train_job_dir}/train_${device_id}.log 2>&1
|
||||
fi
|
||||
|
||||
if [ $? -eq 0 ];then
|
||||
echo ":::ABK 1.0.0 densenet121 train success"
|
||||
echo ":::ABK 1.0.0 densenet121 train success" >> ${train_job_dir}/train_${device_id}.log
|
||||
echo ":::ABK 1.0.0 densenet121 train success" >> ./hw_densenet121.log
|
||||
else
|
||||
echo ":::ABK 1.0.0 densenet121 train failed"
|
||||
echo ":::ABK 1.0.0 densenet121 train failed" >> ${train_job_dir}/train_${device_id}.log
|
||||
echo ":::ABK 1.0.0 densenet121 train failed" >> ./hw_densenet121.log
|
||||
fi
|
||||
|
||||
endTime=`date +%Y%m%d-%H:%M:%S`
|
||||
endTime_s=`date +%s`
|
||||
sumTime=$[ $endTime_s - $startTime_s ]
|
||||
hour=$(( $sumTime/3600 ))
|
||||
min=$(( ($sumTime-${hour}*3600)/60 ))
|
||||
sec=$(( $sumTime-${hour}*3600-${min}*60 ))
|
||||
echo ":::ABK 1.0.0 densenet121 train total time: ${hour}:${min}:${sec}"
|
||||
echo ":::ABK 1.0.0 densenet121 train total time: ${hour}:${min}:${sec}" >> ${train_job_dir}/${device_id}/hw_densenet121.log
|
||||
@@ -0,0 +1,25 @@
|
||||
# EfficientNet_pytorch训练说明
|
||||
|
||||
### 1. 模型训练参数配置
|
||||
|
||||
在train/yaml/EfficientNet.yaml中修改相应配置, 配置项含义:
|
||||
|
||||
```
|
||||
pytorch_config:
|
||||
data_url: 数据集路径
|
||||
epoches: 跑多少个epoch
|
||||
batch_size: 1p 参数为256 2p 512 4p 1024 8p为2048
|
||||
seed: 49
|
||||
lr: 默认参数1p 0.2 2p 0.4 4p 0.8 8p 1.6
|
||||
docker_image: docker 镜像名称:版本号
|
||||
```
|
||||
|
||||
------
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,202 @@
|
||||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
+253
@@ -0,0 +1,253 @@
|
||||
# EfficientNet PyTorch
|
||||
|
||||
### Quickstart
|
||||
|
||||
Install with `pip install efficientnet_pytorch` and load a pretrained EfficientNet with:
|
||||
```python
|
||||
from efficientnet_pytorch import EfficientNet
|
||||
model = EfficientNet.from_pretrained('efficientnet-b0')
|
||||
```
|
||||
|
||||
### Updates
|
||||
|
||||
#### Update (May 14, 2020)
|
||||
|
||||
This update adds comprehensive comments and documentation (thanks to @workingcoder).
|
||||
|
||||
#### Update (January 23, 2020)
|
||||
|
||||
This update adds a new category of pre-trained model based on adversarial training, called _advprop_. It is important to note that the preprocessing required for the advprop pretrained models is slightly different from normal ImageNet preprocessing. As a result, by default, advprop models are not used. To load a model with advprop, use:
|
||||
```
|
||||
model = EfficientNet.from_pretrained("efficientnet-b0", advprop=True)
|
||||
```
|
||||
There is also a new, large `efficientnet-b8` pretrained model that is only available in advprop form. When using these models, replace ImageNet preprocessing code as follows:
|
||||
```
|
||||
if advprop: # for models using advprop pretrained weights
|
||||
normalize = transforms.Lambda(lambda img: img * 2.0 - 1.0)
|
||||
else:
|
||||
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
|
||||
std=[0.229, 0.224, 0.225])
|
||||
|
||||
```
|
||||
This update also addresses multiple other issues ([#115](https://github.com/lukemelas/EfficientNet-PyTorch/issues/115), [#128](https://github.com/lukemelas/EfficientNet-PyTorch/issues/128)).
|
||||
|
||||
#### Update (October 15, 2019)
|
||||
|
||||
This update allows you to choose whether to use a memory-efficient Swish activation. The memory-efficient version is chosen by default, but it cannot be used when exporting using PyTorch JIT. For this purpose, we have also included a standard (export-friendly) swish activation function. To switch to the export-friendly version, simply call `model.set_swish(memory_efficient=False)` after loading your desired model. This update addresses issues [#88](https://github.com/lukemelas/EfficientNet-PyTorch/pull/88) and [#89](https://github.com/lukemelas/EfficientNet-PyTorch/pull/89).
|
||||
|
||||
#### Update (October 12, 2019)
|
||||
|
||||
This update makes the Swish activation function more memory-efficient. It also addresses pull requests [#72](https://github.com/lukemelas/EfficientNet-PyTorch/pull/72), [#73](https://github.com/lukemelas/EfficientNet-PyTorch/pull/73), [#85](https://github.com/lukemelas/EfficientNet-PyTorch/pull/85), and [#86](https://github.com/lukemelas/EfficientNet-PyTorch/pull/86). Thanks to the authors of all the pull requests!
|
||||
|
||||
#### Update (July 31, 2019)
|
||||
|
||||
_Upgrade the pip package with_ `pip install --upgrade efficientnet-pytorch`
|
||||
|
||||
The B6 and B7 models are now available. Additionally, _all_ pretrained models have been updated to use AutoAugment preprocessing, which translates to better performance across the board. Usage is the same as before:
|
||||
```python
|
||||
from efficientnet_pytorch import EfficientNet
|
||||
model = EfficientNet.from_pretrained('efficientnet-b7')
|
||||
```
|
||||
|
||||
#### Update (June 29, 2019)
|
||||
|
||||
This update adds easy model exporting ([#20](https://github.com/lukemelas/EfficientNet-PyTorch/issues/20)) and feature extraction ([#38](https://github.com/lukemelas/EfficientNet-PyTorch/issues/38)).
|
||||
|
||||
* [Example: Export to ONNX](#example-export)
|
||||
* [Example: Extract features](#example-feature-extraction)
|
||||
* Also: fixed a CUDA/CPU bug ([#32](https://github.com/lukemelas/EfficientNet-PyTorch/issues/32))
|
||||
|
||||
It is also now incredibly simple to load a pretrained model with a new number of classes for transfer learning:
|
||||
```python
|
||||
model = EfficientNet.from_pretrained('efficientnet-b1', num_classes=23)
|
||||
```
|
||||
|
||||
|
||||
#### Update (June 23, 2019)
|
||||
|
||||
The B4 and B5 models are now available. Their usage is identical to the other models:
|
||||
```python
|
||||
from efficientnet_pytorch import EfficientNet
|
||||
model = EfficientNet.from_pretrained('efficientnet-b4')
|
||||
```
|
||||
|
||||
### Overview
|
||||
This repository contains an op-for-op PyTorch reimplementation of [EfficientNet](https://arxiv.org/abs/1905.11946), along with pre-trained models and examples.
|
||||
|
||||
The goal of this implementation is to be simple, highly extensible, and easy to integrate into your own projects. This implementation is a work in progress -- new features are currently being implemented.
|
||||
|
||||
At the moment, you can easily:
|
||||
* Load pretrained EfficientNet models
|
||||
* Use EfficientNet models for classification or feature extraction
|
||||
* Evaluate EfficientNet models on ImageNet or your own images
|
||||
|
||||
_Upcoming features_: In the next few days, you will be able to:
|
||||
* Train new models from scratch on ImageNet with a simple command
|
||||
* Quickly finetune an EfficientNet on your own dataset
|
||||
* Export EfficientNet models for production
|
||||
|
||||
### Table of contents
|
||||
1. [About EfficientNet](#about-efficientnet)
|
||||
2. [About EfficientNet-PyTorch](#about-efficientnet-pytorch)
|
||||
3. [Installation](#installation)
|
||||
4. [Usage](#usage)
|
||||
* [Load pretrained models](#loading-pretrained-models)
|
||||
* [Example: Classify](#example-classification)
|
||||
* [Example: Extract features](#example-feature-extraction)
|
||||
* [Example: Export to ONNX](#example-export)
|
||||
6. [Contributing](#contributing)
|
||||
|
||||
### About EfficientNet
|
||||
|
||||
If you're new to EfficientNets, here is an explanation straight from the official TensorFlow implementation:
|
||||
|
||||
EfficientNets are a family of image classification models, which achieve state-of-the-art accuracy, yet being an order-of-magnitude smaller and faster than previous models. We develop EfficientNets based on AutoML and Compound Scaling. In particular, we first use [AutoML Mobile framework](https://ai.googleblog.com/2018/08/mnasnet-towards-automating-design-of.html) to develop a mobile-size baseline network, named as EfficientNet-B0; Then, we use the compound scaling method to scale up this baseline to obtain EfficientNet-B1 to B7.
|
||||
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td>
|
||||
<img src="https://raw.githubusercontent.com/tensorflow/tpu/master/models/official/efficientnet/g3doc/params.png" width="100%" />
|
||||
</td>
|
||||
<td>
|
||||
<img src="https://raw.githubusercontent.com/tensorflow/tpu/master/models/official/efficientnet/g3doc/flops.png", width="90%" />
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
EfficientNets achieve state-of-the-art accuracy on ImageNet with an order of magnitude better efficiency:
|
||||
|
||||
|
||||
* In high-accuracy regime, our EfficientNet-B7 achieves state-of-the-art 84.4% top-1 / 97.1% top-5 accuracy on ImageNet with 66M parameters and 37B FLOPS, being 8.4x smaller and 6.1x faster on CPU inference than previous best [Gpipe](https://arxiv.org/abs/1811.06965).
|
||||
|
||||
* In middle-accuracy regime, our EfficientNet-B1 is 7.6x smaller and 5.7x faster on CPU inference than [ResNet-152](https://arxiv.org/abs/1512.03385), with similar ImageNet accuracy.
|
||||
|
||||
* Compared with the widely used [ResNet-50](https://arxiv.org/abs/1512.03385), our EfficientNet-B4 improves the top-1 accuracy from 76.3% of ResNet-50 to 82.6% (+6.3%), under similar FLOPS constraint.
|
||||
|
||||
### About EfficientNet PyTorch
|
||||
|
||||
EfficientNet PyTorch is a PyTorch re-implementation of EfficientNet. It is consistent with the [original TensorFlow implementation](https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet), such that it is easy to load weights from a TensorFlow checkpoint. At the same time, we aim to make our PyTorch implementation as simple, flexible, and extensible as possible.
|
||||
|
||||
If you have any feature requests or questions, feel free to leave them as GitHub issues!
|
||||
|
||||
### Installation
|
||||
|
||||
Install via pip:
|
||||
```bash
|
||||
pip install efficientnet_pytorch
|
||||
```
|
||||
|
||||
Or install from source:
|
||||
```bash
|
||||
git clone https://github.com/lukemelas/EfficientNet-PyTorch
|
||||
cd EfficientNet-Pytorch
|
||||
pip install -e .
|
||||
```
|
||||
|
||||
### Usage
|
||||
|
||||
#### Loading pretrained models
|
||||
|
||||
Load an EfficientNet:
|
||||
```python
|
||||
from efficientnet_pytorch import EfficientNet
|
||||
model = EfficientNet.from_name('efficientnet-b0')
|
||||
```
|
||||
|
||||
Load a pretrained EfficientNet:
|
||||
```python
|
||||
from efficientnet_pytorch import EfficientNet
|
||||
model = EfficientNet.from_pretrained('efficientnet-b0')
|
||||
```
|
||||
|
||||
Note that pretrained models have only been released for `N=0,1,2,3,4,5` at the current time, so `.from_pretrained` only supports `'efficientnet-b{N}'` for `N=0,1,2,3,4,5`.
|
||||
|
||||
Details about the models are below:
|
||||
|
||||
| *Name* |*# Params*|*Top-1 Acc.*|*Pretrained?*|
|
||||
|:-----------------:|:--------:|:----------:|:-----------:|
|
||||
| `efficientnet-b0` | 5.3M | 76.3 | ✓ |
|
||||
| `efficientnet-b1` | 7.8M | 78.8 | ✓ |
|
||||
| `efficientnet-b2` | 9.2M | 79.8 | ✓ |
|
||||
| `efficientnet-b3` | 12M | 81.1 | ✓ |
|
||||
| `efficientnet-b4` | 19M | 82.6 | ✓ |
|
||||
| `efficientnet-b5` | 30M | 83.3 | ✓ |
|
||||
| `efficientnet-b6` | 43M | 84.0 | ✓ |
|
||||
| `efficientnet-b7` | 66M | 84.4 | ✓ |
|
||||
|
||||
|
||||
#### Example: Classification
|
||||
|
||||
Below is a simple, complete example. It may also be found as a jupyter notebook in `examples/simple` or as a [Colab Notebook](https://colab.research.google.com/drive/1Jw28xZ1NJq4Cja4jLe6tJ6_F5lCzElb4).
|
||||
|
||||
We assume that in your current directory, there is a `img.jpg` file and a `labels_map.txt` file (ImageNet class names). These are both included in `examples/simple`.
|
||||
|
||||
```python
|
||||
import json
|
||||
from PIL import Image
|
||||
import torch
|
||||
from torchvision import transforms
|
||||
|
||||
from efficientnet_pytorch import EfficientNet
|
||||
model = EfficientNet.from_pretrained('efficientnet-b0')
|
||||
|
||||
# Preprocess image
|
||||
tfms = transforms.Compose([transforms.Resize(224), transforms.ToTensor(),
|
||||
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),])
|
||||
img = tfms(Image.open('img.jpg')).unsqueeze(0)
|
||||
print(img.shape) # torch.Size([1, 3, 224, 224])
|
||||
|
||||
# Load ImageNet class names
|
||||
labels_map = json.load(open('labels_map.txt'))
|
||||
labels_map = [labels_map[str(i)] for i in range(1000)]
|
||||
|
||||
# Classify
|
||||
model.eval()
|
||||
with torch.no_grad():
|
||||
outputs = model(img)
|
||||
|
||||
# Print predictions
|
||||
print('-----')
|
||||
for idx in torch.topk(outputs, k=5).indices.squeeze(0).tolist():
|
||||
prob = torch.softmax(outputs, dim=1)[0, idx].item()
|
||||
print('{label:<75} ({p:.2f}%)'.format(label=labels_map[idx], p=prob*100))
|
||||
```
|
||||
|
||||
#### Example: Feature Extraction
|
||||
|
||||
You can easily extract features with `model.extract_features`:
|
||||
```python
|
||||
from efficientnet_pytorch import EfficientNet
|
||||
model = EfficientNet.from_pretrained('efficientnet-b0')
|
||||
|
||||
# ... image preprocessing as in the classification example ...
|
||||
print(img.shape) # torch.Size([1, 3, 224, 224])
|
||||
|
||||
features = model.extract_features(img)
|
||||
print(features.shape) # torch.Size([1, 1280, 7, 7])
|
||||
```
|
||||
|
||||
#### Example: Export to ONNX
|
||||
|
||||
Exporting to ONNX for deploying to production is now simple:
|
||||
```python
|
||||
import torch
|
||||
from efficientnet_pytorch import EfficientNet
|
||||
|
||||
model = EfficientNet.from_pretrained('efficientnet-b1')
|
||||
dummy_input = torch.randn(10, 3, 240, 240)
|
||||
|
||||
torch.onnx.export(model, dummy_input, "test-b1.onnx", verbose=True)
|
||||
```
|
||||
|
||||
[Here](https://colab.research.google.com/drive/1rOAEXeXHaA8uo3aG2YcFDHItlRJMV0VP) is a Colab example.
|
||||
|
||||
|
||||
#### ImageNet
|
||||
|
||||
See `examples/imagenet` for details about evaluating on ImageNet.
|
||||
|
||||
### Contributing
|
||||
|
||||
If you find a bug, create a GitHub issue, or even better, submit a pull request. Similarly, if you have questions, simply post them as GitHub issues.
|
||||
|
||||
I look forward to seeing what the community does with these models!
|
||||
+45
@@ -0,0 +1,45 @@
|
||||
# EfficientNet PyTorch
|
||||
|
||||
## About EfficientNet
|
||||
|
||||
If you're new to EfficientNets, here is an explanation straight from the official TensorFlow implementation:
|
||||
|
||||
EfficientNets are a family of image classification models, which achieve state-of-the-art accuracy, yet being an order-of-magnitude smaller and faster than previous models. We develop EfficientNets based on AutoML and Compound Scaling. In particular, we first use [AutoML Mobile framework](https://ai.googleblog.com/2018/08/mnasnet-towards-automating-design-of.html) to develop a mobile-size baseline network, named as EfficientNet-B0; Then, we use the compound scaling method to scale up this baseline to obtain EfficientNet-B1 to B7.
|
||||
|
||||
EfficientNets achieve state-of-the-art accuracy on ImageNet with an order of magnitude better efficiency:
|
||||
|
||||
|
||||
* In high-accuracy regime, our EfficientNet-B7 achieves state-of-the-art 84.4% top-1 / 97.1% top-5 accuracy on ImageNet with 66M parameters and 37B FLOPS, being 8.4x smaller and 6.1x faster on CPU inference than previous best [Gpipe](https://arxiv.org/abs/1811.06965).
|
||||
|
||||
* In middle-accuracy regime, our EfficientNet-B1 is 7.6x smaller and 5.7x faster on CPU inference than [ResNet-152](https://arxiv.org/abs/1512.03385), with similar ImageNet accuracy.
|
||||
|
||||
* Compared with the widely used [ResNet-50](https://arxiv.org/abs/1512.03385), our EfficientNet-B4 improves the top-1 accuracy from 76.3% of ResNet-50 to 82.6% (+6.3%), under similar FLOPS constraint.
|
||||
|
||||
## About EfficientNet PyTorch NPU
|
||||
|
||||
The source codes are based on the open source https://github.com/lukemelas/EfficientNet-PyTorch with least modified codes as far as possible.
|
||||
|
||||
|
||||
## Quick Start
|
||||
|
||||
### Train on 1 NPU:
|
||||
|
||||
(1) modify the last line in npu_1p.sh with the particular params:
|
||||
|
||||
* fp32: taskset -c 0-64 python3.7 examples/imagenet/main.py --data=/data/imagenet --arch=efficientnet-b0 --batch-size=256 --lr=0.2 --epochs=200 --autoaug --npu=0
|
||||
* O1: taskset -c 0-64 python3.7 examples/imagenet/main.py --data=/data/imagenet --arch=efficientnet-b0 --batch-size=256 --lr=0.2 --epochs=200 --autoaug --npu=0 --amp --pm=O1 --loss_scale=1024
|
||||
* O2: taskset -c 0-64 python3.7 examples/imagenet/main.py --data=/data/imagenet --arch=efficientnet-b0 --batch-size=256 --lr=0.2 --epochs=200 --autoaug --npu=0 --amp --pm=O2 --loss_scale=128
|
||||
|
||||
(2) Execute run.sh,ALL the train log will be recorded in nohup.out.
|
||||
|
||||
## Know issues:
|
||||
|
||||
* Distribution train is NOT available.
|
||||
* top1/top5 accuracy is lower than GPU about 2% in the same setting (dropout).
|
||||
* O2 Performance is lower than GPU about 50 fps in the same setting (dropout, depthwiseconv2d).
|
||||
* torch.rand is replaced with numpy implementation due to the lack of AICPU operator (aicpu).
|
||||
* momentum has to be set to 0 due to logsoftmax precision(logsoftmax)
|
||||
|
||||
|
||||
|
||||
|
||||
+12
@@ -0,0 +1,12 @@
|
||||
__version__ = "0.7.0"
|
||||
from .model import EfficientNet
|
||||
from .utils import (
|
||||
GlobalParams,
|
||||
BlockArgs,
|
||||
BlockDecoder,
|
||||
efficientnet,
|
||||
get_model_params,
|
||||
)
|
||||
from .auto_augment import rand_augment_transform, augment_and_mix_transform, auto_augment_transform
|
||||
from .rmsprop_tf import RMSpropTF
|
||||
|
||||
+817
@@ -0,0 +1,817 @@
|
||||
""" AutoAugment, RandAugment, and AugMix for PyTorch
|
||||
|
||||
This code implements the searched ImageNet policies with various tweaks and improvements and
|
||||
does not include any of the search code.
|
||||
|
||||
AA and RA Implementation adapted from:
|
||||
https://github.com/tensorflow/tpu/blob/master/models/official/efficientnet/autoaugment.py
|
||||
|
||||
AugMix adapted from:
|
||||
https://github.com/google-research/augmix
|
||||
|
||||
Papers:
|
||||
AutoAugment: Learning Augmentation Policies from Data - https://arxiv.org/abs/1805.09501
|
||||
Learning Data Augmentation Strategies for Object Detection - https://arxiv.org/abs/1906.11172
|
||||
RandAugment: Practical automated data augmentation... - https://arxiv.org/abs/1909.13719
|
||||
AugMix: A Simple Data Processing Method to Improve Robustness and Uncertainty - https://arxiv.org/abs/1912.02781
|
||||
|
||||
Hacked together by Ross Wightman
|
||||
"""
|
||||
import random
|
||||
import math
|
||||
import re
|
||||
from PIL import Image, ImageOps, ImageEnhance, ImageChops
|
||||
import PIL
|
||||
import numpy as np
|
||||
|
||||
|
||||
_PIL_VER = tuple([int(x) for x in PIL.__version__.split('.')[:2]])
|
||||
|
||||
_FILL = (128, 128, 128)
|
||||
|
||||
# This signifies the max integer that the controller RNN could predict for the
|
||||
# augmentation scheme.
|
||||
_MAX_LEVEL = 10.
|
||||
|
||||
_HPARAMS_DEFAULT = dict(
|
||||
translate_const=250,
|
||||
img_mean=_FILL,
|
||||
)
|
||||
|
||||
_RANDOM_INTERPOLATION = (Image.BILINEAR, Image.BICUBIC)
|
||||
|
||||
|
||||
def _interpolation(kwargs):
|
||||
interpolation = kwargs.pop('resample', Image.BILINEAR)
|
||||
if isinstance(interpolation, (list, tuple)):
|
||||
return random.choice(interpolation)
|
||||
else:
|
||||
return interpolation
|
||||
|
||||
|
||||
def _check_args_tf(kwargs):
|
||||
if 'fillcolor' in kwargs and _PIL_VER < (5, 0):
|
||||
kwargs.pop('fillcolor')
|
||||
kwargs['resample'] = _interpolation(kwargs)
|
||||
|
||||
|
||||
def shear_x(img, factor, **kwargs):
|
||||
_check_args_tf(kwargs)
|
||||
return img.transform(img.size, Image.AFFINE, (1, factor, 0, 0, 1, 0), **kwargs)
|
||||
|
||||
|
||||
def shear_y(img, factor, **kwargs):
|
||||
_check_args_tf(kwargs)
|
||||
return img.transform(img.size, Image.AFFINE, (1, 0, 0, factor, 1, 0), **kwargs)
|
||||
|
||||
|
||||
def translate_x_rel(img, pct, **kwargs):
|
||||
pixels = pct * img.size[0]
|
||||
_check_args_tf(kwargs)
|
||||
return img.transform(img.size, Image.AFFINE, (1, 0, pixels, 0, 1, 0), **kwargs)
|
||||
|
||||
|
||||
def translate_y_rel(img, pct, **kwargs):
|
||||
pixels = pct * img.size[1]
|
||||
_check_args_tf(kwargs)
|
||||
return img.transform(img.size, Image.AFFINE, (1, 0, 0, 0, 1, pixels), **kwargs)
|
||||
|
||||
|
||||
def translate_x_abs(img, pixels, **kwargs):
|
||||
_check_args_tf(kwargs)
|
||||
return img.transform(img.size, Image.AFFINE, (1, 0, pixels, 0, 1, 0), **kwargs)
|
||||
|
||||
|
||||
def translate_y_abs(img, pixels, **kwargs):
|
||||
_check_args_tf(kwargs)
|
||||
return img.transform(img.size, Image.AFFINE, (1, 0, 0, 0, 1, pixels), **kwargs)
|
||||
|
||||
|
||||
def rotate(img, degrees, **kwargs):
|
||||
_check_args_tf(kwargs)
|
||||
if _PIL_VER >= (5, 2):
|
||||
return img.rotate(degrees, **kwargs)
|
||||
elif _PIL_VER >= (5, 0):
|
||||
w, h = img.size
|
||||
post_trans = (0, 0)
|
||||
rotn_center = (w / 2.0, h / 2.0)
|
||||
angle = -math.radians(degrees)
|
||||
matrix = [
|
||||
round(math.cos(angle), 15),
|
||||
round(math.sin(angle), 15),
|
||||
0.0,
|
||||
round(-math.sin(angle), 15),
|
||||
round(math.cos(angle), 15),
|
||||
0.0,
|
||||
]
|
||||
|
||||
def transform(x, y, matrix):
|
||||
(a, b, c, d, e, f) = matrix
|
||||
return a * x + b * y + c, d * x + e * y + f
|
||||
|
||||
matrix[2], matrix[5] = transform(
|
||||
-rotn_center[0] - post_trans[0], -rotn_center[1] - post_trans[1], matrix
|
||||
)
|
||||
matrix[2] += rotn_center[0]
|
||||
matrix[5] += rotn_center[1]
|
||||
return img.transform(img.size, Image.AFFINE, matrix, **kwargs)
|
||||
else:
|
||||
return img.rotate(degrees, resample=kwargs['resample'])
|
||||
|
||||
|
||||
def auto_contrast(img, **__):
|
||||
return ImageOps.autocontrast(img)
|
||||
|
||||
|
||||
def invert(img, **__):
|
||||
return ImageOps.invert(img)
|
||||
|
||||
|
||||
def equalize(img, **__):
|
||||
return ImageOps.equalize(img)
|
||||
|
||||
|
||||
def solarize(img, thresh, **__):
|
||||
return ImageOps.solarize(img, thresh)
|
||||
|
||||
|
||||
def solarize_add(img, add, thresh=128, **__):
|
||||
lut = []
|
||||
for i in range(256):
|
||||
if i < thresh:
|
||||
lut.append(min(255, i + add))
|
||||
else:
|
||||
lut.append(i)
|
||||
if img.mode in ("L", "RGB"):
|
||||
if img.mode == "RGB" and len(lut) == 256:
|
||||
lut = lut + lut + lut
|
||||
return img.point(lut)
|
||||
else:
|
||||
return img
|
||||
|
||||
|
||||
def posterize(img, bits_to_keep, **__):
|
||||
if bits_to_keep >= 8:
|
||||
return img
|
||||
return ImageOps.posterize(img, bits_to_keep)
|
||||
|
||||
|
||||
def contrast(img, factor, **__):
|
||||
return ImageEnhance.Contrast(img).enhance(factor)
|
||||
|
||||
|
||||
def color(img, factor, **__):
|
||||
return ImageEnhance.Color(img).enhance(factor)
|
||||
|
||||
|
||||
def brightness(img, factor, **__):
|
||||
return ImageEnhance.Brightness(img).enhance(factor)
|
||||
|
||||
|
||||
def sharpness(img, factor, **__):
|
||||
return ImageEnhance.Sharpness(img).enhance(factor)
|
||||
|
||||
|
||||
def _randomly_negate(v):
|
||||
"""With 50% prob, negate the value"""
|
||||
return -v if random.random() > 0.5 else v
|
||||
|
||||
|
||||
def _rotate_level_to_arg(level, _hparams):
|
||||
# range [-30, 30]
|
||||
level = (level / _MAX_LEVEL) * 30.
|
||||
level = _randomly_negate(level)
|
||||
return level,
|
||||
|
||||
|
||||
def _enhance_level_to_arg(level, _hparams):
|
||||
# range [0.1, 1.9]
|
||||
return (level / _MAX_LEVEL) * 1.8 + 0.1,
|
||||
|
||||
|
||||
def _enhance_increasing_level_to_arg(level, _hparams):
|
||||
# the 'no change' level is 1.0, moving away from that towards 0. or 2.0 increases the enhancement blend
|
||||
# range [0.1, 1.9]
|
||||
level = (level / _MAX_LEVEL) * .9
|
||||
level = 1.0 + _randomly_negate(level)
|
||||
return level,
|
||||
|
||||
|
||||
def _shear_level_to_arg(level, _hparams):
|
||||
# range [-0.3, 0.3]
|
||||
level = (level / _MAX_LEVEL) * 0.3
|
||||
level = _randomly_negate(level)
|
||||
return level,
|
||||
|
||||
|
||||
def _translate_abs_level_to_arg(level, hparams):
|
||||
translate_const = hparams['translate_const']
|
||||
level = (level / _MAX_LEVEL) * float(translate_const)
|
||||
level = _randomly_negate(level)
|
||||
return level,
|
||||
|
||||
|
||||
def _translate_rel_level_to_arg(level, hparams):
|
||||
# default range [-0.45, 0.45]
|
||||
translate_pct = hparams.get('translate_pct', 0.45)
|
||||
level = (level / _MAX_LEVEL) * translate_pct
|
||||
level = _randomly_negate(level)
|
||||
return level,
|
||||
|
||||
|
||||
def _posterize_level_to_arg(level, _hparams):
|
||||
# As per Tensorflow TPU EfficientNet impl
|
||||
# range [0, 4], 'keep 0 up to 4 MSB of original image'
|
||||
# intensity/severity of augmentation decreases with level
|
||||
return int((level / _MAX_LEVEL) * 4),
|
||||
|
||||
|
||||
def _posterize_increasing_level_to_arg(level, hparams):
|
||||
# As per Tensorflow models research and UDA impl
|
||||
# range [4, 0], 'keep 4 down to 0 MSB of original image',
|
||||
# intensity/severity of augmentation increases with level
|
||||
return 4 - _posterize_level_to_arg(level, hparams)[0],
|
||||
|
||||
|
||||
def _posterize_original_level_to_arg(level, _hparams):
|
||||
# As per original AutoAugment paper description
|
||||
# range [4, 8], 'keep 4 up to 8 MSB of image'
|
||||
# intensity/severity of augmentation decreases with level
|
||||
return int((level / _MAX_LEVEL) * 4) + 4,
|
||||
|
||||
|
||||
def _solarize_level_to_arg(level, _hparams):
|
||||
# range [0, 256]
|
||||
# intensity/severity of augmentation decreases with level
|
||||
return int((level / _MAX_LEVEL) * 256),
|
||||
|
||||
|
||||
def _solarize_increasing_level_to_arg(level, _hparams):
|
||||
# range [0, 256]
|
||||
# intensity/severity of augmentation increases with level
|
||||
return 256 - _solarize_level_to_arg(level, _hparams)[0],
|
||||
|
||||
|
||||
def _solarize_add_level_to_arg(level, _hparams):
|
||||
# range [0, 110]
|
||||
return int((level / _MAX_LEVEL) * 110),
|
||||
|
||||
|
||||
LEVEL_TO_ARG = {
|
||||
'AutoContrast': None,
|
||||
'Equalize': None,
|
||||
'Invert': None,
|
||||
'Rotate': _rotate_level_to_arg,
|
||||
# There are several variations of the posterize level scaling in various Tensorflow/Google repositories/papers
|
||||
'Posterize': _posterize_level_to_arg,
|
||||
'PosterizeIncreasing': _posterize_increasing_level_to_arg,
|
||||
'PosterizeOriginal': _posterize_original_level_to_arg,
|
||||
'Solarize': _solarize_level_to_arg,
|
||||
'SolarizeIncreasing': _solarize_increasing_level_to_arg,
|
||||
'SolarizeAdd': _solarize_add_level_to_arg,
|
||||
'Color': _enhance_level_to_arg,
|
||||
'ColorIncreasing': _enhance_increasing_level_to_arg,
|
||||
'Contrast': _enhance_level_to_arg,
|
||||
'ContrastIncreasing': _enhance_increasing_level_to_arg,
|
||||
'Brightness': _enhance_level_to_arg,
|
||||
'BrightnessIncreasing': _enhance_increasing_level_to_arg,
|
||||
'Sharpness': _enhance_level_to_arg,
|
||||
'SharpnessIncreasing': _enhance_increasing_level_to_arg,
|
||||
'ShearX': _shear_level_to_arg,
|
||||
'ShearY': _shear_level_to_arg,
|
||||
'TranslateX': _translate_abs_level_to_arg,
|
||||
'TranslateY': _translate_abs_level_to_arg,
|
||||
'TranslateXRel': _translate_rel_level_to_arg,
|
||||
'TranslateYRel': _translate_rel_level_to_arg,
|
||||
}
|
||||
|
||||
|
||||
NAME_TO_OP = {
|
||||
'AutoContrast': auto_contrast,
|
||||
'Equalize': equalize,
|
||||
'Invert': invert,
|
||||
'Rotate': rotate,
|
||||
'Posterize': posterize,
|
||||
'PosterizeIncreasing': posterize,
|
||||
'PosterizeOriginal': posterize,
|
||||
'Solarize': solarize,
|
||||
'SolarizeIncreasing': solarize,
|
||||
'SolarizeAdd': solarize_add,
|
||||
'Color': color,
|
||||
'ColorIncreasing': color,
|
||||
'Contrast': contrast,
|
||||
'ContrastIncreasing': contrast,
|
||||
'Brightness': brightness,
|
||||
'BrightnessIncreasing': brightness,
|
||||
'Sharpness': sharpness,
|
||||
'SharpnessIncreasing': sharpness,
|
||||
'ShearX': shear_x,
|
||||
'ShearY': shear_y,
|
||||
'TranslateX': translate_x_abs,
|
||||
'TranslateY': translate_y_abs,
|
||||
'TranslateXRel': translate_x_rel,
|
||||
'TranslateYRel': translate_y_rel,
|
||||
}
|
||||
|
||||
|
||||
class AugmentOp:
|
||||
|
||||
def __init__(self, name, prob=0.5, magnitude=10, hparams=None):
|
||||
hparams = hparams or _HPARAMS_DEFAULT
|
||||
self.aug_fn = NAME_TO_OP[name]
|
||||
self.level_fn = LEVEL_TO_ARG[name]
|
||||
self.prob = prob
|
||||
self.magnitude = magnitude
|
||||
self.hparams = hparams.copy()
|
||||
self.kwargs = dict(
|
||||
fillcolor=hparams['img_mean'] if 'img_mean' in hparams else _FILL,
|
||||
resample=hparams['interpolation'] if 'interpolation' in hparams else _RANDOM_INTERPOLATION,
|
||||
)
|
||||
|
||||
# If magnitude_std is > 0, we introduce some randomness
|
||||
# in the usually fixed policy and sample magnitude from a normal distribution
|
||||
# with mean `magnitude` and std-dev of `magnitude_std`.
|
||||
# NOTE This is my own hack, being tested, not in papers or reference impls.
|
||||
self.magnitude_std = self.hparams.get('magnitude_std', 0)
|
||||
|
||||
def __call__(self, img):
|
||||
if self.prob < 1.0 and random.random() > self.prob:
|
||||
return img
|
||||
magnitude = self.magnitude
|
||||
if self.magnitude_std and self.magnitude_std > 0:
|
||||
magnitude = random.gauss(magnitude, self.magnitude_std)
|
||||
magnitude = min(_MAX_LEVEL, max(0, magnitude)) # clip to valid range
|
||||
level_args = self.level_fn(magnitude, self.hparams) if self.level_fn is not None else tuple()
|
||||
return self.aug_fn(img, *level_args, **self.kwargs)
|
||||
|
||||
|
||||
def auto_augment_policy_v0(hparams):
|
||||
# ImageNet v0 policy from TPU EfficientNet impl, cannot find a paper reference.
|
||||
policy = [
|
||||
[('Equalize', 0.8, 1), ('ShearY', 0.8, 4)],
|
||||
[('Color', 0.4, 9), ('Equalize', 0.6, 3)],
|
||||
[('Color', 0.4, 1), ('Rotate', 0.6, 8)],
|
||||
[('Solarize', 0.8, 3), ('Equalize', 0.4, 7)],
|
||||
[('Solarize', 0.4, 2), ('Solarize', 0.6, 2)],
|
||||
[('Color', 0.2, 0), ('Equalize', 0.8, 8)],
|
||||
[('Equalize', 0.4, 8), ('SolarizeAdd', 0.8, 3)],
|
||||
[('ShearX', 0.2, 9), ('Rotate', 0.6, 8)],
|
||||
[('Color', 0.6, 1), ('Equalize', 1.0, 2)],
|
||||
[('Invert', 0.4, 9), ('Rotate', 0.6, 0)],
|
||||
[('Equalize', 1.0, 9), ('ShearY', 0.6, 3)],
|
||||
[('Color', 0.4, 7), ('Equalize', 0.6, 0)],
|
||||
[('Posterize', 0.4, 6), ('AutoContrast', 0.4, 7)],
|
||||
[('Solarize', 0.6, 8), ('Color', 0.6, 9)],
|
||||
[('Solarize', 0.2, 4), ('Rotate', 0.8, 9)],
|
||||
[('Rotate', 1.0, 7), ('TranslateYRel', 0.8, 9)],
|
||||
[('ShearX', 0.0, 0), ('Solarize', 0.8, 4)],
|
||||
[('ShearY', 0.8, 0), ('Color', 0.6, 4)],
|
||||
[('Color', 1.0, 0), ('Rotate', 0.6, 2)],
|
||||
[('Equalize', 0.8, 4), ('Equalize', 0.0, 8)],
|
||||
[('Equalize', 1.0, 4), ('AutoContrast', 0.6, 2)],
|
||||
[('ShearY', 0.4, 7), ('SolarizeAdd', 0.6, 7)],
|
||||
[('Posterize', 0.8, 2), ('Solarize', 0.6, 10)], # This results in black image with Tpu posterize
|
||||
[('Solarize', 0.6, 8), ('Equalize', 0.6, 1)],
|
||||
[('Color', 0.8, 6), ('Rotate', 0.4, 5)],
|
||||
]
|
||||
pc = [[AugmentOp(*a, hparams=hparams) for a in sp] for sp in policy]
|
||||
return pc
|
||||
|
||||
|
||||
def auto_augment_policy_v0r(hparams):
|
||||
# ImageNet v0 policy from TPU EfficientNet impl, with variation of Posterize used
|
||||
# in Google research implementation (number of bits discarded increases with magnitude)
|
||||
policy = [
|
||||
[('Equalize', 0.8, 1), ('ShearY', 0.8, 4)],
|
||||
[('Color', 0.4, 9), ('Equalize', 0.6, 3)],
|
||||
[('Color', 0.4, 1), ('Rotate', 0.6, 8)],
|
||||
[('Solarize', 0.8, 3), ('Equalize', 0.4, 7)],
|
||||
[('Solarize', 0.4, 2), ('Solarize', 0.6, 2)],
|
||||
[('Color', 0.2, 0), ('Equalize', 0.8, 8)],
|
||||
[('Equalize', 0.4, 8), ('SolarizeAdd', 0.8, 3)],
|
||||
[('ShearX', 0.2, 9), ('Rotate', 0.6, 8)],
|
||||
[('Color', 0.6, 1), ('Equalize', 1.0, 2)],
|
||||
[('Invert', 0.4, 9), ('Rotate', 0.6, 0)],
|
||||
[('Equalize', 1.0, 9), ('ShearY', 0.6, 3)],
|
||||
[('Color', 0.4, 7), ('Equalize', 0.6, 0)],
|
||||
[('PosterizeIncreasing', 0.4, 6), ('AutoContrast', 0.4, 7)],
|
||||
[('Solarize', 0.6, 8), ('Color', 0.6, 9)],
|
||||
[('Solarize', 0.2, 4), ('Rotate', 0.8, 9)],
|
||||
[('Rotate', 1.0, 7), ('TranslateYRel', 0.8, 9)],
|
||||
[('ShearX', 0.0, 0), ('Solarize', 0.8, 4)],
|
||||
[('ShearY', 0.8, 0), ('Color', 0.6, 4)],
|
||||
[('Color', 1.0, 0), ('Rotate', 0.6, 2)],
|
||||
[('Equalize', 0.8, 4), ('Equalize', 0.0, 8)],
|
||||
[('Equalize', 1.0, 4), ('AutoContrast', 0.6, 2)],
|
||||
[('ShearY', 0.4, 7), ('SolarizeAdd', 0.6, 7)],
|
||||
[('PosterizeIncreasing', 0.8, 2), ('Solarize', 0.6, 10)],
|
||||
[('Solarize', 0.6, 8), ('Equalize', 0.6, 1)],
|
||||
[('Color', 0.8, 6), ('Rotate', 0.4, 5)],
|
||||
]
|
||||
pc = [[AugmentOp(*a, hparams=hparams) for a in sp] for sp in policy]
|
||||
return pc
|
||||
|
||||
|
||||
def auto_augment_policy_original(hparams):
|
||||
# ImageNet policy from https://arxiv.org/abs/1805.09501
|
||||
policy = [
|
||||
[('PosterizeOriginal', 0.4, 8), ('Rotate', 0.6, 9)],
|
||||
[('Solarize', 0.6, 5), ('AutoContrast', 0.6, 5)],
|
||||
[('Equalize', 0.8, 8), ('Equalize', 0.6, 3)],
|
||||
[('PosterizeOriginal', 0.6, 7), ('PosterizeOriginal', 0.6, 6)],
|
||||
[('Equalize', 0.4, 7), ('Solarize', 0.2, 4)],
|
||||
[('Equalize', 0.4, 4), ('Rotate', 0.8, 8)],
|
||||
[('Solarize', 0.6, 3), ('Equalize', 0.6, 7)],
|
||||
[('PosterizeOriginal', 0.8, 5), ('Equalize', 1.0, 2)],
|
||||
[('Rotate', 0.2, 3), ('Solarize', 0.6, 8)],
|
||||
[('Equalize', 0.6, 8), ('PosterizeOriginal', 0.4, 6)],
|
||||
[('Rotate', 0.8, 8), ('Color', 0.4, 0)],
|
||||
[('Rotate', 0.4, 9), ('Equalize', 0.6, 2)],
|
||||
[('Equalize', 0.0, 7), ('Equalize', 0.8, 8)],
|
||||
[('Invert', 0.6, 4), ('Equalize', 1.0, 8)],
|
||||
[('Color', 0.6, 4), ('Contrast', 1.0, 8)],
|
||||
[('Rotate', 0.8, 8), ('Color', 1.0, 2)],
|
||||
[('Color', 0.8, 8), ('Solarize', 0.8, 7)],
|
||||
[('Sharpness', 0.4, 7), ('Invert', 0.6, 8)],
|
||||
[('ShearX', 0.6, 5), ('Equalize', 1.0, 9)],
|
||||
[('Color', 0.4, 0), ('Equalize', 0.6, 3)],
|
||||
[('Equalize', 0.4, 7), ('Solarize', 0.2, 4)],
|
||||
[('Solarize', 0.6, 5), ('AutoContrast', 0.6, 5)],
|
||||
[('Invert', 0.6, 4), ('Equalize', 1.0, 8)],
|
||||
[('Color', 0.6, 4), ('Contrast', 1.0, 8)],
|
||||
[('Equalize', 0.8, 8), ('Equalize', 0.6, 3)],
|
||||
]
|
||||
pc = [[AugmentOp(*a, hparams=hparams) for a in sp] for sp in policy]
|
||||
return pc
|
||||
|
||||
|
||||
def auto_augment_policy_originalr(hparams):
|
||||
# ImageNet policy from https://arxiv.org/abs/1805.09501 with research posterize variation
|
||||
policy = [
|
||||
[('PosterizeIncreasing', 0.4, 8), ('Rotate', 0.6, 9)],
|
||||
[('Solarize', 0.6, 5), ('AutoContrast', 0.6, 5)],
|
||||
[('Equalize', 0.8, 8), ('Equalize', 0.6, 3)],
|
||||
[('PosterizeIncreasing', 0.6, 7), ('PosterizeIncreasing', 0.6, 6)],
|
||||
[('Equalize', 0.4, 7), ('Solarize', 0.2, 4)],
|
||||
[('Equalize', 0.4, 4), ('Rotate', 0.8, 8)],
|
||||
[('Solarize', 0.6, 3), ('Equalize', 0.6, 7)],
|
||||
[('PosterizeIncreasing', 0.8, 5), ('Equalize', 1.0, 2)],
|
||||
[('Rotate', 0.2, 3), ('Solarize', 0.6, 8)],
|
||||
[('Equalize', 0.6, 8), ('PosterizeIncreasing', 0.4, 6)],
|
||||
[('Rotate', 0.8, 8), ('Color', 0.4, 0)],
|
||||
[('Rotate', 0.4, 9), ('Equalize', 0.6, 2)],
|
||||
[('Equalize', 0.0, 7), ('Equalize', 0.8, 8)],
|
||||
[('Invert', 0.6, 4), ('Equalize', 1.0, 8)],
|
||||
[('Color', 0.6, 4), ('Contrast', 1.0, 8)],
|
||||
[('Rotate', 0.8, 8), ('Color', 1.0, 2)],
|
||||
[('Color', 0.8, 8), ('Solarize', 0.8, 7)],
|
||||
[('Sharpness', 0.4, 7), ('Invert', 0.6, 8)],
|
||||
[('ShearX', 0.6, 5), ('Equalize', 1.0, 9)],
|
||||
[('Color', 0.4, 0), ('Equalize', 0.6, 3)],
|
||||
[('Equalize', 0.4, 7), ('Solarize', 0.2, 4)],
|
||||
[('Solarize', 0.6, 5), ('AutoContrast', 0.6, 5)],
|
||||
[('Invert', 0.6, 4), ('Equalize', 1.0, 8)],
|
||||
[('Color', 0.6, 4), ('Contrast', 1.0, 8)],
|
||||
[('Equalize', 0.8, 8), ('Equalize', 0.6, 3)],
|
||||
]
|
||||
pc = [[AugmentOp(*a, hparams=hparams) for a in sp] for sp in policy]
|
||||
return pc
|
||||
|
||||
|
||||
def auto_augment_policy(name='v0', hparams=None):
|
||||
hparams = hparams or _HPARAMS_DEFAULT
|
||||
if name == 'original':
|
||||
return auto_augment_policy_original(hparams)
|
||||
elif name == 'originalr':
|
||||
return auto_augment_policy_originalr(hparams)
|
||||
elif name == 'v0':
|
||||
return auto_augment_policy_v0(hparams)
|
||||
elif name == 'v0r':
|
||||
return auto_augment_policy_v0r(hparams)
|
||||
else:
|
||||
assert False, 'Unknown AA policy (%s)' % name
|
||||
|
||||
|
||||
class AutoAugment:
|
||||
|
||||
def __init__(self, policy):
|
||||
self.policy = policy
|
||||
|
||||
def __call__(self, img):
|
||||
sub_policy = random.choice(self.policy)
|
||||
for op in sub_policy:
|
||||
img = op(img)
|
||||
return img
|
||||
|
||||
|
||||
def auto_augment_transform(config_str, hparams):
|
||||
"""
|
||||
Create a AutoAugment transform
|
||||
|
||||
:param config_str: String defining configuration of auto augmentation. Consists of multiple sections separated by
|
||||
dashes ('-'). The first section defines the AutoAugment policy (one of 'v0', 'v0r', 'original', 'originalr').
|
||||
The remaining sections, not order sepecific determine
|
||||
'mstd' - float std deviation of magnitude noise applied
|
||||
Ex 'original-mstd0.5' results in AutoAugment with original policy, magnitude_std 0.5
|
||||
|
||||
:param hparams: Other hparams (kwargs) for the AutoAugmentation scheme
|
||||
|
||||
:return: A PyTorch compatible Transform
|
||||
"""
|
||||
config = config_str.split('-')
|
||||
policy_name = config[0]
|
||||
config = config[1:]
|
||||
for c in config:
|
||||
cs = re.split(r'(\d.*)', c)
|
||||
if len(cs) < 2:
|
||||
continue
|
||||
key, val = cs[:2]
|
||||
if key == 'mstd':
|
||||
# noise param injected via hparams for now
|
||||
hparams.setdefault('magnitude_std', float(val))
|
||||
else:
|
||||
assert False, 'Unknown AutoAugment config section'
|
||||
aa_policy = auto_augment_policy(policy_name, hparams=hparams)
|
||||
return AutoAugment(aa_policy)
|
||||
|
||||
|
||||
_RAND_TRANSFORMS = [
|
||||
'AutoContrast',
|
||||
'Equalize',
|
||||
'Invert',
|
||||
'Rotate',
|
||||
'Posterize',
|
||||
'Solarize',
|
||||
'SolarizeAdd',
|
||||
'Color',
|
||||
'Contrast',
|
||||
'Brightness',
|
||||
'Sharpness',
|
||||
'ShearX',
|
||||
'ShearY',
|
||||
'TranslateXRel',
|
||||
'TranslateYRel',
|
||||
#'Cutout' # NOTE I've implement this as random erasing separately
|
||||
]
|
||||
|
||||
|
||||
_RAND_INCREASING_TRANSFORMS = [
|
||||
'AutoContrast',
|
||||
'Equalize',
|
||||
'Invert',
|
||||
'Rotate',
|
||||
'PosterizeIncreasing',
|
||||
'SolarizeIncreasing',
|
||||
'SolarizeAdd',
|
||||
'ColorIncreasing',
|
||||
'ContrastIncreasing',
|
||||
'BrightnessIncreasing',
|
||||
'SharpnessIncreasing',
|
||||
'ShearX',
|
||||
'ShearY',
|
||||
'TranslateXRel',
|
||||
'TranslateYRel',
|
||||
#'Cutout' # NOTE I've implement this as random erasing separately
|
||||
]
|
||||
|
||||
|
||||
|
||||
# These experimental weights are based loosely on the relative improvements mentioned in paper.
|
||||
# They may not result in increased performance, but could likely be tuned to so.
|
||||
_RAND_CHOICE_WEIGHTS_0 = {
|
||||
'Rotate': 0.3,
|
||||
'ShearX': 0.2,
|
||||
'ShearY': 0.2,
|
||||
'TranslateXRel': 0.1,
|
||||
'TranslateYRel': 0.1,
|
||||
'Color': .025,
|
||||
'Sharpness': 0.025,
|
||||
'AutoContrast': 0.025,
|
||||
'Solarize': .005,
|
||||
'SolarizeAdd': .005,
|
||||
'Contrast': .005,
|
||||
'Brightness': .005,
|
||||
'Equalize': .005,
|
||||
'Posterize': 0,
|
||||
'Invert': 0,
|
||||
}
|
||||
|
||||
|
||||
def _select_rand_weights(weight_idx=0, transforms=None):
|
||||
transforms = transforms or _RAND_TRANSFORMS
|
||||
assert weight_idx == 0 # only one set of weights currently
|
||||
rand_weights = _RAND_CHOICE_WEIGHTS_0
|
||||
probs = [rand_weights[k] for k in transforms]
|
||||
probs /= np.sum(probs)
|
||||
return probs
|
||||
|
||||
|
||||
def rand_augment_ops(magnitude=10, hparams=None, transforms=None):
|
||||
hparams = hparams or _HPARAMS_DEFAULT
|
||||
transforms = transforms or _RAND_TRANSFORMS
|
||||
return [AugmentOp(
|
||||
name, prob=0.5, magnitude=magnitude, hparams=hparams) for name in transforms]
|
||||
|
||||
|
||||
class RandAugment:
|
||||
def __init__(self, ops, num_layers=2, choice_weights=None):
|
||||
self.ops = ops
|
||||
self.num_layers = num_layers
|
||||
self.choice_weights = choice_weights
|
||||
|
||||
def __call__(self, img):
|
||||
# no replacement when using weighted choice
|
||||
ops = np.random.choice(
|
||||
self.ops, self.num_layers, replace=self.choice_weights is None, p=self.choice_weights)
|
||||
for op in ops:
|
||||
img = op(img)
|
||||
return img
|
||||
|
||||
|
||||
def rand_augment_transform(config_str, hparams):
|
||||
"""
|
||||
Create a RandAugment transform
|
||||
|
||||
:param config_str: String defining configuration of random augmentation. Consists of multiple sections separated by
|
||||
dashes ('-'). The first section defines the specific variant of rand augment (currently only 'rand'). The remaining
|
||||
sections, not order sepecific determine
|
||||
'm' - integer magnitude of rand augment
|
||||
'n' - integer num layers (number of transform ops selected per image)
|
||||
'w' - integer probabiliy weight index (index of a set of weights to influence choice of op)
|
||||
'mstd' - float std deviation of magnitude noise applied
|
||||
'inc' - integer (bool), use augmentations that increase in severity with magnitude (default: 0)
|
||||
Ex 'rand-m9-n3-mstd0.5' results in RandAugment with magnitude 9, num_layers 3, magnitude_std 0.5
|
||||
'rand-mstd1-w0' results in magnitude_std 1.0, weights 0, default magnitude of 10 and num_layers 2
|
||||
|
||||
:param hparams: Other hparams (kwargs) for the RandAugmentation scheme
|
||||
|
||||
:return: A PyTorch compatible Transform
|
||||
"""
|
||||
magnitude = _MAX_LEVEL # default to _MAX_LEVEL for magnitude (currently 10)
|
||||
num_layers = 2 # default to 2 ops per image
|
||||
weight_idx = None # default to no probability weights for op choice
|
||||
transforms = _RAND_TRANSFORMS
|
||||
config = config_str.split('-')
|
||||
assert config[0] == 'rand'
|
||||
config = config[1:]
|
||||
for c in config:
|
||||
cs = re.split(r'(\d.*)', c)
|
||||
if len(cs) < 2:
|
||||
continue
|
||||
key, val = cs[:2]
|
||||
if key == 'mstd':
|
||||
# noise param injected via hparams for now
|
||||
hparams.setdefault('magnitude_std', float(val))
|
||||
elif key == 'inc':
|
||||
if bool(val):
|
||||
transforms = _RAND_INCREASING_TRANSFORMS
|
||||
elif key == 'm':
|
||||
magnitude = int(val)
|
||||
elif key == 'n':
|
||||
num_layers = int(val)
|
||||
elif key == 'w':
|
||||
weight_idx = int(val)
|
||||
else:
|
||||
assert False, 'Unknown RandAugment config section'
|
||||
ra_ops = rand_augment_ops(magnitude=magnitude, hparams=hparams, transforms=transforms)
|
||||
choice_weights = None if weight_idx is None else _select_rand_weights(weight_idx)
|
||||
return RandAugment(ra_ops, num_layers, choice_weights=choice_weights)
|
||||
|
||||
|
||||
_AUGMIX_TRANSFORMS = [
|
||||
'AutoContrast',
|
||||
'ColorIncreasing', # not in paper
|
||||
'ContrastIncreasing', # not in paper
|
||||
'BrightnessIncreasing', # not in paper
|
||||
'SharpnessIncreasing', # not in paper
|
||||
'Equalize',
|
||||
'Rotate',
|
||||
'PosterizeIncreasing',
|
||||
'SolarizeIncreasing',
|
||||
'ShearX',
|
||||
'ShearY',
|
||||
'TranslateXRel',
|
||||
'TranslateYRel',
|
||||
]
|
||||
|
||||
|
||||
def augmix_ops(magnitude=10, hparams=None, transforms=None):
|
||||
hparams = hparams or _HPARAMS_DEFAULT
|
||||
transforms = transforms or _AUGMIX_TRANSFORMS
|
||||
return [AugmentOp(
|
||||
name, prob=1.0, magnitude=magnitude, hparams=hparams) for name in transforms]
|
||||
|
||||
|
||||
class AugMixAugment:
|
||||
""" AugMix Transform
|
||||
Adapted and improved from impl here: https://github.com/google-research/augmix/blob/master/imagenet.py
|
||||
From paper: 'AugMix: A Simple Data Processing Method to Improve Robustness and Uncertainty -
|
||||
https://arxiv.org/abs/1912.02781
|
||||
"""
|
||||
def __init__(self, ops, alpha=1., width=3, depth=-1, blended=False):
|
||||
self.ops = ops
|
||||
self.alpha = alpha
|
||||
self.width = width
|
||||
self.depth = depth
|
||||
self.blended = blended # blended mode is faster but not well tested
|
||||
|
||||
def _calc_blended_weights(self, ws, m):
|
||||
ws = ws * m
|
||||
cump = 1.
|
||||
rws = []
|
||||
for w in ws[::-1]:
|
||||
alpha = w / cump
|
||||
cump *= (1 - alpha)
|
||||
rws.append(alpha)
|
||||
return np.array(rws[::-1], dtype=np.float32)
|
||||
|
||||
def _apply_blended(self, img, mixing_weights, m):
|
||||
# This is my first crack and implementing a slightly faster mixed augmentation. Instead
|
||||
# of accumulating the mix for each chain in a Numpy array and then blending with original,
|
||||
# it recomputes the blending coefficients and applies one PIL image blend per chain.
|
||||
# TODO the results appear in the right ballpark but they differ by more than rounding.
|
||||
img_orig = img.copy()
|
||||
ws = self._calc_blended_weights(mixing_weights, m)
|
||||
for w in ws:
|
||||
depth = self.depth if self.depth > 0 else np.random.randint(1, 4)
|
||||
ops = np.random.choice(self.ops, depth, replace=True)
|
||||
img_aug = img_orig # no ops are in-place, deep copy not necessary
|
||||
for op in ops:
|
||||
img_aug = op(img_aug)
|
||||
img = Image.blend(img, img_aug, w)
|
||||
return img
|
||||
|
||||
def _apply_basic(self, img, mixing_weights, m):
|
||||
# This is a literal adaptation of the paper/official implementation without normalizations and
|
||||
# PIL <-> Numpy conversions between every op. It is still quite CPU compute heavy compared to the
|
||||
# typical augmentation transforms, could use a GPU / Kornia implementation.
|
||||
img_shape = img.size[0], img.size[1], len(img.getbands())
|
||||
mixed = np.zeros(img_shape, dtype=np.float32)
|
||||
for mw in mixing_weights:
|
||||
depth = self.depth if self.depth > 0 else np.random.randint(1, 4)
|
||||
ops = np.random.choice(self.ops, depth, replace=True)
|
||||
img_aug = img # no ops are in-place, deep copy not necessary
|
||||
for op in ops:
|
||||
img_aug = op(img_aug)
|
||||
mixed += mw * np.asarray(img_aug, dtype=np.float32)
|
||||
np.clip(mixed, 0, 255., out=mixed)
|
||||
mixed = Image.fromarray(mixed.astype(np.uint8))
|
||||
return Image.blend(img, mixed, m)
|
||||
|
||||
def __call__(self, img):
|
||||
mixing_weights = np.float32(np.random.dirichlet([self.alpha] * self.width))
|
||||
m = np.float32(np.random.beta(self.alpha, self.alpha))
|
||||
if self.blended:
|
||||
mixed = self._apply_blended(img, mixing_weights, m)
|
||||
else:
|
||||
mixed = self._apply_basic(img, mixing_weights, m)
|
||||
return mixed
|
||||
|
||||
|
||||
def augment_and_mix_transform(config_str, hparams):
|
||||
""" Create AugMix PyTorch transform
|
||||
|
||||
:param config_str: String defining configuration of random augmentation. Consists of multiple sections separated by
|
||||
dashes ('-'). The first section defines the specific variant of rand augment (currently only 'rand'). The remaining
|
||||
sections, not order sepecific determine
|
||||
'm' - integer magnitude (severity) of augmentation mix (default: 3)
|
||||
'w' - integer width of augmentation chain (default: 3)
|
||||
'd' - integer depth of augmentation chain (-1 is random [1, 3], default: -1)
|
||||
'b' - integer (bool), blend each branch of chain into end result without a final blend, less CPU (default: 0)
|
||||
'mstd' - float std deviation of magnitude noise applied (default: 0)
|
||||
Ex 'augmix-m5-w4-d2' results in AugMix with severity 5, chain width 4, chain depth 2
|
||||
|
||||
:param hparams: Other hparams (kwargs) for the Augmentation transforms
|
||||
|
||||
:return: A PyTorch compatible Transform
|
||||
"""
|
||||
magnitude = 3
|
||||
width = 3
|
||||
depth = -1
|
||||
alpha = 1.
|
||||
blended = False
|
||||
config = config_str.split('-')
|
||||
assert config[0] == 'augmix'
|
||||
config = config[1:]
|
||||
for c in config:
|
||||
cs = re.split(r'(\d.*)', c)
|
||||
if len(cs) < 2:
|
||||
continue
|
||||
key, val = cs[:2]
|
||||
if key == 'mstd':
|
||||
# noise param injected via hparams for now
|
||||
hparams.setdefault('magnitude_std', float(val))
|
||||
elif key == 'm':
|
||||
magnitude = int(val)
|
||||
elif key == 'w':
|
||||
width = int(val)
|
||||
elif key == 'd':
|
||||
depth = int(val)
|
||||
elif key == 'a':
|
||||
alpha = float(val)
|
||||
elif key == 'b':
|
||||
blended = bool(val)
|
||||
else:
|
||||
assert False, 'Unknown AugMix config section'
|
||||
ops = augmix_ops(magnitude=magnitude, hparams=hparams)
|
||||
return AugMixAugment(ops, alpha=alpha, width=width, depth=depth, blended=blended)
|
||||
+432
@@ -0,0 +1,432 @@
|
||||
"""model.py - Model and module class for EfficientNet.
|
||||
They are built to mirror those in the official TensorFlow implementation.
|
||||
"""
|
||||
|
||||
# Author: lukemelas (github username)
|
||||
# Github repo: https://github.com/lukemelas/EfficientNet-PyTorch
|
||||
# With adjustments and added comments by workingcoder (github username).
|
||||
|
||||
import torch
|
||||
from torch import nn
|
||||
from torch.nn import functional as F
|
||||
from .utils import (
|
||||
round_filters,
|
||||
round_repeats,
|
||||
drop_connect,
|
||||
get_same_padding_conv2d,
|
||||
get_model_params,
|
||||
efficientnet_params,
|
||||
load_pretrained_weights,
|
||||
Swish,
|
||||
MemoryEfficientSwish,
|
||||
calculate_output_image_size
|
||||
)
|
||||
|
||||
class MBConvBlock(nn.Module):
|
||||
"""Mobile Inverted Residual Bottleneck Block.
|
||||
|
||||
Args:
|
||||
block_args (namedtuple): BlockArgs, defined in utils.py.
|
||||
global_params (namedtuple): GlobalParam, defined in utils.py.
|
||||
image_size (tuple or list): [image_height, image_width].
|
||||
|
||||
References:
|
||||
[1] https://arxiv.org/abs/1704.04861 (MobileNet v1)
|
||||
[2] https://arxiv.org/abs/1801.04381 (MobileNet v2)
|
||||
[3] https://arxiv.org/abs/1905.02244 (MobileNet v3)
|
||||
"""
|
||||
|
||||
def __init__(self, block_args, global_params, image_size=None):
|
||||
super().__init__()
|
||||
self._block_args = block_args
|
||||
self._bn_mom = 1 - global_params.batch_norm_momentum # pytorch's difference from tensorflow
|
||||
self._bn_eps = global_params.batch_norm_epsilon
|
||||
self.has_se = (self._block_args.se_ratio is not None) and (0 < self._block_args.se_ratio <= 1)
|
||||
self.id_skip = block_args.id_skip # whether to use skip connection and drop connect
|
||||
|
||||
# Expansion phase (Inverted Bottleneck)
|
||||
inp = self._block_args.input_filters # number of input channels
|
||||
oup = self._block_args.input_filters * self._block_args.expand_ratio # number of output channels
|
||||
if self._block_args.expand_ratio != 1:
|
||||
Conv2d = get_same_padding_conv2d(image_size=image_size)
|
||||
self._expand_conv = Conv2d(in_channels=inp, out_channels=oup, kernel_size=1, bias=False)
|
||||
self._bn0 = nn.BatchNorm2d(num_features=oup, momentum=self._bn_mom, eps=self._bn_eps)
|
||||
# image_size = calculate_output_image_size(image_size, 1) <-- this wouldn't modify image_size
|
||||
|
||||
# Depthwise convolution phase
|
||||
k = self._block_args.kernel_size
|
||||
s = self._block_args.stride
|
||||
Conv2d = get_same_padding_conv2d(image_size=image_size)
|
||||
self._depthwise_conv = Conv2d(
|
||||
in_channels=oup, out_channels=oup, groups=oup, # groups makes it depthwise
|
||||
kernel_size=k, stride=s, bias=False)
|
||||
self._bn1 = nn.BatchNorm2d(num_features=oup, momentum=self._bn_mom, eps=self._bn_eps)
|
||||
image_size = calculate_output_image_size(image_size, s)
|
||||
|
||||
# Squeeze and Excitation layer, if desired
|
||||
if self.has_se:
|
||||
Conv2d = get_same_padding_conv2d(image_size=(1,1))
|
||||
num_squeezed_channels = max(1, int(self._block_args.input_filters * self._block_args.se_ratio))
|
||||
self._se_reduce = Conv2d(in_channels=oup, out_channels=num_squeezed_channels, kernel_size=1)
|
||||
self._se_expand = Conv2d(in_channels=num_squeezed_channels, out_channels=oup, kernel_size=1)
|
||||
# self._se_relu = torch.nn.ReLU()
|
||||
# self._se_sigmoid = torch.nn.Sigmoid()
|
||||
|
||||
# Pointwise convolution phase
|
||||
final_oup = self._block_args.output_filters
|
||||
Conv2d = get_same_padding_conv2d(image_size=image_size)
|
||||
self._project_conv = Conv2d(in_channels=oup, out_channels=final_oup, kernel_size=1, bias=False)
|
||||
self._bn2 = nn.BatchNorm2d(num_features=final_oup, momentum=self._bn_mom, eps=self._bn_eps)
|
||||
self._swish = MemoryEfficientSwish()
|
||||
|
||||
def forward(self, inputs, drop_connect_rate=None):
|
||||
"""MBConvBlock's forward function.
|
||||
|
||||
Args:
|
||||
inputs (tensor): Input tensor.
|
||||
drop_connect_rate (bool): Drop connect rate (float, between 0 and 1).
|
||||
|
||||
Returns:
|
||||
Output of this block after processing.
|
||||
"""
|
||||
|
||||
# Expansion and Depthwise Convolution
|
||||
x = inputs
|
||||
if self._block_args.expand_ratio != 1:
|
||||
x = self._expand_conv(inputs)
|
||||
x = self._bn0(x)
|
||||
x = self._swish(x)
|
||||
|
||||
x = self._depthwise_conv(x)
|
||||
x = self._bn1(x)
|
||||
x = self._swish(x)
|
||||
|
||||
# Squeeze and Excitation
|
||||
if self.has_se:
|
||||
x_squeezed = F.adaptive_avg_pool2d(x, 1)
|
||||
# x_squeezed = torch.mean(x, [2, 3], keepdim=True)
|
||||
|
||||
x_squeezed = self._se_reduce(x_squeezed)
|
||||
|
||||
x_squeezed = self._swish(x_squeezed)
|
||||
|
||||
x_squeezed = self._se_expand(x_squeezed)
|
||||
|
||||
# x_squeezed = self._se_sigmoid(x_squeezed)
|
||||
#
|
||||
# x = x_squeezed * x
|
||||
|
||||
x = torch.sigmoid(x_squeezed) * x
|
||||
|
||||
# x = torch.sigmoid(x_squeezed) + x
|
||||
# x = torch.nn.functional.relu(x_squeezed) * x
|
||||
# x = x_squeezed + x
|
||||
|
||||
# Pointwise Convolution
|
||||
x = self._project_conv(x)
|
||||
x = self._bn2(x)
|
||||
|
||||
# Skip connection and drop connect
|
||||
input_filters, output_filters = self._block_args.input_filters, self._block_args.output_filters
|
||||
if self.id_skip and self._block_args.stride == 1 and input_filters == output_filters:
|
||||
# The combination of skip connection and drop connect brings about stochastic depth.
|
||||
if drop_connect_rate:
|
||||
x = drop_connect(x, p=drop_connect_rate, training=self.training)
|
||||
x = x + inputs # skip connection
|
||||
return x
|
||||
|
||||
def set_swish(self, memory_efficient=True):
|
||||
"""Sets swish function as memory efficient (for training) or standard (for export).
|
||||
|
||||
Args:
|
||||
memory_efficient (bool): Whether to use memory-efficient version of swish.
|
||||
"""
|
||||
self._swish = MemoryEfficientSwish() if memory_efficient else Swish()
|
||||
|
||||
|
||||
class EfficientNet(nn.Module):
|
||||
"""EfficientNet model.
|
||||
Most easily loaded with the .from_name or .from_pretrained methods.
|
||||
|
||||
Args:
|
||||
blocks_args (list[namedtuple]): A list of BlockArgs to construct blocks.
|
||||
global_params (namedtuple): A set of GlobalParams shared between blocks.
|
||||
|
||||
References:
|
||||
[1] https://arxiv.org/abs/1905.11946 (EfficientNet)
|
||||
|
||||
Example:
|
||||
>>> import torch
|
||||
>>> from efficientnet.model import EfficientNet
|
||||
>>> inputs = torch.rand(1, 3, 224, 224)
|
||||
>>> model = EfficientNet.from_pretrained('efficientnet-b0')
|
||||
>>> model.eval()
|
||||
>>> outputs = model(inputs)
|
||||
"""
|
||||
|
||||
def __init__(self, blocks_args=None, global_params=None):
|
||||
super().__init__()
|
||||
assert isinstance(blocks_args, list), 'blocks_args should be a list'
|
||||
assert len(blocks_args) > 0, 'block args must be greater than 0'
|
||||
self._global_params = global_params
|
||||
self._blocks_args = blocks_args
|
||||
|
||||
# Batch norm parameters
|
||||
bn_mom = 1 - self._global_params.batch_norm_momentum
|
||||
bn_eps = self._global_params.batch_norm_epsilon
|
||||
|
||||
# Get stem static or dynamic convolution depending on image size
|
||||
image_size = global_params.image_size
|
||||
Conv2d = get_same_padding_conv2d(image_size=image_size)
|
||||
|
||||
# Stem
|
||||
in_channels = 3 # rgb
|
||||
out_channels = round_filters(32, self._global_params) # number of output channels
|
||||
self._conv_stem = Conv2d(in_channels, out_channels, kernel_size=3, stride=2, bias=False)
|
||||
self._bn0 = nn.BatchNorm2d(num_features=out_channels, momentum=bn_mom, eps=bn_eps)
|
||||
image_size = calculate_output_image_size(image_size, 2)
|
||||
|
||||
# Build blocks
|
||||
self._blocks = nn.ModuleList([])
|
||||
for block_args in self._blocks_args:
|
||||
|
||||
# Update block input and output filters based on depth multiplier.
|
||||
block_args = block_args._replace(
|
||||
input_filters=round_filters(block_args.input_filters, self._global_params),
|
||||
output_filters=round_filters(block_args.output_filters, self._global_params),
|
||||
num_repeat=round_repeats(block_args.num_repeat, self._global_params)
|
||||
)
|
||||
|
||||
# The first block needs to take care of stride and filter size increase.
|
||||
self._blocks.append(MBConvBlock(block_args, self._global_params, image_size=image_size))
|
||||
image_size = calculate_output_image_size(image_size, block_args.stride)
|
||||
if block_args.num_repeat > 1: # modify block_args to keep same output size
|
||||
block_args = block_args._replace(input_filters=block_args.output_filters, stride=1)
|
||||
for _ in range(block_args.num_repeat - 1):
|
||||
self._blocks.append(MBConvBlock(block_args, self._global_params, image_size=image_size))
|
||||
# image_size = calculate_output_image_size(image_size, block_args.stride) # stride = 1
|
||||
|
||||
# Head
|
||||
in_channels = block_args.output_filters # output of final block
|
||||
out_channels = round_filters(1280, self._global_params)
|
||||
Conv2d = get_same_padding_conv2d(image_size=image_size)
|
||||
self._conv_head = Conv2d(in_channels, out_channels, kernel_size=1, bias=False)
|
||||
self._bn1 = nn.BatchNorm2d(num_features=out_channels, momentum=bn_mom, eps=bn_eps)
|
||||
|
||||
# Final linear layer
|
||||
self._avg_pooling = nn.AdaptiveAvgPool2d(1)
|
||||
self._dropout = nn.Dropout(self._global_params.dropout_rate)
|
||||
self._fc = nn.Linear(out_channels, self._global_params.num_classes)
|
||||
self._swish = MemoryEfficientSwish()
|
||||
|
||||
def set_swish(self, memory_efficient=True):
|
||||
"""Sets swish function as memory efficient (for training) or standard (for export).
|
||||
|
||||
Args:
|
||||
memory_efficient (bool): Whether to use memory-efficient version of swish.
|
||||
|
||||
"""
|
||||
self._swish = MemoryEfficientSwish() if memory_efficient else Swish()
|
||||
for block in self._blocks:
|
||||
block.set_swish(memory_efficient)
|
||||
|
||||
def extract_endpoints(self, inputs):
|
||||
"""Use convolution layer to extract features
|
||||
from reduction levels i in [1, 2, 3, 4, 5].
|
||||
|
||||
Args:
|
||||
inputs (tensor): Input tensor.
|
||||
|
||||
Returns:
|
||||
Dictionary of last intermediate features
|
||||
with reduction levels i in [1, 2, 3, 4, 5].
|
||||
Example:
|
||||
>>> import torch
|
||||
>>> from efficientnet.model import EfficientNet
|
||||
>>> inputs = torch.rand(1, 3, 224, 224)
|
||||
>>> model = EfficientNet.from_pretrained('efficientnet-b0')
|
||||
>>> endpoints = model.extract_features(inputs)
|
||||
>>> print(endpoints['reduction_1'].shape) # torch.Size([1, 16, 112, 112])
|
||||
>>> print(endpoints['reduction_2'].shape) # torch.Size([1, 24, 56, 56])
|
||||
>>> print(endpoints['reduction_3'].shape) # torch.Size([1, 40, 28, 28])
|
||||
>>> print(endpoints['reduction_4'].shape) # torch.Size([1, 112, 14, 14])
|
||||
>>> print(endpoints['reduction_5'].shape) # torch.Size([1, 1280, 7, 7])
|
||||
"""
|
||||
endpoints = dict()
|
||||
|
||||
# Stem
|
||||
x = self._swish(self._bn0(self._conv_stem(inputs)))
|
||||
# x = self._swish(self._conv_stem(inputs))
|
||||
prev_x = x
|
||||
|
||||
# Blocks
|
||||
for idx, block in enumerate(self._blocks):
|
||||
drop_connect_rate = self._global_params.drop_connect_rate
|
||||
if drop_connect_rate:
|
||||
drop_connect_rate *= float(idx) / len(self._blocks) # scale drop connect_rate
|
||||
x = block(x, drop_connect_rate=drop_connect_rate)
|
||||
if prev_x.size(2) > x.size(2):
|
||||
endpoints[f'reduction_{len(endpoints)+1}'] = prev_x
|
||||
prev_x = x
|
||||
|
||||
# Head
|
||||
x = self._swish(self._bn1(self._conv_head(x)))
|
||||
# x = self._swish(self._conv_head(x))
|
||||
endpoints[f'reduction_{len(endpoints)+1}'] = x
|
||||
|
||||
return endpoints
|
||||
|
||||
def extract_features(self, inputs):
|
||||
"""use convolution layer to extract feature .
|
||||
|
||||
Args:
|
||||
inputs (tensor): Input tensor.
|
||||
|
||||
Returns:
|
||||
Output of the final convolution
|
||||
layer in the efficientnet model.
|
||||
"""
|
||||
# Stem
|
||||
x = self._swish(self._bn0(self._conv_stem(inputs)))
|
||||
# x = self._swish(self._conv_stem(inputs))
|
||||
|
||||
# Blocks
|
||||
for idx, block in enumerate(self._blocks):
|
||||
drop_connect_rate = self._global_params.drop_connect_rate
|
||||
if drop_connect_rate:
|
||||
drop_connect_rate *= float(idx) / len(self._blocks) # scale drop connect_rate
|
||||
x = block(x, drop_connect_rate=drop_connect_rate)
|
||||
|
||||
# Head
|
||||
x = self._swish(self._bn1(self._conv_head(x)))
|
||||
# x = self._swish(self._conv_head(x))
|
||||
|
||||
return x
|
||||
|
||||
def forward(self, inputs):
|
||||
"""EfficientNet's forward function.
|
||||
Calls extract_features to extract features, applies final linear layer, and returns logits.
|
||||
|
||||
Args:
|
||||
inputs (tensor): Input tensor.
|
||||
|
||||
Returns:
|
||||
Output of this model after processing.
|
||||
"""
|
||||
bs = inputs.size(0)
|
||||
|
||||
# Convolution layers
|
||||
x = self.extract_features(inputs)
|
||||
|
||||
# Pooling and final linear layer
|
||||
x = self._avg_pooling(x)
|
||||
# x = x.view(bs, -1)
|
||||
x = torch.flatten(x, start_dim=1)
|
||||
# x = self._dropout(x.to('cpu'))
|
||||
# x = self._fc(x.to('npu:5'))
|
||||
x = self._dropout(x)
|
||||
x = self._fc(x)
|
||||
|
||||
return x
|
||||
|
||||
@classmethod
|
||||
def from_name(cls, model_name, in_channels=3, **override_params):
|
||||
"""create an efficientnet model according to name.
|
||||
|
||||
Args:
|
||||
model_name (str): Name for efficientnet.
|
||||
in_channels (int): Input data's channel number.
|
||||
override_params (other key word params):
|
||||
Params to override model's global_params.
|
||||
Optional key:
|
||||
'width_coefficient', 'depth_coefficient',
|
||||
'image_size', 'dropout_rate',
|
||||
'num_classes', 'batch_norm_momentum',
|
||||
'batch_norm_epsilon', 'drop_connect_rate',
|
||||
'depth_divisor', 'min_depth'
|
||||
|
||||
Returns:
|
||||
An efficientnet model.
|
||||
"""
|
||||
cls._check_model_name_is_valid(model_name)
|
||||
blocks_args, global_params = get_model_params(model_name, override_params)
|
||||
model = cls(blocks_args, global_params)
|
||||
model._change_in_channels(in_channels)
|
||||
return model
|
||||
|
||||
@classmethod
|
||||
def from_pretrained(cls, model_name, weights_path=None, advprop=False,
|
||||
in_channels=3, num_classes=1000, **override_params):
|
||||
"""create an efficientnet model according to name.
|
||||
|
||||
Args:
|
||||
model_name (str): Name for efficientnet.
|
||||
weights_path (None or str):
|
||||
str: path to pretrained weights file on the local disk.
|
||||
None: use pretrained weights downloaded from the Internet.
|
||||
advprop (bool):
|
||||
Whether to load pretrained weights
|
||||
trained with advprop (valid when weights_path is None).
|
||||
in_channels (int): Input data's channel number.
|
||||
num_classes (int):
|
||||
Number of categories for classification.
|
||||
It controls the output size for final linear layer.
|
||||
override_params (other key word params):
|
||||
Params to override model's global_params.
|
||||
Optional key:
|
||||
'width_coefficient', 'depth_coefficient',
|
||||
'image_size', 'dropout_rate',
|
||||
'num_classes', 'batch_norm_momentum',
|
||||
'batch_norm_epsilon', 'drop_connect_rate',
|
||||
'depth_divisor', 'min_depth'
|
||||
|
||||
Returns:
|
||||
A pretrained efficientnet model.
|
||||
"""
|
||||
model = cls.from_name(model_name, num_classes = num_classes, **override_params)
|
||||
load_pretrained_weights(model, model_name, weights_path=weights_path, load_fc=(num_classes == 1000), advprop=advprop)
|
||||
model._change_in_channels(in_channels)
|
||||
return model
|
||||
|
||||
@classmethod
|
||||
def get_image_size(cls, model_name):
|
||||
"""Get the input image size for a given efficientnet model.
|
||||
|
||||
Args:
|
||||
model_name (str): Name for efficientnet.
|
||||
|
||||
Returns:
|
||||
Input image size (resolution).
|
||||
"""
|
||||
cls._check_model_name_is_valid(model_name)
|
||||
_, _, res, _ = efficientnet_params(model_name)
|
||||
return res
|
||||
|
||||
@classmethod
|
||||
def _check_model_name_is_valid(cls, model_name):
|
||||
"""Validates model name.
|
||||
|
||||
Args:
|
||||
model_name (str): Name for efficientnet.
|
||||
|
||||
Returns:
|
||||
bool: Is a valid name or not.
|
||||
"""
|
||||
valid_models = ['efficientnet-b'+str(i) for i in range(9)]
|
||||
|
||||
# Support the construction of 'efficientnet-l2' without pretrained weights
|
||||
valid_models += ['efficientnet-l2']
|
||||
|
||||
if model_name not in valid_models:
|
||||
raise ValueError('model_name should be one of: ' + ', '.join(valid_models))
|
||||
|
||||
def _change_in_channels(self, in_channels):
|
||||
"""Adjust model's first convolution layer to in_channels, if in_channels not equals 3.
|
||||
|
||||
Args:
|
||||
in_channels (int): Input data's channel number.
|
||||
"""
|
||||
if in_channels != 3:
|
||||
Conv2d = get_same_padding_conv2d(image_size = self._global_params.image_size)
|
||||
out_channels = round_filters(32, self._global_params)
|
||||
self._conv_stem = Conv2d(in_channels, out_channels, kernel_size=3, stride=2, bias=False)
|
||||
+7
@@ -0,0 +1,7 @@
|
||||
def set_value(value):
|
||||
global _npu_id
|
||||
_npu_id = value
|
||||
print('set device id %s success'%_npu_id)
|
||||
|
||||
def get_value():
|
||||
return _npu_id
|
||||
+122
@@ -0,0 +1,122 @@
|
||||
import torch
|
||||
from torch.optim import Optimizer
|
||||
|
||||
|
||||
class RMSpropTF(Optimizer):
|
||||
"""Implements RMSprop algorithm (TensorFlow style epsilon)
|
||||
|
||||
NOTE: This is a direct cut-and-paste of PyTorch RMSprop with eps applied before sqrt
|
||||
to closer match Tensorflow for matching hyper-params.
|
||||
|
||||
Proposed by G. Hinton in his
|
||||
`course <http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf>`_.
|
||||
|
||||
The centered version first appears in `Generating Sequences
|
||||
With Recurrent Neural Networks <https://arxiv.org/pdf/1308.0850v5.pdf>`_.
|
||||
|
||||
Arguments:
|
||||
params (iterable): iterable of parameters to optimize or dicts defining
|
||||
parameter groups
|
||||
lr (float, optional): learning rate (default: 1e-2)
|
||||
momentum (float, optional): momentum factor (default: 0)
|
||||
alpha (float, optional): smoothing (decay) constant (default: 0.9)
|
||||
eps (float, optional): term added to the denominator to improve
|
||||
numerical stability (default: 1e-10)
|
||||
centered (bool, optional) : if ``True``, compute the centered RMSProp,
|
||||
the gradient is normalized by an estimation of its variance
|
||||
weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
|
||||
decoupled_decay (bool, optional): decoupled weight decay as per https://arxiv.org/abs/1711.05101
|
||||
lr_in_momentum (bool, optional): learning rate scaling is included in the momentum buffer
|
||||
update as per defaults in Tensorflow
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, params, lr=1e-2, alpha=0.9, eps=1e-10, weight_decay=0, momentum=0., centered=False,
|
||||
decoupled_decay=False, lr_in_momentum=True):
|
||||
if not 0.0 <= lr:
|
||||
raise ValueError("Invalid learning rate: {}".format(lr))
|
||||
if not 0.0 <= eps:
|
||||
raise ValueError("Invalid epsilon value: {}".format(eps))
|
||||
if not 0.0 <= momentum:
|
||||
raise ValueError("Invalid momentum value: {}".format(momentum))
|
||||
if not 0.0 <= weight_decay:
|
||||
raise ValueError("Invalid weight_decay value: {}".format(weight_decay))
|
||||
if not 0.0 <= alpha:
|
||||
raise ValueError("Invalid alpha value: {}".format(alpha))
|
||||
|
||||
defaults = dict(lr=lr, momentum=momentum, alpha=alpha, eps=eps, centered=centered, weight_decay=weight_decay,
|
||||
decoupled_decay=decoupled_decay, lr_in_momentum=lr_in_momentum)
|
||||
super(RMSpropTF, self).__init__(params, defaults)
|
||||
|
||||
def __setstate__(self, state):
|
||||
super(RMSpropTF, self).__setstate__(state)
|
||||
for group in self.param_groups:
|
||||
group.setdefault('momentum', 0)
|
||||
group.setdefault('centered', False)
|
||||
|
||||
def step(self, closure=None):
|
||||
"""Performs a single optimization step.
|
||||
|
||||
Arguments:
|
||||
closure (callable, optional): A closure that reevaluates the model
|
||||
and returns the loss.
|
||||
"""
|
||||
loss = None
|
||||
if closure is not None:
|
||||
loss = closure()
|
||||
|
||||
for group in self.param_groups:
|
||||
for p in group['params']:
|
||||
if p.grad is None:
|
||||
continue
|
||||
grad = p.grad.data
|
||||
if grad.is_sparse:
|
||||
raise RuntimeError('RMSprop does not support sparse gradients')
|
||||
state = self.state[p]
|
||||
|
||||
# State initialization
|
||||
if len(state) == 0:
|
||||
state['step'] = 0
|
||||
state['square_avg'] = torch.ones_like(p.data) # PyTorch inits to zero
|
||||
if group['momentum'] > 0:
|
||||
state['momentum_buffer'] = torch.zeros_like(p.data)
|
||||
if group['centered']:
|
||||
state['grad_avg'] = torch.zeros_like(p.data)
|
||||
|
||||
square_avg = state['square_avg']
|
||||
one_minus_alpha = 1. - group['alpha']
|
||||
|
||||
state['step'] += 1
|
||||
|
||||
if group['weight_decay'] != 0:
|
||||
if 'decoupled_decay' in group and group['decoupled_decay']:
|
||||
p.data.add_(-group['weight_decay'], p.data)
|
||||
else:
|
||||
grad = grad.add(group['weight_decay'], p.data)
|
||||
|
||||
# Tensorflow order of ops for updating squared avg
|
||||
square_avg.add_(one_minus_alpha, grad.pow(2) - square_avg)
|
||||
# square_avg.mul_(alpha).addcmul_(1 - alpha, grad, grad) # PyTorch original
|
||||
|
||||
if group['centered']:
|
||||
grad_avg = state['grad_avg']
|
||||
grad_avg.add_(one_minus_alpha, grad - grad_avg)
|
||||
# grad_avg.mul_(alpha).add_(1 - alpha, grad) # PyTorch original
|
||||
avg = square_avg.addcmul(-1, grad_avg, grad_avg).add(group['eps']).sqrt_() # eps moved in sqrt
|
||||
else:
|
||||
avg = square_avg.add(group['eps']).sqrt_() # eps moved in sqrt
|
||||
|
||||
if group['momentum'] > 0:
|
||||
buf = state['momentum_buffer']
|
||||
# Tensorflow accumulates the LR scaling in the momentum buffer
|
||||
if 'lr_in_momentum' in group and group['lr_in_momentum']:
|
||||
buf.mul_(group['momentum']).addcdiv_(group['lr'], grad, avg)
|
||||
p.data.add_(-buf)
|
||||
else:
|
||||
# PyTorch scales the param update by LR
|
||||
buf.mul_(group['momentum']).addcdiv_(grad, avg)
|
||||
p.data.add_(-group['lr'], buf)
|
||||
else:
|
||||
p.data.addcdiv_(-group['lr'], grad, avg)
|
||||
|
||||
return loss
|
||||
+624
@@ -0,0 +1,624 @@
|
||||
"""utils.py - Helper functions for building the model and for loading model parameters.
|
||||
These helper functions are built to mirror those in the official TensorFlow implementation.
|
||||
"""
|
||||
|
||||
# Author: lukemelas (github username)
|
||||
# Github repo: https://github.com/lukemelas/EfficientNet-PyTorch
|
||||
# With adjustments and added comments by workingcoder (github username).
|
||||
|
||||
import re
|
||||
import math
|
||||
import collections
|
||||
from functools import partial
|
||||
import numpy as np
|
||||
import torch
|
||||
from torch import nn
|
||||
from torch.nn import functional as F
|
||||
from torch.utils import model_zoo
|
||||
from . import npu_info
|
||||
|
||||
################################################################################
|
||||
### Help functions for model architecture
|
||||
################################################################################
|
||||
|
||||
# GlobalParams and BlockArgs: Two namedtuples
|
||||
# Swish and MemoryEfficientSwish: Two implementations of the method
|
||||
# round_filters and round_repeats:
|
||||
# Functions to calculate params for scaling model width and depth ! ! !
|
||||
# get_width_and_height_from_size and calculate_output_image_size
|
||||
# drop_connect: A structural design
|
||||
# get_same_padding_conv2d:
|
||||
# Conv2dDynamicSamePadding
|
||||
# Conv2dStaticSamePadding
|
||||
# get_same_padding_maxPool2d:
|
||||
# MaxPool2dDynamicSamePadding
|
||||
# MaxPool2dStaticSamePadding
|
||||
# It's an additional function, not used in EfficientNet,
|
||||
# but can be used in other model (such as EfficientDet).
|
||||
# Identity: An implementation of identical mapping
|
||||
|
||||
# Parameters for the entire model (stem, all blocks, and head)
|
||||
GlobalParams = collections.namedtuple('GlobalParams', [
|
||||
'width_coefficient', 'depth_coefficient', 'image_size', 'dropout_rate',
|
||||
'num_classes', 'batch_norm_momentum', 'batch_norm_epsilon',
|
||||
'drop_connect_rate', 'depth_divisor', 'min_depth'])
|
||||
|
||||
# Parameters for an individual model block
|
||||
BlockArgs = collections.namedtuple('BlockArgs', [
|
||||
'num_repeat', 'kernel_size', 'stride', 'expand_ratio',
|
||||
'input_filters', 'output_filters', 'se_ratio', 'id_skip'])
|
||||
|
||||
# Set GlobalParams and BlockArgs's defaults
|
||||
GlobalParams.__new__.__defaults__ = (None,) * len(GlobalParams._fields)
|
||||
BlockArgs.__new__.__defaults__ = (None,) * len(BlockArgs._fields)
|
||||
|
||||
|
||||
# An ordinary implementation of Swish function
|
||||
class Swish(nn.Module):
|
||||
def forward(self, x):
|
||||
return x * torch.sigmoid(x)
|
||||
|
||||
# A memory-efficient implementation of Swish function
|
||||
class SwishImplementation(torch.autograd.Function):
|
||||
@staticmethod
|
||||
def forward(ctx, i):
|
||||
result = i * torch.sigmoid(i)
|
||||
ctx.save_for_backward(i)
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
def backward(ctx, grad_output):
|
||||
i = ctx.saved_tensors[0]
|
||||
sigmoid_i = torch.sigmoid(i)
|
||||
return grad_output * (sigmoid_i * (1 + i * (1 - sigmoid_i)))
|
||||
|
||||
class MemoryEfficientSwish(nn.Module):
|
||||
def forward(self, x):
|
||||
return SwishImplementation.apply(x)
|
||||
|
||||
|
||||
def round_filters(filters, global_params):
|
||||
"""Calculate and round number of filters based on width multiplier.
|
||||
Use width_coefficient, depth_divisor and min_depth of global_params.
|
||||
|
||||
Args:
|
||||
filters (int): Filters number to be calculated.
|
||||
global_params (namedtuple): Global params of the model.
|
||||
|
||||
Returns:
|
||||
new_filters: New filters number after calculating.
|
||||
"""
|
||||
multiplier = global_params.width_coefficient
|
||||
if not multiplier:
|
||||
return filters
|
||||
# TODO: modify the params names.
|
||||
# maybe the names (width_divisor,min_width)
|
||||
# are more suitable than (depth_divisor,min_depth).
|
||||
divisor = global_params.depth_divisor
|
||||
min_depth = global_params.min_depth
|
||||
filters *= multiplier
|
||||
min_depth = min_depth or divisor # pay attention to this line when using min_depth
|
||||
# follow the formula transferred from official TensorFlow implementation
|
||||
new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor)
|
||||
if new_filters < 0.9 * filters: # prevent rounding by more than 10%
|
||||
new_filters += divisor
|
||||
return int(new_filters)
|
||||
|
||||
|
||||
def round_repeats(repeats, global_params):
|
||||
"""Calculate module's repeat number of a block based on depth multiplier.
|
||||
Use depth_coefficient of global_params.
|
||||
|
||||
Args:
|
||||
repeats (int): num_repeat to be calculated.
|
||||
global_params (namedtuple): Global params of the model.
|
||||
|
||||
Returns:
|
||||
new repeat: New repeat number after calculating.
|
||||
"""
|
||||
multiplier = global_params.depth_coefficient
|
||||
if not multiplier:
|
||||
return repeats
|
||||
# follow the formula transferred from official TensorFlow implementation
|
||||
return int(math.ceil(multiplier * repeats))
|
||||
|
||||
|
||||
def drop_connect(inputs, p, training):
|
||||
"""Drop connect.
|
||||
|
||||
Args:
|
||||
input (tensor: BCWH): Input of this structure.
|
||||
p (float: 0.0~1.0): Probability of drop connection.
|
||||
training (bool): The running mode.
|
||||
|
||||
Returns:
|
||||
output: Output after drop connection.
|
||||
"""
|
||||
assert p >= 0 and p <= 1, 'p must be in range of [0,1]'
|
||||
|
||||
if not training:
|
||||
return inputs
|
||||
|
||||
batch_size = inputs.shape[0]
|
||||
keep_prob = 1 - p
|
||||
|
||||
# generate binary_tensor mask according to probability (p for 0, 1-p for 1)
|
||||
random_tensor = keep_prob
|
||||
random_tensor += torch.rand([batch_size, 1, 1, 1], dtype=inputs.dtype, device=inputs.device)
|
||||
binary_tensor = torch.floor(random_tensor) / keep_prob
|
||||
|
||||
output = inputs * binary_tensor
|
||||
return output
|
||||
|
||||
|
||||
def get_width_and_height_from_size(x):
|
||||
"""Obtain height and width from x.
|
||||
|
||||
Args:
|
||||
x (int, tuple or list): Data size.
|
||||
|
||||
Returns:
|
||||
size: A tuple or list (H,W).
|
||||
"""
|
||||
if isinstance(x, int):
|
||||
return x, x
|
||||
if isinstance(x, list) or isinstance(x, tuple):
|
||||
return x
|
||||
else:
|
||||
raise TypeError()
|
||||
|
||||
|
||||
def calculate_output_image_size(input_image_size, stride):
|
||||
"""Calculates the output image size when using Conv2dSamePadding with a stride.
|
||||
Necessary for static padding. Thanks to mannatsingh for pointing this out.
|
||||
|
||||
Args:
|
||||
input_image_size (int, tuple or list): Size of input image.
|
||||
stride (int, tuple or list): Conv2d operation's stride.
|
||||
|
||||
Returns:
|
||||
output_image_size: A list [H,W].
|
||||
"""
|
||||
if input_image_size is None:
|
||||
return None
|
||||
image_height, image_width = get_width_and_height_from_size(input_image_size)
|
||||
stride = stride if isinstance(stride, int) else stride[0]
|
||||
image_height = int(math.ceil(image_height / stride))
|
||||
image_width = int(math.ceil(image_width / stride))
|
||||
return [image_height, image_width]
|
||||
|
||||
|
||||
# Note:
|
||||
# The following 'SamePadding' functions make output size equal ceil(input size/stride).
|
||||
# Only when stride equals 1, can the output size be the same as input size.
|
||||
# Don't be confused by their function names ! ! !
|
||||
|
||||
def get_same_padding_conv2d(image_size=None):
|
||||
"""Chooses static padding if you have specified an image size, and dynamic padding otherwise.
|
||||
Static padding is necessary for ONNX exporting of models.
|
||||
|
||||
Args:
|
||||
image_size (int or tuple): Size of the image.
|
||||
|
||||
Returns:
|
||||
Conv2dDynamicSamePadding or Conv2dStaticSamePadding.
|
||||
"""
|
||||
if image_size is None:
|
||||
return Conv2dDynamicSamePadding
|
||||
else:
|
||||
return partial(Conv2dStaticSamePadding, image_size=image_size)
|
||||
|
||||
|
||||
class Conv2dDynamicSamePadding(nn.Conv2d):
|
||||
"""2D Convolutions like TensorFlow, for a dynamic image size.
|
||||
The padding is operated in forward function by calculating dynamically.
|
||||
"""
|
||||
|
||||
# Tips for 'SAME' mode padding.
|
||||
# Given the following:
|
||||
# i: width or height
|
||||
# s: stride
|
||||
# k: kernel size
|
||||
# d: dilation
|
||||
# p: padding
|
||||
# Output after Conv2d:
|
||||
# o = floor((i+p-((k-1)*d+1))/s+1)
|
||||
# If o equals i, i = floor((i+p-((k-1)*d+1))/s+1),
|
||||
# => p = (i-1)*s+((k-1)*d+1)-i
|
||||
|
||||
def __init__(self, in_channels, out_channels, kernel_size, stride=1, dilation=1, groups=1, bias=True):
|
||||
super().__init__(in_channels, out_channels, kernel_size, stride, 0, dilation, groups, bias)
|
||||
self.stride = self.stride if len(self.stride) == 2 else [self.stride[0]] * 2
|
||||
|
||||
def forward(self, x):
|
||||
ih, iw = x.size()[-2:]
|
||||
kh, kw = self.weight.size()[-2:]
|
||||
sh, sw = self.stride
|
||||
oh, ow = math.ceil(ih / sh), math.ceil(iw / sw) # change the output size according to stride ! ! !
|
||||
pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0)
|
||||
pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0)
|
||||
if pad_h > 0 or pad_w > 0:
|
||||
x = F.pad(x, [pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2])
|
||||
return F.conv2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups)
|
||||
|
||||
|
||||
class Conv2dStaticSamePadding(nn.Conv2d):
|
||||
"""2D Convolutions like TensorFlow's 'SAME' mode, with the given input image size.
|
||||
The padding mudule is calculated in construction function, then used in forward.
|
||||
"""
|
||||
|
||||
# With the same calculation as Conv2dDynamicSamePadding
|
||||
|
||||
def __init__(self, in_channels, out_channels, kernel_size, stride=1, image_size=None, **kwargs):
|
||||
super().__init__(in_channels, out_channels, kernel_size, stride, **kwargs)
|
||||
self.stride = self.stride if len(self.stride) == 2 else [self.stride[0]] * 2
|
||||
|
||||
# Calculate padding based on image size and save it
|
||||
assert image_size is not None
|
||||
ih, iw = (image_size, image_size) if isinstance(image_size, int) else image_size
|
||||
kh, kw = self.weight.size()[-2:]
|
||||
sh, sw = self.stride
|
||||
oh, ow = math.ceil(ih / sh), math.ceil(iw / sw)
|
||||
pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0)
|
||||
pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0)
|
||||
if pad_h > 0 or pad_w > 0:
|
||||
self.static_padding = nn.ZeroPad2d((pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2))
|
||||
if kh % 2 != 0:
|
||||
self.padding = (kh - 1) // 2
|
||||
else:
|
||||
self.padding = kh // 2
|
||||
else:
|
||||
self.static_padding = Identity()
|
||||
|
||||
def forward(self, x):
|
||||
x = F.conv2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups)
|
||||
return x
|
||||
|
||||
|
||||
def get_same_padding_maxPool2d(image_size=None):
|
||||
"""Chooses static padding if you have specified an image size, and dynamic padding otherwise.
|
||||
Static padding is necessary for ONNX exporting of models.
|
||||
|
||||
Args:
|
||||
image_size (int or tuple): Size of the image.
|
||||
|
||||
Returns:
|
||||
MaxPool2dDynamicSamePadding or MaxPool2dStaticSamePadding.
|
||||
"""
|
||||
if image_size is None:
|
||||
return MaxPool2dDynamicSamePadding
|
||||
else:
|
||||
return partial(MaxPool2dStaticSamePadding, image_size=image_size)
|
||||
|
||||
|
||||
class MaxPool2dDynamicSamePadding(nn.MaxPool2d):
|
||||
"""2D MaxPooling like TensorFlow's 'SAME' mode, with a dynamic image size.
|
||||
The padding is operated in forward function by calculating dynamically.
|
||||
"""
|
||||
|
||||
def __init__(self, kernel_size, stride, padding=0, dilation=1, return_indices=False, ceil_mode=False):
|
||||
super().__init__(kernel_size, stride, padding, dilation, return_indices, ceil_mode)
|
||||
self.stride = [self.stride] * 2 if isinstance(self.stride, int) else self.stride
|
||||
self.kernel_size = [self.kernel_size] * 2 if isinstance(self.kernel_size, int) else self.kernel_size
|
||||
self.dilation = [self.dilation] * 2 if isinstance(self.dilation, int) else self.dilation
|
||||
|
||||
def forward(self, x):
|
||||
ih, iw = x.size()[-2:]
|
||||
kh, kw = self.kernel_size
|
||||
sh, sw = self.stride
|
||||
oh, ow = math.ceil(ih / sh), math.ceil(iw / sw)
|
||||
pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0)
|
||||
pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0)
|
||||
if pad_h > 0 or pad_w > 0:
|
||||
x = F.pad(x, [pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2])
|
||||
return F.max_pool2d(x, self.kernel_size, self.stride, self.padding,
|
||||
self.dilation, self.ceil_mode, self.return_indices)
|
||||
|
||||
class MaxPool2dStaticSamePadding(nn.MaxPool2d):
|
||||
"""2D MaxPooling like TensorFlow's 'SAME' mode, with the given input image size.
|
||||
The padding mudule is calculated in construction function, then used in forward.
|
||||
"""
|
||||
|
||||
def __init__(self, kernel_size, stride, image_size=None, **kwargs):
|
||||
super().__init__(kernel_size, stride, **kwargs)
|
||||
self.stride = [self.stride] * 2 if isinstance(self.stride, int) else self.stride
|
||||
self.kernel_size = [self.kernel_size] * 2 if isinstance(self.kernel_size, int) else self.kernel_size
|
||||
self.dilation = [self.dilation] * 2 if isinstance(self.dilation, int) else self.dilation
|
||||
|
||||
# Calculate padding based on image size and save it
|
||||
assert image_size is not None
|
||||
ih, iw = (image_size, image_size) if isinstance(image_size, int) else image_size
|
||||
kh, kw = self.kernel_size
|
||||
sh, sw = self.stride
|
||||
oh, ow = math.ceil(ih / sh), math.ceil(iw / sw)
|
||||
pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0)
|
||||
pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0)
|
||||
if pad_h > 0 or pad_w > 0:
|
||||
self.static_padding = nn.ZeroPad2d((pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2))
|
||||
else:
|
||||
self.static_padding = Identity()
|
||||
|
||||
def forward(self, x):
|
||||
x = self.static_padding(x)
|
||||
x = F.max_pool2d(x, self.kernel_size, self.stride, self.padding,
|
||||
self.dilation, self.ceil_mode, self.return_indices)
|
||||
return x
|
||||
|
||||
class Identity(nn.Module):
|
||||
"""Identity mapping.
|
||||
Send input to output directly.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super(Identity, self).__init__()
|
||||
|
||||
def forward(self, input):
|
||||
return input
|
||||
|
||||
|
||||
################################################################################
|
||||
### Helper functions for loading model params
|
||||
################################################################################
|
||||
|
||||
# BlockDecoder: A Class for encoding and decoding BlockArgs
|
||||
# efficientnet_params: A function to query compound coefficient
|
||||
# get_model_params and efficientnet:
|
||||
# Functions to get BlockArgs and GlobalParams for efficientnet
|
||||
# url_map and url_map_advprop: Dicts of url_map for pretrained weights
|
||||
# load_pretrained_weights: A function to load pretrained weights
|
||||
|
||||
class BlockDecoder(object):
|
||||
"""Block Decoder for readability,
|
||||
straight from the official TensorFlow repository.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def _decode_block_string(block_string):
|
||||
"""Get a block through a string notation of arguments.
|
||||
|
||||
Args:
|
||||
block_string (str): A string notation of arguments.
|
||||
Examples: 'r1_k3_s11_e1_i32_o16_se0.25_noskip'.
|
||||
|
||||
Returns:
|
||||
BlockArgs: The namedtuple defined at the top of this file.
|
||||
"""
|
||||
assert isinstance(block_string, str)
|
||||
|
||||
ops = block_string.split('_')
|
||||
options = {}
|
||||
for op in ops:
|
||||
splits = re.split(r'(\d.*)', op)
|
||||
if len(splits) >= 2:
|
||||
key, value = splits[:2]
|
||||
options[key] = value
|
||||
|
||||
# Check stride
|
||||
assert (('s' in options and len(options['s']) == 1) or
|
||||
(len(options['s']) == 2 and options['s'][0] == options['s'][1]))
|
||||
|
||||
return BlockArgs(
|
||||
num_repeat=int(options['r']),
|
||||
kernel_size=int(options['k']),
|
||||
stride=[int(options['s'][0])],
|
||||
expand_ratio=int(options['e']),
|
||||
input_filters=int(options['i']),
|
||||
output_filters=int(options['o']),
|
||||
se_ratio=float(options['se']) if 'se' in options else None,
|
||||
id_skip=('noskip' not in block_string))
|
||||
|
||||
@staticmethod
|
||||
def _encode_block_string(block):
|
||||
"""Encode a block to a string.
|
||||
|
||||
Args:
|
||||
block (namedtuple): A BlockArgs type argument.
|
||||
|
||||
Returns:
|
||||
block_string: A String form of BlockArgs.
|
||||
"""
|
||||
args = [
|
||||
'r%d' % block.num_repeat,
|
||||
'k%d' % block.kernel_size,
|
||||
's%d%d' % (block.strides[0], block.strides[1]),
|
||||
'e%s' % block.expand_ratio,
|
||||
'i%d' % block.input_filters,
|
||||
'o%d' % block.output_filters
|
||||
]
|
||||
if 0 < block.se_ratio <= 1:
|
||||
args.append('se%s' % block.se_ratio)
|
||||
if block.id_skip is False:
|
||||
args.append('noskip')
|
||||
return '_'.join(args)
|
||||
|
||||
@staticmethod
|
||||
def decode(string_list):
|
||||
"""Decode a list of string notations to specify blocks inside the network.
|
||||
|
||||
Args:
|
||||
string_list (list[str]): A list of strings, each string is a notation of block.
|
||||
|
||||
Returns:
|
||||
blocks_args: A list of BlockArgs namedtuples of block args.
|
||||
"""
|
||||
assert isinstance(string_list, list)
|
||||
blocks_args = []
|
||||
for block_string in string_list:
|
||||
blocks_args.append(BlockDecoder._decode_block_string(block_string))
|
||||
return blocks_args
|
||||
|
||||
@staticmethod
|
||||
def encode(blocks_args):
|
||||
"""Encode a list of BlockArgs to a list of strings.
|
||||
|
||||
Args:
|
||||
blocks_args (list[namedtuples]): A list of BlockArgs namedtuples of block args.
|
||||
|
||||
Returns:
|
||||
block_strings: A list of strings, each string is a notation of block.
|
||||
"""
|
||||
block_strings = []
|
||||
for block in blocks_args:
|
||||
block_strings.append(BlockDecoder._encode_block_string(block))
|
||||
return block_strings
|
||||
|
||||
|
||||
def efficientnet_params(model_name):
|
||||
"""Map EfficientNet model name to parameter coefficients.
|
||||
|
||||
Args:
|
||||
model_name (str): Model name to be queried.
|
||||
|
||||
Returns:
|
||||
params_dict[model_name]: A (width,depth,res,dropout) tuple.
|
||||
"""
|
||||
params_dict = {
|
||||
# Coefficients: width,depth,res,dropout
|
||||
'efficientnet-b0': (1.0, 1.0, 224, 0.2),
|
||||
'efficientnet-b1': (1.0, 1.1, 240, 0.2),
|
||||
'efficientnet-b2': (1.1, 1.2, 260, 0.3),
|
||||
'efficientnet-b3': (1.2, 1.4, 300, 0.3),
|
||||
'efficientnet-b4': (1.4, 1.8, 380, 0.4),
|
||||
'efficientnet-b5': (1.6, 2.2, 456, 0.4),
|
||||
'efficientnet-b6': (1.8, 2.6, 528, 0.5),
|
||||
'efficientnet-b7': (2.0, 3.1, 600, 0.5),
|
||||
'efficientnet-b8': (2.2, 3.6, 672, 0.5),
|
||||
'efficientnet-l2': (4.3, 5.3, 800, 0.5),
|
||||
}
|
||||
return params_dict[model_name]
|
||||
|
||||
|
||||
def efficientnet(width_coefficient=None, depth_coefficient=None, image_size=None,
|
||||
dropout_rate=0.2, drop_connect_rate=0.2, num_classes=1000):
|
||||
"""Create BlockArgs and GlobalParams for efficientnet model.
|
||||
|
||||
Args:
|
||||
width_coefficient (float)
|
||||
depth_coefficient (float)
|
||||
image_size (int)
|
||||
dropout_rate (float)
|
||||
drop_connect_rate (float)
|
||||
num_classes (int)
|
||||
|
||||
Meaning as the name suggests.
|
||||
|
||||
Returns:
|
||||
blocks_args, global_params.
|
||||
"""
|
||||
|
||||
# Blocks args for the whole model(efficientnet-b0 by default)
|
||||
# It will be modified in the construction of EfficientNet Class according to model
|
||||
blocks_args = [
|
||||
'r1_k3_s11_e1_i32_o16_se0.25',
|
||||
'r2_k3_s22_e6_i16_o24_se0.25',
|
||||
'r2_k5_s22_e6_i24_o40_se0.25',
|
||||
'r3_k3_s22_e6_i40_o80_se0.25',
|
||||
'r3_k5_s11_e6_i80_o112_se0.25',
|
||||
'r4_k5_s22_e6_i112_o192_se0.25',
|
||||
'r1_k3_s11_e6_i192_o320_se0.25',
|
||||
]
|
||||
|
||||
blocks_args = BlockDecoder.decode(blocks_args)
|
||||
|
||||
global_params = GlobalParams(
|
||||
width_coefficient=width_coefficient,
|
||||
depth_coefficient=depth_coefficient,
|
||||
image_size=image_size,
|
||||
dropout_rate=dropout_rate,
|
||||
|
||||
num_classes=num_classes,
|
||||
batch_norm_momentum=0.99,
|
||||
batch_norm_epsilon=1e-3,
|
||||
drop_connect_rate=drop_connect_rate,
|
||||
depth_divisor=8,
|
||||
min_depth=None,
|
||||
)
|
||||
|
||||
return blocks_args, global_params
|
||||
|
||||
|
||||
def get_model_params(model_name, override_params):
|
||||
"""Get the block args and global params for a given model name.
|
||||
|
||||
Args:
|
||||
model_name (str): Model's name.
|
||||
override_params (dict): A dict to modify global_params.
|
||||
|
||||
Returns:
|
||||
blocks_args, global_params
|
||||
"""
|
||||
if model_name.startswith('efficientnet'):
|
||||
w, d, s, p = efficientnet_params(model_name)
|
||||
# note: all models have drop connect rate = 0.2
|
||||
blocks_args, global_params = efficientnet(
|
||||
width_coefficient=w, depth_coefficient=d, dropout_rate=p, image_size=s)
|
||||
else:
|
||||
raise NotImplementedError('model name is not pre-defined: %s' % model_name)
|
||||
if override_params:
|
||||
# ValueError will be raised here if override_params has fields not included in global_params.
|
||||
global_params = global_params._replace(**override_params)
|
||||
return blocks_args, global_params
|
||||
|
||||
|
||||
# train with Standard methods
|
||||
# check more details in paper(EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks)
|
||||
url_map = {
|
||||
'efficientnet-b0': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b0-355c32eb.pth',
|
||||
'efficientnet-b1': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b1-f1951068.pth',
|
||||
'efficientnet-b2': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b2-8bb594d6.pth',
|
||||
'efficientnet-b3': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b3-5fb5a3c3.pth',
|
||||
'efficientnet-b4': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b4-6ed6700e.pth',
|
||||
'efficientnet-b5': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b5-b6417697.pth',
|
||||
'efficientnet-b6': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b6-c76e70fd.pth',
|
||||
'efficientnet-b7': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b7-dcc49843.pth',
|
||||
}
|
||||
|
||||
# train with Adversarial Examples(AdvProp)
|
||||
# check more details in paper(Adversarial Examples Improve Image Recognition)
|
||||
url_map_advprop = {
|
||||
'efficientnet-b0': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b0-b64d5a18.pth',
|
||||
'efficientnet-b1': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b1-0f3ce85a.pth',
|
||||
'efficientnet-b2': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b2-6e9d97e5.pth',
|
||||
'efficientnet-b3': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b3-cdd7c0f4.pth',
|
||||
'efficientnet-b4': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b4-44fb3a87.pth',
|
||||
'efficientnet-b5': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b5-86493f6b.pth',
|
||||
'efficientnet-b6': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b6-ac80338e.pth',
|
||||
'efficientnet-b7': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b7-4652b6dd.pth',
|
||||
'efficientnet-b8': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b8-22a8fe65.pth',
|
||||
}
|
||||
|
||||
# TODO: add the petrained weights url map of 'efficientnet-l2'
|
||||
|
||||
|
||||
def load_pretrained_weights(model, model_name, weights_path=None, load_fc=True, advprop=False):
|
||||
"""Loads pretrained weights from weights path or download using url.
|
||||
|
||||
Args:
|
||||
model (Module): The whole model of efficientnet.
|
||||
model_name (str): Model name of efficientnet.
|
||||
weights_path (None or str):
|
||||
str: path to pretrained weights file on the local disk.
|
||||
None: use pretrained weights downloaded from the Internet.
|
||||
load_fc (bool): Whether to load pretrained weights for fc layer at the end of the model.
|
||||
advprop (bool): Whether to load pretrained weights
|
||||
trained with advprop (valid when weights_path is None).
|
||||
"""
|
||||
if isinstance(weights_path,str):
|
||||
state_dict = torch.load(weights_path)
|
||||
else:
|
||||
# AutoAugment or Advprop (different preprocessing)
|
||||
url_map_ = url_map_advprop if advprop else url_map
|
||||
state_dict = model_zoo.load_url(url_map_[model_name])
|
||||
|
||||
if load_fc:
|
||||
ret = model.load_state_dict(state_dict, strict=False)
|
||||
assert not ret.missing_keys, f'Missing keys when loading pretrained weights: {ret.missing_keys}'
|
||||
else:
|
||||
state_dict.pop('_fc.weight')
|
||||
state_dict.pop('_fc.bias')
|
||||
ret = model.load_state_dict(state_dict, strict=False)
|
||||
assert set(ret.missing_keys) == set(
|
||||
['_fc.weight', '_fc.bias']), f'Missing keys when loading pretrained weights: {ret.missing_keys}'
|
||||
assert not ret.unexpected_keys, f'Missing keys when loading pretrained weights: {ret.unexpected_keys}'
|
||||
|
||||
print('Loaded pretrained weights for {}'.format(model_name))
|
||||
+23
@@ -0,0 +1,23 @@
|
||||
### Imagenet
|
||||
|
||||
This is a preliminary directory for evaluating the model on ImageNet. It is adapted from the standard PyTorch Imagenet script.
|
||||
|
||||
For now, only evaluation is supported, but I am currently building scripts to assist with training new models on Imagenet.
|
||||
|
||||
The evaluation results are slightly different from the original TensorFlow repository, due to differences in data preprocessing. For example, with the current preprocessing, `efficientnet-b3` gives a top-1 accuracy of `80.8`, rather than `81.1` in the paper. I am working on porting the TensorFlow preprocessing into PyTorch to address this issue.
|
||||
|
||||
To run on Imagenet, place your `train` and `val` directories in `data`.
|
||||
|
||||
Example commands:
|
||||
```bash
|
||||
# Evaluate small EfficientNet on CPU
|
||||
python main.py data -e -a 'efficientnet-b0' --pretrained
|
||||
```
|
||||
```bash
|
||||
# Evaluate medium EfficientNet on GPU
|
||||
python main.py data -e -a 'efficientnet-b3' --pretrained --gpu 0 --batch-size 128
|
||||
```
|
||||
```bash
|
||||
# Evaluate ResNet-50 for comparison
|
||||
python main.py data -e -a 'resnet50' --pretrained --gpu 0
|
||||
```
|
||||
+5
@@ -0,0 +1,5 @@
|
||||
### ImageNet
|
||||
|
||||
Download ImageNet and place it into `train` and `val` folders here.
|
||||
|
||||
More details may be found with the official PyTorch ImageNet example [here](https://github.com/pytorch/examples/blob/master/imagenet).
|
||||
+531
@@ -0,0 +1,531 @@
|
||||
"""
|
||||
Evaluate on ImageNet. Note that at the moment, training is not implemented (I am working on it).
|
||||
that being said, evaluation is working.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import sys
|
||||
import random
|
||||
import shutil
|
||||
import time
|
||||
import warnings
|
||||
import PIL
|
||||
import numpy as np
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.parallel
|
||||
import torch.backends.cudnn as cudnn
|
||||
import torch.distributed as dist
|
||||
import torch.optim
|
||||
import torch.multiprocessing as mp
|
||||
import torch.utils.data
|
||||
import torch.utils.data.distributed
|
||||
import torchvision.transforms as transforms
|
||||
import torchvision.datasets as datasets
|
||||
import torchvision.models as models
|
||||
|
||||
from apex import amp
|
||||
|
||||
sys.path.append(os.path.join(os.path.abspath(os.path.dirname(__file__)),'../../'))
|
||||
from efficientnet_pytorch import EfficientNet
|
||||
from efficientnet_pytorch import rand_augment_transform, augment_and_mix_transform, auto_augment_transform
|
||||
from efficientnet_pytorch import RMSpropTF
|
||||
from efficientnet_pytorch import npu_info
|
||||
|
||||
from benchmark_log import hwlog
|
||||
from benchmark_log.basic_utils import get_environment_info
|
||||
from benchmark_log.basic_utils import get_model_parameter
|
||||
|
||||
parser = argparse.ArgumentParser(description='PyTorch ImageNet Training')
|
||||
parser.add_argument('--data', metavar='DIR',
|
||||
help='path to dataset')
|
||||
parser.add_argument('-a', '--arch', metavar='ARCH', default='resnet18',
|
||||
help='model architecture (default: resnet18)')
|
||||
parser.add_argument('-j', '--workers', default=128, type=int, metavar='N',
|
||||
help='number of data loading workers (default: 4)')
|
||||
parser.add_argument('--epochs', default=90, type=int, metavar='N',
|
||||
help='number of total epochs to run')
|
||||
parser.add_argument('--start-epoch', default=0, type=int, metavar='N',
|
||||
help='manual epoch number (useful on restarts)')
|
||||
parser.add_argument('-b', '--batch-size', default=256, type=int,
|
||||
metavar='N',
|
||||
help='mini-batch size (default: 256), this is the total '
|
||||
'batch size of all GPUs on the current node when '
|
||||
'using Data Parallel or Distributed Data Parallel')
|
||||
parser.add_argument('--lr', '--learning-rate', default=0.1, type=float,
|
||||
metavar='LR', help='initial learning rate', dest='lr')
|
||||
parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
|
||||
help='momentum')
|
||||
parser.add_argument('--wd', '--weight-decay', default=1e-5, type=float,
|
||||
metavar='W', help='weight decay (default: 1e-4)',
|
||||
dest='weight_decay')
|
||||
parser.add_argument('-p', '--print-freq', default=10, type=int,
|
||||
metavar='N', help='print frequency (default: 10)')
|
||||
parser.add_argument('--resume', default='', type=str, metavar='PATH',
|
||||
help='path to latest checkpoint (default: none)')
|
||||
parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true',
|
||||
help='evaluate model on validation set')
|
||||
parser.add_argument('--pretrained', dest='pretrained', action='store_true',
|
||||
help='use pre-trained model')
|
||||
parser.add_argument('--world-size', default=-1, type=int,
|
||||
help='number of nodes for distributed training')
|
||||
parser.add_argument('--rank', default=-1, type=int,
|
||||
help='node rank for distributed training')
|
||||
parser.add_argument('--dist-url', default='tcp://224.66.41.62:23456', type=str,
|
||||
help='url used to set up distributed training')
|
||||
parser.add_argument('--dist-backend', default='hccl', type=str,
|
||||
help='distributed backend')
|
||||
parser.add_argument('--seed', default=None, type=int,
|
||||
help='seed for initializing training. ')
|
||||
parser.add_argument('--npu', default=None, type=str,
|
||||
help='npu id to use.')
|
||||
parser.add_argument('--image_size', default=224, type=int,
|
||||
help='image size')
|
||||
parser.add_argument('--advprop', default=False, action='store_true',
|
||||
help='use advprop or not')
|
||||
parser.add_argument('--multiprocessing-distributed', action='store_true',
|
||||
help='Use multi-processing distributed training to launch '
|
||||
'N processes per node, which has N GPUs. This is the '
|
||||
'fastest way to use PyTorch for either single node or '
|
||||
'multi node data parallel training')
|
||||
parser.add_argument('--autoaug', action='store_true', help='use auto augment')
|
||||
parser.add_argument('--amp', action='store_true', help='use apex')
|
||||
parser.add_argument('--pm', '--precision-mode', default='O1', type=str,
|
||||
help='precision mode to use for mix precision, only support O1, O2')
|
||||
parser.add_argument('--loss_scale', default=1024, type=int, help='loss_scale for amp')
|
||||
parser.add_argument('--addr', default='127.0.0.1', type=str,
|
||||
help='npu id to use.')
|
||||
parser.add_argument('--nnpus_per_node', default=None, type=int,
|
||||
help='number of npus to use for distributed train on each node')
|
||||
parser.add_argument('--val_feq', default=10, type=int,
|
||||
help='validation frequency')
|
||||
parser.add_argument('--device_list', default='0,1,2,3,4,5,6,7', type=str, help='device id list')
|
||||
|
||||
def device_id_to_process_device_map(device_list):
|
||||
devices = device_list.split(",")
|
||||
devices = [int(x) for x in devices]
|
||||
devices.sort()
|
||||
|
||||
process_device_map = dict()
|
||||
for process_id, device_id in enumerate(devices):
|
||||
process_device_map[process_id] = device_id
|
||||
|
||||
return process_device_map
|
||||
|
||||
|
||||
def main():
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.dist_url == "env://" and args.world_size == -1:
|
||||
args.world_size = int(os.environ["WORLD_SIZE"])
|
||||
|
||||
args.distributed = args.world_size > 1 or args.multiprocessing_distributed
|
||||
|
||||
args.process_device_map = device_id_to_process_device_map(args.device_list)
|
||||
nnpus_per_node = len(args.process_device_map)
|
||||
|
||||
|
||||
if args.multiprocessing_distributed:
|
||||
# Since we have ngpus_per_node processes per node, the total world_size
|
||||
# needs to be adjusted accordingly
|
||||
args.world_size = nnpus_per_node * args.world_size
|
||||
# Use torch.multiprocessing.spawn to launch distributed processes: the
|
||||
# main_worker process function
|
||||
os.environ['MASTER_ADDR'] = args.addr
|
||||
os.environ['MASTER_PORT'] = '29688'
|
||||
mp.spawn(main_worker, nprocs=nnpus_per_node, args=(nnpus_per_node, args))
|
||||
else:
|
||||
# Simply call main_worker function
|
||||
main_worker(args.npu, nnpus_per_node, args)
|
||||
|
||||
def main_worker(npu, nnpus_per_node, args):
|
||||
args.npu = npu
|
||||
|
||||
if args.distributed:
|
||||
args.npu = args.process_device_map[npu]
|
||||
|
||||
if args.npu is not None:
|
||||
print("Use npu: {} for training".format(args.npu))
|
||||
torch.npu.set_device('npu:' + str(args.npu))
|
||||
|
||||
if args.distributed:
|
||||
if args.dist_url == "env://" and args.rank == -1:
|
||||
args.rank = int(os.environ["RANK"])
|
||||
if args.multiprocessing_distributed:
|
||||
# For multiprocessing distributed training, rank needs to be the
|
||||
# global rank among all the processes
|
||||
args.rank = args.rank * nnpus_per_node + int(npu)
|
||||
|
||||
dist.init_process_group(backend=args.dist_backend,
|
||||
world_size=args.world_size, rank=args.rank)
|
||||
# create model
|
||||
if 'efficientnet' in args.arch: # NEW
|
||||
if args.pretrained:
|
||||
model = EfficientNet.from_pretrained(args.arch, advprop=args.advprop)
|
||||
print("=> using pre-trained model '{}'".format(args.arch))
|
||||
else:
|
||||
print("=> creating model '{}'".format(args.arch))
|
||||
model = EfficientNet.from_name(args.arch)
|
||||
|
||||
else:
|
||||
if args.pretrained:
|
||||
print("=> using pre-trained model '{}'".format(args.arch))
|
||||
model = models.__dict__[args.arch](pretrained=True)
|
||||
else:
|
||||
print("=> creating model '{}'".format(args.arch))
|
||||
model = models.__dict__[args.arch]()
|
||||
|
||||
criterion = nn.CrossEntropyLoss().to('npu:' + str(args.npu))
|
||||
|
||||
optimizer = torch.optim.SGD(model.parameters(), args.lr,
|
||||
momentum=args.momentum,
|
||||
weight_decay=args.weight_decay)
|
||||
model = model.to('npu:' + str(args.npu))
|
||||
if args.amp:
|
||||
print("=> use amp...")
|
||||
if args.pm not in ['O1', 'O2']:
|
||||
print('=>unsupported precision mode!')
|
||||
exit()
|
||||
opt_level = args.pm
|
||||
model, optimizer = amp.initialize(model, optimizer, opt_level=opt_level, loss_scale=args.loss_scale)
|
||||
|
||||
global total_batch_size
|
||||
total_batch_size = args.batch_size
|
||||
if args.distributed:
|
||||
args.batch_size = int(args.batch_size / nnpus_per_node)
|
||||
args.workers = int(args.workers / nnpus_per_node)
|
||||
model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.npu], broadcast_buffers=False)
|
||||
|
||||
|
||||
|
||||
# optionally resume from a checkpoint
|
||||
if args.resume:
|
||||
if os.path.isfile(args.resume):
|
||||
print("=> loading checkpoint '{}'".format(args.resume))
|
||||
checkpoint = torch.load(args.resume, map_location='npu:' + str(args.npu))
|
||||
args.start_epoch = checkpoint['epoch']
|
||||
if args.amp:
|
||||
amp.load_state_dict(checkpoint['amp'])
|
||||
model.load_state_dict(checkpoint['state_dict'])
|
||||
optimizer.load_state_dict(checkpoint['optimizer'])
|
||||
print("=> loaded checkpoint '{}' (epoch {})"
|
||||
.format(args.resume, checkpoint['epoch']))
|
||||
else:
|
||||
print("=> no checkpoint found at '{}'".format(args.resume))
|
||||
|
||||
# Data loading code
|
||||
traindir = os.path.join(args.data, 'train')
|
||||
valdir = os.path.join(args.data, 'val')
|
||||
if args.advprop:
|
||||
normalize = transforms.Lambda(lambda img: img * 2.0 - 1.0)
|
||||
else:
|
||||
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
|
||||
std=[0.229, 0.224, 0.225])
|
||||
|
||||
if 'efficientnet' in args.arch:
|
||||
image_size = EfficientNet.get_image_size(args.arch)
|
||||
else:
|
||||
image_size = args.image_size
|
||||
|
||||
if args.autoaug:
|
||||
print("=> use auto augment...")
|
||||
train_dataset = datasets.ImageFolder(
|
||||
traindir,
|
||||
transforms.Compose([
|
||||
transforms.RandomResizedCrop(image_size),
|
||||
auto_augment_wrapper(image_size),
|
||||
transforms.ToTensor(),
|
||||
normalize,
|
||||
]))
|
||||
else:
|
||||
train_dataset = datasets.ImageFolder(
|
||||
traindir,
|
||||
transforms.Compose([
|
||||
transforms.RandomResizedCrop(image_size),
|
||||
transforms.RandomHorizontalFlip(),
|
||||
transforms.ToTensor(),
|
||||
normalize,
|
||||
]))
|
||||
|
||||
if args.distributed:
|
||||
train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
|
||||
else:
|
||||
train_sampler = None
|
||||
|
||||
train_loader = torch.utils.data.DataLoader(
|
||||
train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None),
|
||||
num_workers=args.workers, pin_memory=True, sampler=train_sampler, drop_last=True)
|
||||
|
||||
val_transforms = transforms.Compose([
|
||||
transforms.Resize(image_size, interpolation=PIL.Image.BICUBIC),
|
||||
transforms.CenterCrop(image_size),
|
||||
transforms.ToTensor(),
|
||||
normalize,
|
||||
])
|
||||
print('npu:' + str(args.npu), ' optimizer params:', optimizer)
|
||||
|
||||
val_loader = torch.utils.data.DataLoader(
|
||||
datasets.ImageFolder(valdir, val_transforms),
|
||||
batch_size=args.batch_size, shuffle=False,
|
||||
num_workers=args.workers, pin_memory=True)
|
||||
|
||||
if args.evaluate:
|
||||
res = validate(val_loader, model, criterion, args)
|
||||
with open('res.txt', 'w') as f:
|
||||
print(res, file=f)
|
||||
return
|
||||
|
||||
for epoch in range(args.start_epoch, args.epochs):
|
||||
if args.distributed:
|
||||
train_sampler.set_epoch(epoch)
|
||||
|
||||
# train for one epoch
|
||||
train(train_loader, model, criterion, optimizer, epoch, args, nnpus_per_node)
|
||||
|
||||
# evaluate on validation set
|
||||
if epoch % args.val_feq == 0 or epoch == args.epochs - 1:
|
||||
acc1 = validate(val_loader, model, criterion, args, epoch, nnpus_per_node)
|
||||
|
||||
if not args.multiprocessing_distributed or (args.multiprocessing_distributed
|
||||
and args.rank % nnpus_per_node == 0):
|
||||
if not args.amp:
|
||||
save_checkpoint({
|
||||
'epoch': epoch + 1,
|
||||
'arch': args.arch,
|
||||
'state_dict': model.state_dict(),
|
||||
'optimizer': optimizer.state_dict(),
|
||||
})
|
||||
else:
|
||||
save_checkpoint({
|
||||
'epoch': epoch + 1,
|
||||
'arch': args.arch,
|
||||
'state_dict': model.state_dict(),
|
||||
'optimizer': optimizer.state_dict(),
|
||||
'amp': amp.state_dict(),
|
||||
})
|
||||
|
||||
|
||||
def train(train_loader, model, criterion, optimizer, epoch, args, nnpus_per_node):
|
||||
batch_time = AverageMeter('Time', ':6.3f')
|
||||
data_time = AverageMeter('Data', ':6.3f')
|
||||
losses = AverageMeter('Loss', ':6.4f')
|
||||
lr = AverageMeter('LR', ':6.4f')
|
||||
top1 = AverageMeter('Acc@1', ':6.2f')
|
||||
top5 = AverageMeter('Acc@5', ':6.2f')
|
||||
fps_time = AverageMeter('FPS', ':6.1f')
|
||||
progress = ProgressMeter(len(train_loader), fps_time, batch_time, data_time, losses, lr, top1,
|
||||
top5, prefix="Epoch: [{}]".format(epoch))
|
||||
|
||||
# switch to train mode
|
||||
model.train()
|
||||
|
||||
end = time.time()
|
||||
for i, (images, target) in enumerate(train_loader):
|
||||
adjust_learning_rate_fraction_epoch(optimizer, epoch, i, len(train_loader), args)
|
||||
|
||||
# measure data loading time
|
||||
data_time.update(time.time() - end)
|
||||
|
||||
optimizer.zero_grad()
|
||||
|
||||
target = target.int()
|
||||
images, target = images.to('npu:' + str(args.npu), non_blocking=True), target.to('npu:' + str(args.npu), non_blocking=True)
|
||||
|
||||
# compute output
|
||||
output = model(images)
|
||||
|
||||
loss = criterion(output, target)
|
||||
|
||||
# measure accuracy and record loss
|
||||
acc1, acc5 = accuracy(output, target, topk=(1, 5))
|
||||
|
||||
losses.update(loss.item(), images.size(0))
|
||||
lr.update(optimizer.param_groups[0]['lr'], images.size(0))
|
||||
top1.update(acc1[0], images.size(0))
|
||||
top5.update(acc5[0], images.size(0))
|
||||
# compute gradient and do SGD step
|
||||
|
||||
if args.amp:
|
||||
with amp.scale_loss(loss, optimizer) as scaled_loss:
|
||||
scaled_loss.backward()
|
||||
else:
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
# measure elapsed time
|
||||
fps_time.update(total_batch_size / (time.time() - end))
|
||||
batch_time.update(time.time() - end)
|
||||
end = time.time()
|
||||
if not args.multiprocessing_distributed or (args.multiprocessing_distributed
|
||||
and args.rank % nnpus_per_node == 0):
|
||||
progress.print(i)
|
||||
|
||||
# print(' * FPS@all {:.3f}'.format(nnpus_per_node*args.batch_size / batch_time.avg))
|
||||
hwlog.remark_print(key=hwlog.FPS, value=('{}'.format(fps_time)))
|
||||
|
||||
def validate(val_loader, model, criterion, args, epoch, nnpus_per_node):
|
||||
batch_time = AverageMeter('Time', ':6.3f')
|
||||
losses = AverageMeter('Loss', ':.4e')
|
||||
top1 = AverageMeter('Acc@1', ':6.2f')
|
||||
top5 = AverageMeter('Acc@5', ':6.2f')
|
||||
progress = ProgressMeter(len(val_loader), batch_time, losses, top1, top5,
|
||||
prefix='Test: ')
|
||||
|
||||
# switch to evaluate mode
|
||||
model.eval()
|
||||
|
||||
with torch.no_grad():
|
||||
end = time.time()
|
||||
for i, (images, target) in enumerate(val_loader):
|
||||
|
||||
target = target.int()
|
||||
images, target = images.to('npu:' + str(args.npu), non_blocking=True), target.to('npu:' + str(args.npu), non_blocking=True)
|
||||
|
||||
# compute output
|
||||
output = model(images)
|
||||
loss = criterion(output, target)
|
||||
|
||||
# measure accuracy and record loss
|
||||
acc1, acc5 = accuracy(output, target, topk=(1, 5))
|
||||
losses.update(loss.item(), images.size(0))
|
||||
top1.update(acc1[0], images.size(0))
|
||||
top5.update(acc5[0], images.size(0))
|
||||
|
||||
# measure elapsed time
|
||||
batch_time.update(time.time() - end)
|
||||
end = time.time()
|
||||
|
||||
if not args.multiprocessing_distributed or (args.multiprocessing_distributed
|
||||
and args.rank % nnpus_per_node == 0):
|
||||
progress.print(i)
|
||||
|
||||
# TODO: this should also be done with the ProgressMeter
|
||||
if not args.multiprocessing_distributed or (args.multiprocessing_distributed
|
||||
and args.rank % nnpus_per_node == 0):
|
||||
|
||||
print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'
|
||||
.format(top1=top1, top5=top5))
|
||||
hwlog.remark_print(key=hwlog.EVAL_ACCURACY_TOP1, value="{top1.avg:.3f}".format(top1=top1))
|
||||
hwlog.remark_print(key=hwlog.EVAL_ACCURACY_TOP5, value="{top5.avg:.3f}".format(top5=top5))
|
||||
|
||||
|
||||
return top1.avg
|
||||
|
||||
|
||||
def save_checkpoint(state, filename='checkpoint.pth'):
|
||||
torch.save(state, filename)
|
||||
|
||||
|
||||
class AverageMeter(object):
|
||||
"""Computes and stores the average and current value"""
|
||||
def __init__(self, name, fmt=':f'):
|
||||
self.name = name
|
||||
self.fmt = fmt
|
||||
self.reset()
|
||||
self.skip = 0
|
||||
|
||||
def reset(self):
|
||||
self.val = 0
|
||||
self.avg = 0
|
||||
self.sum = 0
|
||||
self.count = 0
|
||||
self.skip = 0
|
||||
|
||||
def update(self, val, n=1):
|
||||
self.val = val
|
||||
# the first 5 value are not accumulated in the average stats
|
||||
self.skip += 1
|
||||
if self.skip < 5:
|
||||
return
|
||||
self.sum += val * n
|
||||
self.count += n
|
||||
self.avg = self.sum / self.count
|
||||
|
||||
def __str__(self):
|
||||
fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
|
||||
return fmtstr.format(**self.__dict__)
|
||||
|
||||
|
||||
class ProgressMeter(object):
|
||||
def __init__(self, num_batches, *meters, prefix=""):
|
||||
self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
|
||||
self.meters = meters
|
||||
self.prefix = prefix
|
||||
|
||||
def print(self, batch):
|
||||
entries = [self.prefix + self.batch_fmtstr.format(batch)]
|
||||
entries += [str(meter) for meter in self.meters]
|
||||
print('\t'.join(entries))
|
||||
train_acc1 = str(entries).split("Acc@1")[1].strip().split(" ")[0]
|
||||
train_acc5 = str(entries).split("Acc@5")[1].strip().split(" ")[0]
|
||||
hwlog.remark_print(key=hwlog.TRAIN_ACCURACY_TOP1, value=train_acc1)
|
||||
hwlog.remark_print(key=hwlog.TRAIN_ACCURACY_TOP5, value=train_acc5)
|
||||
|
||||
def _get_batch_fmtstr(self, num_batches):
|
||||
num_digits = len(str(num_batches // 1))
|
||||
fmt = '{:' + str(num_digits) + 'd}'
|
||||
return '[' + fmt + '/' + fmt.format(num_batches) + ']'
|
||||
|
||||
|
||||
def adjust_learning_rate(optimizer, epoch, args):
|
||||
"""Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
|
||||
lr = args.lr * (0.1 ** (epoch // 30))
|
||||
for param_group in optimizer.param_groups:
|
||||
param_group['lr'] = lr
|
||||
|
||||
|
||||
def accuracy(output, target, topk=(1,)):
|
||||
"""Computes the accuracy over the k top predictions for the specified values of k"""
|
||||
with torch.no_grad():
|
||||
maxk = max(topk)
|
||||
batch_size = target.size(0)
|
||||
|
||||
_, pred = output.topk(maxk, 1, True, True)
|
||||
pred = pred.t()
|
||||
correct = pred.eq(target.view(1, -1).expand_as(pred))
|
||||
|
||||
res = []
|
||||
for k in topk:
|
||||
correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
|
||||
res.append(correct_k.mul_(100.0 / batch_size))
|
||||
return res
|
||||
|
||||
def auto_augment_wrapper(img_size, auto_augment='original-mstd0.5'):
|
||||
IMAGENET_DEFAULT_MEAN = [0.485, 0.456, 0.406]
|
||||
assert isinstance(auto_augment, str)
|
||||
aa_params = dict(
|
||||
translate_const=int(img_size * 0.45),
|
||||
img_mean=tuple([min(255, round(255 * x)) for x in IMAGENET_DEFAULT_MEAN]),
|
||||
)
|
||||
if auto_augment.startswith('rand'):
|
||||
return rand_augment_transform(auto_augment, aa_params)
|
||||
elif auto_augment.startswith('augmix'):
|
||||
aa_params['translate_pct'] = 0.3
|
||||
return augment_and_mix_transform(auto_augment, aa_params)
|
||||
else:
|
||||
return auto_augment_transform(auto_augment, aa_params)
|
||||
|
||||
def adjust_learning_rate_fraction_epoch(optimizer, epoch, step, steps_per_epoch, args):
|
||||
"""Sets the learning rate to the initial LR decayed by 0.97 every 3.0 epochs"""
|
||||
|
||||
lr = args.lr * (0.97 ** ((step + epoch * steps_per_epoch) // int(steps_per_epoch * 5.0)))
|
||||
for param_group in optimizer.param_groups:
|
||||
param_group['lr'] = lr
|
||||
|
||||
if __name__ == '__main__':
|
||||
cpu_info, npu_infos, framework_info, os_info, benchmark_version = get_environment_info("pytorch")
|
||||
config_info = get_model_parameter("pytorch_config")
|
||||
initinal_data = {"base_lr": 0.1, "dataset": "imagenet", "optimizer": "SGD", "loss_scale": 1024}
|
||||
hwlog.remark_print(key=hwlog.CPU_INFO, value=cpu_info)
|
||||
hwlog.remark_print(key=hwlog.NPU_INFO, value=npu_infos)
|
||||
hwlog.remark_print(key=hwlog.OS_INFO, value=os_info)
|
||||
hwlog.remark_print(key=hwlog.FRAMEWORK_INFO, value=framework_info)
|
||||
hwlog.remark_print(key=hwlog.BENCHMARK_VERSION, value=benchmark_version)
|
||||
hwlog.remark_print(key=hwlog.CONFIG_INFO, value=config_info)
|
||||
hwlog.remark_print(key=hwlog.BASE_LR, value=initinal_data.get("base_lr"))
|
||||
hwlog.remark_print(key=hwlog.DATASET, value=initinal_data.get("dataset"))
|
||||
hwlog.remark_print(key=hwlog.OPT_NAME, value=initinal_data.get("optimizer"))
|
||||
hwlog.remark_print(key=hwlog.LOSS_SCALE, value=initinal_data.get("loss_scale"))
|
||||
main()
|
||||
+177
File diff suppressed because one or more lines are too long
+144
File diff suppressed because one or more lines are too long
+1
File diff suppressed because one or more lines are too long
+43
@@ -0,0 +1,43 @@
|
||||
from efficientnet_pytorch import EfficientNet as _EfficientNet
|
||||
|
||||
dependencies = ['torch']
|
||||
|
||||
|
||||
def _create_model_fn(model_name):
|
||||
def _model_fn(num_classes=1000, in_channels=3, pretrained='imagenet'):
|
||||
"""Create Efficient Net.
|
||||
|
||||
Described in detail here: https://arxiv.org/abs/1905.11946
|
||||
|
||||
Args:
|
||||
num_classes (int, optional): Number of classes, default is 1000.
|
||||
in_channels (int, optional): Number of input channels, default
|
||||
is 3.
|
||||
pretrained (str, optional): One of [None, 'imagenet', 'advprop']
|
||||
If None, no pretrained model is loaded.
|
||||
If 'imagenet', models trained on imagenet dataset are loaded.
|
||||
If 'advprop', models trained using adversarial training called
|
||||
advprop are loaded. It is important to note that the
|
||||
preprocessing required for the advprop pretrained models is
|
||||
slightly different from normal ImageNet preprocessing
|
||||
"""
|
||||
model_name_ = model_name.replace('_', '-')
|
||||
if pretrained is not None:
|
||||
model = _EfficientNet.from_pretrained(
|
||||
model_name=model_name_,
|
||||
advprop=(pretrained == 'advprop'),
|
||||
num_classes=num_classes,
|
||||
in_channels=in_channels)
|
||||
else:
|
||||
model = _EfficientNet.from_name(
|
||||
model_name=model_name_,
|
||||
override_params={'num_classes': num_classes},
|
||||
)
|
||||
model._change_in_channels(in_channels)
|
||||
|
||||
return model
|
||||
|
||||
return _model_fn
|
||||
|
||||
for model_name in ['efficientnet_b' + str(i) for i in range(9)]:
|
||||
locals()[model_name] = _create_model_fn(model_name)
|
||||
@@ -0,0 +1,9 @@
|
||||
export ASCEND_HOME=/usr/local/Ascend
|
||||
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib/:/usr/lib/:/usr/local/Ascend/nnae/latest/fwkacllib/lib64:/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:/usr/local/Ascend/add-ons/:/usr/local/python3.7.5/lib/
|
||||
export PYTHONPATH=${PYTHONPATH}:/usr/local/Ascend/nnae/latest/fwkacllib/python/site-packages/:/usr/local/Ascend/nnae/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:/usr/local/Ascend/nnae/latest/fwkacllib/python/site-packages/schedule_search.egg:/usr/local/Ascend/nnae/latest/opp/op_impl/built-in/ai_core/tbe:/usr/local/Ascend/nnae/latest/fwkacllib/python/site-packages/hccl
|
||||
export PATH=$PATH:/usr/local/Ascend/nnae/latest/fwkacllib/ccec_compiler/bin
|
||||
export ASCEND_OPP_PATH=/usr/local/Ascend/nnae/latest/opp/
|
||||
export PYTHONPATH=$PYTHONPATH:${PWD}
|
||||
export SLOG_PRINT_TO_STDOUT=0
|
||||
export TASK_QUEUE_ENABLE=1
|
||||
taskset -c 0-64 python3.7 examples/imagenet/main.py --data=/data/imagenet --arch=efficientnet-b0 --batch-size=256 --lr=0.2 --epochs=200 --autoaug --npu=0 --amp --pm=O1 --loss_scale=1024
|
||||
@@ -0,0 +1,123 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Note: To use the 'upload' functionality of this file, you must:
|
||||
# $ pipenv install twine --dev
|
||||
|
||||
import io
|
||||
import os
|
||||
import sys
|
||||
from shutil import rmtree
|
||||
|
||||
from setuptools import find_packages, setup, Command
|
||||
|
||||
# Package meta-data.
|
||||
NAME = 'efficientnet_pytorch'
|
||||
DESCRIPTION = 'EfficientNet implemented in PyTorch.'
|
||||
URL = 'https://github.com/lukemelas/EfficientNet-PyTorch'
|
||||
EMAIL = 'lmelaskyriazi@college.harvard.edu'
|
||||
AUTHOR = 'Luke'
|
||||
REQUIRES_PYTHON = '>=3.5.0'
|
||||
VERSION = '0.7.0'
|
||||
|
||||
# What packages are required for this module to be executed?
|
||||
REQUIRED = [
|
||||
'torch'
|
||||
]
|
||||
|
||||
# What packages are optional?
|
||||
EXTRAS = {
|
||||
# 'fancy feature': ['django'],
|
||||
}
|
||||
|
||||
# The rest you shouldn't have to touch too much :)
|
||||
# ------------------------------------------------
|
||||
# Except, perhaps the License and Trove Classifiers!
|
||||
# If you do change the License, remember to change the Trove Classifier for that!
|
||||
|
||||
here = os.path.abspath(os.path.dirname(__file__))
|
||||
|
||||
# Import the README and use it as the long-description.
|
||||
# Note: this will only work if 'README.md' is present in your MANIFEST.in file!
|
||||
try:
|
||||
with io.open(os.path.join(here, 'README.md'), encoding='utf-8') as f:
|
||||
long_description = '\n' + f.read()
|
||||
except FileNotFoundError:
|
||||
long_description = DESCRIPTION
|
||||
|
||||
# Load the package's __version__.py module as a dictionary.
|
||||
about = {}
|
||||
if not VERSION:
|
||||
project_slug = NAME.lower().replace("-", "_").replace(" ", "_")
|
||||
with open(os.path.join(here, project_slug, '__version__.py')) as f:
|
||||
exec(f.read(), about)
|
||||
else:
|
||||
about['__version__'] = VERSION
|
||||
|
||||
|
||||
class UploadCommand(Command):
|
||||
"""Support setup.py upload."""
|
||||
|
||||
description = 'Build and publish the package.'
|
||||
user_options = []
|
||||
|
||||
@staticmethod
|
||||
def status(s):
|
||||
"""Prints things in bold."""
|
||||
print('\033[1m{0}\033[0m'.format(s))
|
||||
|
||||
def initialize_options(self):
|
||||
pass
|
||||
|
||||
def finalize_options(self):
|
||||
pass
|
||||
|
||||
def run(self):
|
||||
try:
|
||||
self.status('Removing previous builds…')
|
||||
rmtree(os.path.join(here, 'dist'))
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
self.status('Building Source and Wheel (universal) distribution…')
|
||||
os.system('{0} setup.py sdist bdist_wheel --universal'.format(sys.executable))
|
||||
|
||||
self.status('Uploading the package to PyPI via Twine…')
|
||||
os.system('twine upload dist/*')
|
||||
|
||||
self.status('Pushing git tags…')
|
||||
os.system('git tag v{0}'.format(about['__version__']))
|
||||
os.system('git push --tags')
|
||||
|
||||
sys.exit()
|
||||
|
||||
|
||||
# Where the magic happens:
|
||||
setup(
|
||||
name=NAME,
|
||||
version=about['__version__'],
|
||||
description=DESCRIPTION,
|
||||
long_description=long_description,
|
||||
long_description_content_type='text/markdown',
|
||||
author=AUTHOR,
|
||||
author_email=EMAIL,
|
||||
python_requires=REQUIRES_PYTHON,
|
||||
url=URL,
|
||||
packages=find_packages(exclude=["tests", "*.tests", "*.tests.*", "tests.*"]),
|
||||
# py_modules=['model'], # If your package is a single module, use this instead of 'packages'
|
||||
install_requires=REQUIRED,
|
||||
extras_require=EXTRAS,
|
||||
include_package_data=True,
|
||||
license='Apache',
|
||||
classifiers=[
|
||||
# Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers
|
||||
'License :: OSI Approved :: Apache Software License',
|
||||
'Programming Language :: Python',
|
||||
'Programming Language :: Python :: 3',
|
||||
'Programming Language :: Python :: 3.6',
|
||||
],
|
||||
# $ setup.py publish support.
|
||||
cmdclass={
|
||||
'upload': UploadCommand,
|
||||
},
|
||||
)
|
||||
+124
@@ -0,0 +1,124 @@
|
||||
from collections import OrderedDict
|
||||
|
||||
import pytest
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
from efficientnet_pytorch import EfficientNet
|
||||
|
||||
|
||||
# -- fixtures -------------------------------------------------------------------------------------
|
||||
|
||||
@pytest.fixture(scope='module', params=[x for x in range(4)])
|
||||
def model(request):
|
||||
return 'efficientnet-b{}'.format(request.param)
|
||||
|
||||
|
||||
@pytest.fixture(scope='module', params=[True, False])
|
||||
def pretrained(request):
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(scope='function')
|
||||
def net(model, pretrained):
|
||||
return EfficientNet.from_pretrained(model) if pretrained else EfficientNet.from_name(model)
|
||||
|
||||
|
||||
# -- tests ----------------------------------------------------------------------------------------
|
||||
|
||||
@pytest.mark.parametrize('img_size', [224, 256, 512])
|
||||
def test_forward(net, img_size):
|
||||
"""Test `.forward()` doesn't throw an error"""
|
||||
data = torch.zeros((1, 3, img_size, img_size))
|
||||
output = net(data)
|
||||
assert not torch.isnan(output).any()
|
||||
|
||||
|
||||
def test_dropout_training(net):
|
||||
"""Test dropout `.training` is set by `.train()` on parent `nn.module`"""
|
||||
net.train()
|
||||
assert net._dropout.training == True
|
||||
|
||||
|
||||
def test_dropout_eval(net):
|
||||
"""Test dropout `.training` is set by `.eval()` on parent `nn.module`"""
|
||||
net.eval()
|
||||
assert net._dropout.training == False
|
||||
|
||||
|
||||
def test_dropout_update(net):
|
||||
"""Test dropout `.training` is updated by `.train()` and `.eval()` on parent `nn.module`"""
|
||||
net.train()
|
||||
assert net._dropout.training == True
|
||||
net.eval()
|
||||
assert net._dropout.training == False
|
||||
net.train()
|
||||
assert net._dropout.training == True
|
||||
net.eval()
|
||||
assert net._dropout.training == False
|
||||
|
||||
|
||||
@pytest.mark.parametrize('img_size', [224, 256, 512])
|
||||
def test_modify_dropout(net, img_size):
|
||||
"""Test ability to modify dropout and fc modules of network"""
|
||||
dropout = nn.Sequential(OrderedDict([
|
||||
('_bn2', nn.BatchNorm1d(net._bn1.num_features)),
|
||||
('_drop1', nn.Dropout(p=net._global_params.dropout_rate)),
|
||||
('_linear1', nn.Linear(net._bn1.num_features, 512)),
|
||||
('_relu', nn.ReLU()),
|
||||
('_bn3', nn.BatchNorm1d(512)),
|
||||
('_drop2', nn.Dropout(p=net._global_params.dropout_rate / 2))
|
||||
]))
|
||||
fc = nn.Linear(512, net._global_params.num_classes)
|
||||
|
||||
net._dropout = dropout
|
||||
net._fc = fc
|
||||
|
||||
data = torch.zeros((2, 3, img_size, img_size))
|
||||
output = net(data)
|
||||
assert not torch.isnan(output).any()
|
||||
|
||||
|
||||
@pytest.mark.parametrize('img_size', [224, 256, 512])
|
||||
def test_modify_pool(net, img_size):
|
||||
"""Test ability to modify pooling module of network"""
|
||||
|
||||
class AdaptiveMaxAvgPool(nn.Module):
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.ada_avgpool = nn.AdaptiveAvgPool2d(1)
|
||||
self.ada_maxpool = nn.AdaptiveMaxPool2d(1)
|
||||
|
||||
def forward(self, x):
|
||||
avg_x = self.ada_avgpool(x)
|
||||
max_x = self.ada_maxpool(x)
|
||||
x = torch.cat((avg_x, max_x), dim=1)
|
||||
return x
|
||||
|
||||
avg_pooling = AdaptiveMaxAvgPool()
|
||||
fc = nn.Linear(net._fc.in_features * 2, net._global_params.num_classes)
|
||||
|
||||
net._avg_pooling = avg_pooling
|
||||
net._fc = fc
|
||||
|
||||
data = torch.zeros((2, 3, img_size, img_size))
|
||||
output = net(data)
|
||||
assert not torch.isnan(output).any()
|
||||
|
||||
|
||||
@pytest.mark.parametrize('img_size', [224, 256, 512])
|
||||
def test_extract_endpoints(net, img_size):
|
||||
"""Test `.extract_endpoints()` doesn't throw an error"""
|
||||
data = torch.zeros((1, 3, img_size, img_size))
|
||||
endpoints = net.extract_endpoints(data)
|
||||
assert not torch.isnan(endpoints['reduction_1']).any()
|
||||
assert not torch.isnan(endpoints['reduction_2']).any()
|
||||
assert not torch.isnan(endpoints['reduction_3']).any()
|
||||
assert not torch.isnan(endpoints['reduction_4']).any()
|
||||
assert not torch.isnan(endpoints['reduction_5']).any()
|
||||
assert endpoints['reduction_1'].size(2) == img_size // 2
|
||||
assert endpoints['reduction_2'].size(2) == img_size // 4
|
||||
assert endpoints['reduction_3'].size(2) == img_size // 8
|
||||
assert endpoints['reduction_4'].size(2) == img_size // 16
|
||||
assert endpoints['reduction_5'].size(2) == img_size // 32
|
||||
+31
@@ -0,0 +1,31 @@
|
||||
############## toolkit situation ################
|
||||
#export LD_LIBRARY_PATH=/usr/local/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:/usr/local/lib/:/usr/lib64/:/usr/lib/:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/:/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:/usr/local/Ascend/add-ons/:/usr/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH
|
||||
#export PATH=$PATH:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin/:/usr/local/Ascend/ascend-toolkit/latest/toolkit/tools/ide_daemon/bin/
|
||||
#export ASCEND_OPP_PATH=/usr/local/Ascend/ascend-toolkit/latest/opp/
|
||||
#export OPTION_EXEC_EXTERN_PLUGIN_PATH=/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so
|
||||
#export PYTHONPATH=/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH
|
||||
|
||||
|
||||
############## nnae situation ################
|
||||
|
||||
|
||||
if [ -d /usr/local/Ascend/nnae/latest ];then
|
||||
export LD_LIBRARY_PATH=/usr/local/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:/usr/local/lib/:/usr/lib64/:/usr/lib/:/usr/local/Ascend/nnae/latest/fwkacllib/lib64/:/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:/usr/local/Ascend/add-ons/:/usr/lib/aarch64_64-linux-gnu:$LD_LIBRARY_PATH
|
||||
export PATH=$PATH:/usr/local/Ascend/nnae/latest/fwkacllib/ccec_compiler/bin/:/usr/local/Ascend/nnae/latest/toolkit/tools/ide_daemon/bin/
|
||||
export ASCEND_OPP_PATH=/usr/local/Ascend/nnae/latest/opp/
|
||||
export OPTION_EXEC_EXTERN_PLUGIN_PATH=/usr/local/Ascend/nnae/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:/usr/local/Ascend/nnae/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:/usr/local/Ascend/nnae/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so
|
||||
export PYTHONPATH=/usr/local/Ascend/nnae/latest/fwkacllib/python/site-packages/:/usr/local/Ascend/nnae/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:/usr/local/Ascend/nnae/latest/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH
|
||||
else
|
||||
export LD_LIBRARY_PATH=/usr/local/:/usr/local/lib/:/usr/lib64/:/usr/lib/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/:/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:/usr/local/Ascend/add-ons/:/usr/lib/aarch64-linux-gnu:$LD_LIBRARY_PATH
|
||||
export PATH=$PATH:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin/:/usr/local/Ascend/ascend-toolkit/latest/toolkit/tools/ide_daemon/bin/
|
||||
export ASCEND_OPP_PATH=/usr/local/Ascend/ascend-toolkit/latest/opp/
|
||||
export OPTION_EXEC_EXTERN_PLUGIN_PATH=/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so
|
||||
export PYTHONPATH=/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH
|
||||
fi
|
||||
|
||||
# ln -s /usr/local/Ascend/ascend-toolkit/latest/toolkit/bin/adc /usr/local/bin/
|
||||
|
||||
export SLOG_PRINT_TO_STDOUT=0
|
||||
#su HwHiAiUser -c "adc --host 0.0.0.0:22118 --log \"SetLogLevel(0)[error]\" --device 0"
|
||||
|
||||
export TASK_QUEUE_ENABLE=1
|
||||
@@ -0,0 +1,62 @@
|
||||
#!/bin/bash
|
||||
|
||||
rank_size=$1
|
||||
yamlPath=$2
|
||||
toolsPath=$3
|
||||
|
||||
currentDir=$(cd "$(dirname "$0")/.."; pwd)
|
||||
model_name=$(cd $currentDir/..;basename `pwd`)
|
||||
if [ -f /.dockerenv ];then
|
||||
CLUSTER=$4
|
||||
MPIRUN_ALL_IP="$5"
|
||||
export CLUSTER=${CLUSTER}
|
||||
fi
|
||||
# 从 yaml 获取配置
|
||||
eval $(${toolsPath}/get_params_for_yaml.sh ${yamlPath} "pytorch_config")
|
||||
|
||||
# 清除旧日志
|
||||
rm -rf /var/log/npu/slog/host-0/*
|
||||
rm -rf ${currentDir}/result/*.log
|
||||
|
||||
#mkdir train job path
|
||||
currtime=`date +%Y%m%d%H%M%S`
|
||||
mkdir -p ${currentDir%train*}/train/result/pt_efficientnet/training_job_${currtime}/
|
||||
export train_job_dir=${currentDir%train*}/train/result/pt_efficientnet/training_job_${currtime}/
|
||||
echo "[`date +%Y%m%d-%H:%M:%S`] [INFO] ${train_job_dir}"
|
||||
# device 列表, 若无指定 device 根据 rank_size 顺序选择
|
||||
eval device_group=\$device_group_${rank_size}p
|
||||
if [ x"${device_group}" == x"" ] || [ ${rank_size} -ge 8 ];then
|
||||
device_group="$(seq 0 "$(expr $rank_size - 1)")"
|
||||
fi
|
||||
|
||||
# get last device id in device_group, hw log in performance from the dir named last_device_id
|
||||
device_group_str=`echo ${device_group} | sed 's/ //g'`
|
||||
first_device_id=`echo ${device_group_str: 0:1}`
|
||||
|
||||
if [ x"${CLUSTER}" == x"True" ];then
|
||||
this_ip=$(hostname -I |awk '{print $1}')
|
||||
ln -snf ${currentDir%train*}/train/result/pt_efficientnet/training_job_${currtime}/0/hw_efficientnet.log ${currentDir%train*}/train/result/pt_efficientnet/training_job_${currtime}/
|
||||
for ip in $MPIRUN_ALL_IP;do
|
||||
if [ x"$ip" != x"$this_ip" ];then
|
||||
scp $yamlPath root@$ip:$yamlPath
|
||||
scp ${jsonFilePath} root@$ip:${jsonFilePath}
|
||||
fi
|
||||
done
|
||||
export PATH=$PATH:/usr/local/mpirun4.0/bin
|
||||
mpirun -H ${mpirun_ip} \
|
||||
--bind-to none -map-by slot\
|
||||
--allow-run-as-root \
|
||||
--mca btl_tcp_if_exclude lo,docker0,endvnic,virbr0,vethf40501b,docker_gwbridge,br-f42ac38052b4\
|
||||
--prefix /usr/local/mpirun4.0/ \
|
||||
${currentDir}/scripts/train.sh 0 $rank_size $yamlPath $currtime ${toolsPath} ${CLUSTER}
|
||||
else
|
||||
rank_id=0
|
||||
#for device_id in $device_group;do
|
||||
ln -snf ${currentDir%train*}/train/result/pt_efficientnet/training_job_${currtime}/${first_device_id}/hw_efficientnet.log ${currentDir%train*}/train/result/pt_efficientnet/training_job_${currtime}/
|
||||
${currentDir}/scripts/train.sh 0 $rank_size $yamlPath $currtime ${toolsPath} $rank_id &
|
||||
# let rank_id++
|
||||
# done
|
||||
fi
|
||||
wait
|
||||
|
||||
|
||||
+132
@@ -0,0 +1,132 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
device_id=$1
|
||||
rank_size=$2
|
||||
yamlPath=$3
|
||||
|
||||
currentDir=$(cd "$(dirname "$0")/.."; pwd)
|
||||
currtime=$4
|
||||
toolsPath=$5
|
||||
export YAML_PATH=$3
|
||||
mkdir -p ${currentDir%train*}/train/result/pt_efficientnet/training_job_${currtime}/
|
||||
export train_job_dir=${currentDir%train*}/train/result/pt_efficientnet/training_job_${currtime}/
|
||||
|
||||
# 从 yaml 获取配置
|
||||
eval $(${toolsPath}/get_params_for_yaml.sh ${yamlPath} "pytorch_config")
|
||||
|
||||
export REMARK_LOG_FILE=hw_efficientnet.log # 打点日志文件名称, 必须hw_后跟模型名称小写
|
||||
benchmark_log_path=${currentDir%atlas_benchmark-master*}/atlas_benchmark-master/utils
|
||||
export PYTHONPATH=$PYTHONPATH:${benchmark_log_path}
|
||||
|
||||
|
||||
#source ${currentDir}/config/npu_set_env.sh
|
||||
source ${currentDir}/config/set_env_b023.sh
|
||||
# user env
|
||||
export HCCL_CONNECT_TIMEOUT=600
|
||||
export JOB_ID=9999001
|
||||
export HCCL_RANK_TABLE_PATH=${currentDir}/config/${rank_size}p.json
|
||||
export RANK_SIZE=${rank_size}
|
||||
export SLOG_PRINT_TO_STDOUT=0
|
||||
export DEVICE_ID=${device_id}
|
||||
DEVICE_INDEX=$(( DEVICE_ID + RANK_INDEX * 8 ))
|
||||
export DEVICE_INDEX=${DEVICE_INDEX}
|
||||
|
||||
cd ${train_job_dir}
|
||||
curd_dir=${currentDir%atlas_benchmark-master*}/atlas_benchmark-master/utils/atlasboost
|
||||
export PYTHONPATH=$PYTHONPATH:${curd_dir}
|
||||
|
||||
if [ x"$6" != x"True" ];then
|
||||
rank_id=$6
|
||||
export RANK_ID=$6
|
||||
else
|
||||
device_id_mo=$(python3.7 -c "import src.tensorflow.mpi_ops as atlasboost;atlasboost.init(); \
|
||||
device_id = atlasboost.local_rank();cluster_device_id = str(device_id); \
|
||||
atlasboost.set_device_id(device_id);print(atlasboost.rank())")
|
||||
device_id_mo=`echo $device_id_mo`
|
||||
rank_id=${device_id_mo##* }
|
||||
export RANK_ID=${rank_id}
|
||||
device=${device_id_mo##*deviceid = }
|
||||
device_id=${device%% phyid=*}
|
||||
export DEVICE_ID=${device_id}
|
||||
hccljson=${train_job_dir}/*.json
|
||||
cp ${hccljson} ${currentDir}/config/${rank_size}p.json
|
||||
fi
|
||||
|
||||
#mkdir exec path
|
||||
mkdir -p ${train_job_dir}/${device_id}
|
||||
cd ${train_job_dir}/${device_id}
|
||||
|
||||
startTime=`date +%Y%m%d-%H:%M:%S`
|
||||
startTime_s=`date +%s`
|
||||
|
||||
|
||||
# 根据单卡/多卡区分调用参数
|
||||
if [ x"$6" == x"True" ];then
|
||||
# 多卡多机
|
||||
export CLUSTER=True
|
||||
fi
|
||||
|
||||
if [ x"${mode}" == x"evaluate" ];then
|
||||
pass
|
||||
|
||||
|
||||
elif [ x"${rank_size}" == x"1" ];then
|
||||
# 单卡
|
||||
taskset -c 0-128 python3.7 ${currentDir}/code/examples/imagenet/main.py \
|
||||
--data=${data_url} \
|
||||
--arch=efficientnet-b0 \
|
||||
--batch-size=${batch_size} \
|
||||
--lr=0.2 \
|
||||
--momentum=0 \
|
||||
--epochs=${epoches} \
|
||||
--autoaug \
|
||||
--amp \
|
||||
--pm=O1 \
|
||||
--loss_scale=128 \
|
||||
--val_feq=10 \
|
||||
--npu=${device} > ${train_job_dir}/train_${rank_size}p.log 2>&1
|
||||
|
||||
|
||||
elif [ ${rank_size} -le 8 ];then
|
||||
# 单机多卡
|
||||
taskset -c 0-128 python3.7 ${currentDir}/code/examples/imagenet/main.py \
|
||||
--data=${data_url} \
|
||||
--arch=efficientnet-b0 \
|
||||
--batch-size=${batch_size} \
|
||||
--lr=${lr} \
|
||||
--momentum=0 \
|
||||
--epochs=${epoches} \
|
||||
--autoaug \
|
||||
--amp \
|
||||
--pm=O1 \
|
||||
--loss_scale=128 \
|
||||
--val_feq=10 \
|
||||
--addr=$(hostname -I |awk '{print $1}') \
|
||||
--dist-backend=hccl \
|
||||
--multiprocessing-distributed \
|
||||
--world-size 1 \
|
||||
--rank 0 \
|
||||
--device_list ${device_group} > ${train_job_dir}/train_${rank_size}p.log 2>&1
|
||||
|
||||
|
||||
fi
|
||||
|
||||
#taskset -c 0-20 python3.7 ${currentDir}/code/efficientnet.py > ./train.log 2>&1
|
||||
|
||||
if [ $? -eq 0 ];then
|
||||
echo ":::ABK 1.0.0 efficientnet train success"
|
||||
echo ":::ABK 1.0.0 efficientnet train success" >> ${train_job_dir}/train_${rank_size}p.log
|
||||
echo ":::ABK 1.0.0 efficientnet train success" >> ./hw_efficientnet.log
|
||||
else
|
||||
echo ":::ABK 1.0.0 efficientnet train failed"
|
||||
echo ":::ABK 1.0.0 efficientnet train failed" >> ${train_job_dir}/train_${rank_size}p.log
|
||||
echo ":::ABK 1.0.0 efficientnet train failed" >> ./hw_efficientnet.log
|
||||
fi
|
||||
|
||||
endTime=`date +%Y%m%d-%H:%M:%S`
|
||||
endTime_s=`date +%s`
|
||||
sumTime=$[ $endTime_s - $startTime_s ]
|
||||
hour=$(( $sumTime/3600 ))
|
||||
min=$(( ($sumTime-${hour}*3600)/60 ))
|
||||
sec=$(( $sumTime-${hour}*3600-${min}*60 ))
|
||||
echo ":::ABK 1.0.0 efficientnet train total time: ${hour}:${min}:${sec}" >> ${train_job_dir}/${device_id}/hw_efficientnet.log
|
||||
@@ -0,0 +1 @@
|
||||
#!/bin/bash
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user