[add]上传训练benchmark by z00560161
This commit is contained in:
@@ -0,0 +1,25 @@
|
||||
# EfficientNet_pytorch训练说明
|
||||
|
||||
### 1. 模型训练参数配置
|
||||
|
||||
在train/yaml/EfficientNet.yaml中修改相应配置, 配置项含义:
|
||||
|
||||
```
|
||||
pytorch_config:
|
||||
data_url: 数据集路径
|
||||
epoches: 跑多少个epoch
|
||||
batch_size: 1p 参数为256 2p 512 4p 1024 8p为2048
|
||||
seed: 49
|
||||
lr: 默认参数1p 0.2 2p 0.4 4p 0.8 8p 1.6
|
||||
docker_image: docker 镜像名称:版本号
|
||||
```
|
||||
|
||||
------
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,202 @@
|
||||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
+253
@@ -0,0 +1,253 @@
|
||||
# EfficientNet PyTorch
|
||||
|
||||
### Quickstart
|
||||
|
||||
Install with `pip install efficientnet_pytorch` and load a pretrained EfficientNet with:
|
||||
```python
|
||||
from efficientnet_pytorch import EfficientNet
|
||||
model = EfficientNet.from_pretrained('efficientnet-b0')
|
||||
```
|
||||
|
||||
### Updates
|
||||
|
||||
#### Update (May 14, 2020)
|
||||
|
||||
This update adds comprehensive comments and documentation (thanks to @workingcoder).
|
||||
|
||||
#### Update (January 23, 2020)
|
||||
|
||||
This update adds a new category of pre-trained model based on adversarial training, called _advprop_. It is important to note that the preprocessing required for the advprop pretrained models is slightly different from normal ImageNet preprocessing. As a result, by default, advprop models are not used. To load a model with advprop, use:
|
||||
```
|
||||
model = EfficientNet.from_pretrained("efficientnet-b0", advprop=True)
|
||||
```
|
||||
There is also a new, large `efficientnet-b8` pretrained model that is only available in advprop form. When using these models, replace ImageNet preprocessing code as follows:
|
||||
```
|
||||
if advprop: # for models using advprop pretrained weights
|
||||
normalize = transforms.Lambda(lambda img: img * 2.0 - 1.0)
|
||||
else:
|
||||
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
|
||||
std=[0.229, 0.224, 0.225])
|
||||
|
||||
```
|
||||
This update also addresses multiple other issues ([#115](https://github.com/lukemelas/EfficientNet-PyTorch/issues/115), [#128](https://github.com/lukemelas/EfficientNet-PyTorch/issues/128)).
|
||||
|
||||
#### Update (October 15, 2019)
|
||||
|
||||
This update allows you to choose whether to use a memory-efficient Swish activation. The memory-efficient version is chosen by default, but it cannot be used when exporting using PyTorch JIT. For this purpose, we have also included a standard (export-friendly) swish activation function. To switch to the export-friendly version, simply call `model.set_swish(memory_efficient=False)` after loading your desired model. This update addresses issues [#88](https://github.com/lukemelas/EfficientNet-PyTorch/pull/88) and [#89](https://github.com/lukemelas/EfficientNet-PyTorch/pull/89).
|
||||
|
||||
#### Update (October 12, 2019)
|
||||
|
||||
This update makes the Swish activation function more memory-efficient. It also addresses pull requests [#72](https://github.com/lukemelas/EfficientNet-PyTorch/pull/72), [#73](https://github.com/lukemelas/EfficientNet-PyTorch/pull/73), [#85](https://github.com/lukemelas/EfficientNet-PyTorch/pull/85), and [#86](https://github.com/lukemelas/EfficientNet-PyTorch/pull/86). Thanks to the authors of all the pull requests!
|
||||
|
||||
#### Update (July 31, 2019)
|
||||
|
||||
_Upgrade the pip package with_ `pip install --upgrade efficientnet-pytorch`
|
||||
|
||||
The B6 and B7 models are now available. Additionally, _all_ pretrained models have been updated to use AutoAugment preprocessing, which translates to better performance across the board. Usage is the same as before:
|
||||
```python
|
||||
from efficientnet_pytorch import EfficientNet
|
||||
model = EfficientNet.from_pretrained('efficientnet-b7')
|
||||
```
|
||||
|
||||
#### Update (June 29, 2019)
|
||||
|
||||
This update adds easy model exporting ([#20](https://github.com/lukemelas/EfficientNet-PyTorch/issues/20)) and feature extraction ([#38](https://github.com/lukemelas/EfficientNet-PyTorch/issues/38)).
|
||||
|
||||
* [Example: Export to ONNX](#example-export)
|
||||
* [Example: Extract features](#example-feature-extraction)
|
||||
* Also: fixed a CUDA/CPU bug ([#32](https://github.com/lukemelas/EfficientNet-PyTorch/issues/32))
|
||||
|
||||
It is also now incredibly simple to load a pretrained model with a new number of classes for transfer learning:
|
||||
```python
|
||||
model = EfficientNet.from_pretrained('efficientnet-b1', num_classes=23)
|
||||
```
|
||||
|
||||
|
||||
#### Update (June 23, 2019)
|
||||
|
||||
The B4 and B5 models are now available. Their usage is identical to the other models:
|
||||
```python
|
||||
from efficientnet_pytorch import EfficientNet
|
||||
model = EfficientNet.from_pretrained('efficientnet-b4')
|
||||
```
|
||||
|
||||
### Overview
|
||||
This repository contains an op-for-op PyTorch reimplementation of [EfficientNet](https://arxiv.org/abs/1905.11946), along with pre-trained models and examples.
|
||||
|
||||
The goal of this implementation is to be simple, highly extensible, and easy to integrate into your own projects. This implementation is a work in progress -- new features are currently being implemented.
|
||||
|
||||
At the moment, you can easily:
|
||||
* Load pretrained EfficientNet models
|
||||
* Use EfficientNet models for classification or feature extraction
|
||||
* Evaluate EfficientNet models on ImageNet or your own images
|
||||
|
||||
_Upcoming features_: In the next few days, you will be able to:
|
||||
* Train new models from scratch on ImageNet with a simple command
|
||||
* Quickly finetune an EfficientNet on your own dataset
|
||||
* Export EfficientNet models for production
|
||||
|
||||
### Table of contents
|
||||
1. [About EfficientNet](#about-efficientnet)
|
||||
2. [About EfficientNet-PyTorch](#about-efficientnet-pytorch)
|
||||
3. [Installation](#installation)
|
||||
4. [Usage](#usage)
|
||||
* [Load pretrained models](#loading-pretrained-models)
|
||||
* [Example: Classify](#example-classification)
|
||||
* [Example: Extract features](#example-feature-extraction)
|
||||
* [Example: Export to ONNX](#example-export)
|
||||
6. [Contributing](#contributing)
|
||||
|
||||
### About EfficientNet
|
||||
|
||||
If you're new to EfficientNets, here is an explanation straight from the official TensorFlow implementation:
|
||||
|
||||
EfficientNets are a family of image classification models, which achieve state-of-the-art accuracy, yet being an order-of-magnitude smaller and faster than previous models. We develop EfficientNets based on AutoML and Compound Scaling. In particular, we first use [AutoML Mobile framework](https://ai.googleblog.com/2018/08/mnasnet-towards-automating-design-of.html) to develop a mobile-size baseline network, named as EfficientNet-B0; Then, we use the compound scaling method to scale up this baseline to obtain EfficientNet-B1 to B7.
|
||||
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td>
|
||||
<img src="https://raw.githubusercontent.com/tensorflow/tpu/master/models/official/efficientnet/g3doc/params.png" width="100%" />
|
||||
</td>
|
||||
<td>
|
||||
<img src="https://raw.githubusercontent.com/tensorflow/tpu/master/models/official/efficientnet/g3doc/flops.png", width="90%" />
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
EfficientNets achieve state-of-the-art accuracy on ImageNet with an order of magnitude better efficiency:
|
||||
|
||||
|
||||
* In high-accuracy regime, our EfficientNet-B7 achieves state-of-the-art 84.4% top-1 / 97.1% top-5 accuracy on ImageNet with 66M parameters and 37B FLOPS, being 8.4x smaller and 6.1x faster on CPU inference than previous best [Gpipe](https://arxiv.org/abs/1811.06965).
|
||||
|
||||
* In middle-accuracy regime, our EfficientNet-B1 is 7.6x smaller and 5.7x faster on CPU inference than [ResNet-152](https://arxiv.org/abs/1512.03385), with similar ImageNet accuracy.
|
||||
|
||||
* Compared with the widely used [ResNet-50](https://arxiv.org/abs/1512.03385), our EfficientNet-B4 improves the top-1 accuracy from 76.3% of ResNet-50 to 82.6% (+6.3%), under similar FLOPS constraint.
|
||||
|
||||
### About EfficientNet PyTorch
|
||||
|
||||
EfficientNet PyTorch is a PyTorch re-implementation of EfficientNet. It is consistent with the [original TensorFlow implementation](https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet), such that it is easy to load weights from a TensorFlow checkpoint. At the same time, we aim to make our PyTorch implementation as simple, flexible, and extensible as possible.
|
||||
|
||||
If you have any feature requests or questions, feel free to leave them as GitHub issues!
|
||||
|
||||
### Installation
|
||||
|
||||
Install via pip:
|
||||
```bash
|
||||
pip install efficientnet_pytorch
|
||||
```
|
||||
|
||||
Or install from source:
|
||||
```bash
|
||||
git clone https://github.com/lukemelas/EfficientNet-PyTorch
|
||||
cd EfficientNet-Pytorch
|
||||
pip install -e .
|
||||
```
|
||||
|
||||
### Usage
|
||||
|
||||
#### Loading pretrained models
|
||||
|
||||
Load an EfficientNet:
|
||||
```python
|
||||
from efficientnet_pytorch import EfficientNet
|
||||
model = EfficientNet.from_name('efficientnet-b0')
|
||||
```
|
||||
|
||||
Load a pretrained EfficientNet:
|
||||
```python
|
||||
from efficientnet_pytorch import EfficientNet
|
||||
model = EfficientNet.from_pretrained('efficientnet-b0')
|
||||
```
|
||||
|
||||
Note that pretrained models have only been released for `N=0,1,2,3,4,5` at the current time, so `.from_pretrained` only supports `'efficientnet-b{N}'` for `N=0,1,2,3,4,5`.
|
||||
|
||||
Details about the models are below:
|
||||
|
||||
| *Name* |*# Params*|*Top-1 Acc.*|*Pretrained?*|
|
||||
|:-----------------:|:--------:|:----------:|:-----------:|
|
||||
| `efficientnet-b0` | 5.3M | 76.3 | ✓ |
|
||||
| `efficientnet-b1` | 7.8M | 78.8 | ✓ |
|
||||
| `efficientnet-b2` | 9.2M | 79.8 | ✓ |
|
||||
| `efficientnet-b3` | 12M | 81.1 | ✓ |
|
||||
| `efficientnet-b4` | 19M | 82.6 | ✓ |
|
||||
| `efficientnet-b5` | 30M | 83.3 | ✓ |
|
||||
| `efficientnet-b6` | 43M | 84.0 | ✓ |
|
||||
| `efficientnet-b7` | 66M | 84.4 | ✓ |
|
||||
|
||||
|
||||
#### Example: Classification
|
||||
|
||||
Below is a simple, complete example. It may also be found as a jupyter notebook in `examples/simple` or as a [Colab Notebook](https://colab.research.google.com/drive/1Jw28xZ1NJq4Cja4jLe6tJ6_F5lCzElb4).
|
||||
|
||||
We assume that in your current directory, there is a `img.jpg` file and a `labels_map.txt` file (ImageNet class names). These are both included in `examples/simple`.
|
||||
|
||||
```python
|
||||
import json
|
||||
from PIL import Image
|
||||
import torch
|
||||
from torchvision import transforms
|
||||
|
||||
from efficientnet_pytorch import EfficientNet
|
||||
model = EfficientNet.from_pretrained('efficientnet-b0')
|
||||
|
||||
# Preprocess image
|
||||
tfms = transforms.Compose([transforms.Resize(224), transforms.ToTensor(),
|
||||
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),])
|
||||
img = tfms(Image.open('img.jpg')).unsqueeze(0)
|
||||
print(img.shape) # torch.Size([1, 3, 224, 224])
|
||||
|
||||
# Load ImageNet class names
|
||||
labels_map = json.load(open('labels_map.txt'))
|
||||
labels_map = [labels_map[str(i)] for i in range(1000)]
|
||||
|
||||
# Classify
|
||||
model.eval()
|
||||
with torch.no_grad():
|
||||
outputs = model(img)
|
||||
|
||||
# Print predictions
|
||||
print('-----')
|
||||
for idx in torch.topk(outputs, k=5).indices.squeeze(0).tolist():
|
||||
prob = torch.softmax(outputs, dim=1)[0, idx].item()
|
||||
print('{label:<75} ({p:.2f}%)'.format(label=labels_map[idx], p=prob*100))
|
||||
```
|
||||
|
||||
#### Example: Feature Extraction
|
||||
|
||||
You can easily extract features with `model.extract_features`:
|
||||
```python
|
||||
from efficientnet_pytorch import EfficientNet
|
||||
model = EfficientNet.from_pretrained('efficientnet-b0')
|
||||
|
||||
# ... image preprocessing as in the classification example ...
|
||||
print(img.shape) # torch.Size([1, 3, 224, 224])
|
||||
|
||||
features = model.extract_features(img)
|
||||
print(features.shape) # torch.Size([1, 1280, 7, 7])
|
||||
```
|
||||
|
||||
#### Example: Export to ONNX
|
||||
|
||||
Exporting to ONNX for deploying to production is now simple:
|
||||
```python
|
||||
import torch
|
||||
from efficientnet_pytorch import EfficientNet
|
||||
|
||||
model = EfficientNet.from_pretrained('efficientnet-b1')
|
||||
dummy_input = torch.randn(10, 3, 240, 240)
|
||||
|
||||
torch.onnx.export(model, dummy_input, "test-b1.onnx", verbose=True)
|
||||
```
|
||||
|
||||
[Here](https://colab.research.google.com/drive/1rOAEXeXHaA8uo3aG2YcFDHItlRJMV0VP) is a Colab example.
|
||||
|
||||
|
||||
#### ImageNet
|
||||
|
||||
See `examples/imagenet` for details about evaluating on ImageNet.
|
||||
|
||||
### Contributing
|
||||
|
||||
If you find a bug, create a GitHub issue, or even better, submit a pull request. Similarly, if you have questions, simply post them as GitHub issues.
|
||||
|
||||
I look forward to seeing what the community does with these models!
|
||||
+45
@@ -0,0 +1,45 @@
|
||||
# EfficientNet PyTorch
|
||||
|
||||
## About EfficientNet
|
||||
|
||||
If you're new to EfficientNets, here is an explanation straight from the official TensorFlow implementation:
|
||||
|
||||
EfficientNets are a family of image classification models, which achieve state-of-the-art accuracy, yet being an order-of-magnitude smaller and faster than previous models. We develop EfficientNets based on AutoML and Compound Scaling. In particular, we first use [AutoML Mobile framework](https://ai.googleblog.com/2018/08/mnasnet-towards-automating-design-of.html) to develop a mobile-size baseline network, named as EfficientNet-B0; Then, we use the compound scaling method to scale up this baseline to obtain EfficientNet-B1 to B7.
|
||||
|
||||
EfficientNets achieve state-of-the-art accuracy on ImageNet with an order of magnitude better efficiency:
|
||||
|
||||
|
||||
* In high-accuracy regime, our EfficientNet-B7 achieves state-of-the-art 84.4% top-1 / 97.1% top-5 accuracy on ImageNet with 66M parameters and 37B FLOPS, being 8.4x smaller and 6.1x faster on CPU inference than previous best [Gpipe](https://arxiv.org/abs/1811.06965).
|
||||
|
||||
* In middle-accuracy regime, our EfficientNet-B1 is 7.6x smaller and 5.7x faster on CPU inference than [ResNet-152](https://arxiv.org/abs/1512.03385), with similar ImageNet accuracy.
|
||||
|
||||
* Compared with the widely used [ResNet-50](https://arxiv.org/abs/1512.03385), our EfficientNet-B4 improves the top-1 accuracy from 76.3% of ResNet-50 to 82.6% (+6.3%), under similar FLOPS constraint.
|
||||
|
||||
## About EfficientNet PyTorch NPU
|
||||
|
||||
The source codes are based on the open source https://github.com/lukemelas/EfficientNet-PyTorch with least modified codes as far as possible.
|
||||
|
||||
|
||||
## Quick Start
|
||||
|
||||
### Train on 1 NPU:
|
||||
|
||||
(1) modify the last line in npu_1p.sh with the particular params:
|
||||
|
||||
* fp32: taskset -c 0-64 python3.7 examples/imagenet/main.py --data=/data/imagenet --arch=efficientnet-b0 --batch-size=256 --lr=0.2 --epochs=200 --autoaug --npu=0
|
||||
* O1: taskset -c 0-64 python3.7 examples/imagenet/main.py --data=/data/imagenet --arch=efficientnet-b0 --batch-size=256 --lr=0.2 --epochs=200 --autoaug --npu=0 --amp --pm=O1 --loss_scale=1024
|
||||
* O2: taskset -c 0-64 python3.7 examples/imagenet/main.py --data=/data/imagenet --arch=efficientnet-b0 --batch-size=256 --lr=0.2 --epochs=200 --autoaug --npu=0 --amp --pm=O2 --loss_scale=128
|
||||
|
||||
(2) Execute run.sh,ALL the train log will be recorded in nohup.out.
|
||||
|
||||
## Know issues:
|
||||
|
||||
* Distribution train is NOT available.
|
||||
* top1/top5 accuracy is lower than GPU about 2% in the same setting (dropout).
|
||||
* O2 Performance is lower than GPU about 50 fps in the same setting (dropout, depthwiseconv2d).
|
||||
* torch.rand is replaced with numpy implementation due to the lack of AICPU operator (aicpu).
|
||||
* momentum has to be set to 0 due to logsoftmax precision(logsoftmax)
|
||||
|
||||
|
||||
|
||||
|
||||
+12
@@ -0,0 +1,12 @@
|
||||
__version__ = "0.7.0"
|
||||
from .model import EfficientNet
|
||||
from .utils import (
|
||||
GlobalParams,
|
||||
BlockArgs,
|
||||
BlockDecoder,
|
||||
efficientnet,
|
||||
get_model_params,
|
||||
)
|
||||
from .auto_augment import rand_augment_transform, augment_and_mix_transform, auto_augment_transform
|
||||
from .rmsprop_tf import RMSpropTF
|
||||
|
||||
+817
@@ -0,0 +1,817 @@
|
||||
""" AutoAugment, RandAugment, and AugMix for PyTorch
|
||||
|
||||
This code implements the searched ImageNet policies with various tweaks and improvements and
|
||||
does not include any of the search code.
|
||||
|
||||
AA and RA Implementation adapted from:
|
||||
https://github.com/tensorflow/tpu/blob/master/models/official/efficientnet/autoaugment.py
|
||||
|
||||
AugMix adapted from:
|
||||
https://github.com/google-research/augmix
|
||||
|
||||
Papers:
|
||||
AutoAugment: Learning Augmentation Policies from Data - https://arxiv.org/abs/1805.09501
|
||||
Learning Data Augmentation Strategies for Object Detection - https://arxiv.org/abs/1906.11172
|
||||
RandAugment: Practical automated data augmentation... - https://arxiv.org/abs/1909.13719
|
||||
AugMix: A Simple Data Processing Method to Improve Robustness and Uncertainty - https://arxiv.org/abs/1912.02781
|
||||
|
||||
Hacked together by Ross Wightman
|
||||
"""
|
||||
import random
|
||||
import math
|
||||
import re
|
||||
from PIL import Image, ImageOps, ImageEnhance, ImageChops
|
||||
import PIL
|
||||
import numpy as np
|
||||
|
||||
|
||||
_PIL_VER = tuple([int(x) for x in PIL.__version__.split('.')[:2]])
|
||||
|
||||
_FILL = (128, 128, 128)
|
||||
|
||||
# This signifies the max integer that the controller RNN could predict for the
|
||||
# augmentation scheme.
|
||||
_MAX_LEVEL = 10.
|
||||
|
||||
_HPARAMS_DEFAULT = dict(
|
||||
translate_const=250,
|
||||
img_mean=_FILL,
|
||||
)
|
||||
|
||||
_RANDOM_INTERPOLATION = (Image.BILINEAR, Image.BICUBIC)
|
||||
|
||||
|
||||
def _interpolation(kwargs):
|
||||
interpolation = kwargs.pop('resample', Image.BILINEAR)
|
||||
if isinstance(interpolation, (list, tuple)):
|
||||
return random.choice(interpolation)
|
||||
else:
|
||||
return interpolation
|
||||
|
||||
|
||||
def _check_args_tf(kwargs):
|
||||
if 'fillcolor' in kwargs and _PIL_VER < (5, 0):
|
||||
kwargs.pop('fillcolor')
|
||||
kwargs['resample'] = _interpolation(kwargs)
|
||||
|
||||
|
||||
def shear_x(img, factor, **kwargs):
|
||||
_check_args_tf(kwargs)
|
||||
return img.transform(img.size, Image.AFFINE, (1, factor, 0, 0, 1, 0), **kwargs)
|
||||
|
||||
|
||||
def shear_y(img, factor, **kwargs):
|
||||
_check_args_tf(kwargs)
|
||||
return img.transform(img.size, Image.AFFINE, (1, 0, 0, factor, 1, 0), **kwargs)
|
||||
|
||||
|
||||
def translate_x_rel(img, pct, **kwargs):
|
||||
pixels = pct * img.size[0]
|
||||
_check_args_tf(kwargs)
|
||||
return img.transform(img.size, Image.AFFINE, (1, 0, pixels, 0, 1, 0), **kwargs)
|
||||
|
||||
|
||||
def translate_y_rel(img, pct, **kwargs):
|
||||
pixels = pct * img.size[1]
|
||||
_check_args_tf(kwargs)
|
||||
return img.transform(img.size, Image.AFFINE, (1, 0, 0, 0, 1, pixels), **kwargs)
|
||||
|
||||
|
||||
def translate_x_abs(img, pixels, **kwargs):
|
||||
_check_args_tf(kwargs)
|
||||
return img.transform(img.size, Image.AFFINE, (1, 0, pixels, 0, 1, 0), **kwargs)
|
||||
|
||||
|
||||
def translate_y_abs(img, pixels, **kwargs):
|
||||
_check_args_tf(kwargs)
|
||||
return img.transform(img.size, Image.AFFINE, (1, 0, 0, 0, 1, pixels), **kwargs)
|
||||
|
||||
|
||||
def rotate(img, degrees, **kwargs):
|
||||
_check_args_tf(kwargs)
|
||||
if _PIL_VER >= (5, 2):
|
||||
return img.rotate(degrees, **kwargs)
|
||||
elif _PIL_VER >= (5, 0):
|
||||
w, h = img.size
|
||||
post_trans = (0, 0)
|
||||
rotn_center = (w / 2.0, h / 2.0)
|
||||
angle = -math.radians(degrees)
|
||||
matrix = [
|
||||
round(math.cos(angle), 15),
|
||||
round(math.sin(angle), 15),
|
||||
0.0,
|
||||
round(-math.sin(angle), 15),
|
||||
round(math.cos(angle), 15),
|
||||
0.0,
|
||||
]
|
||||
|
||||
def transform(x, y, matrix):
|
||||
(a, b, c, d, e, f) = matrix
|
||||
return a * x + b * y + c, d * x + e * y + f
|
||||
|
||||
matrix[2], matrix[5] = transform(
|
||||
-rotn_center[0] - post_trans[0], -rotn_center[1] - post_trans[1], matrix
|
||||
)
|
||||
matrix[2] += rotn_center[0]
|
||||
matrix[5] += rotn_center[1]
|
||||
return img.transform(img.size, Image.AFFINE, matrix, **kwargs)
|
||||
else:
|
||||
return img.rotate(degrees, resample=kwargs['resample'])
|
||||
|
||||
|
||||
def auto_contrast(img, **__):
|
||||
return ImageOps.autocontrast(img)
|
||||
|
||||
|
||||
def invert(img, **__):
|
||||
return ImageOps.invert(img)
|
||||
|
||||
|
||||
def equalize(img, **__):
|
||||
return ImageOps.equalize(img)
|
||||
|
||||
|
||||
def solarize(img, thresh, **__):
|
||||
return ImageOps.solarize(img, thresh)
|
||||
|
||||
|
||||
def solarize_add(img, add, thresh=128, **__):
|
||||
lut = []
|
||||
for i in range(256):
|
||||
if i < thresh:
|
||||
lut.append(min(255, i + add))
|
||||
else:
|
||||
lut.append(i)
|
||||
if img.mode in ("L", "RGB"):
|
||||
if img.mode == "RGB" and len(lut) == 256:
|
||||
lut = lut + lut + lut
|
||||
return img.point(lut)
|
||||
else:
|
||||
return img
|
||||
|
||||
|
||||
def posterize(img, bits_to_keep, **__):
|
||||
if bits_to_keep >= 8:
|
||||
return img
|
||||
return ImageOps.posterize(img, bits_to_keep)
|
||||
|
||||
|
||||
def contrast(img, factor, **__):
|
||||
return ImageEnhance.Contrast(img).enhance(factor)
|
||||
|
||||
|
||||
def color(img, factor, **__):
|
||||
return ImageEnhance.Color(img).enhance(factor)
|
||||
|
||||
|
||||
def brightness(img, factor, **__):
|
||||
return ImageEnhance.Brightness(img).enhance(factor)
|
||||
|
||||
|
||||
def sharpness(img, factor, **__):
|
||||
return ImageEnhance.Sharpness(img).enhance(factor)
|
||||
|
||||
|
||||
def _randomly_negate(v):
|
||||
"""With 50% prob, negate the value"""
|
||||
return -v if random.random() > 0.5 else v
|
||||
|
||||
|
||||
def _rotate_level_to_arg(level, _hparams):
|
||||
# range [-30, 30]
|
||||
level = (level / _MAX_LEVEL) * 30.
|
||||
level = _randomly_negate(level)
|
||||
return level,
|
||||
|
||||
|
||||
def _enhance_level_to_arg(level, _hparams):
|
||||
# range [0.1, 1.9]
|
||||
return (level / _MAX_LEVEL) * 1.8 + 0.1,
|
||||
|
||||
|
||||
def _enhance_increasing_level_to_arg(level, _hparams):
|
||||
# the 'no change' level is 1.0, moving away from that towards 0. or 2.0 increases the enhancement blend
|
||||
# range [0.1, 1.9]
|
||||
level = (level / _MAX_LEVEL) * .9
|
||||
level = 1.0 + _randomly_negate(level)
|
||||
return level,
|
||||
|
||||
|
||||
def _shear_level_to_arg(level, _hparams):
|
||||
# range [-0.3, 0.3]
|
||||
level = (level / _MAX_LEVEL) * 0.3
|
||||
level = _randomly_negate(level)
|
||||
return level,
|
||||
|
||||
|
||||
def _translate_abs_level_to_arg(level, hparams):
|
||||
translate_const = hparams['translate_const']
|
||||
level = (level / _MAX_LEVEL) * float(translate_const)
|
||||
level = _randomly_negate(level)
|
||||
return level,
|
||||
|
||||
|
||||
def _translate_rel_level_to_arg(level, hparams):
|
||||
# default range [-0.45, 0.45]
|
||||
translate_pct = hparams.get('translate_pct', 0.45)
|
||||
level = (level / _MAX_LEVEL) * translate_pct
|
||||
level = _randomly_negate(level)
|
||||
return level,
|
||||
|
||||
|
||||
def _posterize_level_to_arg(level, _hparams):
|
||||
# As per Tensorflow TPU EfficientNet impl
|
||||
# range [0, 4], 'keep 0 up to 4 MSB of original image'
|
||||
# intensity/severity of augmentation decreases with level
|
||||
return int((level / _MAX_LEVEL) * 4),
|
||||
|
||||
|
||||
def _posterize_increasing_level_to_arg(level, hparams):
|
||||
# As per Tensorflow models research and UDA impl
|
||||
# range [4, 0], 'keep 4 down to 0 MSB of original image',
|
||||
# intensity/severity of augmentation increases with level
|
||||
return 4 - _posterize_level_to_arg(level, hparams)[0],
|
||||
|
||||
|
||||
def _posterize_original_level_to_arg(level, _hparams):
|
||||
# As per original AutoAugment paper description
|
||||
# range [4, 8], 'keep 4 up to 8 MSB of image'
|
||||
# intensity/severity of augmentation decreases with level
|
||||
return int((level / _MAX_LEVEL) * 4) + 4,
|
||||
|
||||
|
||||
def _solarize_level_to_arg(level, _hparams):
|
||||
# range [0, 256]
|
||||
# intensity/severity of augmentation decreases with level
|
||||
return int((level / _MAX_LEVEL) * 256),
|
||||
|
||||
|
||||
def _solarize_increasing_level_to_arg(level, _hparams):
|
||||
# range [0, 256]
|
||||
# intensity/severity of augmentation increases with level
|
||||
return 256 - _solarize_level_to_arg(level, _hparams)[0],
|
||||
|
||||
|
||||
def _solarize_add_level_to_arg(level, _hparams):
|
||||
# range [0, 110]
|
||||
return int((level / _MAX_LEVEL) * 110),
|
||||
|
||||
|
||||
LEVEL_TO_ARG = {
|
||||
'AutoContrast': None,
|
||||
'Equalize': None,
|
||||
'Invert': None,
|
||||
'Rotate': _rotate_level_to_arg,
|
||||
# There are several variations of the posterize level scaling in various Tensorflow/Google repositories/papers
|
||||
'Posterize': _posterize_level_to_arg,
|
||||
'PosterizeIncreasing': _posterize_increasing_level_to_arg,
|
||||
'PosterizeOriginal': _posterize_original_level_to_arg,
|
||||
'Solarize': _solarize_level_to_arg,
|
||||
'SolarizeIncreasing': _solarize_increasing_level_to_arg,
|
||||
'SolarizeAdd': _solarize_add_level_to_arg,
|
||||
'Color': _enhance_level_to_arg,
|
||||
'ColorIncreasing': _enhance_increasing_level_to_arg,
|
||||
'Contrast': _enhance_level_to_arg,
|
||||
'ContrastIncreasing': _enhance_increasing_level_to_arg,
|
||||
'Brightness': _enhance_level_to_arg,
|
||||
'BrightnessIncreasing': _enhance_increasing_level_to_arg,
|
||||
'Sharpness': _enhance_level_to_arg,
|
||||
'SharpnessIncreasing': _enhance_increasing_level_to_arg,
|
||||
'ShearX': _shear_level_to_arg,
|
||||
'ShearY': _shear_level_to_arg,
|
||||
'TranslateX': _translate_abs_level_to_arg,
|
||||
'TranslateY': _translate_abs_level_to_arg,
|
||||
'TranslateXRel': _translate_rel_level_to_arg,
|
||||
'TranslateYRel': _translate_rel_level_to_arg,
|
||||
}
|
||||
|
||||
|
||||
NAME_TO_OP = {
|
||||
'AutoContrast': auto_contrast,
|
||||
'Equalize': equalize,
|
||||
'Invert': invert,
|
||||
'Rotate': rotate,
|
||||
'Posterize': posterize,
|
||||
'PosterizeIncreasing': posterize,
|
||||
'PosterizeOriginal': posterize,
|
||||
'Solarize': solarize,
|
||||
'SolarizeIncreasing': solarize,
|
||||
'SolarizeAdd': solarize_add,
|
||||
'Color': color,
|
||||
'ColorIncreasing': color,
|
||||
'Contrast': contrast,
|
||||
'ContrastIncreasing': contrast,
|
||||
'Brightness': brightness,
|
||||
'BrightnessIncreasing': brightness,
|
||||
'Sharpness': sharpness,
|
||||
'SharpnessIncreasing': sharpness,
|
||||
'ShearX': shear_x,
|
||||
'ShearY': shear_y,
|
||||
'TranslateX': translate_x_abs,
|
||||
'TranslateY': translate_y_abs,
|
||||
'TranslateXRel': translate_x_rel,
|
||||
'TranslateYRel': translate_y_rel,
|
||||
}
|
||||
|
||||
|
||||
class AugmentOp:
|
||||
|
||||
def __init__(self, name, prob=0.5, magnitude=10, hparams=None):
|
||||
hparams = hparams or _HPARAMS_DEFAULT
|
||||
self.aug_fn = NAME_TO_OP[name]
|
||||
self.level_fn = LEVEL_TO_ARG[name]
|
||||
self.prob = prob
|
||||
self.magnitude = magnitude
|
||||
self.hparams = hparams.copy()
|
||||
self.kwargs = dict(
|
||||
fillcolor=hparams['img_mean'] if 'img_mean' in hparams else _FILL,
|
||||
resample=hparams['interpolation'] if 'interpolation' in hparams else _RANDOM_INTERPOLATION,
|
||||
)
|
||||
|
||||
# If magnitude_std is > 0, we introduce some randomness
|
||||
# in the usually fixed policy and sample magnitude from a normal distribution
|
||||
# with mean `magnitude` and std-dev of `magnitude_std`.
|
||||
# NOTE This is my own hack, being tested, not in papers or reference impls.
|
||||
self.magnitude_std = self.hparams.get('magnitude_std', 0)
|
||||
|
||||
def __call__(self, img):
|
||||
if self.prob < 1.0 and random.random() > self.prob:
|
||||
return img
|
||||
magnitude = self.magnitude
|
||||
if self.magnitude_std and self.magnitude_std > 0:
|
||||
magnitude = random.gauss(magnitude, self.magnitude_std)
|
||||
magnitude = min(_MAX_LEVEL, max(0, magnitude)) # clip to valid range
|
||||
level_args = self.level_fn(magnitude, self.hparams) if self.level_fn is not None else tuple()
|
||||
return self.aug_fn(img, *level_args, **self.kwargs)
|
||||
|
||||
|
||||
def auto_augment_policy_v0(hparams):
|
||||
# ImageNet v0 policy from TPU EfficientNet impl, cannot find a paper reference.
|
||||
policy = [
|
||||
[('Equalize', 0.8, 1), ('ShearY', 0.8, 4)],
|
||||
[('Color', 0.4, 9), ('Equalize', 0.6, 3)],
|
||||
[('Color', 0.4, 1), ('Rotate', 0.6, 8)],
|
||||
[('Solarize', 0.8, 3), ('Equalize', 0.4, 7)],
|
||||
[('Solarize', 0.4, 2), ('Solarize', 0.6, 2)],
|
||||
[('Color', 0.2, 0), ('Equalize', 0.8, 8)],
|
||||
[('Equalize', 0.4, 8), ('SolarizeAdd', 0.8, 3)],
|
||||
[('ShearX', 0.2, 9), ('Rotate', 0.6, 8)],
|
||||
[('Color', 0.6, 1), ('Equalize', 1.0, 2)],
|
||||
[('Invert', 0.4, 9), ('Rotate', 0.6, 0)],
|
||||
[('Equalize', 1.0, 9), ('ShearY', 0.6, 3)],
|
||||
[('Color', 0.4, 7), ('Equalize', 0.6, 0)],
|
||||
[('Posterize', 0.4, 6), ('AutoContrast', 0.4, 7)],
|
||||
[('Solarize', 0.6, 8), ('Color', 0.6, 9)],
|
||||
[('Solarize', 0.2, 4), ('Rotate', 0.8, 9)],
|
||||
[('Rotate', 1.0, 7), ('TranslateYRel', 0.8, 9)],
|
||||
[('ShearX', 0.0, 0), ('Solarize', 0.8, 4)],
|
||||
[('ShearY', 0.8, 0), ('Color', 0.6, 4)],
|
||||
[('Color', 1.0, 0), ('Rotate', 0.6, 2)],
|
||||
[('Equalize', 0.8, 4), ('Equalize', 0.0, 8)],
|
||||
[('Equalize', 1.0, 4), ('AutoContrast', 0.6, 2)],
|
||||
[('ShearY', 0.4, 7), ('SolarizeAdd', 0.6, 7)],
|
||||
[('Posterize', 0.8, 2), ('Solarize', 0.6, 10)], # This results in black image with Tpu posterize
|
||||
[('Solarize', 0.6, 8), ('Equalize', 0.6, 1)],
|
||||
[('Color', 0.8, 6), ('Rotate', 0.4, 5)],
|
||||
]
|
||||
pc = [[AugmentOp(*a, hparams=hparams) for a in sp] for sp in policy]
|
||||
return pc
|
||||
|
||||
|
||||
def auto_augment_policy_v0r(hparams):
|
||||
# ImageNet v0 policy from TPU EfficientNet impl, with variation of Posterize used
|
||||
# in Google research implementation (number of bits discarded increases with magnitude)
|
||||
policy = [
|
||||
[('Equalize', 0.8, 1), ('ShearY', 0.8, 4)],
|
||||
[('Color', 0.4, 9), ('Equalize', 0.6, 3)],
|
||||
[('Color', 0.4, 1), ('Rotate', 0.6, 8)],
|
||||
[('Solarize', 0.8, 3), ('Equalize', 0.4, 7)],
|
||||
[('Solarize', 0.4, 2), ('Solarize', 0.6, 2)],
|
||||
[('Color', 0.2, 0), ('Equalize', 0.8, 8)],
|
||||
[('Equalize', 0.4, 8), ('SolarizeAdd', 0.8, 3)],
|
||||
[('ShearX', 0.2, 9), ('Rotate', 0.6, 8)],
|
||||
[('Color', 0.6, 1), ('Equalize', 1.0, 2)],
|
||||
[('Invert', 0.4, 9), ('Rotate', 0.6, 0)],
|
||||
[('Equalize', 1.0, 9), ('ShearY', 0.6, 3)],
|
||||
[('Color', 0.4, 7), ('Equalize', 0.6, 0)],
|
||||
[('PosterizeIncreasing', 0.4, 6), ('AutoContrast', 0.4, 7)],
|
||||
[('Solarize', 0.6, 8), ('Color', 0.6, 9)],
|
||||
[('Solarize', 0.2, 4), ('Rotate', 0.8, 9)],
|
||||
[('Rotate', 1.0, 7), ('TranslateYRel', 0.8, 9)],
|
||||
[('ShearX', 0.0, 0), ('Solarize', 0.8, 4)],
|
||||
[('ShearY', 0.8, 0), ('Color', 0.6, 4)],
|
||||
[('Color', 1.0, 0), ('Rotate', 0.6, 2)],
|
||||
[('Equalize', 0.8, 4), ('Equalize', 0.0, 8)],
|
||||
[('Equalize', 1.0, 4), ('AutoContrast', 0.6, 2)],
|
||||
[('ShearY', 0.4, 7), ('SolarizeAdd', 0.6, 7)],
|
||||
[('PosterizeIncreasing', 0.8, 2), ('Solarize', 0.6, 10)],
|
||||
[('Solarize', 0.6, 8), ('Equalize', 0.6, 1)],
|
||||
[('Color', 0.8, 6), ('Rotate', 0.4, 5)],
|
||||
]
|
||||
pc = [[AugmentOp(*a, hparams=hparams) for a in sp] for sp in policy]
|
||||
return pc
|
||||
|
||||
|
||||
def auto_augment_policy_original(hparams):
|
||||
# ImageNet policy from https://arxiv.org/abs/1805.09501
|
||||
policy = [
|
||||
[('PosterizeOriginal', 0.4, 8), ('Rotate', 0.6, 9)],
|
||||
[('Solarize', 0.6, 5), ('AutoContrast', 0.6, 5)],
|
||||
[('Equalize', 0.8, 8), ('Equalize', 0.6, 3)],
|
||||
[('PosterizeOriginal', 0.6, 7), ('PosterizeOriginal', 0.6, 6)],
|
||||
[('Equalize', 0.4, 7), ('Solarize', 0.2, 4)],
|
||||
[('Equalize', 0.4, 4), ('Rotate', 0.8, 8)],
|
||||
[('Solarize', 0.6, 3), ('Equalize', 0.6, 7)],
|
||||
[('PosterizeOriginal', 0.8, 5), ('Equalize', 1.0, 2)],
|
||||
[('Rotate', 0.2, 3), ('Solarize', 0.6, 8)],
|
||||
[('Equalize', 0.6, 8), ('PosterizeOriginal', 0.4, 6)],
|
||||
[('Rotate', 0.8, 8), ('Color', 0.4, 0)],
|
||||
[('Rotate', 0.4, 9), ('Equalize', 0.6, 2)],
|
||||
[('Equalize', 0.0, 7), ('Equalize', 0.8, 8)],
|
||||
[('Invert', 0.6, 4), ('Equalize', 1.0, 8)],
|
||||
[('Color', 0.6, 4), ('Contrast', 1.0, 8)],
|
||||
[('Rotate', 0.8, 8), ('Color', 1.0, 2)],
|
||||
[('Color', 0.8, 8), ('Solarize', 0.8, 7)],
|
||||
[('Sharpness', 0.4, 7), ('Invert', 0.6, 8)],
|
||||
[('ShearX', 0.6, 5), ('Equalize', 1.0, 9)],
|
||||
[('Color', 0.4, 0), ('Equalize', 0.6, 3)],
|
||||
[('Equalize', 0.4, 7), ('Solarize', 0.2, 4)],
|
||||
[('Solarize', 0.6, 5), ('AutoContrast', 0.6, 5)],
|
||||
[('Invert', 0.6, 4), ('Equalize', 1.0, 8)],
|
||||
[('Color', 0.6, 4), ('Contrast', 1.0, 8)],
|
||||
[('Equalize', 0.8, 8), ('Equalize', 0.6, 3)],
|
||||
]
|
||||
pc = [[AugmentOp(*a, hparams=hparams) for a in sp] for sp in policy]
|
||||
return pc
|
||||
|
||||
|
||||
def auto_augment_policy_originalr(hparams):
|
||||
# ImageNet policy from https://arxiv.org/abs/1805.09501 with research posterize variation
|
||||
policy = [
|
||||
[('PosterizeIncreasing', 0.4, 8), ('Rotate', 0.6, 9)],
|
||||
[('Solarize', 0.6, 5), ('AutoContrast', 0.6, 5)],
|
||||
[('Equalize', 0.8, 8), ('Equalize', 0.6, 3)],
|
||||
[('PosterizeIncreasing', 0.6, 7), ('PosterizeIncreasing', 0.6, 6)],
|
||||
[('Equalize', 0.4, 7), ('Solarize', 0.2, 4)],
|
||||
[('Equalize', 0.4, 4), ('Rotate', 0.8, 8)],
|
||||
[('Solarize', 0.6, 3), ('Equalize', 0.6, 7)],
|
||||
[('PosterizeIncreasing', 0.8, 5), ('Equalize', 1.0, 2)],
|
||||
[('Rotate', 0.2, 3), ('Solarize', 0.6, 8)],
|
||||
[('Equalize', 0.6, 8), ('PosterizeIncreasing', 0.4, 6)],
|
||||
[('Rotate', 0.8, 8), ('Color', 0.4, 0)],
|
||||
[('Rotate', 0.4, 9), ('Equalize', 0.6, 2)],
|
||||
[('Equalize', 0.0, 7), ('Equalize', 0.8, 8)],
|
||||
[('Invert', 0.6, 4), ('Equalize', 1.0, 8)],
|
||||
[('Color', 0.6, 4), ('Contrast', 1.0, 8)],
|
||||
[('Rotate', 0.8, 8), ('Color', 1.0, 2)],
|
||||
[('Color', 0.8, 8), ('Solarize', 0.8, 7)],
|
||||
[('Sharpness', 0.4, 7), ('Invert', 0.6, 8)],
|
||||
[('ShearX', 0.6, 5), ('Equalize', 1.0, 9)],
|
||||
[('Color', 0.4, 0), ('Equalize', 0.6, 3)],
|
||||
[('Equalize', 0.4, 7), ('Solarize', 0.2, 4)],
|
||||
[('Solarize', 0.6, 5), ('AutoContrast', 0.6, 5)],
|
||||
[('Invert', 0.6, 4), ('Equalize', 1.0, 8)],
|
||||
[('Color', 0.6, 4), ('Contrast', 1.0, 8)],
|
||||
[('Equalize', 0.8, 8), ('Equalize', 0.6, 3)],
|
||||
]
|
||||
pc = [[AugmentOp(*a, hparams=hparams) for a in sp] for sp in policy]
|
||||
return pc
|
||||
|
||||
|
||||
def auto_augment_policy(name='v0', hparams=None):
|
||||
hparams = hparams or _HPARAMS_DEFAULT
|
||||
if name == 'original':
|
||||
return auto_augment_policy_original(hparams)
|
||||
elif name == 'originalr':
|
||||
return auto_augment_policy_originalr(hparams)
|
||||
elif name == 'v0':
|
||||
return auto_augment_policy_v0(hparams)
|
||||
elif name == 'v0r':
|
||||
return auto_augment_policy_v0r(hparams)
|
||||
else:
|
||||
assert False, 'Unknown AA policy (%s)' % name
|
||||
|
||||
|
||||
class AutoAugment:
|
||||
|
||||
def __init__(self, policy):
|
||||
self.policy = policy
|
||||
|
||||
def __call__(self, img):
|
||||
sub_policy = random.choice(self.policy)
|
||||
for op in sub_policy:
|
||||
img = op(img)
|
||||
return img
|
||||
|
||||
|
||||
def auto_augment_transform(config_str, hparams):
|
||||
"""
|
||||
Create a AutoAugment transform
|
||||
|
||||
:param config_str: String defining configuration of auto augmentation. Consists of multiple sections separated by
|
||||
dashes ('-'). The first section defines the AutoAugment policy (one of 'v0', 'v0r', 'original', 'originalr').
|
||||
The remaining sections, not order sepecific determine
|
||||
'mstd' - float std deviation of magnitude noise applied
|
||||
Ex 'original-mstd0.5' results in AutoAugment with original policy, magnitude_std 0.5
|
||||
|
||||
:param hparams: Other hparams (kwargs) for the AutoAugmentation scheme
|
||||
|
||||
:return: A PyTorch compatible Transform
|
||||
"""
|
||||
config = config_str.split('-')
|
||||
policy_name = config[0]
|
||||
config = config[1:]
|
||||
for c in config:
|
||||
cs = re.split(r'(\d.*)', c)
|
||||
if len(cs) < 2:
|
||||
continue
|
||||
key, val = cs[:2]
|
||||
if key == 'mstd':
|
||||
# noise param injected via hparams for now
|
||||
hparams.setdefault('magnitude_std', float(val))
|
||||
else:
|
||||
assert False, 'Unknown AutoAugment config section'
|
||||
aa_policy = auto_augment_policy(policy_name, hparams=hparams)
|
||||
return AutoAugment(aa_policy)
|
||||
|
||||
|
||||
_RAND_TRANSFORMS = [
|
||||
'AutoContrast',
|
||||
'Equalize',
|
||||
'Invert',
|
||||
'Rotate',
|
||||
'Posterize',
|
||||
'Solarize',
|
||||
'SolarizeAdd',
|
||||
'Color',
|
||||
'Contrast',
|
||||
'Brightness',
|
||||
'Sharpness',
|
||||
'ShearX',
|
||||
'ShearY',
|
||||
'TranslateXRel',
|
||||
'TranslateYRel',
|
||||
#'Cutout' # NOTE I've implement this as random erasing separately
|
||||
]
|
||||
|
||||
|
||||
_RAND_INCREASING_TRANSFORMS = [
|
||||
'AutoContrast',
|
||||
'Equalize',
|
||||
'Invert',
|
||||
'Rotate',
|
||||
'PosterizeIncreasing',
|
||||
'SolarizeIncreasing',
|
||||
'SolarizeAdd',
|
||||
'ColorIncreasing',
|
||||
'ContrastIncreasing',
|
||||
'BrightnessIncreasing',
|
||||
'SharpnessIncreasing',
|
||||
'ShearX',
|
||||
'ShearY',
|
||||
'TranslateXRel',
|
||||
'TranslateYRel',
|
||||
#'Cutout' # NOTE I've implement this as random erasing separately
|
||||
]
|
||||
|
||||
|
||||
|
||||
# These experimental weights are based loosely on the relative improvements mentioned in paper.
|
||||
# They may not result in increased performance, but could likely be tuned to so.
|
||||
_RAND_CHOICE_WEIGHTS_0 = {
|
||||
'Rotate': 0.3,
|
||||
'ShearX': 0.2,
|
||||
'ShearY': 0.2,
|
||||
'TranslateXRel': 0.1,
|
||||
'TranslateYRel': 0.1,
|
||||
'Color': .025,
|
||||
'Sharpness': 0.025,
|
||||
'AutoContrast': 0.025,
|
||||
'Solarize': .005,
|
||||
'SolarizeAdd': .005,
|
||||
'Contrast': .005,
|
||||
'Brightness': .005,
|
||||
'Equalize': .005,
|
||||
'Posterize': 0,
|
||||
'Invert': 0,
|
||||
}
|
||||
|
||||
|
||||
def _select_rand_weights(weight_idx=0, transforms=None):
|
||||
transforms = transforms or _RAND_TRANSFORMS
|
||||
assert weight_idx == 0 # only one set of weights currently
|
||||
rand_weights = _RAND_CHOICE_WEIGHTS_0
|
||||
probs = [rand_weights[k] for k in transforms]
|
||||
probs /= np.sum(probs)
|
||||
return probs
|
||||
|
||||
|
||||
def rand_augment_ops(magnitude=10, hparams=None, transforms=None):
|
||||
hparams = hparams or _HPARAMS_DEFAULT
|
||||
transforms = transforms or _RAND_TRANSFORMS
|
||||
return [AugmentOp(
|
||||
name, prob=0.5, magnitude=magnitude, hparams=hparams) for name in transforms]
|
||||
|
||||
|
||||
class RandAugment:
|
||||
def __init__(self, ops, num_layers=2, choice_weights=None):
|
||||
self.ops = ops
|
||||
self.num_layers = num_layers
|
||||
self.choice_weights = choice_weights
|
||||
|
||||
def __call__(self, img):
|
||||
# no replacement when using weighted choice
|
||||
ops = np.random.choice(
|
||||
self.ops, self.num_layers, replace=self.choice_weights is None, p=self.choice_weights)
|
||||
for op in ops:
|
||||
img = op(img)
|
||||
return img
|
||||
|
||||
|
||||
def rand_augment_transform(config_str, hparams):
|
||||
"""
|
||||
Create a RandAugment transform
|
||||
|
||||
:param config_str: String defining configuration of random augmentation. Consists of multiple sections separated by
|
||||
dashes ('-'). The first section defines the specific variant of rand augment (currently only 'rand'). The remaining
|
||||
sections, not order sepecific determine
|
||||
'm' - integer magnitude of rand augment
|
||||
'n' - integer num layers (number of transform ops selected per image)
|
||||
'w' - integer probabiliy weight index (index of a set of weights to influence choice of op)
|
||||
'mstd' - float std deviation of magnitude noise applied
|
||||
'inc' - integer (bool), use augmentations that increase in severity with magnitude (default: 0)
|
||||
Ex 'rand-m9-n3-mstd0.5' results in RandAugment with magnitude 9, num_layers 3, magnitude_std 0.5
|
||||
'rand-mstd1-w0' results in magnitude_std 1.0, weights 0, default magnitude of 10 and num_layers 2
|
||||
|
||||
:param hparams: Other hparams (kwargs) for the RandAugmentation scheme
|
||||
|
||||
:return: A PyTorch compatible Transform
|
||||
"""
|
||||
magnitude = _MAX_LEVEL # default to _MAX_LEVEL for magnitude (currently 10)
|
||||
num_layers = 2 # default to 2 ops per image
|
||||
weight_idx = None # default to no probability weights for op choice
|
||||
transforms = _RAND_TRANSFORMS
|
||||
config = config_str.split('-')
|
||||
assert config[0] == 'rand'
|
||||
config = config[1:]
|
||||
for c in config:
|
||||
cs = re.split(r'(\d.*)', c)
|
||||
if len(cs) < 2:
|
||||
continue
|
||||
key, val = cs[:2]
|
||||
if key == 'mstd':
|
||||
# noise param injected via hparams for now
|
||||
hparams.setdefault('magnitude_std', float(val))
|
||||
elif key == 'inc':
|
||||
if bool(val):
|
||||
transforms = _RAND_INCREASING_TRANSFORMS
|
||||
elif key == 'm':
|
||||
magnitude = int(val)
|
||||
elif key == 'n':
|
||||
num_layers = int(val)
|
||||
elif key == 'w':
|
||||
weight_idx = int(val)
|
||||
else:
|
||||
assert False, 'Unknown RandAugment config section'
|
||||
ra_ops = rand_augment_ops(magnitude=magnitude, hparams=hparams, transforms=transforms)
|
||||
choice_weights = None if weight_idx is None else _select_rand_weights(weight_idx)
|
||||
return RandAugment(ra_ops, num_layers, choice_weights=choice_weights)
|
||||
|
||||
|
||||
_AUGMIX_TRANSFORMS = [
|
||||
'AutoContrast',
|
||||
'ColorIncreasing', # not in paper
|
||||
'ContrastIncreasing', # not in paper
|
||||
'BrightnessIncreasing', # not in paper
|
||||
'SharpnessIncreasing', # not in paper
|
||||
'Equalize',
|
||||
'Rotate',
|
||||
'PosterizeIncreasing',
|
||||
'SolarizeIncreasing',
|
||||
'ShearX',
|
||||
'ShearY',
|
||||
'TranslateXRel',
|
||||
'TranslateYRel',
|
||||
]
|
||||
|
||||
|
||||
def augmix_ops(magnitude=10, hparams=None, transforms=None):
|
||||
hparams = hparams or _HPARAMS_DEFAULT
|
||||
transforms = transforms or _AUGMIX_TRANSFORMS
|
||||
return [AugmentOp(
|
||||
name, prob=1.0, magnitude=magnitude, hparams=hparams) for name in transforms]
|
||||
|
||||
|
||||
class AugMixAugment:
|
||||
""" AugMix Transform
|
||||
Adapted and improved from impl here: https://github.com/google-research/augmix/blob/master/imagenet.py
|
||||
From paper: 'AugMix: A Simple Data Processing Method to Improve Robustness and Uncertainty -
|
||||
https://arxiv.org/abs/1912.02781
|
||||
"""
|
||||
def __init__(self, ops, alpha=1., width=3, depth=-1, blended=False):
|
||||
self.ops = ops
|
||||
self.alpha = alpha
|
||||
self.width = width
|
||||
self.depth = depth
|
||||
self.blended = blended # blended mode is faster but not well tested
|
||||
|
||||
def _calc_blended_weights(self, ws, m):
|
||||
ws = ws * m
|
||||
cump = 1.
|
||||
rws = []
|
||||
for w in ws[::-1]:
|
||||
alpha = w / cump
|
||||
cump *= (1 - alpha)
|
||||
rws.append(alpha)
|
||||
return np.array(rws[::-1], dtype=np.float32)
|
||||
|
||||
def _apply_blended(self, img, mixing_weights, m):
|
||||
# This is my first crack and implementing a slightly faster mixed augmentation. Instead
|
||||
# of accumulating the mix for each chain in a Numpy array and then blending with original,
|
||||
# it recomputes the blending coefficients and applies one PIL image blend per chain.
|
||||
# TODO the results appear in the right ballpark but they differ by more than rounding.
|
||||
img_orig = img.copy()
|
||||
ws = self._calc_blended_weights(mixing_weights, m)
|
||||
for w in ws:
|
||||
depth = self.depth if self.depth > 0 else np.random.randint(1, 4)
|
||||
ops = np.random.choice(self.ops, depth, replace=True)
|
||||
img_aug = img_orig # no ops are in-place, deep copy not necessary
|
||||
for op in ops:
|
||||
img_aug = op(img_aug)
|
||||
img = Image.blend(img, img_aug, w)
|
||||
return img
|
||||
|
||||
def _apply_basic(self, img, mixing_weights, m):
|
||||
# This is a literal adaptation of the paper/official implementation without normalizations and
|
||||
# PIL <-> Numpy conversions between every op. It is still quite CPU compute heavy compared to the
|
||||
# typical augmentation transforms, could use a GPU / Kornia implementation.
|
||||
img_shape = img.size[0], img.size[1], len(img.getbands())
|
||||
mixed = np.zeros(img_shape, dtype=np.float32)
|
||||
for mw in mixing_weights:
|
||||
depth = self.depth if self.depth > 0 else np.random.randint(1, 4)
|
||||
ops = np.random.choice(self.ops, depth, replace=True)
|
||||
img_aug = img # no ops are in-place, deep copy not necessary
|
||||
for op in ops:
|
||||
img_aug = op(img_aug)
|
||||
mixed += mw * np.asarray(img_aug, dtype=np.float32)
|
||||
np.clip(mixed, 0, 255., out=mixed)
|
||||
mixed = Image.fromarray(mixed.astype(np.uint8))
|
||||
return Image.blend(img, mixed, m)
|
||||
|
||||
def __call__(self, img):
|
||||
mixing_weights = np.float32(np.random.dirichlet([self.alpha] * self.width))
|
||||
m = np.float32(np.random.beta(self.alpha, self.alpha))
|
||||
if self.blended:
|
||||
mixed = self._apply_blended(img, mixing_weights, m)
|
||||
else:
|
||||
mixed = self._apply_basic(img, mixing_weights, m)
|
||||
return mixed
|
||||
|
||||
|
||||
def augment_and_mix_transform(config_str, hparams):
|
||||
""" Create AugMix PyTorch transform
|
||||
|
||||
:param config_str: String defining configuration of random augmentation. Consists of multiple sections separated by
|
||||
dashes ('-'). The first section defines the specific variant of rand augment (currently only 'rand'). The remaining
|
||||
sections, not order sepecific determine
|
||||
'm' - integer magnitude (severity) of augmentation mix (default: 3)
|
||||
'w' - integer width of augmentation chain (default: 3)
|
||||
'd' - integer depth of augmentation chain (-1 is random [1, 3], default: -1)
|
||||
'b' - integer (bool), blend each branch of chain into end result without a final blend, less CPU (default: 0)
|
||||
'mstd' - float std deviation of magnitude noise applied (default: 0)
|
||||
Ex 'augmix-m5-w4-d2' results in AugMix with severity 5, chain width 4, chain depth 2
|
||||
|
||||
:param hparams: Other hparams (kwargs) for the Augmentation transforms
|
||||
|
||||
:return: A PyTorch compatible Transform
|
||||
"""
|
||||
magnitude = 3
|
||||
width = 3
|
||||
depth = -1
|
||||
alpha = 1.
|
||||
blended = False
|
||||
config = config_str.split('-')
|
||||
assert config[0] == 'augmix'
|
||||
config = config[1:]
|
||||
for c in config:
|
||||
cs = re.split(r'(\d.*)', c)
|
||||
if len(cs) < 2:
|
||||
continue
|
||||
key, val = cs[:2]
|
||||
if key == 'mstd':
|
||||
# noise param injected via hparams for now
|
||||
hparams.setdefault('magnitude_std', float(val))
|
||||
elif key == 'm':
|
||||
magnitude = int(val)
|
||||
elif key == 'w':
|
||||
width = int(val)
|
||||
elif key == 'd':
|
||||
depth = int(val)
|
||||
elif key == 'a':
|
||||
alpha = float(val)
|
||||
elif key == 'b':
|
||||
blended = bool(val)
|
||||
else:
|
||||
assert False, 'Unknown AugMix config section'
|
||||
ops = augmix_ops(magnitude=magnitude, hparams=hparams)
|
||||
return AugMixAugment(ops, alpha=alpha, width=width, depth=depth, blended=blended)
|
||||
+432
@@ -0,0 +1,432 @@
|
||||
"""model.py - Model and module class for EfficientNet.
|
||||
They are built to mirror those in the official TensorFlow implementation.
|
||||
"""
|
||||
|
||||
# Author: lukemelas (github username)
|
||||
# Github repo: https://github.com/lukemelas/EfficientNet-PyTorch
|
||||
# With adjustments and added comments by workingcoder (github username).
|
||||
|
||||
import torch
|
||||
from torch import nn
|
||||
from torch.nn import functional as F
|
||||
from .utils import (
|
||||
round_filters,
|
||||
round_repeats,
|
||||
drop_connect,
|
||||
get_same_padding_conv2d,
|
||||
get_model_params,
|
||||
efficientnet_params,
|
||||
load_pretrained_weights,
|
||||
Swish,
|
||||
MemoryEfficientSwish,
|
||||
calculate_output_image_size
|
||||
)
|
||||
|
||||
class MBConvBlock(nn.Module):
|
||||
"""Mobile Inverted Residual Bottleneck Block.
|
||||
|
||||
Args:
|
||||
block_args (namedtuple): BlockArgs, defined in utils.py.
|
||||
global_params (namedtuple): GlobalParam, defined in utils.py.
|
||||
image_size (tuple or list): [image_height, image_width].
|
||||
|
||||
References:
|
||||
[1] https://arxiv.org/abs/1704.04861 (MobileNet v1)
|
||||
[2] https://arxiv.org/abs/1801.04381 (MobileNet v2)
|
||||
[3] https://arxiv.org/abs/1905.02244 (MobileNet v3)
|
||||
"""
|
||||
|
||||
def __init__(self, block_args, global_params, image_size=None):
|
||||
super().__init__()
|
||||
self._block_args = block_args
|
||||
self._bn_mom = 1 - global_params.batch_norm_momentum # pytorch's difference from tensorflow
|
||||
self._bn_eps = global_params.batch_norm_epsilon
|
||||
self.has_se = (self._block_args.se_ratio is not None) and (0 < self._block_args.se_ratio <= 1)
|
||||
self.id_skip = block_args.id_skip # whether to use skip connection and drop connect
|
||||
|
||||
# Expansion phase (Inverted Bottleneck)
|
||||
inp = self._block_args.input_filters # number of input channels
|
||||
oup = self._block_args.input_filters * self._block_args.expand_ratio # number of output channels
|
||||
if self._block_args.expand_ratio != 1:
|
||||
Conv2d = get_same_padding_conv2d(image_size=image_size)
|
||||
self._expand_conv = Conv2d(in_channels=inp, out_channels=oup, kernel_size=1, bias=False)
|
||||
self._bn0 = nn.BatchNorm2d(num_features=oup, momentum=self._bn_mom, eps=self._bn_eps)
|
||||
# image_size = calculate_output_image_size(image_size, 1) <-- this wouldn't modify image_size
|
||||
|
||||
# Depthwise convolution phase
|
||||
k = self._block_args.kernel_size
|
||||
s = self._block_args.stride
|
||||
Conv2d = get_same_padding_conv2d(image_size=image_size)
|
||||
self._depthwise_conv = Conv2d(
|
||||
in_channels=oup, out_channels=oup, groups=oup, # groups makes it depthwise
|
||||
kernel_size=k, stride=s, bias=False)
|
||||
self._bn1 = nn.BatchNorm2d(num_features=oup, momentum=self._bn_mom, eps=self._bn_eps)
|
||||
image_size = calculate_output_image_size(image_size, s)
|
||||
|
||||
# Squeeze and Excitation layer, if desired
|
||||
if self.has_se:
|
||||
Conv2d = get_same_padding_conv2d(image_size=(1,1))
|
||||
num_squeezed_channels = max(1, int(self._block_args.input_filters * self._block_args.se_ratio))
|
||||
self._se_reduce = Conv2d(in_channels=oup, out_channels=num_squeezed_channels, kernel_size=1)
|
||||
self._se_expand = Conv2d(in_channels=num_squeezed_channels, out_channels=oup, kernel_size=1)
|
||||
# self._se_relu = torch.nn.ReLU()
|
||||
# self._se_sigmoid = torch.nn.Sigmoid()
|
||||
|
||||
# Pointwise convolution phase
|
||||
final_oup = self._block_args.output_filters
|
||||
Conv2d = get_same_padding_conv2d(image_size=image_size)
|
||||
self._project_conv = Conv2d(in_channels=oup, out_channels=final_oup, kernel_size=1, bias=False)
|
||||
self._bn2 = nn.BatchNorm2d(num_features=final_oup, momentum=self._bn_mom, eps=self._bn_eps)
|
||||
self._swish = MemoryEfficientSwish()
|
||||
|
||||
def forward(self, inputs, drop_connect_rate=None):
|
||||
"""MBConvBlock's forward function.
|
||||
|
||||
Args:
|
||||
inputs (tensor): Input tensor.
|
||||
drop_connect_rate (bool): Drop connect rate (float, between 0 and 1).
|
||||
|
||||
Returns:
|
||||
Output of this block after processing.
|
||||
"""
|
||||
|
||||
# Expansion and Depthwise Convolution
|
||||
x = inputs
|
||||
if self._block_args.expand_ratio != 1:
|
||||
x = self._expand_conv(inputs)
|
||||
x = self._bn0(x)
|
||||
x = self._swish(x)
|
||||
|
||||
x = self._depthwise_conv(x)
|
||||
x = self._bn1(x)
|
||||
x = self._swish(x)
|
||||
|
||||
# Squeeze and Excitation
|
||||
if self.has_se:
|
||||
x_squeezed = F.adaptive_avg_pool2d(x, 1)
|
||||
# x_squeezed = torch.mean(x, [2, 3], keepdim=True)
|
||||
|
||||
x_squeezed = self._se_reduce(x_squeezed)
|
||||
|
||||
x_squeezed = self._swish(x_squeezed)
|
||||
|
||||
x_squeezed = self._se_expand(x_squeezed)
|
||||
|
||||
# x_squeezed = self._se_sigmoid(x_squeezed)
|
||||
#
|
||||
# x = x_squeezed * x
|
||||
|
||||
x = torch.sigmoid(x_squeezed) * x
|
||||
|
||||
# x = torch.sigmoid(x_squeezed) + x
|
||||
# x = torch.nn.functional.relu(x_squeezed) * x
|
||||
# x = x_squeezed + x
|
||||
|
||||
# Pointwise Convolution
|
||||
x = self._project_conv(x)
|
||||
x = self._bn2(x)
|
||||
|
||||
# Skip connection and drop connect
|
||||
input_filters, output_filters = self._block_args.input_filters, self._block_args.output_filters
|
||||
if self.id_skip and self._block_args.stride == 1 and input_filters == output_filters:
|
||||
# The combination of skip connection and drop connect brings about stochastic depth.
|
||||
if drop_connect_rate:
|
||||
x = drop_connect(x, p=drop_connect_rate, training=self.training)
|
||||
x = x + inputs # skip connection
|
||||
return x
|
||||
|
||||
def set_swish(self, memory_efficient=True):
|
||||
"""Sets swish function as memory efficient (for training) or standard (for export).
|
||||
|
||||
Args:
|
||||
memory_efficient (bool): Whether to use memory-efficient version of swish.
|
||||
"""
|
||||
self._swish = MemoryEfficientSwish() if memory_efficient else Swish()
|
||||
|
||||
|
||||
class EfficientNet(nn.Module):
|
||||
"""EfficientNet model.
|
||||
Most easily loaded with the .from_name or .from_pretrained methods.
|
||||
|
||||
Args:
|
||||
blocks_args (list[namedtuple]): A list of BlockArgs to construct blocks.
|
||||
global_params (namedtuple): A set of GlobalParams shared between blocks.
|
||||
|
||||
References:
|
||||
[1] https://arxiv.org/abs/1905.11946 (EfficientNet)
|
||||
|
||||
Example:
|
||||
>>> import torch
|
||||
>>> from efficientnet.model import EfficientNet
|
||||
>>> inputs = torch.rand(1, 3, 224, 224)
|
||||
>>> model = EfficientNet.from_pretrained('efficientnet-b0')
|
||||
>>> model.eval()
|
||||
>>> outputs = model(inputs)
|
||||
"""
|
||||
|
||||
def __init__(self, blocks_args=None, global_params=None):
|
||||
super().__init__()
|
||||
assert isinstance(blocks_args, list), 'blocks_args should be a list'
|
||||
assert len(blocks_args) > 0, 'block args must be greater than 0'
|
||||
self._global_params = global_params
|
||||
self._blocks_args = blocks_args
|
||||
|
||||
# Batch norm parameters
|
||||
bn_mom = 1 - self._global_params.batch_norm_momentum
|
||||
bn_eps = self._global_params.batch_norm_epsilon
|
||||
|
||||
# Get stem static or dynamic convolution depending on image size
|
||||
image_size = global_params.image_size
|
||||
Conv2d = get_same_padding_conv2d(image_size=image_size)
|
||||
|
||||
# Stem
|
||||
in_channels = 3 # rgb
|
||||
out_channels = round_filters(32, self._global_params) # number of output channels
|
||||
self._conv_stem = Conv2d(in_channels, out_channels, kernel_size=3, stride=2, bias=False)
|
||||
self._bn0 = nn.BatchNorm2d(num_features=out_channels, momentum=bn_mom, eps=bn_eps)
|
||||
image_size = calculate_output_image_size(image_size, 2)
|
||||
|
||||
# Build blocks
|
||||
self._blocks = nn.ModuleList([])
|
||||
for block_args in self._blocks_args:
|
||||
|
||||
# Update block input and output filters based on depth multiplier.
|
||||
block_args = block_args._replace(
|
||||
input_filters=round_filters(block_args.input_filters, self._global_params),
|
||||
output_filters=round_filters(block_args.output_filters, self._global_params),
|
||||
num_repeat=round_repeats(block_args.num_repeat, self._global_params)
|
||||
)
|
||||
|
||||
# The first block needs to take care of stride and filter size increase.
|
||||
self._blocks.append(MBConvBlock(block_args, self._global_params, image_size=image_size))
|
||||
image_size = calculate_output_image_size(image_size, block_args.stride)
|
||||
if block_args.num_repeat > 1: # modify block_args to keep same output size
|
||||
block_args = block_args._replace(input_filters=block_args.output_filters, stride=1)
|
||||
for _ in range(block_args.num_repeat - 1):
|
||||
self._blocks.append(MBConvBlock(block_args, self._global_params, image_size=image_size))
|
||||
# image_size = calculate_output_image_size(image_size, block_args.stride) # stride = 1
|
||||
|
||||
# Head
|
||||
in_channels = block_args.output_filters # output of final block
|
||||
out_channels = round_filters(1280, self._global_params)
|
||||
Conv2d = get_same_padding_conv2d(image_size=image_size)
|
||||
self._conv_head = Conv2d(in_channels, out_channels, kernel_size=1, bias=False)
|
||||
self._bn1 = nn.BatchNorm2d(num_features=out_channels, momentum=bn_mom, eps=bn_eps)
|
||||
|
||||
# Final linear layer
|
||||
self._avg_pooling = nn.AdaptiveAvgPool2d(1)
|
||||
self._dropout = nn.Dropout(self._global_params.dropout_rate)
|
||||
self._fc = nn.Linear(out_channels, self._global_params.num_classes)
|
||||
self._swish = MemoryEfficientSwish()
|
||||
|
||||
def set_swish(self, memory_efficient=True):
|
||||
"""Sets swish function as memory efficient (for training) or standard (for export).
|
||||
|
||||
Args:
|
||||
memory_efficient (bool): Whether to use memory-efficient version of swish.
|
||||
|
||||
"""
|
||||
self._swish = MemoryEfficientSwish() if memory_efficient else Swish()
|
||||
for block in self._blocks:
|
||||
block.set_swish(memory_efficient)
|
||||
|
||||
def extract_endpoints(self, inputs):
|
||||
"""Use convolution layer to extract features
|
||||
from reduction levels i in [1, 2, 3, 4, 5].
|
||||
|
||||
Args:
|
||||
inputs (tensor): Input tensor.
|
||||
|
||||
Returns:
|
||||
Dictionary of last intermediate features
|
||||
with reduction levels i in [1, 2, 3, 4, 5].
|
||||
Example:
|
||||
>>> import torch
|
||||
>>> from efficientnet.model import EfficientNet
|
||||
>>> inputs = torch.rand(1, 3, 224, 224)
|
||||
>>> model = EfficientNet.from_pretrained('efficientnet-b0')
|
||||
>>> endpoints = model.extract_features(inputs)
|
||||
>>> print(endpoints['reduction_1'].shape) # torch.Size([1, 16, 112, 112])
|
||||
>>> print(endpoints['reduction_2'].shape) # torch.Size([1, 24, 56, 56])
|
||||
>>> print(endpoints['reduction_3'].shape) # torch.Size([1, 40, 28, 28])
|
||||
>>> print(endpoints['reduction_4'].shape) # torch.Size([1, 112, 14, 14])
|
||||
>>> print(endpoints['reduction_5'].shape) # torch.Size([1, 1280, 7, 7])
|
||||
"""
|
||||
endpoints = dict()
|
||||
|
||||
# Stem
|
||||
x = self._swish(self._bn0(self._conv_stem(inputs)))
|
||||
# x = self._swish(self._conv_stem(inputs))
|
||||
prev_x = x
|
||||
|
||||
# Blocks
|
||||
for idx, block in enumerate(self._blocks):
|
||||
drop_connect_rate = self._global_params.drop_connect_rate
|
||||
if drop_connect_rate:
|
||||
drop_connect_rate *= float(idx) / len(self._blocks) # scale drop connect_rate
|
||||
x = block(x, drop_connect_rate=drop_connect_rate)
|
||||
if prev_x.size(2) > x.size(2):
|
||||
endpoints[f'reduction_{len(endpoints)+1}'] = prev_x
|
||||
prev_x = x
|
||||
|
||||
# Head
|
||||
x = self._swish(self._bn1(self._conv_head(x)))
|
||||
# x = self._swish(self._conv_head(x))
|
||||
endpoints[f'reduction_{len(endpoints)+1}'] = x
|
||||
|
||||
return endpoints
|
||||
|
||||
def extract_features(self, inputs):
|
||||
"""use convolution layer to extract feature .
|
||||
|
||||
Args:
|
||||
inputs (tensor): Input tensor.
|
||||
|
||||
Returns:
|
||||
Output of the final convolution
|
||||
layer in the efficientnet model.
|
||||
"""
|
||||
# Stem
|
||||
x = self._swish(self._bn0(self._conv_stem(inputs)))
|
||||
# x = self._swish(self._conv_stem(inputs))
|
||||
|
||||
# Blocks
|
||||
for idx, block in enumerate(self._blocks):
|
||||
drop_connect_rate = self._global_params.drop_connect_rate
|
||||
if drop_connect_rate:
|
||||
drop_connect_rate *= float(idx) / len(self._blocks) # scale drop connect_rate
|
||||
x = block(x, drop_connect_rate=drop_connect_rate)
|
||||
|
||||
# Head
|
||||
x = self._swish(self._bn1(self._conv_head(x)))
|
||||
# x = self._swish(self._conv_head(x))
|
||||
|
||||
return x
|
||||
|
||||
def forward(self, inputs):
|
||||
"""EfficientNet's forward function.
|
||||
Calls extract_features to extract features, applies final linear layer, and returns logits.
|
||||
|
||||
Args:
|
||||
inputs (tensor): Input tensor.
|
||||
|
||||
Returns:
|
||||
Output of this model after processing.
|
||||
"""
|
||||
bs = inputs.size(0)
|
||||
|
||||
# Convolution layers
|
||||
x = self.extract_features(inputs)
|
||||
|
||||
# Pooling and final linear layer
|
||||
x = self._avg_pooling(x)
|
||||
# x = x.view(bs, -1)
|
||||
x = torch.flatten(x, start_dim=1)
|
||||
# x = self._dropout(x.to('cpu'))
|
||||
# x = self._fc(x.to('npu:5'))
|
||||
x = self._dropout(x)
|
||||
x = self._fc(x)
|
||||
|
||||
return x
|
||||
|
||||
@classmethod
|
||||
def from_name(cls, model_name, in_channels=3, **override_params):
|
||||
"""create an efficientnet model according to name.
|
||||
|
||||
Args:
|
||||
model_name (str): Name for efficientnet.
|
||||
in_channels (int): Input data's channel number.
|
||||
override_params (other key word params):
|
||||
Params to override model's global_params.
|
||||
Optional key:
|
||||
'width_coefficient', 'depth_coefficient',
|
||||
'image_size', 'dropout_rate',
|
||||
'num_classes', 'batch_norm_momentum',
|
||||
'batch_norm_epsilon', 'drop_connect_rate',
|
||||
'depth_divisor', 'min_depth'
|
||||
|
||||
Returns:
|
||||
An efficientnet model.
|
||||
"""
|
||||
cls._check_model_name_is_valid(model_name)
|
||||
blocks_args, global_params = get_model_params(model_name, override_params)
|
||||
model = cls(blocks_args, global_params)
|
||||
model._change_in_channels(in_channels)
|
||||
return model
|
||||
|
||||
@classmethod
|
||||
def from_pretrained(cls, model_name, weights_path=None, advprop=False,
|
||||
in_channels=3, num_classes=1000, **override_params):
|
||||
"""create an efficientnet model according to name.
|
||||
|
||||
Args:
|
||||
model_name (str): Name for efficientnet.
|
||||
weights_path (None or str):
|
||||
str: path to pretrained weights file on the local disk.
|
||||
None: use pretrained weights downloaded from the Internet.
|
||||
advprop (bool):
|
||||
Whether to load pretrained weights
|
||||
trained with advprop (valid when weights_path is None).
|
||||
in_channels (int): Input data's channel number.
|
||||
num_classes (int):
|
||||
Number of categories for classification.
|
||||
It controls the output size for final linear layer.
|
||||
override_params (other key word params):
|
||||
Params to override model's global_params.
|
||||
Optional key:
|
||||
'width_coefficient', 'depth_coefficient',
|
||||
'image_size', 'dropout_rate',
|
||||
'num_classes', 'batch_norm_momentum',
|
||||
'batch_norm_epsilon', 'drop_connect_rate',
|
||||
'depth_divisor', 'min_depth'
|
||||
|
||||
Returns:
|
||||
A pretrained efficientnet model.
|
||||
"""
|
||||
model = cls.from_name(model_name, num_classes = num_classes, **override_params)
|
||||
load_pretrained_weights(model, model_name, weights_path=weights_path, load_fc=(num_classes == 1000), advprop=advprop)
|
||||
model._change_in_channels(in_channels)
|
||||
return model
|
||||
|
||||
@classmethod
|
||||
def get_image_size(cls, model_name):
|
||||
"""Get the input image size for a given efficientnet model.
|
||||
|
||||
Args:
|
||||
model_name (str): Name for efficientnet.
|
||||
|
||||
Returns:
|
||||
Input image size (resolution).
|
||||
"""
|
||||
cls._check_model_name_is_valid(model_name)
|
||||
_, _, res, _ = efficientnet_params(model_name)
|
||||
return res
|
||||
|
||||
@classmethod
|
||||
def _check_model_name_is_valid(cls, model_name):
|
||||
"""Validates model name.
|
||||
|
||||
Args:
|
||||
model_name (str): Name for efficientnet.
|
||||
|
||||
Returns:
|
||||
bool: Is a valid name or not.
|
||||
"""
|
||||
valid_models = ['efficientnet-b'+str(i) for i in range(9)]
|
||||
|
||||
# Support the construction of 'efficientnet-l2' without pretrained weights
|
||||
valid_models += ['efficientnet-l2']
|
||||
|
||||
if model_name not in valid_models:
|
||||
raise ValueError('model_name should be one of: ' + ', '.join(valid_models))
|
||||
|
||||
def _change_in_channels(self, in_channels):
|
||||
"""Adjust model's first convolution layer to in_channels, if in_channels not equals 3.
|
||||
|
||||
Args:
|
||||
in_channels (int): Input data's channel number.
|
||||
"""
|
||||
if in_channels != 3:
|
||||
Conv2d = get_same_padding_conv2d(image_size = self._global_params.image_size)
|
||||
out_channels = round_filters(32, self._global_params)
|
||||
self._conv_stem = Conv2d(in_channels, out_channels, kernel_size=3, stride=2, bias=False)
|
||||
+7
@@ -0,0 +1,7 @@
|
||||
def set_value(value):
|
||||
global _npu_id
|
||||
_npu_id = value
|
||||
print('set device id %s success'%_npu_id)
|
||||
|
||||
def get_value():
|
||||
return _npu_id
|
||||
+122
@@ -0,0 +1,122 @@
|
||||
import torch
|
||||
from torch.optim import Optimizer
|
||||
|
||||
|
||||
class RMSpropTF(Optimizer):
|
||||
"""Implements RMSprop algorithm (TensorFlow style epsilon)
|
||||
|
||||
NOTE: This is a direct cut-and-paste of PyTorch RMSprop with eps applied before sqrt
|
||||
to closer match Tensorflow for matching hyper-params.
|
||||
|
||||
Proposed by G. Hinton in his
|
||||
`course <http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf>`_.
|
||||
|
||||
The centered version first appears in `Generating Sequences
|
||||
With Recurrent Neural Networks <https://arxiv.org/pdf/1308.0850v5.pdf>`_.
|
||||
|
||||
Arguments:
|
||||
params (iterable): iterable of parameters to optimize or dicts defining
|
||||
parameter groups
|
||||
lr (float, optional): learning rate (default: 1e-2)
|
||||
momentum (float, optional): momentum factor (default: 0)
|
||||
alpha (float, optional): smoothing (decay) constant (default: 0.9)
|
||||
eps (float, optional): term added to the denominator to improve
|
||||
numerical stability (default: 1e-10)
|
||||
centered (bool, optional) : if ``True``, compute the centered RMSProp,
|
||||
the gradient is normalized by an estimation of its variance
|
||||
weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
|
||||
decoupled_decay (bool, optional): decoupled weight decay as per https://arxiv.org/abs/1711.05101
|
||||
lr_in_momentum (bool, optional): learning rate scaling is included in the momentum buffer
|
||||
update as per defaults in Tensorflow
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, params, lr=1e-2, alpha=0.9, eps=1e-10, weight_decay=0, momentum=0., centered=False,
|
||||
decoupled_decay=False, lr_in_momentum=True):
|
||||
if not 0.0 <= lr:
|
||||
raise ValueError("Invalid learning rate: {}".format(lr))
|
||||
if not 0.0 <= eps:
|
||||
raise ValueError("Invalid epsilon value: {}".format(eps))
|
||||
if not 0.0 <= momentum:
|
||||
raise ValueError("Invalid momentum value: {}".format(momentum))
|
||||
if not 0.0 <= weight_decay:
|
||||
raise ValueError("Invalid weight_decay value: {}".format(weight_decay))
|
||||
if not 0.0 <= alpha:
|
||||
raise ValueError("Invalid alpha value: {}".format(alpha))
|
||||
|
||||
defaults = dict(lr=lr, momentum=momentum, alpha=alpha, eps=eps, centered=centered, weight_decay=weight_decay,
|
||||
decoupled_decay=decoupled_decay, lr_in_momentum=lr_in_momentum)
|
||||
super(RMSpropTF, self).__init__(params, defaults)
|
||||
|
||||
def __setstate__(self, state):
|
||||
super(RMSpropTF, self).__setstate__(state)
|
||||
for group in self.param_groups:
|
||||
group.setdefault('momentum', 0)
|
||||
group.setdefault('centered', False)
|
||||
|
||||
def step(self, closure=None):
|
||||
"""Performs a single optimization step.
|
||||
|
||||
Arguments:
|
||||
closure (callable, optional): A closure that reevaluates the model
|
||||
and returns the loss.
|
||||
"""
|
||||
loss = None
|
||||
if closure is not None:
|
||||
loss = closure()
|
||||
|
||||
for group in self.param_groups:
|
||||
for p in group['params']:
|
||||
if p.grad is None:
|
||||
continue
|
||||
grad = p.grad.data
|
||||
if grad.is_sparse:
|
||||
raise RuntimeError('RMSprop does not support sparse gradients')
|
||||
state = self.state[p]
|
||||
|
||||
# State initialization
|
||||
if len(state) == 0:
|
||||
state['step'] = 0
|
||||
state['square_avg'] = torch.ones_like(p.data) # PyTorch inits to zero
|
||||
if group['momentum'] > 0:
|
||||
state['momentum_buffer'] = torch.zeros_like(p.data)
|
||||
if group['centered']:
|
||||
state['grad_avg'] = torch.zeros_like(p.data)
|
||||
|
||||
square_avg = state['square_avg']
|
||||
one_minus_alpha = 1. - group['alpha']
|
||||
|
||||
state['step'] += 1
|
||||
|
||||
if group['weight_decay'] != 0:
|
||||
if 'decoupled_decay' in group and group['decoupled_decay']:
|
||||
p.data.add_(-group['weight_decay'], p.data)
|
||||
else:
|
||||
grad = grad.add(group['weight_decay'], p.data)
|
||||
|
||||
# Tensorflow order of ops for updating squared avg
|
||||
square_avg.add_(one_minus_alpha, grad.pow(2) - square_avg)
|
||||
# square_avg.mul_(alpha).addcmul_(1 - alpha, grad, grad) # PyTorch original
|
||||
|
||||
if group['centered']:
|
||||
grad_avg = state['grad_avg']
|
||||
grad_avg.add_(one_minus_alpha, grad - grad_avg)
|
||||
# grad_avg.mul_(alpha).add_(1 - alpha, grad) # PyTorch original
|
||||
avg = square_avg.addcmul(-1, grad_avg, grad_avg).add(group['eps']).sqrt_() # eps moved in sqrt
|
||||
else:
|
||||
avg = square_avg.add(group['eps']).sqrt_() # eps moved in sqrt
|
||||
|
||||
if group['momentum'] > 0:
|
||||
buf = state['momentum_buffer']
|
||||
# Tensorflow accumulates the LR scaling in the momentum buffer
|
||||
if 'lr_in_momentum' in group and group['lr_in_momentum']:
|
||||
buf.mul_(group['momentum']).addcdiv_(group['lr'], grad, avg)
|
||||
p.data.add_(-buf)
|
||||
else:
|
||||
# PyTorch scales the param update by LR
|
||||
buf.mul_(group['momentum']).addcdiv_(grad, avg)
|
||||
p.data.add_(-group['lr'], buf)
|
||||
else:
|
||||
p.data.addcdiv_(-group['lr'], grad, avg)
|
||||
|
||||
return loss
|
||||
+624
@@ -0,0 +1,624 @@
|
||||
"""utils.py - Helper functions for building the model and for loading model parameters.
|
||||
These helper functions are built to mirror those in the official TensorFlow implementation.
|
||||
"""
|
||||
|
||||
# Author: lukemelas (github username)
|
||||
# Github repo: https://github.com/lukemelas/EfficientNet-PyTorch
|
||||
# With adjustments and added comments by workingcoder (github username).
|
||||
|
||||
import re
|
||||
import math
|
||||
import collections
|
||||
from functools import partial
|
||||
import numpy as np
|
||||
import torch
|
||||
from torch import nn
|
||||
from torch.nn import functional as F
|
||||
from torch.utils import model_zoo
|
||||
from . import npu_info
|
||||
|
||||
################################################################################
|
||||
### Help functions for model architecture
|
||||
################################################################################
|
||||
|
||||
# GlobalParams and BlockArgs: Two namedtuples
|
||||
# Swish and MemoryEfficientSwish: Two implementations of the method
|
||||
# round_filters and round_repeats:
|
||||
# Functions to calculate params for scaling model width and depth ! ! !
|
||||
# get_width_and_height_from_size and calculate_output_image_size
|
||||
# drop_connect: A structural design
|
||||
# get_same_padding_conv2d:
|
||||
# Conv2dDynamicSamePadding
|
||||
# Conv2dStaticSamePadding
|
||||
# get_same_padding_maxPool2d:
|
||||
# MaxPool2dDynamicSamePadding
|
||||
# MaxPool2dStaticSamePadding
|
||||
# It's an additional function, not used in EfficientNet,
|
||||
# but can be used in other model (such as EfficientDet).
|
||||
# Identity: An implementation of identical mapping
|
||||
|
||||
# Parameters for the entire model (stem, all blocks, and head)
|
||||
GlobalParams = collections.namedtuple('GlobalParams', [
|
||||
'width_coefficient', 'depth_coefficient', 'image_size', 'dropout_rate',
|
||||
'num_classes', 'batch_norm_momentum', 'batch_norm_epsilon',
|
||||
'drop_connect_rate', 'depth_divisor', 'min_depth'])
|
||||
|
||||
# Parameters for an individual model block
|
||||
BlockArgs = collections.namedtuple('BlockArgs', [
|
||||
'num_repeat', 'kernel_size', 'stride', 'expand_ratio',
|
||||
'input_filters', 'output_filters', 'se_ratio', 'id_skip'])
|
||||
|
||||
# Set GlobalParams and BlockArgs's defaults
|
||||
GlobalParams.__new__.__defaults__ = (None,) * len(GlobalParams._fields)
|
||||
BlockArgs.__new__.__defaults__ = (None,) * len(BlockArgs._fields)
|
||||
|
||||
|
||||
# An ordinary implementation of Swish function
|
||||
class Swish(nn.Module):
|
||||
def forward(self, x):
|
||||
return x * torch.sigmoid(x)
|
||||
|
||||
# A memory-efficient implementation of Swish function
|
||||
class SwishImplementation(torch.autograd.Function):
|
||||
@staticmethod
|
||||
def forward(ctx, i):
|
||||
result = i * torch.sigmoid(i)
|
||||
ctx.save_for_backward(i)
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
def backward(ctx, grad_output):
|
||||
i = ctx.saved_tensors[0]
|
||||
sigmoid_i = torch.sigmoid(i)
|
||||
return grad_output * (sigmoid_i * (1 + i * (1 - sigmoid_i)))
|
||||
|
||||
class MemoryEfficientSwish(nn.Module):
|
||||
def forward(self, x):
|
||||
return SwishImplementation.apply(x)
|
||||
|
||||
|
||||
def round_filters(filters, global_params):
|
||||
"""Calculate and round number of filters based on width multiplier.
|
||||
Use width_coefficient, depth_divisor and min_depth of global_params.
|
||||
|
||||
Args:
|
||||
filters (int): Filters number to be calculated.
|
||||
global_params (namedtuple): Global params of the model.
|
||||
|
||||
Returns:
|
||||
new_filters: New filters number after calculating.
|
||||
"""
|
||||
multiplier = global_params.width_coefficient
|
||||
if not multiplier:
|
||||
return filters
|
||||
# TODO: modify the params names.
|
||||
# maybe the names (width_divisor,min_width)
|
||||
# are more suitable than (depth_divisor,min_depth).
|
||||
divisor = global_params.depth_divisor
|
||||
min_depth = global_params.min_depth
|
||||
filters *= multiplier
|
||||
min_depth = min_depth or divisor # pay attention to this line when using min_depth
|
||||
# follow the formula transferred from official TensorFlow implementation
|
||||
new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor)
|
||||
if new_filters < 0.9 * filters: # prevent rounding by more than 10%
|
||||
new_filters += divisor
|
||||
return int(new_filters)
|
||||
|
||||
|
||||
def round_repeats(repeats, global_params):
|
||||
"""Calculate module's repeat number of a block based on depth multiplier.
|
||||
Use depth_coefficient of global_params.
|
||||
|
||||
Args:
|
||||
repeats (int): num_repeat to be calculated.
|
||||
global_params (namedtuple): Global params of the model.
|
||||
|
||||
Returns:
|
||||
new repeat: New repeat number after calculating.
|
||||
"""
|
||||
multiplier = global_params.depth_coefficient
|
||||
if not multiplier:
|
||||
return repeats
|
||||
# follow the formula transferred from official TensorFlow implementation
|
||||
return int(math.ceil(multiplier * repeats))
|
||||
|
||||
|
||||
def drop_connect(inputs, p, training):
|
||||
"""Drop connect.
|
||||
|
||||
Args:
|
||||
input (tensor: BCWH): Input of this structure.
|
||||
p (float: 0.0~1.0): Probability of drop connection.
|
||||
training (bool): The running mode.
|
||||
|
||||
Returns:
|
||||
output: Output after drop connection.
|
||||
"""
|
||||
assert p >= 0 and p <= 1, 'p must be in range of [0,1]'
|
||||
|
||||
if not training:
|
||||
return inputs
|
||||
|
||||
batch_size = inputs.shape[0]
|
||||
keep_prob = 1 - p
|
||||
|
||||
# generate binary_tensor mask according to probability (p for 0, 1-p for 1)
|
||||
random_tensor = keep_prob
|
||||
random_tensor += torch.rand([batch_size, 1, 1, 1], dtype=inputs.dtype, device=inputs.device)
|
||||
binary_tensor = torch.floor(random_tensor) / keep_prob
|
||||
|
||||
output = inputs * binary_tensor
|
||||
return output
|
||||
|
||||
|
||||
def get_width_and_height_from_size(x):
|
||||
"""Obtain height and width from x.
|
||||
|
||||
Args:
|
||||
x (int, tuple or list): Data size.
|
||||
|
||||
Returns:
|
||||
size: A tuple or list (H,W).
|
||||
"""
|
||||
if isinstance(x, int):
|
||||
return x, x
|
||||
if isinstance(x, list) or isinstance(x, tuple):
|
||||
return x
|
||||
else:
|
||||
raise TypeError()
|
||||
|
||||
|
||||
def calculate_output_image_size(input_image_size, stride):
|
||||
"""Calculates the output image size when using Conv2dSamePadding with a stride.
|
||||
Necessary for static padding. Thanks to mannatsingh for pointing this out.
|
||||
|
||||
Args:
|
||||
input_image_size (int, tuple or list): Size of input image.
|
||||
stride (int, tuple or list): Conv2d operation's stride.
|
||||
|
||||
Returns:
|
||||
output_image_size: A list [H,W].
|
||||
"""
|
||||
if input_image_size is None:
|
||||
return None
|
||||
image_height, image_width = get_width_and_height_from_size(input_image_size)
|
||||
stride = stride if isinstance(stride, int) else stride[0]
|
||||
image_height = int(math.ceil(image_height / stride))
|
||||
image_width = int(math.ceil(image_width / stride))
|
||||
return [image_height, image_width]
|
||||
|
||||
|
||||
# Note:
|
||||
# The following 'SamePadding' functions make output size equal ceil(input size/stride).
|
||||
# Only when stride equals 1, can the output size be the same as input size.
|
||||
# Don't be confused by their function names ! ! !
|
||||
|
||||
def get_same_padding_conv2d(image_size=None):
|
||||
"""Chooses static padding if you have specified an image size, and dynamic padding otherwise.
|
||||
Static padding is necessary for ONNX exporting of models.
|
||||
|
||||
Args:
|
||||
image_size (int or tuple): Size of the image.
|
||||
|
||||
Returns:
|
||||
Conv2dDynamicSamePadding or Conv2dStaticSamePadding.
|
||||
"""
|
||||
if image_size is None:
|
||||
return Conv2dDynamicSamePadding
|
||||
else:
|
||||
return partial(Conv2dStaticSamePadding, image_size=image_size)
|
||||
|
||||
|
||||
class Conv2dDynamicSamePadding(nn.Conv2d):
|
||||
"""2D Convolutions like TensorFlow, for a dynamic image size.
|
||||
The padding is operated in forward function by calculating dynamically.
|
||||
"""
|
||||
|
||||
# Tips for 'SAME' mode padding.
|
||||
# Given the following:
|
||||
# i: width or height
|
||||
# s: stride
|
||||
# k: kernel size
|
||||
# d: dilation
|
||||
# p: padding
|
||||
# Output after Conv2d:
|
||||
# o = floor((i+p-((k-1)*d+1))/s+1)
|
||||
# If o equals i, i = floor((i+p-((k-1)*d+1))/s+1),
|
||||
# => p = (i-1)*s+((k-1)*d+1)-i
|
||||
|
||||
def __init__(self, in_channels, out_channels, kernel_size, stride=1, dilation=1, groups=1, bias=True):
|
||||
super().__init__(in_channels, out_channels, kernel_size, stride, 0, dilation, groups, bias)
|
||||
self.stride = self.stride if len(self.stride) == 2 else [self.stride[0]] * 2
|
||||
|
||||
def forward(self, x):
|
||||
ih, iw = x.size()[-2:]
|
||||
kh, kw = self.weight.size()[-2:]
|
||||
sh, sw = self.stride
|
||||
oh, ow = math.ceil(ih / sh), math.ceil(iw / sw) # change the output size according to stride ! ! !
|
||||
pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0)
|
||||
pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0)
|
||||
if pad_h > 0 or pad_w > 0:
|
||||
x = F.pad(x, [pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2])
|
||||
return F.conv2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups)
|
||||
|
||||
|
||||
class Conv2dStaticSamePadding(nn.Conv2d):
|
||||
"""2D Convolutions like TensorFlow's 'SAME' mode, with the given input image size.
|
||||
The padding mudule is calculated in construction function, then used in forward.
|
||||
"""
|
||||
|
||||
# With the same calculation as Conv2dDynamicSamePadding
|
||||
|
||||
def __init__(self, in_channels, out_channels, kernel_size, stride=1, image_size=None, **kwargs):
|
||||
super().__init__(in_channels, out_channels, kernel_size, stride, **kwargs)
|
||||
self.stride = self.stride if len(self.stride) == 2 else [self.stride[0]] * 2
|
||||
|
||||
# Calculate padding based on image size and save it
|
||||
assert image_size is not None
|
||||
ih, iw = (image_size, image_size) if isinstance(image_size, int) else image_size
|
||||
kh, kw = self.weight.size()[-2:]
|
||||
sh, sw = self.stride
|
||||
oh, ow = math.ceil(ih / sh), math.ceil(iw / sw)
|
||||
pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0)
|
||||
pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0)
|
||||
if pad_h > 0 or pad_w > 0:
|
||||
self.static_padding = nn.ZeroPad2d((pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2))
|
||||
if kh % 2 != 0:
|
||||
self.padding = (kh - 1) // 2
|
||||
else:
|
||||
self.padding = kh // 2
|
||||
else:
|
||||
self.static_padding = Identity()
|
||||
|
||||
def forward(self, x):
|
||||
x = F.conv2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups)
|
||||
return x
|
||||
|
||||
|
||||
def get_same_padding_maxPool2d(image_size=None):
|
||||
"""Chooses static padding if you have specified an image size, and dynamic padding otherwise.
|
||||
Static padding is necessary for ONNX exporting of models.
|
||||
|
||||
Args:
|
||||
image_size (int or tuple): Size of the image.
|
||||
|
||||
Returns:
|
||||
MaxPool2dDynamicSamePadding or MaxPool2dStaticSamePadding.
|
||||
"""
|
||||
if image_size is None:
|
||||
return MaxPool2dDynamicSamePadding
|
||||
else:
|
||||
return partial(MaxPool2dStaticSamePadding, image_size=image_size)
|
||||
|
||||
|
||||
class MaxPool2dDynamicSamePadding(nn.MaxPool2d):
|
||||
"""2D MaxPooling like TensorFlow's 'SAME' mode, with a dynamic image size.
|
||||
The padding is operated in forward function by calculating dynamically.
|
||||
"""
|
||||
|
||||
def __init__(self, kernel_size, stride, padding=0, dilation=1, return_indices=False, ceil_mode=False):
|
||||
super().__init__(kernel_size, stride, padding, dilation, return_indices, ceil_mode)
|
||||
self.stride = [self.stride] * 2 if isinstance(self.stride, int) else self.stride
|
||||
self.kernel_size = [self.kernel_size] * 2 if isinstance(self.kernel_size, int) else self.kernel_size
|
||||
self.dilation = [self.dilation] * 2 if isinstance(self.dilation, int) else self.dilation
|
||||
|
||||
def forward(self, x):
|
||||
ih, iw = x.size()[-2:]
|
||||
kh, kw = self.kernel_size
|
||||
sh, sw = self.stride
|
||||
oh, ow = math.ceil(ih / sh), math.ceil(iw / sw)
|
||||
pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0)
|
||||
pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0)
|
||||
if pad_h > 0 or pad_w > 0:
|
||||
x = F.pad(x, [pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2])
|
||||
return F.max_pool2d(x, self.kernel_size, self.stride, self.padding,
|
||||
self.dilation, self.ceil_mode, self.return_indices)
|
||||
|
||||
class MaxPool2dStaticSamePadding(nn.MaxPool2d):
|
||||
"""2D MaxPooling like TensorFlow's 'SAME' mode, with the given input image size.
|
||||
The padding mudule is calculated in construction function, then used in forward.
|
||||
"""
|
||||
|
||||
def __init__(self, kernel_size, stride, image_size=None, **kwargs):
|
||||
super().__init__(kernel_size, stride, **kwargs)
|
||||
self.stride = [self.stride] * 2 if isinstance(self.stride, int) else self.stride
|
||||
self.kernel_size = [self.kernel_size] * 2 if isinstance(self.kernel_size, int) else self.kernel_size
|
||||
self.dilation = [self.dilation] * 2 if isinstance(self.dilation, int) else self.dilation
|
||||
|
||||
# Calculate padding based on image size and save it
|
||||
assert image_size is not None
|
||||
ih, iw = (image_size, image_size) if isinstance(image_size, int) else image_size
|
||||
kh, kw = self.kernel_size
|
||||
sh, sw = self.stride
|
||||
oh, ow = math.ceil(ih / sh), math.ceil(iw / sw)
|
||||
pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0)
|
||||
pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0)
|
||||
if pad_h > 0 or pad_w > 0:
|
||||
self.static_padding = nn.ZeroPad2d((pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2))
|
||||
else:
|
||||
self.static_padding = Identity()
|
||||
|
||||
def forward(self, x):
|
||||
x = self.static_padding(x)
|
||||
x = F.max_pool2d(x, self.kernel_size, self.stride, self.padding,
|
||||
self.dilation, self.ceil_mode, self.return_indices)
|
||||
return x
|
||||
|
||||
class Identity(nn.Module):
|
||||
"""Identity mapping.
|
||||
Send input to output directly.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super(Identity, self).__init__()
|
||||
|
||||
def forward(self, input):
|
||||
return input
|
||||
|
||||
|
||||
################################################################################
|
||||
### Helper functions for loading model params
|
||||
################################################################################
|
||||
|
||||
# BlockDecoder: A Class for encoding and decoding BlockArgs
|
||||
# efficientnet_params: A function to query compound coefficient
|
||||
# get_model_params and efficientnet:
|
||||
# Functions to get BlockArgs and GlobalParams for efficientnet
|
||||
# url_map and url_map_advprop: Dicts of url_map for pretrained weights
|
||||
# load_pretrained_weights: A function to load pretrained weights
|
||||
|
||||
class BlockDecoder(object):
|
||||
"""Block Decoder for readability,
|
||||
straight from the official TensorFlow repository.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def _decode_block_string(block_string):
|
||||
"""Get a block through a string notation of arguments.
|
||||
|
||||
Args:
|
||||
block_string (str): A string notation of arguments.
|
||||
Examples: 'r1_k3_s11_e1_i32_o16_se0.25_noskip'.
|
||||
|
||||
Returns:
|
||||
BlockArgs: The namedtuple defined at the top of this file.
|
||||
"""
|
||||
assert isinstance(block_string, str)
|
||||
|
||||
ops = block_string.split('_')
|
||||
options = {}
|
||||
for op in ops:
|
||||
splits = re.split(r'(\d.*)', op)
|
||||
if len(splits) >= 2:
|
||||
key, value = splits[:2]
|
||||
options[key] = value
|
||||
|
||||
# Check stride
|
||||
assert (('s' in options and len(options['s']) == 1) or
|
||||
(len(options['s']) == 2 and options['s'][0] == options['s'][1]))
|
||||
|
||||
return BlockArgs(
|
||||
num_repeat=int(options['r']),
|
||||
kernel_size=int(options['k']),
|
||||
stride=[int(options['s'][0])],
|
||||
expand_ratio=int(options['e']),
|
||||
input_filters=int(options['i']),
|
||||
output_filters=int(options['o']),
|
||||
se_ratio=float(options['se']) if 'se' in options else None,
|
||||
id_skip=('noskip' not in block_string))
|
||||
|
||||
@staticmethod
|
||||
def _encode_block_string(block):
|
||||
"""Encode a block to a string.
|
||||
|
||||
Args:
|
||||
block (namedtuple): A BlockArgs type argument.
|
||||
|
||||
Returns:
|
||||
block_string: A String form of BlockArgs.
|
||||
"""
|
||||
args = [
|
||||
'r%d' % block.num_repeat,
|
||||
'k%d' % block.kernel_size,
|
||||
's%d%d' % (block.strides[0], block.strides[1]),
|
||||
'e%s' % block.expand_ratio,
|
||||
'i%d' % block.input_filters,
|
||||
'o%d' % block.output_filters
|
||||
]
|
||||
if 0 < block.se_ratio <= 1:
|
||||
args.append('se%s' % block.se_ratio)
|
||||
if block.id_skip is False:
|
||||
args.append('noskip')
|
||||
return '_'.join(args)
|
||||
|
||||
@staticmethod
|
||||
def decode(string_list):
|
||||
"""Decode a list of string notations to specify blocks inside the network.
|
||||
|
||||
Args:
|
||||
string_list (list[str]): A list of strings, each string is a notation of block.
|
||||
|
||||
Returns:
|
||||
blocks_args: A list of BlockArgs namedtuples of block args.
|
||||
"""
|
||||
assert isinstance(string_list, list)
|
||||
blocks_args = []
|
||||
for block_string in string_list:
|
||||
blocks_args.append(BlockDecoder._decode_block_string(block_string))
|
||||
return blocks_args
|
||||
|
||||
@staticmethod
|
||||
def encode(blocks_args):
|
||||
"""Encode a list of BlockArgs to a list of strings.
|
||||
|
||||
Args:
|
||||
blocks_args (list[namedtuples]): A list of BlockArgs namedtuples of block args.
|
||||
|
||||
Returns:
|
||||
block_strings: A list of strings, each string is a notation of block.
|
||||
"""
|
||||
block_strings = []
|
||||
for block in blocks_args:
|
||||
block_strings.append(BlockDecoder._encode_block_string(block))
|
||||
return block_strings
|
||||
|
||||
|
||||
def efficientnet_params(model_name):
|
||||
"""Map EfficientNet model name to parameter coefficients.
|
||||
|
||||
Args:
|
||||
model_name (str): Model name to be queried.
|
||||
|
||||
Returns:
|
||||
params_dict[model_name]: A (width,depth,res,dropout) tuple.
|
||||
"""
|
||||
params_dict = {
|
||||
# Coefficients: width,depth,res,dropout
|
||||
'efficientnet-b0': (1.0, 1.0, 224, 0.2),
|
||||
'efficientnet-b1': (1.0, 1.1, 240, 0.2),
|
||||
'efficientnet-b2': (1.1, 1.2, 260, 0.3),
|
||||
'efficientnet-b3': (1.2, 1.4, 300, 0.3),
|
||||
'efficientnet-b4': (1.4, 1.8, 380, 0.4),
|
||||
'efficientnet-b5': (1.6, 2.2, 456, 0.4),
|
||||
'efficientnet-b6': (1.8, 2.6, 528, 0.5),
|
||||
'efficientnet-b7': (2.0, 3.1, 600, 0.5),
|
||||
'efficientnet-b8': (2.2, 3.6, 672, 0.5),
|
||||
'efficientnet-l2': (4.3, 5.3, 800, 0.5),
|
||||
}
|
||||
return params_dict[model_name]
|
||||
|
||||
|
||||
def efficientnet(width_coefficient=None, depth_coefficient=None, image_size=None,
|
||||
dropout_rate=0.2, drop_connect_rate=0.2, num_classes=1000):
|
||||
"""Create BlockArgs and GlobalParams for efficientnet model.
|
||||
|
||||
Args:
|
||||
width_coefficient (float)
|
||||
depth_coefficient (float)
|
||||
image_size (int)
|
||||
dropout_rate (float)
|
||||
drop_connect_rate (float)
|
||||
num_classes (int)
|
||||
|
||||
Meaning as the name suggests.
|
||||
|
||||
Returns:
|
||||
blocks_args, global_params.
|
||||
"""
|
||||
|
||||
# Blocks args for the whole model(efficientnet-b0 by default)
|
||||
# It will be modified in the construction of EfficientNet Class according to model
|
||||
blocks_args = [
|
||||
'r1_k3_s11_e1_i32_o16_se0.25',
|
||||
'r2_k3_s22_e6_i16_o24_se0.25',
|
||||
'r2_k5_s22_e6_i24_o40_se0.25',
|
||||
'r3_k3_s22_e6_i40_o80_se0.25',
|
||||
'r3_k5_s11_e6_i80_o112_se0.25',
|
||||
'r4_k5_s22_e6_i112_o192_se0.25',
|
||||
'r1_k3_s11_e6_i192_o320_se0.25',
|
||||
]
|
||||
|
||||
blocks_args = BlockDecoder.decode(blocks_args)
|
||||
|
||||
global_params = GlobalParams(
|
||||
width_coefficient=width_coefficient,
|
||||
depth_coefficient=depth_coefficient,
|
||||
image_size=image_size,
|
||||
dropout_rate=dropout_rate,
|
||||
|
||||
num_classes=num_classes,
|
||||
batch_norm_momentum=0.99,
|
||||
batch_norm_epsilon=1e-3,
|
||||
drop_connect_rate=drop_connect_rate,
|
||||
depth_divisor=8,
|
||||
min_depth=None,
|
||||
)
|
||||
|
||||
return blocks_args, global_params
|
||||
|
||||
|
||||
def get_model_params(model_name, override_params):
|
||||
"""Get the block args and global params for a given model name.
|
||||
|
||||
Args:
|
||||
model_name (str): Model's name.
|
||||
override_params (dict): A dict to modify global_params.
|
||||
|
||||
Returns:
|
||||
blocks_args, global_params
|
||||
"""
|
||||
if model_name.startswith('efficientnet'):
|
||||
w, d, s, p = efficientnet_params(model_name)
|
||||
# note: all models have drop connect rate = 0.2
|
||||
blocks_args, global_params = efficientnet(
|
||||
width_coefficient=w, depth_coefficient=d, dropout_rate=p, image_size=s)
|
||||
else:
|
||||
raise NotImplementedError('model name is not pre-defined: %s' % model_name)
|
||||
if override_params:
|
||||
# ValueError will be raised here if override_params has fields not included in global_params.
|
||||
global_params = global_params._replace(**override_params)
|
||||
return blocks_args, global_params
|
||||
|
||||
|
||||
# train with Standard methods
|
||||
# check more details in paper(EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks)
|
||||
url_map = {
|
||||
'efficientnet-b0': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b0-355c32eb.pth',
|
||||
'efficientnet-b1': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b1-f1951068.pth',
|
||||
'efficientnet-b2': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b2-8bb594d6.pth',
|
||||
'efficientnet-b3': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b3-5fb5a3c3.pth',
|
||||
'efficientnet-b4': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b4-6ed6700e.pth',
|
||||
'efficientnet-b5': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b5-b6417697.pth',
|
||||
'efficientnet-b6': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b6-c76e70fd.pth',
|
||||
'efficientnet-b7': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b7-dcc49843.pth',
|
||||
}
|
||||
|
||||
# train with Adversarial Examples(AdvProp)
|
||||
# check more details in paper(Adversarial Examples Improve Image Recognition)
|
||||
url_map_advprop = {
|
||||
'efficientnet-b0': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b0-b64d5a18.pth',
|
||||
'efficientnet-b1': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b1-0f3ce85a.pth',
|
||||
'efficientnet-b2': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b2-6e9d97e5.pth',
|
||||
'efficientnet-b3': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b3-cdd7c0f4.pth',
|
||||
'efficientnet-b4': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b4-44fb3a87.pth',
|
||||
'efficientnet-b5': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b5-86493f6b.pth',
|
||||
'efficientnet-b6': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b6-ac80338e.pth',
|
||||
'efficientnet-b7': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b7-4652b6dd.pth',
|
||||
'efficientnet-b8': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b8-22a8fe65.pth',
|
||||
}
|
||||
|
||||
# TODO: add the petrained weights url map of 'efficientnet-l2'
|
||||
|
||||
|
||||
def load_pretrained_weights(model, model_name, weights_path=None, load_fc=True, advprop=False):
|
||||
"""Loads pretrained weights from weights path or download using url.
|
||||
|
||||
Args:
|
||||
model (Module): The whole model of efficientnet.
|
||||
model_name (str): Model name of efficientnet.
|
||||
weights_path (None or str):
|
||||
str: path to pretrained weights file on the local disk.
|
||||
None: use pretrained weights downloaded from the Internet.
|
||||
load_fc (bool): Whether to load pretrained weights for fc layer at the end of the model.
|
||||
advprop (bool): Whether to load pretrained weights
|
||||
trained with advprop (valid when weights_path is None).
|
||||
"""
|
||||
if isinstance(weights_path,str):
|
||||
state_dict = torch.load(weights_path)
|
||||
else:
|
||||
# AutoAugment or Advprop (different preprocessing)
|
||||
url_map_ = url_map_advprop if advprop else url_map
|
||||
state_dict = model_zoo.load_url(url_map_[model_name])
|
||||
|
||||
if load_fc:
|
||||
ret = model.load_state_dict(state_dict, strict=False)
|
||||
assert not ret.missing_keys, f'Missing keys when loading pretrained weights: {ret.missing_keys}'
|
||||
else:
|
||||
state_dict.pop('_fc.weight')
|
||||
state_dict.pop('_fc.bias')
|
||||
ret = model.load_state_dict(state_dict, strict=False)
|
||||
assert set(ret.missing_keys) == set(
|
||||
['_fc.weight', '_fc.bias']), f'Missing keys when loading pretrained weights: {ret.missing_keys}'
|
||||
assert not ret.unexpected_keys, f'Missing keys when loading pretrained weights: {ret.unexpected_keys}'
|
||||
|
||||
print('Loaded pretrained weights for {}'.format(model_name))
|
||||
+23
@@ -0,0 +1,23 @@
|
||||
### Imagenet
|
||||
|
||||
This is a preliminary directory for evaluating the model on ImageNet. It is adapted from the standard PyTorch Imagenet script.
|
||||
|
||||
For now, only evaluation is supported, but I am currently building scripts to assist with training new models on Imagenet.
|
||||
|
||||
The evaluation results are slightly different from the original TensorFlow repository, due to differences in data preprocessing. For example, with the current preprocessing, `efficientnet-b3` gives a top-1 accuracy of `80.8`, rather than `81.1` in the paper. I am working on porting the TensorFlow preprocessing into PyTorch to address this issue.
|
||||
|
||||
To run on Imagenet, place your `train` and `val` directories in `data`.
|
||||
|
||||
Example commands:
|
||||
```bash
|
||||
# Evaluate small EfficientNet on CPU
|
||||
python main.py data -e -a 'efficientnet-b0' --pretrained
|
||||
```
|
||||
```bash
|
||||
# Evaluate medium EfficientNet on GPU
|
||||
python main.py data -e -a 'efficientnet-b3' --pretrained --gpu 0 --batch-size 128
|
||||
```
|
||||
```bash
|
||||
# Evaluate ResNet-50 for comparison
|
||||
python main.py data -e -a 'resnet50' --pretrained --gpu 0
|
||||
```
|
||||
+5
@@ -0,0 +1,5 @@
|
||||
### ImageNet
|
||||
|
||||
Download ImageNet and place it into `train` and `val` folders here.
|
||||
|
||||
More details may be found with the official PyTorch ImageNet example [here](https://github.com/pytorch/examples/blob/master/imagenet).
|
||||
+531
@@ -0,0 +1,531 @@
|
||||
"""
|
||||
Evaluate on ImageNet. Note that at the moment, training is not implemented (I am working on it).
|
||||
that being said, evaluation is working.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import sys
|
||||
import random
|
||||
import shutil
|
||||
import time
|
||||
import warnings
|
||||
import PIL
|
||||
import numpy as np
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.parallel
|
||||
import torch.backends.cudnn as cudnn
|
||||
import torch.distributed as dist
|
||||
import torch.optim
|
||||
import torch.multiprocessing as mp
|
||||
import torch.utils.data
|
||||
import torch.utils.data.distributed
|
||||
import torchvision.transforms as transforms
|
||||
import torchvision.datasets as datasets
|
||||
import torchvision.models as models
|
||||
|
||||
from apex import amp
|
||||
|
||||
sys.path.append(os.path.join(os.path.abspath(os.path.dirname(__file__)),'../../'))
|
||||
from efficientnet_pytorch import EfficientNet
|
||||
from efficientnet_pytorch import rand_augment_transform, augment_and_mix_transform, auto_augment_transform
|
||||
from efficientnet_pytorch import RMSpropTF
|
||||
from efficientnet_pytorch import npu_info
|
||||
|
||||
from benchmark_log import hwlog
|
||||
from benchmark_log.basic_utils import get_environment_info
|
||||
from benchmark_log.basic_utils import get_model_parameter
|
||||
|
||||
parser = argparse.ArgumentParser(description='PyTorch ImageNet Training')
|
||||
parser.add_argument('--data', metavar='DIR',
|
||||
help='path to dataset')
|
||||
parser.add_argument('-a', '--arch', metavar='ARCH', default='resnet18',
|
||||
help='model architecture (default: resnet18)')
|
||||
parser.add_argument('-j', '--workers', default=128, type=int, metavar='N',
|
||||
help='number of data loading workers (default: 4)')
|
||||
parser.add_argument('--epochs', default=90, type=int, metavar='N',
|
||||
help='number of total epochs to run')
|
||||
parser.add_argument('--start-epoch', default=0, type=int, metavar='N',
|
||||
help='manual epoch number (useful on restarts)')
|
||||
parser.add_argument('-b', '--batch-size', default=256, type=int,
|
||||
metavar='N',
|
||||
help='mini-batch size (default: 256), this is the total '
|
||||
'batch size of all GPUs on the current node when '
|
||||
'using Data Parallel or Distributed Data Parallel')
|
||||
parser.add_argument('--lr', '--learning-rate', default=0.1, type=float,
|
||||
metavar='LR', help='initial learning rate', dest='lr')
|
||||
parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
|
||||
help='momentum')
|
||||
parser.add_argument('--wd', '--weight-decay', default=1e-5, type=float,
|
||||
metavar='W', help='weight decay (default: 1e-4)',
|
||||
dest='weight_decay')
|
||||
parser.add_argument('-p', '--print-freq', default=10, type=int,
|
||||
metavar='N', help='print frequency (default: 10)')
|
||||
parser.add_argument('--resume', default='', type=str, metavar='PATH',
|
||||
help='path to latest checkpoint (default: none)')
|
||||
parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true',
|
||||
help='evaluate model on validation set')
|
||||
parser.add_argument('--pretrained', dest='pretrained', action='store_true',
|
||||
help='use pre-trained model')
|
||||
parser.add_argument('--world-size', default=-1, type=int,
|
||||
help='number of nodes for distributed training')
|
||||
parser.add_argument('--rank', default=-1, type=int,
|
||||
help='node rank for distributed training')
|
||||
parser.add_argument('--dist-url', default='tcp://224.66.41.62:23456', type=str,
|
||||
help='url used to set up distributed training')
|
||||
parser.add_argument('--dist-backend', default='hccl', type=str,
|
||||
help='distributed backend')
|
||||
parser.add_argument('--seed', default=None, type=int,
|
||||
help='seed for initializing training. ')
|
||||
parser.add_argument('--npu', default=None, type=str,
|
||||
help='npu id to use.')
|
||||
parser.add_argument('--image_size', default=224, type=int,
|
||||
help='image size')
|
||||
parser.add_argument('--advprop', default=False, action='store_true',
|
||||
help='use advprop or not')
|
||||
parser.add_argument('--multiprocessing-distributed', action='store_true',
|
||||
help='Use multi-processing distributed training to launch '
|
||||
'N processes per node, which has N GPUs. This is the '
|
||||
'fastest way to use PyTorch for either single node or '
|
||||
'multi node data parallel training')
|
||||
parser.add_argument('--autoaug', action='store_true', help='use auto augment')
|
||||
parser.add_argument('--amp', action='store_true', help='use apex')
|
||||
parser.add_argument('--pm', '--precision-mode', default='O1', type=str,
|
||||
help='precision mode to use for mix precision, only support O1, O2')
|
||||
parser.add_argument('--loss_scale', default=1024, type=int, help='loss_scale for amp')
|
||||
parser.add_argument('--addr', default='127.0.0.1', type=str,
|
||||
help='npu id to use.')
|
||||
parser.add_argument('--nnpus_per_node', default=None, type=int,
|
||||
help='number of npus to use for distributed train on each node')
|
||||
parser.add_argument('--val_feq', default=10, type=int,
|
||||
help='validation frequency')
|
||||
parser.add_argument('--device_list', default='0,1,2,3,4,5,6,7', type=str, help='device id list')
|
||||
|
||||
def device_id_to_process_device_map(device_list):
|
||||
devices = device_list.split(",")
|
||||
devices = [int(x) for x in devices]
|
||||
devices.sort()
|
||||
|
||||
process_device_map = dict()
|
||||
for process_id, device_id in enumerate(devices):
|
||||
process_device_map[process_id] = device_id
|
||||
|
||||
return process_device_map
|
||||
|
||||
|
||||
def main():
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.dist_url == "env://" and args.world_size == -1:
|
||||
args.world_size = int(os.environ["WORLD_SIZE"])
|
||||
|
||||
args.distributed = args.world_size > 1 or args.multiprocessing_distributed
|
||||
|
||||
args.process_device_map = device_id_to_process_device_map(args.device_list)
|
||||
nnpus_per_node = len(args.process_device_map)
|
||||
|
||||
|
||||
if args.multiprocessing_distributed:
|
||||
# Since we have ngpus_per_node processes per node, the total world_size
|
||||
# needs to be adjusted accordingly
|
||||
args.world_size = nnpus_per_node * args.world_size
|
||||
# Use torch.multiprocessing.spawn to launch distributed processes: the
|
||||
# main_worker process function
|
||||
os.environ['MASTER_ADDR'] = args.addr
|
||||
os.environ['MASTER_PORT'] = '29688'
|
||||
mp.spawn(main_worker, nprocs=nnpus_per_node, args=(nnpus_per_node, args))
|
||||
else:
|
||||
# Simply call main_worker function
|
||||
main_worker(args.npu, nnpus_per_node, args)
|
||||
|
||||
def main_worker(npu, nnpus_per_node, args):
|
||||
args.npu = npu
|
||||
|
||||
if args.distributed:
|
||||
args.npu = args.process_device_map[npu]
|
||||
|
||||
if args.npu is not None:
|
||||
print("Use npu: {} for training".format(args.npu))
|
||||
torch.npu.set_device('npu:' + str(args.npu))
|
||||
|
||||
if args.distributed:
|
||||
if args.dist_url == "env://" and args.rank == -1:
|
||||
args.rank = int(os.environ["RANK"])
|
||||
if args.multiprocessing_distributed:
|
||||
# For multiprocessing distributed training, rank needs to be the
|
||||
# global rank among all the processes
|
||||
args.rank = args.rank * nnpus_per_node + int(npu)
|
||||
|
||||
dist.init_process_group(backend=args.dist_backend,
|
||||
world_size=args.world_size, rank=args.rank)
|
||||
# create model
|
||||
if 'efficientnet' in args.arch: # NEW
|
||||
if args.pretrained:
|
||||
model = EfficientNet.from_pretrained(args.arch, advprop=args.advprop)
|
||||
print("=> using pre-trained model '{}'".format(args.arch))
|
||||
else:
|
||||
print("=> creating model '{}'".format(args.arch))
|
||||
model = EfficientNet.from_name(args.arch)
|
||||
|
||||
else:
|
||||
if args.pretrained:
|
||||
print("=> using pre-trained model '{}'".format(args.arch))
|
||||
model = models.__dict__[args.arch](pretrained=True)
|
||||
else:
|
||||
print("=> creating model '{}'".format(args.arch))
|
||||
model = models.__dict__[args.arch]()
|
||||
|
||||
criterion = nn.CrossEntropyLoss().to('npu:' + str(args.npu))
|
||||
|
||||
optimizer = torch.optim.SGD(model.parameters(), args.lr,
|
||||
momentum=args.momentum,
|
||||
weight_decay=args.weight_decay)
|
||||
model = model.to('npu:' + str(args.npu))
|
||||
if args.amp:
|
||||
print("=> use amp...")
|
||||
if args.pm not in ['O1', 'O2']:
|
||||
print('=>unsupported precision mode!')
|
||||
exit()
|
||||
opt_level = args.pm
|
||||
model, optimizer = amp.initialize(model, optimizer, opt_level=opt_level, loss_scale=args.loss_scale)
|
||||
|
||||
global total_batch_size
|
||||
total_batch_size = args.batch_size
|
||||
if args.distributed:
|
||||
args.batch_size = int(args.batch_size / nnpus_per_node)
|
||||
args.workers = int(args.workers / nnpus_per_node)
|
||||
model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.npu], broadcast_buffers=False)
|
||||
|
||||
|
||||
|
||||
# optionally resume from a checkpoint
|
||||
if args.resume:
|
||||
if os.path.isfile(args.resume):
|
||||
print("=> loading checkpoint '{}'".format(args.resume))
|
||||
checkpoint = torch.load(args.resume, map_location='npu:' + str(args.npu))
|
||||
args.start_epoch = checkpoint['epoch']
|
||||
if args.amp:
|
||||
amp.load_state_dict(checkpoint['amp'])
|
||||
model.load_state_dict(checkpoint['state_dict'])
|
||||
optimizer.load_state_dict(checkpoint['optimizer'])
|
||||
print("=> loaded checkpoint '{}' (epoch {})"
|
||||
.format(args.resume, checkpoint['epoch']))
|
||||
else:
|
||||
print("=> no checkpoint found at '{}'".format(args.resume))
|
||||
|
||||
# Data loading code
|
||||
traindir = os.path.join(args.data, 'train')
|
||||
valdir = os.path.join(args.data, 'val')
|
||||
if args.advprop:
|
||||
normalize = transforms.Lambda(lambda img: img * 2.0 - 1.0)
|
||||
else:
|
||||
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
|
||||
std=[0.229, 0.224, 0.225])
|
||||
|
||||
if 'efficientnet' in args.arch:
|
||||
image_size = EfficientNet.get_image_size(args.arch)
|
||||
else:
|
||||
image_size = args.image_size
|
||||
|
||||
if args.autoaug:
|
||||
print("=> use auto augment...")
|
||||
train_dataset = datasets.ImageFolder(
|
||||
traindir,
|
||||
transforms.Compose([
|
||||
transforms.RandomResizedCrop(image_size),
|
||||
auto_augment_wrapper(image_size),
|
||||
transforms.ToTensor(),
|
||||
normalize,
|
||||
]))
|
||||
else:
|
||||
train_dataset = datasets.ImageFolder(
|
||||
traindir,
|
||||
transforms.Compose([
|
||||
transforms.RandomResizedCrop(image_size),
|
||||
transforms.RandomHorizontalFlip(),
|
||||
transforms.ToTensor(),
|
||||
normalize,
|
||||
]))
|
||||
|
||||
if args.distributed:
|
||||
train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
|
||||
else:
|
||||
train_sampler = None
|
||||
|
||||
train_loader = torch.utils.data.DataLoader(
|
||||
train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None),
|
||||
num_workers=args.workers, pin_memory=True, sampler=train_sampler, drop_last=True)
|
||||
|
||||
val_transforms = transforms.Compose([
|
||||
transforms.Resize(image_size, interpolation=PIL.Image.BICUBIC),
|
||||
transforms.CenterCrop(image_size),
|
||||
transforms.ToTensor(),
|
||||
normalize,
|
||||
])
|
||||
print('npu:' + str(args.npu), ' optimizer params:', optimizer)
|
||||
|
||||
val_loader = torch.utils.data.DataLoader(
|
||||
datasets.ImageFolder(valdir, val_transforms),
|
||||
batch_size=args.batch_size, shuffle=False,
|
||||
num_workers=args.workers, pin_memory=True)
|
||||
|
||||
if args.evaluate:
|
||||
res = validate(val_loader, model, criterion, args)
|
||||
with open('res.txt', 'w') as f:
|
||||
print(res, file=f)
|
||||
return
|
||||
|
||||
for epoch in range(args.start_epoch, args.epochs):
|
||||
if args.distributed:
|
||||
train_sampler.set_epoch(epoch)
|
||||
|
||||
# train for one epoch
|
||||
train(train_loader, model, criterion, optimizer, epoch, args, nnpus_per_node)
|
||||
|
||||
# evaluate on validation set
|
||||
if epoch % args.val_feq == 0 or epoch == args.epochs - 1:
|
||||
acc1 = validate(val_loader, model, criterion, args, epoch, nnpus_per_node)
|
||||
|
||||
if not args.multiprocessing_distributed or (args.multiprocessing_distributed
|
||||
and args.rank % nnpus_per_node == 0):
|
||||
if not args.amp:
|
||||
save_checkpoint({
|
||||
'epoch': epoch + 1,
|
||||
'arch': args.arch,
|
||||
'state_dict': model.state_dict(),
|
||||
'optimizer': optimizer.state_dict(),
|
||||
})
|
||||
else:
|
||||
save_checkpoint({
|
||||
'epoch': epoch + 1,
|
||||
'arch': args.arch,
|
||||
'state_dict': model.state_dict(),
|
||||
'optimizer': optimizer.state_dict(),
|
||||
'amp': amp.state_dict(),
|
||||
})
|
||||
|
||||
|
||||
def train(train_loader, model, criterion, optimizer, epoch, args, nnpus_per_node):
|
||||
batch_time = AverageMeter('Time', ':6.3f')
|
||||
data_time = AverageMeter('Data', ':6.3f')
|
||||
losses = AverageMeter('Loss', ':6.4f')
|
||||
lr = AverageMeter('LR', ':6.4f')
|
||||
top1 = AverageMeter('Acc@1', ':6.2f')
|
||||
top5 = AverageMeter('Acc@5', ':6.2f')
|
||||
fps_time = AverageMeter('FPS', ':6.1f')
|
||||
progress = ProgressMeter(len(train_loader), fps_time, batch_time, data_time, losses, lr, top1,
|
||||
top5, prefix="Epoch: [{}]".format(epoch))
|
||||
|
||||
# switch to train mode
|
||||
model.train()
|
||||
|
||||
end = time.time()
|
||||
for i, (images, target) in enumerate(train_loader):
|
||||
adjust_learning_rate_fraction_epoch(optimizer, epoch, i, len(train_loader), args)
|
||||
|
||||
# measure data loading time
|
||||
data_time.update(time.time() - end)
|
||||
|
||||
optimizer.zero_grad()
|
||||
|
||||
target = target.int()
|
||||
images, target = images.to('npu:' + str(args.npu), non_blocking=True), target.to('npu:' + str(args.npu), non_blocking=True)
|
||||
|
||||
# compute output
|
||||
output = model(images)
|
||||
|
||||
loss = criterion(output, target)
|
||||
|
||||
# measure accuracy and record loss
|
||||
acc1, acc5 = accuracy(output, target, topk=(1, 5))
|
||||
|
||||
losses.update(loss.item(), images.size(0))
|
||||
lr.update(optimizer.param_groups[0]['lr'], images.size(0))
|
||||
top1.update(acc1[0], images.size(0))
|
||||
top5.update(acc5[0], images.size(0))
|
||||
# compute gradient and do SGD step
|
||||
|
||||
if args.amp:
|
||||
with amp.scale_loss(loss, optimizer) as scaled_loss:
|
||||
scaled_loss.backward()
|
||||
else:
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
# measure elapsed time
|
||||
fps_time.update(total_batch_size / (time.time() - end))
|
||||
batch_time.update(time.time() - end)
|
||||
end = time.time()
|
||||
if not args.multiprocessing_distributed or (args.multiprocessing_distributed
|
||||
and args.rank % nnpus_per_node == 0):
|
||||
progress.print(i)
|
||||
|
||||
# print(' * FPS@all {:.3f}'.format(nnpus_per_node*args.batch_size / batch_time.avg))
|
||||
hwlog.remark_print(key=hwlog.FPS, value=('{}'.format(fps_time)))
|
||||
|
||||
def validate(val_loader, model, criterion, args, epoch, nnpus_per_node):
|
||||
batch_time = AverageMeter('Time', ':6.3f')
|
||||
losses = AverageMeter('Loss', ':.4e')
|
||||
top1 = AverageMeter('Acc@1', ':6.2f')
|
||||
top5 = AverageMeter('Acc@5', ':6.2f')
|
||||
progress = ProgressMeter(len(val_loader), batch_time, losses, top1, top5,
|
||||
prefix='Test: ')
|
||||
|
||||
# switch to evaluate mode
|
||||
model.eval()
|
||||
|
||||
with torch.no_grad():
|
||||
end = time.time()
|
||||
for i, (images, target) in enumerate(val_loader):
|
||||
|
||||
target = target.int()
|
||||
images, target = images.to('npu:' + str(args.npu), non_blocking=True), target.to('npu:' + str(args.npu), non_blocking=True)
|
||||
|
||||
# compute output
|
||||
output = model(images)
|
||||
loss = criterion(output, target)
|
||||
|
||||
# measure accuracy and record loss
|
||||
acc1, acc5 = accuracy(output, target, topk=(1, 5))
|
||||
losses.update(loss.item(), images.size(0))
|
||||
top1.update(acc1[0], images.size(0))
|
||||
top5.update(acc5[0], images.size(0))
|
||||
|
||||
# measure elapsed time
|
||||
batch_time.update(time.time() - end)
|
||||
end = time.time()
|
||||
|
||||
if not args.multiprocessing_distributed or (args.multiprocessing_distributed
|
||||
and args.rank % nnpus_per_node == 0):
|
||||
progress.print(i)
|
||||
|
||||
# TODO: this should also be done with the ProgressMeter
|
||||
if not args.multiprocessing_distributed or (args.multiprocessing_distributed
|
||||
and args.rank % nnpus_per_node == 0):
|
||||
|
||||
print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'
|
||||
.format(top1=top1, top5=top5))
|
||||
hwlog.remark_print(key=hwlog.EVAL_ACCURACY_TOP1, value="{top1.avg:.3f}".format(top1=top1))
|
||||
hwlog.remark_print(key=hwlog.EVAL_ACCURACY_TOP5, value="{top5.avg:.3f}".format(top5=top5))
|
||||
|
||||
|
||||
return top1.avg
|
||||
|
||||
|
||||
def save_checkpoint(state, filename='checkpoint.pth'):
|
||||
torch.save(state, filename)
|
||||
|
||||
|
||||
class AverageMeter(object):
|
||||
"""Computes and stores the average and current value"""
|
||||
def __init__(self, name, fmt=':f'):
|
||||
self.name = name
|
||||
self.fmt = fmt
|
||||
self.reset()
|
||||
self.skip = 0
|
||||
|
||||
def reset(self):
|
||||
self.val = 0
|
||||
self.avg = 0
|
||||
self.sum = 0
|
||||
self.count = 0
|
||||
self.skip = 0
|
||||
|
||||
def update(self, val, n=1):
|
||||
self.val = val
|
||||
# the first 5 value are not accumulated in the average stats
|
||||
self.skip += 1
|
||||
if self.skip < 5:
|
||||
return
|
||||
self.sum += val * n
|
||||
self.count += n
|
||||
self.avg = self.sum / self.count
|
||||
|
||||
def __str__(self):
|
||||
fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
|
||||
return fmtstr.format(**self.__dict__)
|
||||
|
||||
|
||||
class ProgressMeter(object):
|
||||
def __init__(self, num_batches, *meters, prefix=""):
|
||||
self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
|
||||
self.meters = meters
|
||||
self.prefix = prefix
|
||||
|
||||
def print(self, batch):
|
||||
entries = [self.prefix + self.batch_fmtstr.format(batch)]
|
||||
entries += [str(meter) for meter in self.meters]
|
||||
print('\t'.join(entries))
|
||||
train_acc1 = str(entries).split("Acc@1")[1].strip().split(" ")[0]
|
||||
train_acc5 = str(entries).split("Acc@5")[1].strip().split(" ")[0]
|
||||
hwlog.remark_print(key=hwlog.TRAIN_ACCURACY_TOP1, value=train_acc1)
|
||||
hwlog.remark_print(key=hwlog.TRAIN_ACCURACY_TOP5, value=train_acc5)
|
||||
|
||||
def _get_batch_fmtstr(self, num_batches):
|
||||
num_digits = len(str(num_batches // 1))
|
||||
fmt = '{:' + str(num_digits) + 'd}'
|
||||
return '[' + fmt + '/' + fmt.format(num_batches) + ']'
|
||||
|
||||
|
||||
def adjust_learning_rate(optimizer, epoch, args):
|
||||
"""Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
|
||||
lr = args.lr * (0.1 ** (epoch // 30))
|
||||
for param_group in optimizer.param_groups:
|
||||
param_group['lr'] = lr
|
||||
|
||||
|
||||
def accuracy(output, target, topk=(1,)):
|
||||
"""Computes the accuracy over the k top predictions for the specified values of k"""
|
||||
with torch.no_grad():
|
||||
maxk = max(topk)
|
||||
batch_size = target.size(0)
|
||||
|
||||
_, pred = output.topk(maxk, 1, True, True)
|
||||
pred = pred.t()
|
||||
correct = pred.eq(target.view(1, -1).expand_as(pred))
|
||||
|
||||
res = []
|
||||
for k in topk:
|
||||
correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
|
||||
res.append(correct_k.mul_(100.0 / batch_size))
|
||||
return res
|
||||
|
||||
def auto_augment_wrapper(img_size, auto_augment='original-mstd0.5'):
|
||||
IMAGENET_DEFAULT_MEAN = [0.485, 0.456, 0.406]
|
||||
assert isinstance(auto_augment, str)
|
||||
aa_params = dict(
|
||||
translate_const=int(img_size * 0.45),
|
||||
img_mean=tuple([min(255, round(255 * x)) for x in IMAGENET_DEFAULT_MEAN]),
|
||||
)
|
||||
if auto_augment.startswith('rand'):
|
||||
return rand_augment_transform(auto_augment, aa_params)
|
||||
elif auto_augment.startswith('augmix'):
|
||||
aa_params['translate_pct'] = 0.3
|
||||
return augment_and_mix_transform(auto_augment, aa_params)
|
||||
else:
|
||||
return auto_augment_transform(auto_augment, aa_params)
|
||||
|
||||
def adjust_learning_rate_fraction_epoch(optimizer, epoch, step, steps_per_epoch, args):
|
||||
"""Sets the learning rate to the initial LR decayed by 0.97 every 3.0 epochs"""
|
||||
|
||||
lr = args.lr * (0.97 ** ((step + epoch * steps_per_epoch) // int(steps_per_epoch * 5.0)))
|
||||
for param_group in optimizer.param_groups:
|
||||
param_group['lr'] = lr
|
||||
|
||||
if __name__ == '__main__':
|
||||
cpu_info, npu_infos, framework_info, os_info, benchmark_version = get_environment_info("pytorch")
|
||||
config_info = get_model_parameter("pytorch_config")
|
||||
initinal_data = {"base_lr": 0.1, "dataset": "imagenet", "optimizer": "SGD", "loss_scale": 1024}
|
||||
hwlog.remark_print(key=hwlog.CPU_INFO, value=cpu_info)
|
||||
hwlog.remark_print(key=hwlog.NPU_INFO, value=npu_infos)
|
||||
hwlog.remark_print(key=hwlog.OS_INFO, value=os_info)
|
||||
hwlog.remark_print(key=hwlog.FRAMEWORK_INFO, value=framework_info)
|
||||
hwlog.remark_print(key=hwlog.BENCHMARK_VERSION, value=benchmark_version)
|
||||
hwlog.remark_print(key=hwlog.CONFIG_INFO, value=config_info)
|
||||
hwlog.remark_print(key=hwlog.BASE_LR, value=initinal_data.get("base_lr"))
|
||||
hwlog.remark_print(key=hwlog.DATASET, value=initinal_data.get("dataset"))
|
||||
hwlog.remark_print(key=hwlog.OPT_NAME, value=initinal_data.get("optimizer"))
|
||||
hwlog.remark_print(key=hwlog.LOSS_SCALE, value=initinal_data.get("loss_scale"))
|
||||
main()
|
||||
+177
File diff suppressed because one or more lines are too long
+144
File diff suppressed because one or more lines are too long
+1
File diff suppressed because one or more lines are too long
+43
@@ -0,0 +1,43 @@
|
||||
from efficientnet_pytorch import EfficientNet as _EfficientNet
|
||||
|
||||
dependencies = ['torch']
|
||||
|
||||
|
||||
def _create_model_fn(model_name):
|
||||
def _model_fn(num_classes=1000, in_channels=3, pretrained='imagenet'):
|
||||
"""Create Efficient Net.
|
||||
|
||||
Described in detail here: https://arxiv.org/abs/1905.11946
|
||||
|
||||
Args:
|
||||
num_classes (int, optional): Number of classes, default is 1000.
|
||||
in_channels (int, optional): Number of input channels, default
|
||||
is 3.
|
||||
pretrained (str, optional): One of [None, 'imagenet', 'advprop']
|
||||
If None, no pretrained model is loaded.
|
||||
If 'imagenet', models trained on imagenet dataset are loaded.
|
||||
If 'advprop', models trained using adversarial training called
|
||||
advprop are loaded. It is important to note that the
|
||||
preprocessing required for the advprop pretrained models is
|
||||
slightly different from normal ImageNet preprocessing
|
||||
"""
|
||||
model_name_ = model_name.replace('_', '-')
|
||||
if pretrained is not None:
|
||||
model = _EfficientNet.from_pretrained(
|
||||
model_name=model_name_,
|
||||
advprop=(pretrained == 'advprop'),
|
||||
num_classes=num_classes,
|
||||
in_channels=in_channels)
|
||||
else:
|
||||
model = _EfficientNet.from_name(
|
||||
model_name=model_name_,
|
||||
override_params={'num_classes': num_classes},
|
||||
)
|
||||
model._change_in_channels(in_channels)
|
||||
|
||||
return model
|
||||
|
||||
return _model_fn
|
||||
|
||||
for model_name in ['efficientnet_b' + str(i) for i in range(9)]:
|
||||
locals()[model_name] = _create_model_fn(model_name)
|
||||
@@ -0,0 +1,9 @@
|
||||
export ASCEND_HOME=/usr/local/Ascend
|
||||
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib/:/usr/lib/:/usr/local/Ascend/nnae/latest/fwkacllib/lib64:/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:/usr/local/Ascend/add-ons/:/usr/local/python3.7.5/lib/
|
||||
export PYTHONPATH=${PYTHONPATH}:/usr/local/Ascend/nnae/latest/fwkacllib/python/site-packages/:/usr/local/Ascend/nnae/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:/usr/local/Ascend/nnae/latest/fwkacllib/python/site-packages/schedule_search.egg:/usr/local/Ascend/nnae/latest/opp/op_impl/built-in/ai_core/tbe:/usr/local/Ascend/nnae/latest/fwkacllib/python/site-packages/hccl
|
||||
export PATH=$PATH:/usr/local/Ascend/nnae/latest/fwkacllib/ccec_compiler/bin
|
||||
export ASCEND_OPP_PATH=/usr/local/Ascend/nnae/latest/opp/
|
||||
export PYTHONPATH=$PYTHONPATH:${PWD}
|
||||
export SLOG_PRINT_TO_STDOUT=0
|
||||
export TASK_QUEUE_ENABLE=1
|
||||
taskset -c 0-64 python3.7 examples/imagenet/main.py --data=/data/imagenet --arch=efficientnet-b0 --batch-size=256 --lr=0.2 --epochs=200 --autoaug --npu=0 --amp --pm=O1 --loss_scale=1024
|
||||
@@ -0,0 +1,123 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Note: To use the 'upload' functionality of this file, you must:
|
||||
# $ pipenv install twine --dev
|
||||
|
||||
import io
|
||||
import os
|
||||
import sys
|
||||
from shutil import rmtree
|
||||
|
||||
from setuptools import find_packages, setup, Command
|
||||
|
||||
# Package meta-data.
|
||||
NAME = 'efficientnet_pytorch'
|
||||
DESCRIPTION = 'EfficientNet implemented in PyTorch.'
|
||||
URL = 'https://github.com/lukemelas/EfficientNet-PyTorch'
|
||||
EMAIL = 'lmelaskyriazi@college.harvard.edu'
|
||||
AUTHOR = 'Luke'
|
||||
REQUIRES_PYTHON = '>=3.5.0'
|
||||
VERSION = '0.7.0'
|
||||
|
||||
# What packages are required for this module to be executed?
|
||||
REQUIRED = [
|
||||
'torch'
|
||||
]
|
||||
|
||||
# What packages are optional?
|
||||
EXTRAS = {
|
||||
# 'fancy feature': ['django'],
|
||||
}
|
||||
|
||||
# The rest you shouldn't have to touch too much :)
|
||||
# ------------------------------------------------
|
||||
# Except, perhaps the License and Trove Classifiers!
|
||||
# If you do change the License, remember to change the Trove Classifier for that!
|
||||
|
||||
here = os.path.abspath(os.path.dirname(__file__))
|
||||
|
||||
# Import the README and use it as the long-description.
|
||||
# Note: this will only work if 'README.md' is present in your MANIFEST.in file!
|
||||
try:
|
||||
with io.open(os.path.join(here, 'README.md'), encoding='utf-8') as f:
|
||||
long_description = '\n' + f.read()
|
||||
except FileNotFoundError:
|
||||
long_description = DESCRIPTION
|
||||
|
||||
# Load the package's __version__.py module as a dictionary.
|
||||
about = {}
|
||||
if not VERSION:
|
||||
project_slug = NAME.lower().replace("-", "_").replace(" ", "_")
|
||||
with open(os.path.join(here, project_slug, '__version__.py')) as f:
|
||||
exec(f.read(), about)
|
||||
else:
|
||||
about['__version__'] = VERSION
|
||||
|
||||
|
||||
class UploadCommand(Command):
|
||||
"""Support setup.py upload."""
|
||||
|
||||
description = 'Build and publish the package.'
|
||||
user_options = []
|
||||
|
||||
@staticmethod
|
||||
def status(s):
|
||||
"""Prints things in bold."""
|
||||
print('\033[1m{0}\033[0m'.format(s))
|
||||
|
||||
def initialize_options(self):
|
||||
pass
|
||||
|
||||
def finalize_options(self):
|
||||
pass
|
||||
|
||||
def run(self):
|
||||
try:
|
||||
self.status('Removing previous builds…')
|
||||
rmtree(os.path.join(here, 'dist'))
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
self.status('Building Source and Wheel (universal) distribution…')
|
||||
os.system('{0} setup.py sdist bdist_wheel --universal'.format(sys.executable))
|
||||
|
||||
self.status('Uploading the package to PyPI via Twine…')
|
||||
os.system('twine upload dist/*')
|
||||
|
||||
self.status('Pushing git tags…')
|
||||
os.system('git tag v{0}'.format(about['__version__']))
|
||||
os.system('git push --tags')
|
||||
|
||||
sys.exit()
|
||||
|
||||
|
||||
# Where the magic happens:
|
||||
setup(
|
||||
name=NAME,
|
||||
version=about['__version__'],
|
||||
description=DESCRIPTION,
|
||||
long_description=long_description,
|
||||
long_description_content_type='text/markdown',
|
||||
author=AUTHOR,
|
||||
author_email=EMAIL,
|
||||
python_requires=REQUIRES_PYTHON,
|
||||
url=URL,
|
||||
packages=find_packages(exclude=["tests", "*.tests", "*.tests.*", "tests.*"]),
|
||||
# py_modules=['model'], # If your package is a single module, use this instead of 'packages'
|
||||
install_requires=REQUIRED,
|
||||
extras_require=EXTRAS,
|
||||
include_package_data=True,
|
||||
license='Apache',
|
||||
classifiers=[
|
||||
# Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers
|
||||
'License :: OSI Approved :: Apache Software License',
|
||||
'Programming Language :: Python',
|
||||
'Programming Language :: Python :: 3',
|
||||
'Programming Language :: Python :: 3.6',
|
||||
],
|
||||
# $ setup.py publish support.
|
||||
cmdclass={
|
||||
'upload': UploadCommand,
|
||||
},
|
||||
)
|
||||
+124
@@ -0,0 +1,124 @@
|
||||
from collections import OrderedDict
|
||||
|
||||
import pytest
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
from efficientnet_pytorch import EfficientNet
|
||||
|
||||
|
||||
# -- fixtures -------------------------------------------------------------------------------------
|
||||
|
||||
@pytest.fixture(scope='module', params=[x for x in range(4)])
|
||||
def model(request):
|
||||
return 'efficientnet-b{}'.format(request.param)
|
||||
|
||||
|
||||
@pytest.fixture(scope='module', params=[True, False])
|
||||
def pretrained(request):
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(scope='function')
|
||||
def net(model, pretrained):
|
||||
return EfficientNet.from_pretrained(model) if pretrained else EfficientNet.from_name(model)
|
||||
|
||||
|
||||
# -- tests ----------------------------------------------------------------------------------------
|
||||
|
||||
@pytest.mark.parametrize('img_size', [224, 256, 512])
|
||||
def test_forward(net, img_size):
|
||||
"""Test `.forward()` doesn't throw an error"""
|
||||
data = torch.zeros((1, 3, img_size, img_size))
|
||||
output = net(data)
|
||||
assert not torch.isnan(output).any()
|
||||
|
||||
|
||||
def test_dropout_training(net):
|
||||
"""Test dropout `.training` is set by `.train()` on parent `nn.module`"""
|
||||
net.train()
|
||||
assert net._dropout.training == True
|
||||
|
||||
|
||||
def test_dropout_eval(net):
|
||||
"""Test dropout `.training` is set by `.eval()` on parent `nn.module`"""
|
||||
net.eval()
|
||||
assert net._dropout.training == False
|
||||
|
||||
|
||||
def test_dropout_update(net):
|
||||
"""Test dropout `.training` is updated by `.train()` and `.eval()` on parent `nn.module`"""
|
||||
net.train()
|
||||
assert net._dropout.training == True
|
||||
net.eval()
|
||||
assert net._dropout.training == False
|
||||
net.train()
|
||||
assert net._dropout.training == True
|
||||
net.eval()
|
||||
assert net._dropout.training == False
|
||||
|
||||
|
||||
@pytest.mark.parametrize('img_size', [224, 256, 512])
|
||||
def test_modify_dropout(net, img_size):
|
||||
"""Test ability to modify dropout and fc modules of network"""
|
||||
dropout = nn.Sequential(OrderedDict([
|
||||
('_bn2', nn.BatchNorm1d(net._bn1.num_features)),
|
||||
('_drop1', nn.Dropout(p=net._global_params.dropout_rate)),
|
||||
('_linear1', nn.Linear(net._bn1.num_features, 512)),
|
||||
('_relu', nn.ReLU()),
|
||||
('_bn3', nn.BatchNorm1d(512)),
|
||||
('_drop2', nn.Dropout(p=net._global_params.dropout_rate / 2))
|
||||
]))
|
||||
fc = nn.Linear(512, net._global_params.num_classes)
|
||||
|
||||
net._dropout = dropout
|
||||
net._fc = fc
|
||||
|
||||
data = torch.zeros((2, 3, img_size, img_size))
|
||||
output = net(data)
|
||||
assert not torch.isnan(output).any()
|
||||
|
||||
|
||||
@pytest.mark.parametrize('img_size', [224, 256, 512])
|
||||
def test_modify_pool(net, img_size):
|
||||
"""Test ability to modify pooling module of network"""
|
||||
|
||||
class AdaptiveMaxAvgPool(nn.Module):
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.ada_avgpool = nn.AdaptiveAvgPool2d(1)
|
||||
self.ada_maxpool = nn.AdaptiveMaxPool2d(1)
|
||||
|
||||
def forward(self, x):
|
||||
avg_x = self.ada_avgpool(x)
|
||||
max_x = self.ada_maxpool(x)
|
||||
x = torch.cat((avg_x, max_x), dim=1)
|
||||
return x
|
||||
|
||||
avg_pooling = AdaptiveMaxAvgPool()
|
||||
fc = nn.Linear(net._fc.in_features * 2, net._global_params.num_classes)
|
||||
|
||||
net._avg_pooling = avg_pooling
|
||||
net._fc = fc
|
||||
|
||||
data = torch.zeros((2, 3, img_size, img_size))
|
||||
output = net(data)
|
||||
assert not torch.isnan(output).any()
|
||||
|
||||
|
||||
@pytest.mark.parametrize('img_size', [224, 256, 512])
|
||||
def test_extract_endpoints(net, img_size):
|
||||
"""Test `.extract_endpoints()` doesn't throw an error"""
|
||||
data = torch.zeros((1, 3, img_size, img_size))
|
||||
endpoints = net.extract_endpoints(data)
|
||||
assert not torch.isnan(endpoints['reduction_1']).any()
|
||||
assert not torch.isnan(endpoints['reduction_2']).any()
|
||||
assert not torch.isnan(endpoints['reduction_3']).any()
|
||||
assert not torch.isnan(endpoints['reduction_4']).any()
|
||||
assert not torch.isnan(endpoints['reduction_5']).any()
|
||||
assert endpoints['reduction_1'].size(2) == img_size // 2
|
||||
assert endpoints['reduction_2'].size(2) == img_size // 4
|
||||
assert endpoints['reduction_3'].size(2) == img_size // 8
|
||||
assert endpoints['reduction_4'].size(2) == img_size // 16
|
||||
assert endpoints['reduction_5'].size(2) == img_size // 32
|
||||
+31
@@ -0,0 +1,31 @@
|
||||
############## toolkit situation ################
|
||||
#export LD_LIBRARY_PATH=/usr/local/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:/usr/local/lib/:/usr/lib64/:/usr/lib/:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/:/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:/usr/local/Ascend/add-ons/:/usr/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH
|
||||
#export PATH=$PATH:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin/:/usr/local/Ascend/ascend-toolkit/latest/toolkit/tools/ide_daemon/bin/
|
||||
#export ASCEND_OPP_PATH=/usr/local/Ascend/ascend-toolkit/latest/opp/
|
||||
#export OPTION_EXEC_EXTERN_PLUGIN_PATH=/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so
|
||||
#export PYTHONPATH=/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH
|
||||
|
||||
|
||||
############## nnae situation ################
|
||||
|
||||
|
||||
if [ -d /usr/local/Ascend/nnae/latest ];then
|
||||
export LD_LIBRARY_PATH=/usr/local/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:/usr/local/lib/:/usr/lib64/:/usr/lib/:/usr/local/Ascend/nnae/latest/fwkacllib/lib64/:/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:/usr/local/Ascend/add-ons/:/usr/lib/aarch64_64-linux-gnu:$LD_LIBRARY_PATH
|
||||
export PATH=$PATH:/usr/local/Ascend/nnae/latest/fwkacllib/ccec_compiler/bin/:/usr/local/Ascend/nnae/latest/toolkit/tools/ide_daemon/bin/
|
||||
export ASCEND_OPP_PATH=/usr/local/Ascend/nnae/latest/opp/
|
||||
export OPTION_EXEC_EXTERN_PLUGIN_PATH=/usr/local/Ascend/nnae/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:/usr/local/Ascend/nnae/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:/usr/local/Ascend/nnae/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so
|
||||
export PYTHONPATH=/usr/local/Ascend/nnae/latest/fwkacllib/python/site-packages/:/usr/local/Ascend/nnae/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:/usr/local/Ascend/nnae/latest/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH
|
||||
else
|
||||
export LD_LIBRARY_PATH=/usr/local/:/usr/local/lib/:/usr/lib64/:/usr/lib/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/:/usr/local/Ascend/driver/lib64/common/:/usr/local/Ascend/driver/lib64/driver/:/usr/local/Ascend/add-ons/:/usr/lib/aarch64-linux-gnu:$LD_LIBRARY_PATH
|
||||
export PATH=$PATH:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin/:/usr/local/Ascend/ascend-toolkit/latest/toolkit/tools/ide_daemon/bin/
|
||||
export ASCEND_OPP_PATH=/usr/local/Ascend/ascend-toolkit/latest/opp/
|
||||
export OPTION_EXEC_EXTERN_PLUGIN_PATH=/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so
|
||||
export PYTHONPATH=/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH
|
||||
fi
|
||||
|
||||
# ln -s /usr/local/Ascend/ascend-toolkit/latest/toolkit/bin/adc /usr/local/bin/
|
||||
|
||||
export SLOG_PRINT_TO_STDOUT=0
|
||||
#su HwHiAiUser -c "adc --host 0.0.0.0:22118 --log \"SetLogLevel(0)[error]\" --device 0"
|
||||
|
||||
export TASK_QUEUE_ENABLE=1
|
||||
@@ -0,0 +1,62 @@
|
||||
#!/bin/bash
|
||||
|
||||
rank_size=$1
|
||||
yamlPath=$2
|
||||
toolsPath=$3
|
||||
|
||||
currentDir=$(cd "$(dirname "$0")/.."; pwd)
|
||||
model_name=$(cd $currentDir/..;basename `pwd`)
|
||||
if [ -f /.dockerenv ];then
|
||||
CLUSTER=$4
|
||||
MPIRUN_ALL_IP="$5"
|
||||
export CLUSTER=${CLUSTER}
|
||||
fi
|
||||
# 从 yaml 获取配置
|
||||
eval $(${toolsPath}/get_params_for_yaml.sh ${yamlPath} "pytorch_config")
|
||||
|
||||
# 清除旧日志
|
||||
rm -rf /var/log/npu/slog/host-0/*
|
||||
rm -rf ${currentDir}/result/*.log
|
||||
|
||||
#mkdir train job path
|
||||
currtime=`date +%Y%m%d%H%M%S`
|
||||
mkdir -p ${currentDir%train*}/train/result/pt_efficientnet/training_job_${currtime}/
|
||||
export train_job_dir=${currentDir%train*}/train/result/pt_efficientnet/training_job_${currtime}/
|
||||
echo "[`date +%Y%m%d-%H:%M:%S`] [INFO] ${train_job_dir}"
|
||||
# device 列表, 若无指定 device 根据 rank_size 顺序选择
|
||||
eval device_group=\$device_group_${rank_size}p
|
||||
if [ x"${device_group}" == x"" ] || [ ${rank_size} -ge 8 ];then
|
||||
device_group="$(seq 0 "$(expr $rank_size - 1)")"
|
||||
fi
|
||||
|
||||
# get last device id in device_group, hw log in performance from the dir named last_device_id
|
||||
device_group_str=`echo ${device_group} | sed 's/ //g'`
|
||||
first_device_id=`echo ${device_group_str: 0:1}`
|
||||
|
||||
if [ x"${CLUSTER}" == x"True" ];then
|
||||
this_ip=$(hostname -I |awk '{print $1}')
|
||||
ln -snf ${currentDir%train*}/train/result/pt_efficientnet/training_job_${currtime}/0/hw_efficientnet.log ${currentDir%train*}/train/result/pt_efficientnet/training_job_${currtime}/
|
||||
for ip in $MPIRUN_ALL_IP;do
|
||||
if [ x"$ip" != x"$this_ip" ];then
|
||||
scp $yamlPath root@$ip:$yamlPath
|
||||
scp ${jsonFilePath} root@$ip:${jsonFilePath}
|
||||
fi
|
||||
done
|
||||
export PATH=$PATH:/usr/local/mpirun4.0/bin
|
||||
mpirun -H ${mpirun_ip} \
|
||||
--bind-to none -map-by slot\
|
||||
--allow-run-as-root \
|
||||
--mca btl_tcp_if_exclude lo,docker0,endvnic,virbr0,vethf40501b,docker_gwbridge,br-f42ac38052b4\
|
||||
--prefix /usr/local/mpirun4.0/ \
|
||||
${currentDir}/scripts/train.sh 0 $rank_size $yamlPath $currtime ${toolsPath} ${CLUSTER}
|
||||
else
|
||||
rank_id=0
|
||||
#for device_id in $device_group;do
|
||||
ln -snf ${currentDir%train*}/train/result/pt_efficientnet/training_job_${currtime}/${first_device_id}/hw_efficientnet.log ${currentDir%train*}/train/result/pt_efficientnet/training_job_${currtime}/
|
||||
${currentDir}/scripts/train.sh 0 $rank_size $yamlPath $currtime ${toolsPath} $rank_id &
|
||||
# let rank_id++
|
||||
# done
|
||||
fi
|
||||
wait
|
||||
|
||||
|
||||
+132
@@ -0,0 +1,132 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
device_id=$1
|
||||
rank_size=$2
|
||||
yamlPath=$3
|
||||
|
||||
currentDir=$(cd "$(dirname "$0")/.."; pwd)
|
||||
currtime=$4
|
||||
toolsPath=$5
|
||||
export YAML_PATH=$3
|
||||
mkdir -p ${currentDir%train*}/train/result/pt_efficientnet/training_job_${currtime}/
|
||||
export train_job_dir=${currentDir%train*}/train/result/pt_efficientnet/training_job_${currtime}/
|
||||
|
||||
# 从 yaml 获取配置
|
||||
eval $(${toolsPath}/get_params_for_yaml.sh ${yamlPath} "pytorch_config")
|
||||
|
||||
export REMARK_LOG_FILE=hw_efficientnet.log # 打点日志文件名称, 必须hw_后跟模型名称小写
|
||||
benchmark_log_path=${currentDir%atlas_benchmark-master*}/atlas_benchmark-master/utils
|
||||
export PYTHONPATH=$PYTHONPATH:${benchmark_log_path}
|
||||
|
||||
|
||||
#source ${currentDir}/config/npu_set_env.sh
|
||||
source ${currentDir}/config/set_env_b023.sh
|
||||
# user env
|
||||
export HCCL_CONNECT_TIMEOUT=600
|
||||
export JOB_ID=9999001
|
||||
export HCCL_RANK_TABLE_PATH=${currentDir}/config/${rank_size}p.json
|
||||
export RANK_SIZE=${rank_size}
|
||||
export SLOG_PRINT_TO_STDOUT=0
|
||||
export DEVICE_ID=${device_id}
|
||||
DEVICE_INDEX=$(( DEVICE_ID + RANK_INDEX * 8 ))
|
||||
export DEVICE_INDEX=${DEVICE_INDEX}
|
||||
|
||||
cd ${train_job_dir}
|
||||
curd_dir=${currentDir%atlas_benchmark-master*}/atlas_benchmark-master/utils/atlasboost
|
||||
export PYTHONPATH=$PYTHONPATH:${curd_dir}
|
||||
|
||||
if [ x"$6" != x"True" ];then
|
||||
rank_id=$6
|
||||
export RANK_ID=$6
|
||||
else
|
||||
device_id_mo=$(python3.7 -c "import src.tensorflow.mpi_ops as atlasboost;atlasboost.init(); \
|
||||
device_id = atlasboost.local_rank();cluster_device_id = str(device_id); \
|
||||
atlasboost.set_device_id(device_id);print(atlasboost.rank())")
|
||||
device_id_mo=`echo $device_id_mo`
|
||||
rank_id=${device_id_mo##* }
|
||||
export RANK_ID=${rank_id}
|
||||
device=${device_id_mo##*deviceid = }
|
||||
device_id=${device%% phyid=*}
|
||||
export DEVICE_ID=${device_id}
|
||||
hccljson=${train_job_dir}/*.json
|
||||
cp ${hccljson} ${currentDir}/config/${rank_size}p.json
|
||||
fi
|
||||
|
||||
#mkdir exec path
|
||||
mkdir -p ${train_job_dir}/${device_id}
|
||||
cd ${train_job_dir}/${device_id}
|
||||
|
||||
startTime=`date +%Y%m%d-%H:%M:%S`
|
||||
startTime_s=`date +%s`
|
||||
|
||||
|
||||
# 根据单卡/多卡区分调用参数
|
||||
if [ x"$6" == x"True" ];then
|
||||
# 多卡多机
|
||||
export CLUSTER=True
|
||||
fi
|
||||
|
||||
if [ x"${mode}" == x"evaluate" ];then
|
||||
pass
|
||||
|
||||
|
||||
elif [ x"${rank_size}" == x"1" ];then
|
||||
# 单卡
|
||||
taskset -c 0-128 python3.7 ${currentDir}/code/examples/imagenet/main.py \
|
||||
--data=${data_url} \
|
||||
--arch=efficientnet-b0 \
|
||||
--batch-size=${batch_size} \
|
||||
--lr=0.2 \
|
||||
--momentum=0 \
|
||||
--epochs=${epoches} \
|
||||
--autoaug \
|
||||
--amp \
|
||||
--pm=O1 \
|
||||
--loss_scale=128 \
|
||||
--val_feq=10 \
|
||||
--npu=${device} > ${train_job_dir}/train_${rank_size}p.log 2>&1
|
||||
|
||||
|
||||
elif [ ${rank_size} -le 8 ];then
|
||||
# 单机多卡
|
||||
taskset -c 0-128 python3.7 ${currentDir}/code/examples/imagenet/main.py \
|
||||
--data=${data_url} \
|
||||
--arch=efficientnet-b0 \
|
||||
--batch-size=${batch_size} \
|
||||
--lr=${lr} \
|
||||
--momentum=0 \
|
||||
--epochs=${epoches} \
|
||||
--autoaug \
|
||||
--amp \
|
||||
--pm=O1 \
|
||||
--loss_scale=128 \
|
||||
--val_feq=10 \
|
||||
--addr=$(hostname -I |awk '{print $1}') \
|
||||
--dist-backend=hccl \
|
||||
--multiprocessing-distributed \
|
||||
--world-size 1 \
|
||||
--rank 0 \
|
||||
--device_list ${device_group} > ${train_job_dir}/train_${rank_size}p.log 2>&1
|
||||
|
||||
|
||||
fi
|
||||
|
||||
#taskset -c 0-20 python3.7 ${currentDir}/code/efficientnet.py > ./train.log 2>&1
|
||||
|
||||
if [ $? -eq 0 ];then
|
||||
echo ":::ABK 1.0.0 efficientnet train success"
|
||||
echo ":::ABK 1.0.0 efficientnet train success" >> ${train_job_dir}/train_${rank_size}p.log
|
||||
echo ":::ABK 1.0.0 efficientnet train success" >> ./hw_efficientnet.log
|
||||
else
|
||||
echo ":::ABK 1.0.0 efficientnet train failed"
|
||||
echo ":::ABK 1.0.0 efficientnet train failed" >> ${train_job_dir}/train_${rank_size}p.log
|
||||
echo ":::ABK 1.0.0 efficientnet train failed" >> ./hw_efficientnet.log
|
||||
fi
|
||||
|
||||
endTime=`date +%Y%m%d-%H:%M:%S`
|
||||
endTime_s=`date +%s`
|
||||
sumTime=$[ $endTime_s - $startTime_s ]
|
||||
hour=$(( $sumTime/3600 ))
|
||||
min=$(( ($sumTime-${hour}*3600)/60 ))
|
||||
sec=$(( $sumTime-${hour}*3600-${min}*60 ))
|
||||
echo ":::ABK 1.0.0 efficientnet train total time: ${hour}:${min}:${sec}" >> ${train_job_dir}/${device_id}/hw_efficientnet.log
|
||||
Reference in New Issue
Block a user