Coding style changes

This commit is contained in:
Heiko J Schick
2020-10-20 23:01:28 +02:00
parent f0d1b50727
commit 65c7cd2178
+324 -308
View File
@@ -3,179 +3,198 @@ YOLO v3 object detection with Keras
Source: https://towardsdatascience.com/yolo-v3-object-detection-with-keras-461d2cfccef6 Source: https://towardsdatascience.com/yolo-v3-object-detection-with-keras-461d2cfccef6
""" """
# import os
# import scipy.io
# import scipy.misc
import numpy as np
# import pandas as pd
# import PIL
import struct import struct
# import cv2 import glob
import numpy as np
from numpy import expand_dims from numpy import expand_dims
# import tensorflow as tf from keras.layers import Input, Conv2D, BatchNormalization, LeakyReLU, ZeroPadding2D, UpSampling2D
# from skimage.transform import resize from keras.models import Model
from keras import backend as K
from keras.layers import Input, Lambda, Conv2D, BatchNormalization, LeakyReLU, ZeroPadding2D, UpSampling2D
from keras.models import load_model, Model
from keras.layers.merge import add, concatenate from keras.layers.merge import add, concatenate
from keras.preprocessing.image import load_img from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array from keras.preprocessing.image import img_to_array
from matplotlib import pyplot from matplotlib import pyplot
import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow
from matplotlib.patches import Rectangle from matplotlib.patches import Rectangle
# %matplotlib inline
"""**Step 1:** `WeightReader` class is used to parse the "yolov3.weights" file and load the model weights into memory in a format that we can set into keras model"""
# Step 1:
# WeightReader class is used to parse the "yolov3.weights" file and load the model weights into
# memory in a format that we can set into keras model
class WeightReader: class WeightReader:
def __init__(self, weight_file): def __init__(self, weight_file):
with open(weight_file, 'rb') as w_f: with open(weight_file, 'rb') as w_f:
major, = struct.unpack('i', w_f.read(4)) major, = struct.unpack('i', w_f.read(4))
minor, = struct.unpack('i', w_f.read(4)) minor, = struct.unpack('i', w_f.read(4))
revision, = struct.unpack('i', w_f.read(4)) revision, = struct.unpack('i', w_f.read(4))
if (major*10 + minor) >= 2 and major < 1000 and minor < 1000:
w_f.read(8)
else:
w_f.read(4)
transpose = (major > 1000) or (minor > 1000)
binary = w_f.read()
self.offset = 0
self.all_weights = np.frombuffer(binary, dtype='float32')
def read_bytes(self, size): if (major*10 + minor) >= 2 and major < 1000 and minor < 1000:
self.offset = self.offset + size w_f.read(8)
return self.all_weights[self.offset-size:self.offset] else:
w_f.read(4)
def load_weights(self, model): transpose = (major > 1000) or (minor > 1000)
for i in range(106): binary = w_f.read()
try: self.offset = 0
conv_layer = model.get_layer('conv_' + str(i)) self.all_weights = np.frombuffer(binary, dtype='float32')
print("loading weights of convolution #" + str(i))
if i not in [81, 93, 105]:
norm_layer = model.get_layer('bnorm_' + str(i))
size = np.prod(norm_layer.get_weights()[0].shape)
beta = self.read_bytes(size) # bias
gamma = self.read_bytes(size) # scale
mean = self.read_bytes(size) # mean
var = self.read_bytes(size) # variance
weights = norm_layer.set_weights([gamma, beta, mean, var])
if len(conv_layer.get_weights()) > 1:
bias = self.read_bytes(np.prod(conv_layer.get_weights()[1].shape))
kernel = self.read_bytes(np.prod(conv_layer.get_weights()[0].shape))
kernel = kernel.reshape(list(reversed(conv_layer.get_weights()[0].shape)))
kernel = kernel.transpose([2,3,1,0])
conv_layer.set_weights([kernel, bias])
else:
kernel = self.read_bytes(np.prod(conv_layer.get_weights()[0].shape))
kernel = kernel.reshape(list(reversed(conv_layer.get_weights()[0].shape)))
kernel = kernel.transpose([2,3,1,0])
conv_layer.set_weights([kernel])
except ValueError:
print("no convolution #" + str(i))
def reset(self): def read_bytes(self, size):
self.offset = 0 self.offset = self.offset + size
return self.all_weights[self.offset-size:self.offset]
"""**Step 2:** def load_weights(self, model):
- `_conv_block(input, convs, skip=True)` is a function to create convolutional layer for i in range(106):
- `make_yolov3_model()` is a function to create layers of convoluational and stack together as a whole yolo model try:
""" conv_layer = model.get_layer('conv_' + str(i))
print("loading weights of convolution #" + str(i))
def _conv_block(inp, convs, skip=True): if i not in [81, 93, 105]:
x = inp norm_layer = model.get_layer('bnorm_' + str(i))
count = 0 size = np.prod(norm_layer.get_weights()[0].shape)
for conv in convs: beta = self.read_bytes(size) # bias
if count == (len(convs) - 2) and skip: gamma = self.read_bytes(size) # scale
skip_connection = x mean = self.read_bytes(size) # mean
count += 1 var = self.read_bytes(size) # variance
if conv['stride'] > 1: x = ZeroPadding2D(((1,0),(1,0)))(x) # peculiar padding as darknet prefer left and top weights = norm_layer.set_weights([gamma, beta, mean, var])
x = Conv2D(conv['filter'],
conv['kernel'],
strides=conv['stride'],
padding='valid' if conv['stride'] > 1 else 'same', # peculiar padding as darknet prefer left and top
name='conv_' + str(conv['layer_idx']),
use_bias=False if conv['bnorm'] else True)(x)
if conv['bnorm']: x = BatchNormalization(epsilon=0.001, name='bnorm_' + str(conv['layer_idx']))(x)
if conv['leaky']: x = LeakyReLU(alpha=0.1, name='leaky_' + str(conv['layer_idx']))(x)
return add([skip_connection, x]) if skip else x
if len(conv_layer.get_weights()) > 1:
bias = self.read_bytes(np.prod(conv_layer.get_weights()[1].shape))
kernel = self.read_bytes(np.prod(conv_layer.get_weights()[0].shape))
kernel = kernel.reshape(list(reversed(conv_layer.get_weights()[0].shape)))
kernel = kernel.transpose([2,3,1,0])
conv_layer.set_weights([kernel, bias])
else:
kernel = self.read_bytes(np.prod(conv_layer.get_weights()[0].shape))
kernel = kernel.reshape(list(reversed(conv_layer.get_weights()[0].shape)))
kernel = kernel.transpose([2,3,1,0])
conv_layer.set_weights([kernel])
except ValueError:
print("no convolution #" + str(i))
def reset(self):
self.offset = 0
# Step 2:
# _cb(input, convs, skip=True) is a function to create convolutional layer
def _cb(inp, convs, skip=True):
x = inp
count = 0
for conv in convs:
if count == (len(convs) - 2) and skip:
skip_connection = x
count += 1
if conv['stride'] > 1: x = ZeroPadding2D(((1,0),(1,0)))(x) # peculiar padding as darknet
# prefer left and top
x = Conv2D(conv['filter'],
conv['kernel'],
strides=conv['stride'],
padding='valid' if conv['stride'] > 1 else 'same', # peculiar padding as darknet
# prefer left and top
name='conv_' + str(conv['layer_idx']),
use_bias=False if conv['bnorm'] else True)(x)
if conv['bnorm']: x = BatchNormalization(epsilon=0.001, name='bnorm_'
+ str(conv['layer_idx']))(x)
if conv['leaky']: x = LeakyReLU(alpha=0.1, name='leaky_'
+ str(conv['layer_idx']))(x)
return add([skip_connection, x]) if skip else x
# make_yolov3_model() is a function to create layers of convoluational and stack together as a
# whole yolo model
def make_yolov3_model(): def make_yolov3_model():
input_image = Input(shape=(None, None, 3)) input_image = Input(shape=(None, None, 3))
# Layer 0 => 4
x = _conv_block(input_image, [{'filter': 32, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 0}, # Layer 0 => 4
{'filter': 64, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 1}, x = _cb(input_image, [{'filter': 32, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 0},
{'filter': 32, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 2}, {'filter': 64, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 1},
{'filter': 64, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 3}]) {'filter': 32, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 2},
# Layer 5 => 8 {'filter': 64, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 3}])
x = _conv_block(x, [{'filter': 128, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 5},
{'filter': 64, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 6}, # Layer 5 => 8
{'filter': 128, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 7}]) x = _cb(x, [{'filter': 128, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 5},
# Layer 9 => 11 {'filter': 64, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 6},
x = _conv_block(x, [{'filter': 64, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 9}, {'filter': 128, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 7}])
{'filter': 128, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 10}])
# Layer 12 => 15 # Layer 9 => 11
x = _conv_block(x, [{'filter': 256, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 12}, x = _cb(x, [{'filter': 64, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 9},
{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 13}, {'filter': 128, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 10}])
{'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 14}])
# Layer 16 => 36 # Layer 12 => 15
for i in range(7): x = _cb(x, [{'filter': 256, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 12},
x = _conv_block(x, [{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 16+i*3}, {'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 13},
{'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 17+i*3}]) {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 14}])
skip_36 = x
# Layer 37 => 40 # Layer 16 => 36
x = _conv_block(x, [{'filter': 512, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 37}, for i in range(7):
{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 38}, x = _cb(x, [{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 16+i*3},
{'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 39}]) {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 17+i*3}])
# Layer 41 => 61 skip_36 = x
for i in range(7):
x = _conv_block(x, [{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 41+i*3}, # Layer 37 => 40
{'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 42+i*3}]) x = _cb(x, [{'filter': 512, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 37},
skip_61 = x {'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 38},
# Layer 62 => 65 {'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 39}])
x = _conv_block(x, [{'filter': 1024, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 62},
{'filter': 512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 63}, # Layer 41 => 61
{'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 64}]) for i in range(7):
# Layer 66 => 74 x = _cb(x, [{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 41+i*3},
for i in range(3): {'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 42+i*3}])
x = _conv_block(x, [{'filter': 512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 66+i*3}, skip_61 = x
{'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 67+i*3}])
# Layer 75 => 79 # Layer 62 => 65
x = _conv_block(x, [{'filter': 512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 75}, x = _cb(x, [{'filter': 1024, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 62},
{'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 76}, {'filter': 512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 63},
{'filter': 512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 77}, {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 64}])
{'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 78},
{'filter': 512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 79}], skip=False) # Layer 66 => 74
# Layer 80 => 82 for i in range(3):
yolo_82 = _conv_block(x, [{'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 80}, x = _cb(x, [{'filter': 512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 66+i*3},
{'filter': 255, 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 81}], skip=False) {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 67+i*3}])
# Layer 83 => 86
x = _conv_block(x, [{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 84}], skip=False) # Layer 75 => 79
x = UpSampling2D(2)(x) x = _cb(x, [{'filter': 512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 75},
x = concatenate([x, skip_61]) {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 76},
# Layer 87 => 91 {'filter': 512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 77},
x = _conv_block(x, [{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 87}, {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 78},
{'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 88}, {'filter': 512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 79}],
{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 89}, skip=False)
{'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 90},
{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 91}], skip=False) # Layer 80 => 82
# Layer 92 => 94 yolo_82 = _cb(x, [{'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 80},
yolo_94 = _conv_block(x, [{'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 92}, {'filter': 255, 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 81}],
{'filter': 255, 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 93}], skip=False) skip=False)
# Layer 95 => 98
x = _conv_block(x, [{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 96}], skip=False) # Layer 83 => 86
x = UpSampling2D(2)(x) x = _cb(x, [{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 84}],
x = concatenate([x, skip_36]) skip=False)
# Layer 99 => 106
yolo_106 = _conv_block(x, [{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 99}, x = UpSampling2D(2)(x)
{'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 100}, x = concatenate([x, skip_61])
{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 101},
{'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 102}, # Layer 87 => 91
{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 103}, x = _cb(x, [{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 87},
{'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 104}, {'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 88},
{'filter': 255, 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 105}], skip=False) {'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 89},
model = Model(input_image, [yolo_82, yolo_94, yolo_106]) {'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 90},
return model {'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 91}], skip=False)
# Layer 92 => 94
yolo_94 = _cb(x, [{'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 92},
{'filter': 255, 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 93}], skip=False)
# Layer 95 => 98
x = _cb(x, [{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 96}], skip=False)
x = UpSampling2D(2)(x)
x = concatenate([x, skip_36])
# Layer 99 => 106
yolo_106 = _cb(x, [{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 99},
{'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 100},
{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 101},
{'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 102},
{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 103},
{'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 104},
{'filter': 255, 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 105}], skip=False)
model = Model(input_image, [yolo_82, yolo_94, yolo_106])
return model
"""**Step 3:** """**Step 3:**
- define the model - define the model
@@ -200,21 +219,21 @@ yolov3.save('model.h5')
""" """
def load_image_pixels(filename, shape): def load_image_pixels(filename, shape):
# load image to get its shape # load image to get its shape
image = load_img(filename) image = load_img(filename)
width, height = image.size width, height = image.size
# load image with required size # load image with required size
image = load_img(filename, target_size=shape) image = load_img(filename, target_size=shape)
image = img_to_array(image) image = img_to_array(image)
# grayscale image normalization # grayscale image normalization
image = image.astype('float32') image = image.astype('float32')
image /= 255.0 image /= 255.0
# add a dimension so that we have one sample # add a dimension so that we have one sample
image = expand_dims(image, 0) image = expand_dims(image, 0)
return image, width, height return image, width, height
"""**Step 4:** Decode the prediction output to rectangle coordinates """**Step 4:** Decode the prediction output to rectangle coordinates
- `BoundBox` class is used to return object bounding box coordinates, object name and threshold score - `BoundBox` class is used to return object bounding box coordinates, object name and threshold score
@@ -247,127 +266,127 @@ def _sigmoid(x):
return 1. /(1. + np.exp(-x)) return 1. /(1. + np.exp(-x))
def decode_netout(netout, anchors, obj_thresh, net_h, net_w): def decode_netout(netout, anchors, obj_thresh, net_h, net_w):
grid_h, grid_w = netout.shape[:2] grid_h, grid_w = netout.shape[:2]
nb_box = 3 nb_box = 3
netout = netout.reshape((grid_h, grid_w, nb_box, -1)) netout = netout.reshape((grid_h, grid_w, nb_box, -1))
nb_class = netout.shape[-1] - 5 nb_class = netout.shape[-1] - 5
boxes = [] boxes = []
netout[..., :2] = _sigmoid(netout[..., :2]) netout[..., :2] = _sigmoid(netout[..., :2])
netout[..., 4:] = _sigmoid(netout[..., 4:]) netout[..., 4:] = _sigmoid(netout[..., 4:])
netout[..., 5:] = netout[..., 4][..., np.newaxis] * netout[..., 5:] netout[..., 5:] = netout[..., 4][..., np.newaxis] * netout[..., 5:]
netout[..., 5:] *= netout[..., 5:] > obj_thresh netout[..., 5:] *= netout[..., 5:] > obj_thresh
for i in range(grid_h*grid_w): for i in range(grid_h*grid_w):
row = i / grid_w row = i / grid_w
col = i % grid_w col = i % grid_w
for b in range(nb_box): for b in range(nb_box):
# 4th element is objectness score # 4th element is objectness score
objectness = netout[int(row)][int(col)][b][4] objectness = netout[int(row)][int(col)][b][4]
if(objectness.all() <= obj_thresh): continue if(objectness.all() <= obj_thresh): continue
# first 4 elements are x, y, w, and h # first 4 elements are x, y, w, and h
x, y, w, h = netout[int(row)][int(col)][b][:4] x, y, w, h = netout[int(row)][int(col)][b][:4]
x = (col + x) / grid_w # center position, unit: image width x = (col + x) / grid_w # center position, unit: image width
y = (row + y) / grid_h # center position, unit: image height y = (row + y) / grid_h # center position, unit: image height
w = anchors[2 * b + 0] * np.exp(w) / net_w # unit: image width w = anchors[2 * b + 0] * np.exp(w) / net_w # unit: image width
h = anchors[2 * b + 1] * np.exp(h) / net_h # unit: image height h = anchors[2 * b + 1] * np.exp(h) / net_h # unit: image height
# last elements are class probabilities # last elements are class probabilities
classes = netout[int(row)][col][b][5:] classes = netout[int(row)][col][b][5:]
box = BoundBox(x-w/2, y-h/2, x+w/2, y+h/2, objectness, classes) box = BoundBox(x-w/2, y-h/2, x+w/2, y+h/2, objectness, classes)
boxes.append(box) boxes.append(box)
return boxes return boxes
"""**Step 5:** strech the box to be fit to the image normal shape""" """**Step 5:** strech the box to be fit to the image normal shape"""
def correct_yolo_boxes(boxes, image_h, image_w, net_h, net_w): def correct_yolo_boxes(boxes, image_h, image_w, net_h, net_w):
new_w, new_h = net_w, net_h new_w, new_h = net_w, net_h
for i in range(len(boxes)): for i in range(len(boxes)):
x_offset, x_scale = (net_w - new_w)/2./net_w, float(new_w)/net_w x_offset, x_scale = (net_w - new_w)/2./net_w, float(new_w)/net_w
y_offset, y_scale = (net_h - new_h)/2./net_h, float(new_h)/net_h y_offset, y_scale = (net_h - new_h)/2./net_h, float(new_h)/net_h
boxes[i].xmin = int((boxes[i].xmin - x_offset) / x_scale * image_w) boxes[i].xmin = int((boxes[i].xmin - x_offset) / x_scale * image_w)
boxes[i].xmax = int((boxes[i].xmax - x_offset) / x_scale * image_w) boxes[i].xmax = int((boxes[i].xmax - x_offset) / x_scale * image_w)
boxes[i].ymin = int((boxes[i].ymin - y_offset) / y_scale * image_h) boxes[i].ymin = int((boxes[i].ymin - y_offset) / y_scale * image_h)
boxes[i].ymax = int((boxes[i].ymax - y_offset) / y_scale * image_h) boxes[i].ymax = int((boxes[i].ymax - y_offset) / y_scale * image_h)
"""**Step 6:** implementing IOU""" """**Step 6:** implementing IOU"""
def _interval_overlap(interval_a, interval_b): def _interval_overlap(interval_a, interval_b):
x1, x2 = interval_a x1, x2 = interval_a
x3, x4 = interval_b x3, x4 = interval_b
if x3 < x1: if x3 < x1:
if x4 < x1: if x4 < x1:
return 0 return 0
else: else:
return min(x2,x4) - x1 return min(x2,x4) - x1
else: else:
if x2 < x3: if x2 < x3:
return 0 return 0
else: else:
return min(x2,x4) - x3 return min(x2,x4) - x3
def bbox_iou(box1, box2): def bbox_iou(box1, box2):
intersect_w = _interval_overlap([box1.xmin, box1.xmax], [box2.xmin, box2.xmax]) intersect_w = _interval_overlap([box1.xmin, box1.xmax], [box2.xmin, box2.xmax])
intersect_h = _interval_overlap([box1.ymin, box1.ymax], [box2.ymin, box2.ymax]) intersect_h = _interval_overlap([box1.ymin, box1.ymax], [box2.ymin, box2.ymax])
intersect = intersect_w * intersect_h intersect = intersect_w * intersect_h
w1, h1 = box1.xmax-box1.xmin, box1.ymax-box1.ymin w1, h1 = box1.xmax-box1.xmin, box1.ymax-box1.ymin
w2, h2 = box2.xmax-box2.xmin, box2.ymax-box2.ymin w2, h2 = box2.xmax-box2.xmin, box2.ymax-box2.ymin
union = w1*h1 + w2*h2 - intersect union = w1*h1 + w2*h2 - intersect
return float(intersect) / union return float(intersect) / union
def do_nms(boxes, nms_thresh): def do_nms(boxes, nms_thresh):
if len(boxes) > 0: if len(boxes) > 0:
nb_class = len(boxes[0].classes) nb_class = len(boxes[0].classes)
else: else:
return return
for c in range(nb_class): for c in range(nb_class):
sorted_indices = np.argsort([-box.classes[c] for box in boxes]) sorted_indices = np.argsort([-box.classes[c] for box in boxes])
for i in range(len(sorted_indices)): for i in range(len(sorted_indices)):
index_i = sorted_indices[i] index_i = sorted_indices[i]
if boxes[index_i].classes[c] == 0: continue if boxes[index_i].classes[c] == 0: continue
for j in range(i+1, len(sorted_indices)): for j in range(i+1, len(sorted_indices)):
index_j = sorted_indices[j] index_j = sorted_indices[j]
if bbox_iou(boxes[index_i], boxes[index_j]) >= nms_thresh: if bbox_iou(boxes[index_i], boxes[index_j]) >= nms_thresh:
boxes[index_j].classes[c] = 0 boxes[index_j].classes[c] = 0
# get all of the results above a threshold # get all of the results above a threshold
def get_boxes(boxes, labels, thresh): def get_boxes(boxes, labels, thresh):
v_boxes, v_labels, v_scores = list(), list(), list() v_boxes, v_labels, v_scores = list(), list(), list()
# enumerate all boxes # enumerate all boxes
for box in boxes: for box in boxes:
# enumerate all possible labels # enumerate all possible labels
for i in range(len(labels)): for i in range(len(labels)):
# check if the threshold for this label is high enough # check if the threshold for this label is high enough
if box.classes[i] > thresh: if box.classes[i] > thresh:
v_boxes.append(box) v_boxes.append(box)
v_labels.append(labels[i]) v_labels.append(labels[i])
v_scores.append(box.classes[i]*100) v_scores.append(box.classes[i]*100)
# don't break, many labels may trigger for one box # don't break, many labels may trigger for one box
return v_boxes, v_labels, v_scores return v_boxes, v_labels, v_scores
# draw all results # draw all results
def draw_boxes(filename, v_boxes, v_labels, v_scores): def draw_boxes(filename, v_boxes, v_labels, v_scores):
# load the image # load the image
data = pyplot.imread(filename) data = pyplot.imread(filename)
# plot the image # plot the image
pyplot.imshow(data) pyplot.imshow(data)
# get the context for drawing boxes # get the context for drawing boxes
ax = pyplot.gca() ax = pyplot.gca()
# plot each box # plot each box
for i in range(len(v_boxes)): for i in range(len(v_boxes)):
box = v_boxes[i] box = v_boxes[i]
# get coordinates # get coordinates
y1, x1, y2, x2 = box.ymin, box.xmin, box.ymax, box.xmax y1, x1, y2, x2 = box.ymin, box.xmin, box.ymax, box.xmax
# calculate width and height of the box # calculate width and height of the box
width, height = x2 - x1, y2 - y1 width, height = x2 - x1, y2 - y1
# create the shape # create the shape
rect = Rectangle((x1, y1), width, height, fill=False, color='red', linewidth = '2') rect = Rectangle((x1, y1), width, height, fill=False, color='red', linewidth = '2')
# draw the box # draw the box
ax.add_patch(rect) ax.add_patch(rect)
# draw text and score in top left corner # draw text and score in top left corner
label = "%s (%.3f)" % (v_labels[i], v_scores[i]) label = "%s (%.3f)" % (v_labels[i], v_scores[i])
pyplot.text(x1, y1, label, color='red') pyplot.text(x1, y1, label, color='red')
# show the plot # show the plot
pyplot.show() pyplot.show()
"""**step 7:** declare several configuration""" """**step 7:** declare several configuration"""
@@ -379,59 +398,56 @@ class_threshold = 0.6
# define the labels # define the labels
labels = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", labels = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck",
"boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench",
"bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe",
"backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard",
"sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
"tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana",
"apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake",
"chair", "sofa", "pottedplant", "bed", "diningtable", "toilet", "tvmonitor", "laptop", "mouse", "chair", "sofa", "pottedplant", "bed", "diningtable", "toilet", "tvmonitor", "laptop", "mouse",
"remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator",
"book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"] "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"]
"""**Step 8:** Make Prediction""" """**Step 8:** Make Prediction"""
# from google.colab import files # from google.colab import files
# upload = files.upload() # upload = files.upload()
import glob
for photo_filename in glob.glob("images/test/dog/*"): for photo_filename in glob.glob("images/test/dog/*"):
# for fn in upload.keys(): # for fn in upload.keys():
# photo_filename = '/content/' + fn # photo_filename = '/content/' + fn
# photo_filename = 'test.jpg' # photo_filename = 'test.jpg'
# define the expected input shape for the model # define the expected input shape for the model
input_w, input_h = 416, 416 input_w, input_h = 416, 416
image, image_w, image_h = load_image_pixels(photo_filename, (input_w, input_h)) image, image_w, image_h = load_image_pixels(photo_filename, (input_w, input_h))
# make prediction # make prediction
yhat = yolov3.predict(image) yhat = yolov3.predict(image)
# summarize the shape of the list of arrays # summarize the shape of the list of arrays
print([a.shape for a in yhat]) print([a.shape for a in yhat])
boxes = list() boxes = list()
for i in range(len(yhat)): for i in range(len(yhat)):
# decode the output of the network # decode the output of the network
boxes += decode_netout(yhat[i][0], anchors[i], class_threshold, input_h, input_w) boxes += decode_netout(yhat[i][0], anchors[i], class_threshold, input_h, input_w)
# correct the sizes of the bounding boxes for the shape of the image # correct the sizes of the bounding boxes for the shape of the image
correct_yolo_boxes(boxes, image_h, image_w, input_h, input_w) correct_yolo_boxes(boxes, image_h, image_w, input_h, input_w)
# suppress non-maximal boxes # suppress non-maximal boxes
do_nms(boxes, 0.5) do_nms(boxes, 0.5)
# get the details of the detected objects # get the details of the detected objects
v_boxes, v_labels, v_scores = get_boxes(boxes, labels, class_threshold) v_boxes, v_labels, v_scores = get_boxes(boxes, labels, class_threshold)
# summarize what we found # summarize what we found
for i in range(len(v_boxes)): for i in range(len(v_boxes)):
print(v_labels[i], v_scores[i]) print(v_labels[i], v_scores[i])
# draw what we found # draw what we found
draw_boxes(photo_filename, v_boxes, v_labels, v_scores) draw_boxes(photo_filename, v_boxes, v_labels, v_scores)
print([a.shape for a in yhat])
print([a.shape for a in yhat])