499 lines
20 KiB
Python
499 lines
20 KiB
Python
"""
|
|
YOLO v3 object detection with Keras
|
|
|
|
Source: https://towardsdatascience.com/yolo-v3-object-detection-with-keras-461d2cfccef6
|
|
"""
|
|
import struct
|
|
import glob
|
|
import numpy as np
|
|
from numpy import expand_dims
|
|
from keras.layers import Input, Conv2D, BatchNormalization, LeakyReLU, ZeroPadding2D, UpSampling2D
|
|
from keras.models import Model
|
|
from keras.layers.merge import add, concatenate
|
|
from keras.preprocessing.image import load_img
|
|
from keras.preprocessing.image import img_to_array
|
|
from matplotlib import pyplot
|
|
from matplotlib.patches import Rectangle
|
|
|
|
# Step 1:
|
|
# Define WeightReader class
|
|
|
|
class WeightReader:
|
|
"""
|
|
WeightReader class is used to parse the "yolov3.weights" file and load the model weights into
|
|
memory in a format that we can set into keras model.
|
|
"""
|
|
def __init__(self, weight_file):
|
|
with open(weight_file, 'rb') as w_f:
|
|
major, = struct.unpack('i', w_f.read(4))
|
|
minor, = struct.unpack('i', w_f.read(4))
|
|
w_f.read(4) # ignore revision
|
|
|
|
if (major * 10 + minor) >= 2 and major < 1000 and minor < 1000:
|
|
w_f.read(8)
|
|
else:
|
|
w_f.read(4)
|
|
|
|
binary = w_f.read()
|
|
self.offset = 0
|
|
self.all_weights = np.frombuffer(binary, dtype='float32')
|
|
|
|
def read_bytes(self, size):
|
|
"""
|
|
Helper function to read bytes from all_weights.
|
|
"""
|
|
self.offset = self.offset + size
|
|
|
|
return self.all_weights[self.offset - size:self.offset]
|
|
|
|
def load_weights(self, model):
|
|
"""
|
|
Load weights into created model
|
|
"""
|
|
for i in range(106):
|
|
try:
|
|
conv_layer = model.get_layer('conv_' + str(i))
|
|
print("loading weights of convolution #" + str(i))
|
|
|
|
if i not in [81, 93, 105]:
|
|
norm_layer = model.get_layer('bnorm_' + str(i))
|
|
size = np.prod(norm_layer.get_weights()[0].shape)
|
|
beta = self.read_bytes(size) # bias
|
|
gamma = self.read_bytes(size) # scale
|
|
mean = self.read_bytes(size) # mean
|
|
var = self.read_bytes(size) # variance
|
|
norm_layer.set_weights([gamma, beta, mean, var])
|
|
|
|
if len(conv_layer.get_weights()) > 1:
|
|
bias = self.read_bytes(np.prod(conv_layer.get_weights()[1].shape))
|
|
kernel = self.read_bytes(np.prod(conv_layer.get_weights()[0].shape))
|
|
kernel = kernel.reshape(list(reversed(conv_layer.get_weights()[0].shape)))
|
|
kernel = kernel.transpose([2,3,1,0])
|
|
conv_layer.set_weights([kernel, bias])
|
|
else:
|
|
kernel = self.read_bytes(np.prod(conv_layer.get_weights()[0].shape))
|
|
kernel = kernel.reshape(list(reversed(conv_layer.get_weights()[0].shape)))
|
|
kernel = kernel.transpose([2,3,1,0])
|
|
conv_layer.set_weights([kernel])
|
|
|
|
except ValueError:
|
|
print("no convolution #" + str(i))
|
|
|
|
def reset(self):
|
|
"""
|
|
Resets offset to restart loading weights
|
|
"""
|
|
self.offset = 0
|
|
|
|
# Step 2:
|
|
# _conv_block(input, convs, skip=True) is a function to create convolutional layer
|
|
def _conv_block(input_layer, convs, skip=True):
|
|
tmp = input_layer
|
|
count = 0
|
|
for conv in convs:
|
|
if count == (len(convs) - 2) and skip:
|
|
skip_connection = tmp
|
|
count += 1
|
|
# Peculiar padding as darknet prefer left and top
|
|
if conv['stride'] > 1: tmp = ZeroPadding2D(((1,0),(1,0)))(tmp)
|
|
tmp = Conv2D(conv['filter'],
|
|
conv['kernel'],
|
|
strides=conv['stride'],
|
|
# Peculiar padding as darknet prefer left and top
|
|
padding='valid' if conv['stride'] > 1 else 'same',
|
|
name='conv_' + str(conv['layer_idx']),
|
|
use_bias=False if conv['bnorm'] else True)(tmp)
|
|
|
|
if conv['bnorm']: tmp = BatchNormalization(epsilon=0.001, name='bnorm_'
|
|
+ str(conv['layer_idx']))(tmp)
|
|
if conv['leaky']: tmp = LeakyReLU(alpha=0.1, name='leaky_'
|
|
+ str(conv['layer_idx']))(tmp)
|
|
|
|
return add([skip_connection, tmp]) if skip else tmp
|
|
|
|
# make_yolov3_model() is a function to create layers of convoluational and stack together as a
|
|
# whole yolo model
|
|
def make_yolov3_model():
|
|
input_image = Input(shape=(None, None, 3))
|
|
|
|
# Layer 0 => 4
|
|
x = _conv_block(input_image,
|
|
[{'filter': 32, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 0},
|
|
{'filter': 64, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 1},
|
|
{'filter': 32, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 2},
|
|
{'filter': 64, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 3}])
|
|
|
|
# Layer 5 => 8
|
|
x = _conv_block(x,
|
|
[{'filter': 128, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 5},
|
|
{'filter': 64, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 6},
|
|
{'filter': 128, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 7}])
|
|
|
|
# Layer 9 => 11
|
|
x = _conv_block(x,
|
|
[{'filter': 64, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 9},
|
|
{'filter': 128, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 10}])
|
|
|
|
# Layer 12 => 15
|
|
x = _conv_block(x,
|
|
[{'filter': 256, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 12},
|
|
{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 13},
|
|
{'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 14}])
|
|
|
|
# Layer 16 => 36
|
|
for i in range(7):
|
|
x = _conv_block(x,
|
|
[{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 16+i*3},
|
|
{'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 17+i*3}])
|
|
skip_36 = x
|
|
|
|
# Layer 37 => 40
|
|
x = _conv_block(x,
|
|
[{'filter': 512, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 37},
|
|
{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 38},
|
|
{'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 39}])
|
|
|
|
# Layer 41 => 61
|
|
for i in range(7):
|
|
x = _conv_block(x,
|
|
[{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 41+i*3},
|
|
{'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 42+i*3}])
|
|
skip_61 = x
|
|
|
|
# Layer 62 => 65
|
|
x = _conv_block(x,
|
|
[{'filter': 1024, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 62},
|
|
{'filter': 512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 63},
|
|
{'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 64}])
|
|
|
|
# Layer 66 => 74
|
|
for i in range(3):
|
|
x = _conv_block(x,
|
|
[{'filter': 512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 66+i*3},
|
|
{'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 67+i*3}])
|
|
|
|
# Layer 75 => 79
|
|
x = _conv_block(x,
|
|
[{'filter': 512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 75},
|
|
{'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 76},
|
|
{'filter': 512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 77},
|
|
{'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 78},
|
|
{'filter': 512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 79}],
|
|
skip=False)
|
|
|
|
# Layer 80 => 82
|
|
yolo_82 = _conv_block(x,
|
|
[{'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 80},
|
|
{'filter': 255, 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 81}],
|
|
skip=False)
|
|
|
|
# Layer 83 => 86
|
|
x = _conv_block(x,
|
|
[{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 84}],
|
|
skip=False)
|
|
|
|
x = UpSampling2D(2)(x)
|
|
x = concatenate([x, skip_61])
|
|
|
|
# Layer 87 => 91
|
|
x = _conv_block(x,
|
|
[{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 87},
|
|
{'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 88},
|
|
{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 89},
|
|
{'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 90},
|
|
{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 91}],
|
|
skip=False)
|
|
|
|
# Layer 92 => 94
|
|
yolo_94 = _conv_block(x,
|
|
[{'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 92},
|
|
{'filter': 255, 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 93}],
|
|
skip=False)
|
|
|
|
# Layer 95 => 98
|
|
x = _conv_block(x,
|
|
[{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 96}],
|
|
skip=False)
|
|
|
|
x = UpSampling2D(2)(x)
|
|
x = concatenate([x, skip_36])
|
|
|
|
# Layer 99 => 106
|
|
yolo_106 = _conv_block(x,
|
|
[{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 99},
|
|
{'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 100},
|
|
{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 101},
|
|
{'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 102},
|
|
{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 103},
|
|
{'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 104},
|
|
{'filter': 255, 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 105}],
|
|
skip=False)
|
|
|
|
model = Model(input_image, [yolo_82, yolo_94, yolo_106])
|
|
|
|
return model
|
|
|
|
"""**step 4:** Prediction
|
|
by loading the image to model and make prediction
|
|
"""
|
|
|
|
def load_image_pixels(filename, shape):
|
|
# load image to get its shape
|
|
image = load_img(filename)
|
|
width, height = image.size
|
|
|
|
# load image with required size
|
|
image = load_img(filename, target_size=shape)
|
|
image = img_to_array(image)
|
|
|
|
# grayscale image normalization
|
|
image = image.astype('float32')
|
|
image /= 255.0
|
|
|
|
# add a dimension so that we have one sample
|
|
image = expand_dims(image, 0)
|
|
return image, width, height
|
|
|
|
"""**Step 4:** Decode the prediction output to rectangle coordinates
|
|
- `BoundBox` class is used to return object bounding box coordinates, object name and threshold score
|
|
- `decode_netout` function is used to decode the prediction output to rectangle coordinates
|
|
"""
|
|
|
|
class BoundBox:
|
|
def __init__(self, xmin, ymin, xmax, ymax, objness = None, classes = None):
|
|
self.xmin = xmin
|
|
self.ymin = ymin
|
|
self.xmax = xmax
|
|
self.ymax = ymax
|
|
self.objness = objness
|
|
self.classes = classes
|
|
self.label = -1
|
|
self.score = -1
|
|
|
|
def get_label(self):
|
|
if self.label == -1:
|
|
self.label = np.argmax(self.classes)
|
|
|
|
return self.label
|
|
|
|
def get_score(self):
|
|
if self.score == -1:
|
|
self.score = self.classes[self.get_label()]
|
|
|
|
return self.get_score
|
|
|
|
def _sigmoid(x):
|
|
return 1. /(1. + np.exp(-x))
|
|
|
|
def decode_netout(netout, anchors, obj_thresh, net_h, net_w):
|
|
grid_h, grid_w = netout.shape[:2]
|
|
nb_box = 3
|
|
netout = netout.reshape((grid_h, grid_w, nb_box, -1))
|
|
nb_class = netout.shape[-1] - 5
|
|
boxes = []
|
|
netout[..., :2] = _sigmoid(netout[..., :2])
|
|
netout[..., 4:] = _sigmoid(netout[..., 4:])
|
|
netout[..., 5:] = netout[..., 4][..., np.newaxis] * netout[..., 5:]
|
|
netout[..., 5:] *= netout[..., 5:] > obj_thresh
|
|
|
|
for i in range(grid_h*grid_w):
|
|
row = i / grid_w
|
|
col = i % grid_w
|
|
for b in range(nb_box):
|
|
# 4th element is objectness score
|
|
objectness = netout[int(row)][int(col)][b][4]
|
|
if objectness.all() <= obj_thresh: continue
|
|
# first 4 elements are x, y, w, and h
|
|
x, y, w, h = netout[int(row)][int(col)][b][:4]
|
|
x = (col + x) / grid_w # center position, unit: image width
|
|
y = (row + y) / grid_h # center position, unit: image height
|
|
w = anchors[2 * b + 0] * np.exp(w) / net_w # unit: image width
|
|
h = anchors[2 * b + 1] * np.exp(h) / net_h # unit: image height
|
|
# last elements are class probabilities
|
|
classes = netout[int(row)][col][b][5:]
|
|
box = BoundBox(x-w/2, y-h/2, x+w/2, y+h/2, objectness, classes)
|
|
boxes.append(box)
|
|
return boxes
|
|
|
|
"""**Step 5:** strech the box to be fit to the image normal shape"""
|
|
|
|
def correct_yolo_boxes(boxes, image_h, image_w, net_h, net_w):
|
|
new_w, new_h = net_w, net_h
|
|
for i in range(len(boxes)):
|
|
x_offset, x_scale = (net_w - new_w)/2./net_w, float(new_w)/net_w
|
|
y_offset, y_scale = (net_h - new_h)/2./net_h, float(new_h)/net_h
|
|
boxes[i].xmin = int((boxes[i].xmin - x_offset) / x_scale * image_w)
|
|
boxes[i].xmax = int((boxes[i].xmax - x_offset) / x_scale * image_w)
|
|
boxes[i].ymin = int((boxes[i].ymin - y_offset) / y_scale * image_h)
|
|
boxes[i].ymax = int((boxes[i].ymax - y_offset) / y_scale * image_h)
|
|
|
|
"""**Step 6:** implementing IOU"""
|
|
|
|
def _interval_overlap(interval_a, interval_b):
|
|
x1, x2 = interval_a
|
|
x3, x4 = interval_b
|
|
if x3 < x1:
|
|
if x4 < x1:
|
|
return 0
|
|
else:
|
|
return min(x2,x4) - x1
|
|
else:
|
|
if x2 < x3:
|
|
return 0
|
|
else:
|
|
return min(x2,x4) - x3
|
|
|
|
def bbox_iou(box1, box2):
|
|
intersect_w = _interval_overlap([box1.xmin, box1.xmax], [box2.xmin, box2.xmax])
|
|
intersect_h = _interval_overlap([box1.ymin, box1.ymax], [box2.ymin, box2.ymax])
|
|
intersect = intersect_w * intersect_h
|
|
w1, h1 = box1.xmax-box1.xmin, box1.ymax-box1.ymin
|
|
w2, h2 = box2.xmax-box2.xmin, box2.ymax-box2.ymin
|
|
union = w1*h1 + w2*h2 - intersect
|
|
return float(intersect) / union
|
|
|
|
def do_nms(boxes, nms_thresh):
|
|
if len(boxes) > 0:
|
|
nb_class = len(boxes[0].classes)
|
|
else:
|
|
return
|
|
for c in range(nb_class):
|
|
sorted_indices = np.argsort([-box.classes[c] for box in boxes])
|
|
for i in range(len(sorted_indices)):
|
|
index_i = sorted_indices[i]
|
|
if boxes[index_i].classes[c] == 0: continue
|
|
for j in range(i+1, len(sorted_indices)):
|
|
index_j = sorted_indices[j]
|
|
if bbox_iou(boxes[index_i], boxes[index_j]) >= nms_thresh:
|
|
boxes[index_j].classes[c] = 0
|
|
|
|
# get all of the results above a threshold
|
|
def get_boxes(boxes, labels, thresh):
|
|
v_boxes, v_labels, v_scores = list(), list(), list()
|
|
# enumerate all boxes
|
|
for box in boxes:
|
|
# enumerate all possible labels
|
|
for i in range(len(labels)):
|
|
# check if the threshold for this label is high enough
|
|
if box.classes[i] > thresh:
|
|
v_boxes.append(box)
|
|
v_labels.append(labels[i])
|
|
v_scores.append(box.classes[i]*100)
|
|
# don't break, many labels may trigger for one box
|
|
return v_boxes, v_labels, v_scores
|
|
|
|
# draw all results
|
|
def draw_boxes(filename, v_boxes, v_labels, v_scores):
|
|
|
|
# load the image
|
|
data = pyplot.imread(filename)
|
|
# plot the image
|
|
pyplot.imshow(data)
|
|
# get the context for drawing boxes
|
|
ax = pyplot.gca()
|
|
# plot each box
|
|
for i in range(len(v_boxes)):
|
|
box = v_boxes[i]
|
|
# get coordinates
|
|
y1, x1, y2, x2 = box.ymin, box.xmin, box.ymax, box.xmax
|
|
# calculate width and height of the box
|
|
width, height = x2 - x1, y2 - y1
|
|
# create the shape
|
|
rect = Rectangle((x1, y1), width, height, fill=False, color='red', linewidth = '2')
|
|
# draw the box
|
|
ax.add_patch(rect)
|
|
# draw text and score in top left corner
|
|
label = "%s (%.3f)" % (v_labels[i], v_scores[i])
|
|
pyplot.text(x1, y1, label, color='red')
|
|
# show the plot
|
|
pyplot.show()
|
|
|
|
"""**step 7:** declare several configuration"""
|
|
|
|
# define the anchors
|
|
anchors = [[116,90, 156,198, 373,326], [30,61, 62,45, 59,119], [10,13, 16,30, 33,23]]
|
|
|
|
# define the probability threshold for detected objects
|
|
class_threshold = 0.6
|
|
|
|
# define the labels
|
|
labels = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck",
|
|
"boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench",
|
|
"bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe",
|
|
"backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard",
|
|
"sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
|
|
"tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana",
|
|
"apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake",
|
|
"chair", "sofa", "pottedplant", "bed", "diningtable", "toilet", "tvmonitor", "laptop", "mouse",
|
|
"remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator",
|
|
"book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"]
|
|
|
|
|
|
def main():
|
|
"""
|
|
Defined starting point of source code.
|
|
"""
|
|
|
|
# Step 3:
|
|
# (1) Define the model
|
|
# (2) Load the weight
|
|
# (3) Save the model
|
|
|
|
# Define the YOLO v3 model
|
|
yolov3 = make_yolov3_model()
|
|
print(yolov3.summary())
|
|
|
|
# Load the weights
|
|
# Source: https://pjreddie.com/media/files/yolov3.weights
|
|
weight_reader = WeightReader('yolov3.weights')
|
|
|
|
# Set the weights
|
|
weight_reader.load_weights(yolov3)
|
|
|
|
# Save the model to file
|
|
yolov3.save('yolov3.h5')
|
|
|
|
# Step 8:
|
|
# Make Prediction
|
|
for photo_filename in glob.glob("images/test/dog/*"):
|
|
|
|
# for fn in upload.keys():
|
|
# photo_filename = '/content/' + fn
|
|
# photo_filename = 'test.jpg'
|
|
|
|
# define the expected input shape for the model
|
|
input_w, input_h = 416, 416
|
|
|
|
image, image_w, image_h = load_image_pixels(photo_filename, (input_w, input_h))
|
|
|
|
# make prediction
|
|
yhat = yolov3.predict(image)
|
|
# summarize the shape of the list of arrays
|
|
print([a.shape for a in yhat])
|
|
|
|
boxes = list()
|
|
for i in range(len(yhat)):
|
|
# decode the output of the network
|
|
boxes += decode_netout(yhat[i][0], anchors[i], class_threshold, input_h, input_w)
|
|
|
|
# correct the sizes of the bounding boxes for the shape of the image
|
|
correct_yolo_boxes(boxes, image_h, image_w, input_h, input_w)
|
|
|
|
# suppress non-maximal boxes
|
|
do_nms(boxes, 0.5)
|
|
|
|
# get the details of the detected objects
|
|
v_boxes, v_labels, v_scores = get_boxes(boxes, labels, class_threshold)
|
|
|
|
# summarize what we found
|
|
for i in range(len(v_boxes)):
|
|
print(v_labels[i], v_scores[i])
|
|
|
|
# draw what we found
|
|
draw_boxes(photo_filename, v_boxes, v_labels, v_scores)
|
|
|
|
print([a.shape for a in yhat])
|
|
|
|
if __name__ == "__main__":
|
|
main()
|