add pt2tf tool
This commit is contained in:
@@ -0,0 +1,702 @@
|
||||
from __future__ import division
|
||||
|
||||
import tensorflow as tf
|
||||
import numpy as np
|
||||
|
||||
from onnx_tf.common import pooling_helper
|
||||
from onnx_tf.common.tf_helper import tf_shape
|
||||
from onnx_tf.common.tf_helper import tf_product
|
||||
|
||||
|
||||
class DilatedPooling(object):
|
||||
"""
|
||||
This class implements two main methods:
|
||||
dilated_pool:
|
||||
calculates a max or average pool over the input
|
||||
|
||||
dilated_maxpool_with_argmax:
|
||||
calculates a maxpool over the input and returns the
|
||||
indices/argmax of the selected values
|
||||
|
||||
In addition to the standard features of pooling operations in
|
||||
Tensorflow, these methods support dilations, ceil mode, SAME_LOWER and
|
||||
explicit padding.
|
||||
|
||||
Dilations are partly supported in Tensorflow in `tf.nn.pool` and
|
||||
`tf.nn.dilation2d`. The code will try to use the Tensoflow build-in
|
||||
functions as much as poosible.
|
||||
|
||||
In cases, not supported by Tensorflow there is a custom algorith of
|
||||
dilated pooling `_remove_dilations`.
|
||||
|
||||
The idea behind `_remove_dilations` is to transform the input N-D data
|
||||
into a supported input for the standard tf.nn.pool operation.
|
||||
This is achieved by calculating N-D indicies for the values which will
|
||||
be selected from the input when applying the dilations and
|
||||
then extracting the values using tf.gather_nd. Next step is to execute
|
||||
`tf.nn.pool` on this new input data with **strides=kernel_shape** and
|
||||
no dilations. The resulting pool will be the result we are looking for.
|
||||
|
||||
In case of `deilated_maxpool_with_argmax` an additional step is needed
|
||||
to recalculated the resulting indices back into the original
|
||||
data indices. It is done with `_calc_orig_argmax`
|
||||
|
||||
Here is a simple example of how the algorithm works:
|
||||
|
||||
kernel_shape = [3]
|
||||
strides = [2]
|
||||
dilations = [3]
|
||||
|
||||
Input 1D data:
|
||||
|
||||
x-----x-----x-----x-----x-----x-----x-----x-----x-----x-----x
|
||||
| * | | ** | * | | ** | * | | ** | |
|
||||
| 10 | 9 | 30 | 7 | 6 | 15 | 16 | 17 | 18 | 19 |
|
||||
x-----x-----x-----x-----x-----x-----x-----x-----x-----x-----x
|
||||
(0) (1) (2) (3) (4) (5) (6) (7) (8) (9)
|
||||
|
||||
where * represents the values selected during the first sliding window
|
||||
step and ** during the second sliding window step
|
||||
|
||||
the resulting indices will be:
|
||||
|
||||
[0, 3, 6, 2, 5, 8]
|
||||
| | | |
|
||||
First Second
|
||||
step step
|
||||
|
||||
after tf.gather_nd operation we get a new input data with
|
||||
removed dilations:
|
||||
|
||||
[10, 7, 16, 30, 15, 18]
|
||||
|
||||
and apllying tf.nn.maxpool (or avgpool) with strides = kernel_shape = 3
|
||||
will result into:
|
||||
|
||||
[16, 30]
|
||||
|
||||
which is the result of the dilated maxpooling.
|
||||
|
||||
Here is pseudo code of the algorithm with comments:
|
||||
|
||||
FUNCTION _remove_dilations:
|
||||
/* Calculate N-D index of the values to be selected by the
|
||||
dilations and strides */
|
||||
|
||||
/* Do a loop over the input spatial dimensions starting from the
|
||||
last (most internal) going up to the first dimension
|
||||
|
||||
On every step of the loop calculate the input indices and
|
||||
"combine" them with the already calculated indices from the
|
||||
previous dimensions using cartesian product.
|
||||
*/
|
||||
LOOP with **dimension** from **dimensions_count** to **0**:
|
||||
|
||||
// Initialize empty gather_nd index
|
||||
gather_ind = []
|
||||
|
||||
// Calculate the output size for the current dimension
|
||||
dim_filter_size = (dim_kernel_size - 1) * dim_dilations
|
||||
dim_output_size = (((dim_input_size - dim_filter_size) //
|
||||
dim_strides) + 1) * dim_kernel_size)
|
||||
|
||||
/* For every output index, calculate the corresponding index
|
||||
into the input data */
|
||||
dim_input_indices = range(0, dim_output_size)
|
||||
dim_input_indices = calculate_input_indicies(dim_input_indices)
|
||||
|
||||
/* combine the calculated indices with the previous dimensions
|
||||
*/
|
||||
gather_ind = cartesian_product(dim_input_indices, gather_ind)
|
||||
END LOOP
|
||||
|
||||
/* For example for 2D input the resulting gather_ind will
|
||||
look like this:
|
||||
|
||||
[[y1, x1], [y2, x2], ..., [yn, xm]]
|
||||
|
||||
where:
|
||||
n is the height
|
||||
m is the width and
|
||||
[xi, yi] are the 2D indices in the input data
|
||||
*/
|
||||
|
||||
new_data = tf.gather_nd(input, gather_ind)
|
||||
|
||||
reshape new_data to the correct output shape
|
||||
|
||||
RETURN new_data
|
||||
|
||||
|
||||
Before executing _remove_dilations the code will apply paddings to the
|
||||
input data if needed. Padding is done using tf.pad with -inf values.
|
||||
Check `_remove_dilations` code for more details explanation of the
|
||||
implementation
|
||||
|
||||
In case of dilated_maxpool_with_argmax the returned indices from
|
||||
tf.nn.max_pool_with_argmax will point into our "no dilations" data.
|
||||
That is why they need to be mapped back to the original input data.
|
||||
It is done with `_calc_orig_argmax` function which will apply the same
|
||||
calculations, that are used in _remove_dilations when calculating the
|
||||
input data indices from output indices (check `_calc_orig_argmax` for
|
||||
detailed inline comments explaining the calculations)
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
input,
|
||||
kernel_shape,
|
||||
strides,
|
||||
dilations,
|
||||
padding="VALID",
|
||||
ceil_mode=False,
|
||||
count_include_pad=False,
|
||||
pooling_type="MAX",
|
||||
p=2):
|
||||
self.input = tf.convert_to_tensor(input)
|
||||
|
||||
self.kernel_shape = kernel_shape
|
||||
self.strides = strides
|
||||
self.dilations = dilations
|
||||
self.padding = padding
|
||||
self.is_explicit_padding = type(padding) is list
|
||||
self.ceil_mode = ceil_mode
|
||||
self.count_include_pad = count_include_pad
|
||||
self.pooling_type = pooling_type.upper()
|
||||
self.p = p
|
||||
|
||||
self.is_known_shape = self.input.shape.is_fully_defined()
|
||||
self.spatial_size = len(kernel_shape)
|
||||
self.input_rank = self.spatial_size + 2
|
||||
|
||||
# if the rank is not defined, set it to the calculated input_rank
|
||||
# rank should be known for ops like tf.gather_nd
|
||||
if not input.shape.rank:
|
||||
input.set_shape([None] * self.input_rank)
|
||||
self.orig_input_shape = tf_shape(input)
|
||||
self.input_shape = self.orig_input_shape
|
||||
|
||||
if pooling_type.startswith("MAX"):
|
||||
self.padding_constant = input.dtype.min
|
||||
else:
|
||||
self.padding_constant = 0
|
||||
|
||||
def _calc_input_ind(self, output_ind, kernel, dilation, stride):
|
||||
"""
|
||||
This function maps index from the output of _remove_dilations
|
||||
to index from the original input along single axis. It calculates
|
||||
the index inside the input data from the index of the output.
|
||||
It is used to generate the correct indexes of the values to be
|
||||
extracted by gather_nd.
|
||||
|
||||
Args:
|
||||
output_ind: vector with indices from the output to be mapped
|
||||
kernel: kernel size along the axis
|
||||
dilation: dilations along the axis
|
||||
stride: strides along the axis
|
||||
Return:
|
||||
input_ind: calculated indices
|
||||
|
||||
The formula is:
|
||||
input_ind = (output_ind // kernel) * stride +
|
||||
(output_ind % kernel) * dilation
|
||||
|
||||
Example:
|
||||
If we have following 2D input to _remove_dilations:
|
||||
[[ 0, 1, 2, 3],
|
||||
[ 4, 5, 6, 7],
|
||||
[ 8, 9, 10, 11],
|
||||
[ 12, 13, 14, 15]]
|
||||
and Kernel = [2, 2], Dilations: [2, 2], Strides: [1, 1]
|
||||
|
||||
the output of _remove_dilations will have shape [4, 4] and
|
||||
_calc_input_ind will be called twice for the two axis 0 (along
|
||||
height) and axis 1 (along width) with
|
||||
|
||||
output_ind = [0, 1, 2, 3]
|
||||
|
||||
which will result in:
|
||||
|
||||
input_ind = [0, 2, 1, 3]
|
||||
"""
|
||||
return (output_ind // kernel) * (stride - kernel * dilation) + \
|
||||
output_ind * dilation
|
||||
|
||||
def _calc_orig_argmax(self, ind):
|
||||
"""
|
||||
Map result argxmax to the original input indices
|
||||
|
||||
Maps indices generated by maxpool_with_argmax on top of the
|
||||
dilation reduced input to the orignal input indices
|
||||
"""
|
||||
|
||||
in_width = self.orig_input_shape[2]
|
||||
num_channels = self.orig_input_shape[3]
|
||||
output_width = self.output_shape[2]
|
||||
|
||||
# mod_floor op is not implemented on GPU
|
||||
# implement it using: a % b = a - (a // b) * b
|
||||
|
||||
# inRow = (ind // num_channels) // output_width
|
||||
# inCol = (ind // num_channels) % output_width
|
||||
# ind_channel = ind % num_channels
|
||||
|
||||
ind_nhw = ind // num_channels
|
||||
|
||||
inRow = ind_nhw // output_width
|
||||
inCol = ind_nhw - (ind_nhw // output_width) * output_width
|
||||
|
||||
ind_channel = ind - ind_nhw * num_channels
|
||||
|
||||
row = self._calc_input_ind(inRow, self.kernel_shape[0], self.dilations[0],
|
||||
self.strides[0]) - self.pads[0]
|
||||
col = self._calc_input_ind(inCol, self.kernel_shape[1], self.dilations[1],
|
||||
self.strides[1]) - self.pads[2]
|
||||
|
||||
new_ind = num_channels * (row * in_width + col) + ind_channel
|
||||
return new_ind
|
||||
|
||||
def _remove_dilations(self):
|
||||
"""
|
||||
This method removes the dilations by extracting the values from
|
||||
the input for every sliding window according to the dilations,
|
||||
strides and kernel size and generates output that can be used by
|
||||
pooling operations with strides = kernel_shape to accomplish
|
||||
dilated pooling
|
||||
|
||||
Example:
|
||||
Input: [[ 0, 1, 2, 3],
|
||||
[ 4, 5, 6, 7],
|
||||
[ 8, 9, 10, 11],
|
||||
[ 12, 13, 14, 15]]
|
||||
|
||||
Kernel: [2, 2]
|
||||
Dilations: [2, 2]
|
||||
Strides: [1, 1]
|
||||
|
||||
Will return:
|
||||
[[ 0, 2, 1, 3],
|
||||
[ 8, 10, 9, 11],
|
||||
[ 4, 6, 5, 7],
|
||||
[ 12, 14, 13, 15]]
|
||||
|
||||
After max_pool2d with kernel_shape = strides = [2, 2]
|
||||
the result is:
|
||||
[[ 10, 11],
|
||||
[ 14, 15]]
|
||||
"""
|
||||
|
||||
input_shape = tf_shape(self.input)
|
||||
in_spatial_shape = input_shape[1:self.spatial_size + 1]
|
||||
|
||||
channels_count = input_shape[self.spatial_size + 1]
|
||||
# Initialize gather_ind with the range of channels
|
||||
# e.g. [0 1]
|
||||
gather_ind = tf.range(channels_count, dtype=tf.int64)
|
||||
# convert the vector to column vector
|
||||
# in the following logic we use column vectors
|
||||
gather_ind = tf.expand_dims(gather_ind, 1)
|
||||
|
||||
# initilize the output_shape with zeros
|
||||
# self.output_shape will contain the shape of the
|
||||
# output tensor after the loop below is executed
|
||||
self.output_shape = [0] * (self.spatial_size + 2)
|
||||
self.output_shape[0] = input_shape[0]
|
||||
"""
|
||||
Loop over the input spatial dimensions starting from the
|
||||
last (most internal) going up to the first dimension
|
||||
|
||||
On every step of the loop calculate the output indices and
|
||||
map them to the input indices using `_calc_input_ind`,
|
||||
then "combine" with the already calculated indices from the
|
||||
previous dimensions using cartesian product.
|
||||
|
||||
For the following example input:
|
||||
|
||||
Input: [[ 0, 1, 2, 3],
|
||||
[ 4, 5, 6, 7],
|
||||
[ 8, 9, 10, 11],
|
||||
[ 12, 13, 14, 15]]
|
||||
|
||||
Kernel: [2, 2]
|
||||
Dilations: [2, 2]
|
||||
Strides: [1, 1]
|
||||
|
||||
these are the steps that will be executed:
|
||||
|
||||
1. Initilize gather_ind = [[0]] # we have only 1 channel
|
||||
|
||||
2. Loop step 0 (axis 1):
|
||||
filter_size = 3
|
||||
output_size = 4
|
||||
dim_ind = [[0]
|
||||
[2]
|
||||
[1]
|
||||
[3]]
|
||||
|
||||
gather_ind = [[0 0]
|
||||
[2 0]
|
||||
[1 0]
|
||||
[3 0]]
|
||||
|
||||
3. Loop step 1 (axis 0):
|
||||
filter_size = 3
|
||||
output_size = 4
|
||||
dim_ind = [[0]
|
||||
[2]
|
||||
[1]
|
||||
[3]]
|
||||
|
||||
gather_ind = [[0 0 0]
|
||||
[0 2 0]
|
||||
[0 1 0]
|
||||
[0 3 0]
|
||||
[2 0 0]
|
||||
[2 2 0]
|
||||
[2 1 0]
|
||||
[2 3 0]
|
||||
[1 0 0]
|
||||
[1 2 0]
|
||||
[1 1 0]
|
||||
[1 3 0]
|
||||
[3 0 0]
|
||||
[3 2 0]
|
||||
[3 1 0]
|
||||
[3 3 0]]
|
||||
|
||||
These are the indices used for gather_nd operation to collect
|
||||
the values from the input data.
|
||||
"""
|
||||
|
||||
for dim in range(self.spatial_size - 1, -1, -1):
|
||||
filter_size = (self.kernel_shape[dim] - 1) * \
|
||||
self.dilations[dim] + 1
|
||||
output_size = ((
|
||||
(in_spatial_shape[dim] - filter_size) // self.strides[dim]) + 1
|
||||
) * self.kernel_shape[dim]
|
||||
self.output_shape[dim + 1] = output_size
|
||||
|
||||
# initialize the output dimension index with the range of the
|
||||
# dimension output size (e.g. 4): [0, 1, 2, 3]
|
||||
dim_ind = tf.range(output_size)
|
||||
|
||||
# calculate the matching indices in the input data
|
||||
# [0, 1, 2, 3] will calculate to [0, 2, 1, 3]
|
||||
# from the above example
|
||||
dim_ind = self._calc_input_ind(dim_ind, self.kernel_shape[dim],
|
||||
self.dilations[dim], self.strides[dim])
|
||||
# convert to column vector
|
||||
dim_ind = tf.expand_dims(dim_ind, 1)
|
||||
|
||||
# "combine" current dimension indices with the previous dimensions
|
||||
# using cartesian product
|
||||
gather_ind = tf_product(dim_ind, gather_ind)
|
||||
|
||||
# The result from the above loop for 2D data will be:
|
||||
# [[y1, x1, c], [y2, x2, c], ..., [yn, xm, c]] where n is the height,
|
||||
# m is the width and c is the channel number.
|
||||
|
||||
# set the channels count in the output_shape
|
||||
self.output_shape[self.spatial_size + 1] = channels_count
|
||||
|
||||
# expand the dimensions to match the input dimensions + 1
|
||||
for x in range(self.spatial_size):
|
||||
gather_ind = tf.expand_dims(gather_ind, 0)
|
||||
# dublicate the indices for every batch
|
||||
gather_ind = tf.tile(gather_ind,
|
||||
[input_shape[0]] + [1] * (self.spatial_size + 1))
|
||||
|
||||
# extract the selected values from the input
|
||||
output = tf.gather_nd(self.input, gather_ind, batch_dims=1)
|
||||
# reshape the output to the correct shape calculated earlier
|
||||
output = tf.reshape(output, self.output_shape)
|
||||
|
||||
return output
|
||||
|
||||
def _calc_pads_same(self, in_spatial_shape):
|
||||
"""
|
||||
Calculate SAME_* paddings.
|
||||
"""
|
||||
|
||||
pad_ops = pooling_helper.pad_numpy_ops if self.is_known_shape else \
|
||||
pooling_helper.pad_tf_ops
|
||||
|
||||
return pooling_helper.calc_pads_same(in_spatial_shape, self.kernel_shape,
|
||||
self.strides, self.dilations,
|
||||
self.padding, pad_ops, 2)
|
||||
|
||||
def _calc_pads_explicit(self):
|
||||
"""
|
||||
Calculate explicit padding
|
||||
"""
|
||||
assert type(self.padding) is list
|
||||
|
||||
pads = []
|
||||
for i in range(self.spatial_size):
|
||||
pads += [self.padding[i], self.padding[i + self.spatial_size]]
|
||||
return pads
|
||||
|
||||
def _calc_pads_ceil_mode(self, in_spatial_shape):
|
||||
"""
|
||||
Calculate padding in ceil_mode
|
||||
"""
|
||||
|
||||
pads = []
|
||||
for i in range(self.spatial_size):
|
||||
dim_size = in_spatial_shape[i]
|
||||
filter_size = (self.kernel_shape[i] - 1) * self.dilations[i] + 1
|
||||
out_size = (dim_size - filter_size) / self.strides[i]
|
||||
if self.is_known_shape:
|
||||
pad_size = (np.ceil(out_size) - np.floor(out_size)).astype(np.int64)
|
||||
else:
|
||||
pad_size = tf.cast(
|
||||
tf.math.ceil(out_size) - tf.math.floor(out_size), tf.int64)
|
||||
|
||||
pads += [0, pad_size * self.strides[i]]
|
||||
return pads
|
||||
|
||||
def _calc_pads(self, in_spatial_shape):
|
||||
if self.is_known_shape:
|
||||
pads = np.zeros([self.spatial_size * 2], np.int64)
|
||||
else:
|
||||
pads = tf.zeros([self.spatial_size * 2], tf.int64)
|
||||
|
||||
# check for explicit padding
|
||||
if type(self.padding) is list:
|
||||
pads += self._calc_pads_explicit()
|
||||
elif self.padding.lower().startswith("same"):
|
||||
pads += self._calc_pads_same(in_spatial_shape)
|
||||
|
||||
# when padding is set to SAME, ceil_mode will not do anything
|
||||
# because output sizes will be multiple of the strides
|
||||
if self.ceil_mode and (type(self.padding) is list or
|
||||
not self.padding.lower().startswith("same")):
|
||||
new_spatial_shape = [
|
||||
in_spatial_shape[i] + pads[i * 2] + pads[i * 2 + 1]
|
||||
for i in range(self.spatial_size)
|
||||
]
|
||||
pads += self._calc_pads_ceil_mode(new_spatial_shape)
|
||||
return pads
|
||||
|
||||
def _pad_input(self):
|
||||
"""
|
||||
Pad the input according to the parameters
|
||||
"""
|
||||
# check if we need to do any padding at all
|
||||
if not self.ceil_mode and ((type(self.padding) is list and
|
||||
self.padding == [0] * self.spatial_size * 2) or
|
||||
self.padding == "VALID"):
|
||||
self.pads = np.array([0] * self.spatial_size * 2)
|
||||
return (self.input, self.pads)
|
||||
|
||||
in_spatial_shape = self.input_shape[1:self.spatial_size + 1]
|
||||
pads = self._calc_pads(in_spatial_shape)
|
||||
|
||||
if self.is_known_shape and np.count_nonzero(pads) == 0:
|
||||
self.pads = pads
|
||||
return (self.input, pads)
|
||||
|
||||
tf_paddings = [[0, 0]]
|
||||
for i in range(self.spatial_size):
|
||||
tf_paddings += [[pads[i * 2], pads[i * 2 + 1]]]
|
||||
tf_paddings += [[0, 0]]
|
||||
|
||||
self.input = tf.pad(
|
||||
self.input,
|
||||
tf_paddings,
|
||||
mode='CONSTANT',
|
||||
constant_values=self.padding_constant)
|
||||
# update input shape and pads values
|
||||
self.input_shape = tf_shape(self.input)
|
||||
self.pads = pads
|
||||
|
||||
def _calc_argmax_without_padding(self, ind):
|
||||
"""
|
||||
Calculate the original indices as they would be without padding
|
||||
"""
|
||||
in_width = self.orig_input_shape[2]
|
||||
padded_width = self.input_shape[2]
|
||||
num_channels = self.input_shape[3]
|
||||
|
||||
# mod_floor op is not implemented on GPU
|
||||
# implement it using: a % b = a - (a // b) * b
|
||||
|
||||
# ind_nhw = ind // num_channels
|
||||
# ind_channel = ind % num_channels
|
||||
|
||||
ind_nhw = ind // num_channels
|
||||
ind_channel = ind - ind_nhw * num_channels
|
||||
|
||||
new_ind = (ind_nhw // padded_width) * (self.pads[2] + self.pads[3])
|
||||
new_ind = ind_nhw - new_ind - self.pads[0] * in_width - self.pads[2]
|
||||
new_ind = num_channels * new_ind + ind_channel
|
||||
return new_ind
|
||||
|
||||
def dilated_maxpool_with_argmax(self, force_custom_impl=False):
|
||||
"""
|
||||
Do a dilated maxpool and return indices/argmax
|
||||
"""
|
||||
# Tensorflow does not support maxpool_with_argmax on
|
||||
# spatial_size != 2
|
||||
assert self.spatial_size == 2
|
||||
|
||||
if list(self.dilations) != [1] * self.spatial_size or \
|
||||
force_custom_impl:
|
||||
# pad the input
|
||||
self._pad_input()
|
||||
|
||||
new_input = self._remove_dilations()
|
||||
kernel_shape = [1] + list(self.kernel_shape) + [1]
|
||||
pooled, new_ind = tf.nn.max_pool_with_argmax(
|
||||
new_input, ksize=kernel_shape, strides=kernel_shape, padding="VALID")
|
||||
new_ind = self._calc_orig_argmax(new_ind)
|
||||
else:
|
||||
self.pads = np.array([0] * self.spatial_size * 2)
|
||||
if type(self.padding) is list or \
|
||||
self.padding.lower() == "same_lower":
|
||||
# pad the input
|
||||
self._pad_input()
|
||||
|
||||
padding_ = "VALID"
|
||||
elif self.padding.lower() == "same_upper":
|
||||
padding_ = "SAME"
|
||||
else:
|
||||
padding_ = self.padding
|
||||
|
||||
strides = [1] + list(self.strides) + [1]
|
||||
kernel_shape = [1] + list(self.kernel_shape) + [1]
|
||||
pooled, new_ind = tf.nn.max_pool_with_argmax(
|
||||
self.input, ksize=kernel_shape, strides=strides, padding=padding_)
|
||||
# if there was padding, recalculate the returned index
|
||||
# to exclude the padding
|
||||
if np.count_nonzero(self.pads) != 0:
|
||||
new_ind = self._calc_argmax_without_padding(new_ind)
|
||||
|
||||
return (pooled, new_ind)
|
||||
|
||||
def _lp_pool(self, input, ksize, strides, padding):
|
||||
window_size = np.prod(ksize)
|
||||
|
||||
input = tf.math.pow(tf.math.abs(input), self.p) * window_size
|
||||
pooled = tf.nn.avg_pool_v2(input, ksize=ksize, strides=strides,
|
||||
padding=padding)
|
||||
pooled = tf.math.pow(pooled, 1.0 / self.p)
|
||||
|
||||
return pooled
|
||||
|
||||
def dilated_pool(self, force_custom_impl=False):
|
||||
"""
|
||||
Does N-D dilated max/avg pooling. Pads the input if explicit or
|
||||
SAME_* padding is provided or ceil_mode is True
|
||||
"""
|
||||
|
||||
assert self.is_supported()
|
||||
|
||||
if self.is_explicit_padding or self.padding.lower() == "same_lower" \
|
||||
or (self.padding.lower() == "same_upper" and
|
||||
self.count_include_pad) or self.pooling_type.upper() == "LP":
|
||||
# pad the input
|
||||
self._pad_input()
|
||||
|
||||
padding_ = "VALID"
|
||||
elif self.padding.lower() == "same_upper":
|
||||
padding_ = "SAME"
|
||||
else:
|
||||
padding_ = self.padding
|
||||
|
||||
# if maxpool op with dialtions != 1 and spatial_size == 2
|
||||
# we can use tf.nn.dilation2d directly
|
||||
if self.spatial_size == 2 and self.pooling_type.startswith("MAX") \
|
||||
and self.dilations != [1] * self.spatial_size and \
|
||||
not force_custom_impl:
|
||||
strides = [1] + list(self.strides) + [1]
|
||||
dilations = [1] + list(self.dilations) + [1]
|
||||
|
||||
filter = tf.zeros(
|
||||
[self.kernel_shape[0], self.kernel_shape[1], self.input_shape[3]],
|
||||
self.input.dtype)
|
||||
pooled = tf.nn.dilation2d(
|
||||
input=self.input,
|
||||
filters=filter,
|
||||
strides=strides,
|
||||
dilations=dilations,
|
||||
padding=padding_)
|
||||
# if spatial_size < 4 and strides == 1 or dilation == 1 use tf.nn.pool
|
||||
elif self.spatial_size < 4 and (self.strides == [1] * self.spatial_size or
|
||||
self.dilations == [1] * self.spatial_size) and \
|
||||
not force_custom_impl:
|
||||
# if strides == 1 and not LpPool use tf.nn.pool directly
|
||||
if self.strides == [1] * self.spatial_size and self.pooling_type != "LP":
|
||||
pooled = tf.nn.pool(
|
||||
self.input,
|
||||
window_shape=self.kernel_shape,
|
||||
dilations=self.dilations,
|
||||
strides=self.strides,
|
||||
padding=padding_,
|
||||
pooling_type=self.pooling_type)
|
||||
else:
|
||||
# othwerwise check the pooling_type and use the correct op
|
||||
if self.pooling_type.startswith("MAX"):
|
||||
op = tf.nn.max_pool_v2
|
||||
elif self.pooling_type == "AVG":
|
||||
op = tf.nn.avg_pool_v2
|
||||
elif self.pooling_type == "LP":
|
||||
op = self._lp_pool
|
||||
else:
|
||||
raise ValueError("%d-D %s pooling is not supported." %
|
||||
(self.spatial_size, self.pooling_type))
|
||||
pooled = op(self.input, ksize=self.kernel_shape, strides=self.strides,
|
||||
padding=padding_)
|
||||
# in any other case we use custom implementation _remove_dilations
|
||||
# to reduce atrous/dilated pooling into regular pooling and selecting
|
||||
# only the values of the input that should have been selected by
|
||||
# applying the strides and dilations. Then use tf.nn.pool with
|
||||
# strides = kernel_shape and no dilations
|
||||
else:
|
||||
if padding_ == "SAME":
|
||||
# pad the input
|
||||
self._pad_input()
|
||||
input_ = self._remove_dilations()
|
||||
if self.pooling_type=="LP":
|
||||
pooled = self._lp_pool(
|
||||
input_,
|
||||
ksize=self.kernel_shape,
|
||||
strides=self.kernel_shape,
|
||||
padding="VALID")
|
||||
|
||||
else:
|
||||
pooled = tf.nn.pool(
|
||||
input_,
|
||||
window_shape=self.kernel_shape,
|
||||
strides=self.kernel_shape,
|
||||
padding="VALID",
|
||||
pooling_type=self.pooling_type)
|
||||
return pooled
|
||||
|
||||
def is_supported(self):
|
||||
"""
|
||||
Function to check if the current set of arguments are
|
||||
supported for average pool
|
||||
"""
|
||||
# check for maxpool
|
||||
if self.pooling_type.startswith("MAX") or \
|
||||
self.pooling_type=="LP":
|
||||
return True
|
||||
else:
|
||||
# if count_include_pad is true it is fully supported
|
||||
if self.count_include_pad:
|
||||
return True
|
||||
# ceil mode is not supported
|
||||
elif self.ceil_mode:
|
||||
return False
|
||||
# explicit padding with padding values set to 0 is supported
|
||||
elif (self.is_explicit_padding and
|
||||
self.padding == [0] * self.spatial_size * 2):
|
||||
return True
|
||||
# "valid" and "same_upper" auto padding is supported
|
||||
elif (not self.is_explicit_padding and
|
||||
self.padding.lower() in ["valid", "same_upper"]):
|
||||
return True
|
||||
# any other case is not supported
|
||||
else:
|
||||
return False
|
||||
Reference in New Issue
Block a user