Files
mixed-precision-neural-netw…/mpq/configure_ibex.py
T
2024-07-26 20:16:18 +03:00

1455 lines
60 KiB
Python

import numpy as np
import torch.nn as nn
import torch
from torch.autograd import Variable
import shutil
def quantize_multiplier(real_multiplier):
s = 0
while real_multiplier < 0.5:
real_multiplier *= 2.0
s += 1
q = int(round(real_multiplier * (1 << 7)))
# Handle the special case when the real multiplier was so close to 1
# that its fixed-point approximation was undistinguishable from 1.
# We handle this by dividing it by two, and remembering to decrement
# the right shift amount.
if q == (1 << 7):
q //= 2
s -= 1
quantized_multiplier = int(q)
right_shift = s
return quantized_multiplier, right_shift
def get_int_params(quant_net):
int_weights = []
int_bias = []
in_scales = []
act_scales = []
def extract_quant_params(module):
for name, submodule in module.named_children():
# Check if the submodule has weights and append them if present
if hasattr(submodule, 'weight') and submodule.weight is not None:
int_weights.append(submodule.int_weight().cpu().detach().numpy())
int_bias.append(submodule.int_bias().cpu().detach().numpy())
in_scales.append(submodule.quant_bias_scale().cpu().detach().numpy())
# Check if the submodule has activation scale and append it if present
if hasattr(submodule, 'quant_act_scale') and submodule.quant_act_scale is not None:
act_scales.append(submodule.quant_act_scale().cpu().detach().numpy())
# Recursively extract parameters from the children modules
extract_quant_params(submodule)
# Start extraction from the top-level module
extract_quant_params(quant_net)
mul_vals, shift_vals = [], []
for i in range(len(act_scales)-1):
M = in_scales[i]/act_scales[i+1]
mul, shift = quantize_multiplier(M[0])
mul_vals.append(mul)
shift_vals.append(shift)
int_biases = []
f_int_biases = []
shift_biases = []
for int_b in int_bias:
shift_bias = np.clip(np.log2(abs(int_b + 1e-10)).astype(np.int32) - 6, a_max = None, a_min = 0)
r_bias = np.right_shift(int_b, shift_bias)
f_int_biases.append(r_bias)
l_bias = np.left_shift(r_bias, shift_bias)
shift_biases.append(shift_bias)
int_biases.append(l_bias)
return int_weights, int_biases, f_int_biases, shift_biases, mul_vals, shift_vals
def decide_mode(network, weight_bit_width, input_uint8 = True):
VALID_BIT_WIDTH_VALS = {2, 4, 8}
# Checking the values of the arrays
unique_weight_bit_width = np.unique(weight_bit_width)
for i in range(len(unique_weight_bit_width)):
if unique_weight_bit_width[i] not in VALID_BIT_WIDTH_VALS:
raise ValueError("Wrong bit width selected {0}. Please choose values 2, 4 or 8".format(unique_weight_bit_width[i]))
input_sign = [int(not input_uint8)] + (len(weight_bit_width)-1)*[1]
ins = 1
mode_per_layer = []
layer_type = []
layer_types_py = tuple(cls for name, cls in nn.__dict__.items() if isinstance(cls, type) and issubclass(cls, nn.Module))
for name, module in network.named_modules():
if isinstance(module, layer_types_py):
layer_type_name = module.__class__.__name__
if(layer_type_name == 'Linear'):
layer_type.append(layer_type_name)
if(layer_type_name == 'Conv2d'):
if(module.groups == module.in_channels):
layer_type.append('DepthwiseConv2d')
else:
layer_type.append(layer_type_name)
else:
if(layer_type_name == 'ReLU' or layer_type_name == 'Sigmoid'):
input_sign[ins] = 0
ins += 1
for i in range(len(weight_bit_width)):
signed_input = 4 * input_sign[i]
if(layer_type[i] == 'DepthwiseConv2d'):
mode_per_layer.append(signed_input + 1)
else:
if(weight_bit_width[i] == 2):
mode_per_layer.append(signed_input + 3)
elif(weight_bit_width[i] == 4):
mode_per_layer.append(signed_input + 2)
else:
mode_per_layer.append(signed_input)
return mode_per_layer, layer_type
def pad_inputs_weights(quant_net, test_loader, mode_per_layer,
int_weights, int_biases, shift_biases,
mul_vals, shift_vals):
for test_imgs, _ in test_loader:
t = (torch.round(Variable(test_imgs).float()/quant_net.quant_inp.quant_act_scale().cpu()))
t = t.detach().cpu().numpy().astype(np.int16)[0]
isPaddingNeeded = t.shape[0]%4
a = t.shape[0]//4
if(isPaddingNeeded != 0):
new_size = (a+1)*4
else:
new_size = a*4
new_shape = [new_size]
for sh in np.shape(t)[1:]:
new_shape.append(sh)
new_shape = tuple(new_shape)
padded_input = np.zeros(shape = new_shape).astype(np.int16)
padded_input[:t.shape[0], ...] = t
padded_int_weights = []
for i, w in enumerate(int_weights):
if(len(w.shape) == 2):
nodes_per_layer = w.shape[0]
a = nodes_per_layer//4
if(nodes_per_layer%4 != 0):
new_size_0 = (a+1)*4
else:
new_size_0 = a*4
if(i == 0):
new_size_1 = padded_input.shape[0]
new_w = np.zeros((new_size_0, new_size_1)).astype(np.int8)
new_w[:w.shape[0], :w.shape[1]] = w
else:
new_w = np.zeros((new_size_0, w.shape[1])).astype(np.int8)
new_w[:w.shape[0], :] = w
elif(len(w.shape) == 4):
filters_per_layer = w.shape[0]
a = filters_per_layer//4
if(filters_per_layer % 4 != 0):
new_size_0 = (a + 1) * 4
else:
new_size_0 = a * 4
if((mode_per_layer[i] != 1) and (mode_per_layer[i] != 5)):
b = w.shape[1] // 4
if(w.shape[1] % 4 != 0):
new_size_1 = (b + 1) * 4
else:
new_size_1 = b * 4
new_w = np.zeros((new_size_0, new_size_1, w.shape[2], w.shape[3])).astype(np.int8)
new_w[:w.shape[0], :w.shape[1], :, :] = w
else:
new_size_1 = 1
new_w = np.zeros((new_size_0, new_size_1, w.shape[2], w.shape[3])).astype(np.int8)
new_w[:w.shape[0], :w.shape[1], :, :] = w
new_w = np.squeeze(new_w, axis = 1)
padded_int_weights.append(new_w)
padded_int_biases = []
for i, b in enumerate(int_biases):
nodes_per_layer = b.shape[0]
a = nodes_per_layer//4
if(nodes_per_layer%4 != 0):
new_size_b = (a+1)*4
else:
new_size_b = a*4
new_b = np.zeros(new_size_b).astype(np.int8)
new_b[:b.shape[0]] = b
padded_int_biases.append(new_b)
padded_shift_biases = []
for i, b in enumerate(shift_biases):
nodes_per_layer = b.shape[0]
a = nodes_per_layer//4
if(nodes_per_layer%4 != 0):
new_size_b = (a+1)*4
else:
new_size_b = a*4
new_b = np.zeros(new_size_b).astype(np.int8)
new_b[:b.shape[0]] = b
padded_shift_biases.append(new_b)
padded_mul_vals = []
for i, mul_v in enumerate(mul_vals):
m = np.array(mul_v)
if(len(np.shape(m)) > 0):
nodes_per_layer = m.shape[0]
a = nodes_per_layer//4
if(nodes_per_layer%4 != 0):
new_size_m = (a+1)*4
else:
new_size_m = a*4
new_m = np.zeros(new_size_m).astype(np.int8)
new_m[:m.shape[0]] = m
padded_mul_vals.append(new_m)
else:
padded_mul_vals.append(mul_v)
padded_shift_vals = []
for i, sh_v in enumerate(shift_vals):
s = np.array(sh_v)
if(len(np.shape(s)) > 0):
nodes_per_layer = s.shape[0]
a = nodes_per_layer//4
if(nodes_per_layer%4 != 0):
new_size_s = (a+1)*4
else:
new_size_s = a*4
new_s = np.zeros(new_size_s).astype(np.int8)
new_s[:s.shape[0]] = m
padded_shift_vals.append(new_s)
else:
padded_shift_vals.append(sh_v)
t = np.expand_dims(t, axis = 0)
return t, padded_input, padded_int_weights, padded_int_biases, padded_shift_biases, padded_mul_vals, padded_shift_vals
def combine_values(vec):
combined_value = 0
dims = np.shape(vec)
if(dims == (2,2)):
vec = [vec[0][0], vec[1][0], vec[0][1], vec[1][1]]
elif(dims == (2,4)):
vec = [vec[0][0], vec[1][0], vec[0][1], vec[1][1],
vec[0][2], vec[1][2], vec[0][3], vec[1][3]]
elif(dims == (4,2)):
vec = [vec[0][0], vec[1][0], vec[2][0], vec[3][0],
vec[0][1], vec[1][1], vec[2][1], vec[3][1]]
elif(dims == (4,4)):
vec = [vec[0][0], vec[1][0], vec[2][0], vec[3][0],
vec[0][1], vec[1][1], vec[2][1], vec[3][1],
vec[0][2], vec[1][2], vec[2][2], vec[3][2],
vec[0][3], vec[1][3], vec[2][3], vec[3][3]]
if len(vec) not in [1, 2, 4, 8, 16]:
raise ValueError("The input vector 'a' must have 1, 2, 4, 8 or 16 values")
else:
div_s = int(32/len(vec))
keep_lsb = (1 << div_s) - 1
for value in vec:
value = int(value)
combined_value = (combined_value << div_s) | (value & keep_lsb)
return combined_value
def concat_inputs_weights(mode_per_layer, padded_input, padded_int_weights, padded_int_biases,
padded_shift_biases, padded_mul_vals, padded_shift_vals):
padded_input = np.expand_dims(padded_input, axis = 0)
combined_input_data = []
if(len(np.shape(padded_input)) == 2):
for data in padded_input:
size = len(data)
new_mat = np.zeros(int(size//4), dtype = np.int64)
for i in range(int(size//4)):
vector = data[4*i : 4*(i+1)]
comb = combine_values(vector)
new_mat[i] = comb
combined_input_data.append(new_mat)
else:
for data in padded_input:
new_mat = np.zeros((data.shape[0] // 4, data.shape[1], data.shape[2]), dtype = np.int64)
for i in range(data.shape[0]//4):
for j in range(data.shape[1]):
for k in range(data.shape[2]):
vector = data[4 * i : 4 * (i + 1), j, k]
comb = combine_values(vector)
new_mat[i][j][k] = comb
combined_input_data.append(new_mat)
new_int_weights = []
for iter, layer_weight in enumerate(padded_int_weights):
dims = layer_weight.shape
if(len(dims) == 2):
if((mode_per_layer[iter] == 0) | (mode_per_layer[iter] == 4)):
new_mat = np.zeros((int(dims[0]/4), int(dims[1])), dtype = np.int64)
for i in range(int(dims[0]/4)):
for j in range(int(dims[1])):
vector = layer_weight[4*i : 4*(i+1),j]
comb = combine_values(vector)
new_mat[i][j] = comb
elif((mode_per_layer[iter] == 2) | (mode_per_layer[iter] == 6)):
new_mat = np.zeros((int(dims[0]/4), int(dims[1]/2)), dtype = np.int64)
for i in range(int(dims[0]/4)):
for j in range(int(dims[1]/2)):
vector = layer_weight[4*i : 4*(i+1), 2*j : 2*(j+1)]
comb = combine_values(vector)
new_mat[i][j] = comb
elif((mode_per_layer[iter] == 3) | (mode_per_layer[iter] == 7)):
new_mat = np.zeros((int(dims[0]/4), int(dims[1]/4)), dtype = np.int64)
for i in range(int(dims[0]/4)):
for j in range(int(dims[1]/4)):
vector = layer_weight[4*i : 4*(i+1), 4*j : 4*(j+1)]
comb = combine_values(vector)
new_mat[i][j] = comb
elif(len(dims) == 3):
new_mat = np.zeros((int(dims[0]//4), dims[1], dims[2]), dtype = np.int64)
for i in range(int(dims[0]//4)):
for j in range(dims[1]):
for k in range(dims[2]):
vector = layer_weight[4*i : 4*(i+1), j, k]
comb = combine_values(vector)
new_mat[i][j][k] = comb
elif(len(dims) == 4):
if((mode_per_layer[iter] == 0) | (mode_per_layer[iter] == 4)):
new_mat = np.zeros((int(dims[0]//4), dims[1], dims[2], dims[3]), dtype = np.int64)
for i in range(int(dims[0]//4)):
for j in range(dims[1]):
for k in range(dims[2]):
for l in range(dims[3]):
vector = layer_weight[4*i : 4*(i+1), j, k, l]
comb = combine_values(vector)
new_mat[i][j][k][l] = comb
elif((mode_per_layer[iter] == 2) | (mode_per_layer[iter] == 6)):
new_mat = np.zeros((int(dims[0]//4), int(dims[1]//2), dims[2], dims[3]), dtype = np.int64)
for i in range(int(dims[0]//4)):
for j in range(int(dims[1]//2)):
for k in range(dims[2]):
for l in range(dims[3]):
vector = layer_weight[4*i : 4*(i+1), 2*j : 2*(j+1), k, l]
comb = combine_values(vector)
new_mat[i][j][k][l] = comb
elif((mode_per_layer[iter] == 3) | (mode_per_layer[iter] == 7)):
new_mat = np.zeros((int(dims[0]//4), int(dims[1]//4), dims[2], dims[3]), dtype = np.int64)
for i in range(int(dims[0]//4)):
for j in range(int(dims[1]//4)):
for k in range(dims[2]):
for l in range(dims[3]):
vector = layer_weight[4*i : 4*(i+1), 4*j : 4*(j+1), k, l]
comb = combine_values(vector)
new_mat[i][j][k][l] = comb
new_int_weights.append(new_mat)
new_int_biases = []
for iter, layer_biases in enumerate(padded_int_biases):
dims = np.shape(layer_biases)
new_mat = np.zeros(int(dims[0]/4), dtype = np.int64)
for j in range(int(dims[0]/4)):
comb = combine_values(layer_biases[4 * j : 4 * (j+1)])
new_mat[j] = comb
new_int_biases.append(new_mat)
shift_biases = []
for iter, layer_shift_biases in enumerate(padded_shift_biases):
dims = np.shape(layer_shift_biases)
new_mat = np.zeros(int(dims[0]/4), dtype = np.int64)
for j in range(int(dims[0]/4)):
sh1 = layer_shift_biases[4*j]
sh2 = layer_shift_biases[4*j+1]
sh3 = layer_shift_biases[4*j+2]
sh4 = layer_shift_biases[4*j+3]
s = (sh1 << 27) | (sh2 << 20) | (sh3 << 13) | (sh4 << 6)
s += mode_per_layer[iter]
new_mat[j] = s
shift_biases.append(new_mat)
mul_vals = []
for iter, layer_muls in enumerate(padded_mul_vals):
if(len(np.shape(layer_muls)) > 0):
dims = np.shape(layer_muls)
new_mat = np.zeros(int(dims[0]/4), dtype = np.int64)
for j in range(int(dims[0]/4)):
m1 = layer_muls[4*j]
m2 = layer_muls[4*j+1]
m3 = layer_muls[4*j+2]
m4 = layer_muls[4*j+3]
m = (m1 << 24) | (m2 << 16) | (m3 << 8) | (m4)
new_mat[j] = m
else:
vec = [layer_muls, layer_muls, layer_muls, layer_muls]
new_mat = combine_values(vec)
mul_vals.append(new_mat)
shift_vals = []
for iter, layer_shifts in enumerate(padded_shift_vals):
if(len(np.shape(layer_shifts)) > 0):
dims = np.shape(layer_shifts)
new_mat = np.zeros(int(dims[0]/4), dtype = np.int64)
for j in range(int(dims[0]/4)):
sh1 = layer_shifts[4*j] + 7
sh2 = layer_shifts[4*j+1] + 7
sh3 = layer_shifts[4*j+2] + 7
sh4 = layer_shifts[4*j+3] + 7
s = (sh1 << 27) | (sh2 << 20) | (sh3 << 13) | (sh4 << 6)
if(iter + 1 == len(padded_shift_vals)):
s += 1
else:
if(mode_per_layer[iter+1] < 4):
s += 1
new_mat[j] = s
else:
sh1 = layer_shifts + 7
new_mat = (sh1 << 27) | (sh1 << 20) | (sh1 << 13) | (sh1 << 6)
if(iter + 1 == len(padded_shift_vals)):
new_mat += 1
else:
if(mode_per_layer[iter+1] < 4):
new_mat += 1
shift_vals.append(new_mat)
return combined_input_data, new_int_weights, new_int_biases, shift_biases, mul_vals, shift_vals
def save_1d_inputs(path, input):
with open(path + '/ibex_inputs.h', 'w') as f:
f.write('#ifndef MLP_INPUTS_H\n#define MLP_INPUTS_H\n\n')
dims = np.shape(input)
st = 'static const int input[' + str(dims[0]) + '][' + str(dims[1]) + '] = {\n'
f.write(st)
for n in range(dims[0]):
f.write('\t{')
for m in range(dims[1] - 1):
f.write(str(input[n][m]) + ', ')
f.write(str(input[n][m+1]) + '}')
if(n != dims[0]-1):
f.write(',')
f.write('\n')
f.write('};\n\n')
f.write('#endif /* IBEX_MLP_INPUTS_H */')
return
def save_2d_inputs(path, input):
with open(path + '/ibex_inputs.h', 'w') as f:
f.write('#ifndef IBEX_INPUTS_H\n#define IBEX_INPUTS_H\n\n')
test_batch_X_cnn_new = np.transpose(input, (2, 3, 1, 0))
dims = np.shape(test_batch_X_cnn_new)
st = 'static const int input[' + str(dims[0]) + '][' + str(dims[1]) + '][' + str(dims[2]) + ']['
st += str(dims[3]) + '] = {\n'
f.write(st)
for n in range(dims[0]):
f.write('\t{\n')
for m in range(dims[1]):
f.write('\t\t{\n')
for k in range(dims[2]):
f.write('\t\t\t{')
for l in range(dims[3]-1):
f.write(str(test_batch_X_cnn_new[n][m][k][l]) + ', ')
if(dims[3] != 1):
f.write(str(test_batch_X_cnn_new[n][m][k][l+1]) + '}')
else:
f.write(str(test_batch_X_cnn_new[n][m][k][0]) + '}')
if(k != dims[2]-1):
f.write(',')
f.write('\n')
f.write('\t\t}')
if(m != dims[1]-1):
f.write(',')
f.write('\n')
f.write('\t}')
if(n != dims[0]-1):
f.write(',')
f.write('\n')
f.write('};\n\n\n')
f.write('#endif /* IBEX_INPUTS_H */')
return
def save_mlp_net_params(path, int_weights, int_biases, mul_vals, shift_vals, shift_biases = None):
i = 0
j = 0
# Open a text file for writing
with open(path + '/mlp_weights.h', 'w') as f:
f.write('#ifndef MLP_WEIGHTS_H\n#define MLP_WEIGHTS_H\n\n')
for k in range(len(int_weights)):
dims = np.shape(int_weights[k])
mat = int_weights[k]
i += 1
st = 'static const int W' + str(i) + '[' + str(dims[0]) + ']' + '[' + str(dims[1]) + '] = {\n'
f.write(st)
for n in range(dims[0]):
f.write('\t{')
for m in range(dims[1] - 1):
f.write(str(mat[n][m]) + ', ')
if(dims[1] == 1):
f.write(str(mat[n][0]) + '}')
else:
f.write(str(mat[n][m+1]) + '}')
if(n != dims[0]-1):
f.write(',')
f.write('\n')
f.write('};\n\n')
for k in range(len(int_biases)):
dims = np.shape(int_biases[k])
mat = int_biases[k]
j += 1
st = 'static const int B' + str(j) + '[' + str(dims[0]) + '] = {\n\t'
f.write(st)
for n in range(dims[0]):
f.write(str(mat[n]))
if(n != dims[0] - 1):
f.write(', ')
f.write('\n};\n\n')
f.write('\n')
f.write('#endif /* MLP_WEIGHTS_H */')
if('original' in path):
with open(path + '/ibex_mlp_params.h', 'w') as f:
f.write('#ifndef IBEX_MLP_PARAMS_H\n#define IBEX_MLP_PARAMS_H\n\n')
for i, mul_v in enumerate(mul_vals):
f.write('#define MV' + str(i+1) + ' ' + str(mul_v) + '\n')
f.write('\n')
for i, shift_v in enumerate(shift_vals):
f.write('#define SV' + str(i+1) + ' ' + str(shift_v+7) + '\n')
f.write('\n')
for i, mul_v in enumerate(mul_vals):
f.write('#define SB' + str(i+1) + ' ' + str(0) + '\n')
f.write('\n#endif /* IBEX_MLP_PARAMS_H */')
else:
bi = 0
with open(path + '/ibex_mlp_params.h', 'w') as f:
f.write('#ifndef IBEX_MLP_PARAMS_H\n#define IBEX_MLP_PARAMS_H\n\n')
for i, mul_v in enumerate(mul_vals):
f.write('#define MV' + str(i+1) + ' ' + str(mul_v) + '\n')
f.write('\n')
for i, shift_v in enumerate(shift_vals):
f.write('#define SV' + str(i+1) + ' ' + str(shift_v) + '\n')
f.write('\n')
for k in range(len(shift_biases)):
dims = np.shape(shift_biases[k])
mat = shift_biases[k]
bi += 1
st = 'static const int SB' + str(bi) + '[' + str(dims[0]) + '] = {\n\t'
f.write(st)
for n in range(dims[0]):
f.write(str(mat[n]))
if(n != dims[0] - 1):
f.write(', ')
f.write('\n};\n\n')
f.write('#endif /* IBEX_MLP_PARAMS_H */')
return
def save_cnn_net_params(path, int_weights, int_biases, mul_vals, shift_vals, shift_biases = None):
wi = 0
bi = 0
fi = 0
# Open a text file for writing
with open(path + '/cnn_weights.h', 'w') as f:
f.write('#ifndef CNN_WEIGHTS_H\n#define CNN_WEIGHTS_H\n\n')
for k in range(len(int_weights)):
dims = np.shape(int_weights[k])
mat = int_weights[k]
if(len(dims) == 2 or ((len(dims) == 4) and dims[2] == dims[3] == 1)):
f.write('static const int ')
if(len(dims) == 2):
wi += 1
f.write('W' + str(wi))
else:
mat = np.squeeze(mat, axis = (2,3))
fi += 1
f.write('F' + str(fi))
st = '[' + str(dims[0]) + ']' + '[' + str(dims[1]) + '] = {\n'
f.write(st)
for n in range(dims[0]):
f.write('\t{')
for m in range(dims[1] - 1):
f.write(str(mat[n][m]) + ', ')
if(dims[1] == 1):
f.write(str(mat[n][0]) + '}')
else:
f.write(str(mat[n][m+1]) + '}')
if(n != dims[0]-1):
f.write(',')
f.write('\n')
f.write('};\n\n')
elif (len(dims) == 3):
dims = np.shape(mat)
fi += 1
st = 'static const int F' + str(fi) + '[' + str(dims[0]) + '][' + str(dims[1])
st += '][' + str(dims[2]) + '] = {\n'
f.write(st)
for n in range(dims[0]):
f.write('\t{\n')
for l in range(dims[1]):
f.write('\t\t{')
for h in range(dims[2] - 1):
f.write(str(mat[n][l][h]) + ', ')
if dims[2] != 1:
f.write(str(mat[n][l][dims[2] - 1]) + '}')
else:
f.write(str(mat[n][l][0]) + '}')
if (l != dims[1] - 1):
f.write(',')
f.write('\n')
f.write('\t}')
if n != dims[0] - 1:
f.write(',')
f.write('\n')
f.write('};\n\n')
elif(len(dims) == 4):
mat = np.transpose(mat, (0, 2, 3, 1))
dims = np.shape(mat)
fi += 1
st = 'static const int F' + str(fi) + '[' + str(dims[0]) + '][' + str(dims[1])
st += '][' + str(dims[2]) + '][' + str(dims[3]) + '] = {\n'
f.write(st)
for n in range(dims[0]):
f.write('\t{\n')
for m in range(dims[1]):
f.write('\t\t{\n')
for l in range(dims[2]):
f.write('\t\t\t{')
for h in range(dims[3] - 1):
f.write(str(mat[n][m][l][h]) + ', ')
if(dims[3] != 1):
f.write(str(mat[n][m][l][h+1]) + '}')
else:
f.write(str(mat[n][m][l][0]) + '}')
if (l != dims[2]-1):
f.write(',')
f.write('\n')
f.write('\t\t}')
if (m != dims[1] - 1):
f.write(',')
f.write('\n')
f.write('\t}')
if (n != dims[0] - 1):
f.write(',')
f.write('\n')
f.write('};\n\n')
for k in range(len(int_biases)):
dims = np.shape(int_biases[k])
mat = int_biases[k]
bi += 1
st = 'static const int B' + str(bi) + '[' + str(dims[0]) + '] = {\n\t'
f.write(st)
for n in range(dims[0]):
f.write(str(mat[n]))
if(n != dims[0] - 1):
f.write(', ')
f.write('\n};\n\n')
f.write('\n')
f.write('#endif /* CNN_WEIGHTS_H */')
if('original' in path):
with open(path + '/ibex_cnn_params.h', 'w') as f:
f.write('#ifndef IBEX_CNN_PARAMS_H\n#define IBEX_CNN_PARAMS_H\n\n')
for i, mul_v in enumerate(mul_vals):
f.write('#define MV' + str(i+1) + ' ' + str(mul_v) + '\n')
f.write('\n')
for i, shift_v in enumerate(shift_vals):
f.write('#define SV' + str(i+1) + ' ' + str(shift_v+7) + '\n')
f.write('\n')
for i, mul_v in enumerate(mul_vals):
f.write('#define SB' + str(i+1) + ' ' + str(0) + '\n')
f.write('\n#endif /* IBEX_CNN_PARAMS_H */')
else:
bi = 0
with open(path + '/ibex_cnn_params.h', 'w') as f:
f.write('#ifndef IBEX_CNN_PARAMS_H\n#define IBEX_CNN_PARAMS_H\n\n')
for i, mul_v in enumerate(mul_vals):
f.write('#define MV' + str(i+1) + ' ' + str(mul_v) + '\n')
f.write('\n')
for i, shift_v in enumerate(shift_vals):
f.write('#define SV' + str(i+1) + ' ' + str(shift_v) + '\n')
f.write('\n')
for k in range(len(shift_biases)):
dims = np.shape(shift_biases[k])
mat = shift_biases[k]
bi += 1
st = 'static const int SB' + str(bi) + '[' + str(dims[0]) + '] = {\n\t'
f.write(st)
for n in range(dims[0]):
f.write(str(mat[n]))
if(n != dims[0] - 1):
f.write(', ')
f.write('\n};\n\n')
f.write('#endif /* IBEX_CNN_PARAMS_H */')
return
def generate_Makefile(path, name):
with open(path + '/Makefile', 'w') as f:
f.write('# Copyright lowRISC contributors.\n')
f.write('# Licensed under the Apache License, Version 2.0, see LICENSE for details.\n')
f.write('# SPDX-License-Identifier: Apache-2.0\n')
f.write('#\n# Generate a baremetal application\n\n')
f.write('# Name of the program $(PROGRAM).c will be added as a source file\n\n')
f.write('PROGRAM = ' + name + '\n')
f.write('PROGRAM_DIR := $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST))))\n')
f.write('# Any extra source files to include in the build. Use the upper case .S\n')
f.write('# extension for assembly files\nEXTRA_SRCS :=\n\n')
f.write('include ${PROGRAM_DIR}/../../common/common.mk')
shutil.copy(path + '/Makefile', path + '/../optimized')
return
def generate_og_c_code_mlp(path, name, int_weights, optimal_config, type_of_layer):
with open(path + '/' + name + '.c', 'w') as f:
f.write('#include "simple_system_common.h"\n')
f.write('#include "fully_connected.h"\n')
f.write('#include "ibex_mlp_params.h"\n')
f.write('#include "mlp_weights.h"\n')
f.write('#include "ibex_inputs.h"\n\n')
f.write('#define IN_DIM ' + str(int_weights[0].shape[1]))
for i in range(1, len(int_weights)):
f.write('\n#define HIDDEN_DIM' + str(i) + ' ' + str(int_weights[i].shape[1]))
f.write('\n#define OUT_DIM ' + str(int_weights[-1].shape[0]))
f.write('\n#define SAMPLES 1\n\n')
f.write('int outs[SAMPLES][OUT_DIM];\n\n')
f.write('void ' + name + '() {\n\n')
f.write('\tint inp[IN_DIM];\n')
for i in range(1, len(int_weights)):
f.write('\tint y' + str(i) + '[HIDDEN_DIM' + str(i) + '];\n')
f.write('\tint out[OUT_DIM];\n')
f.write('\n\tfor (int iter = 0; iter < SAMPLES; iter ++){\n')
f.write('\t\tfor(int i = 0; i < IN_DIM; i++) inp[i] = input[iter][i];\n\n')
f.write('\t\tpcount_enable(1);\n\n')
if(type_of_layer[0] == 'Linear'):
f.write('\t\tmlp_layer(inp, y1, IN_DIM,')
f.write(' HIDDEN_DIM1, W1, B1, SB1, MV1, SV1);\n')
for i, b_w in enumerate(optimal_config[1:-1], start = 1):
if(type_of_layer[i] == 'Linear'):
f.write('\t\tmlp_layer(y' + str(i) + ', y' + str(i+1))
f.write(', HIDDEN_DIM' + str(i) + ', HIDDEN_DIM' + str(i+1) + ', W' + str(i+1))
f.write(', B' + str(i+1) + ', SB' + str(i+1) + ', MV' + str(i+1) + ', SV' + str(i+1) + ');\n')
if(type_of_layer[-1] == 'Linear'):
f.write('\t\tmlp_layer(y')
f.write(str(len(int_weights)-1)+', out, HIDDEN_DIM'+str(len(int_weights)-1))
f.write(', OUT_DIM, W' + str(len(int_weights)) + ', B')
f.write(str(len(int_weights)) + ', SB' + str(len(int_weights)) + ', MV')
f.write(str(len(int_weights)) + ', SV' + str(len(int_weights)) + ');\n\n')
f.write('\t\tpcount_enable(0);\n\n')
f.write('\t\tputs("Output Layer Values:\\n");\n')
f.write('\t\tfor(int i = 0; i < OUT_DIM; i++) {\n')
f.write('\t\t\tputhex(out[i]);\n')
f.write('\t\t\tputs("\\n");\n')
f.write('\t\t}\n')
f.write('\t}\n')
f.write('}\n\n')
f.write('int main(void) {\n\n')
f.write('\tpcount_enable(0);\n\n')
f.write('\t' + name + '();\n\n')
f.write('\treturn 0;\n}')
return
def generate_opt_c_code_mlp(path, name, int_weights, optimal_config, type_of_layer):
with open(path + '/' + name + '.c', 'w') as f:
f.write('#include "simple_system_common.h"\n')
f.write('#include "fully_connected_opt.h"\n')
f.write('#include "ibex_mlp_params.h"\n')
f.write('#include "mlp_weights.h"\n')
f.write('#include "ibex_inputs.h"\n\n')
f.write('#define IN_DIM ' + str((8//optimal_config[0]) * int_weights[0].shape[1]))
for i in range(1, len(int_weights)):
f.write('\n#define HIDDEN_DIM' + str(i) + ' ' + str(4 * int_weights[i-1].shape[0]))
f.write('\n#define OUT_DIM ' + str(4 * int_weights[-1].shape[0]))
f.write('\n#define SAMPLES 1\n\n')
f.write('int outs[SAMPLES][OUT_DIM >> 2];\n\n')
f.write('void ' + name + '() {\n\n')
f.write('\tint inp[IN_DIM >> 2];\n')
for i in range(1, len(int_weights)):
f.write('\tint y' + str(i) + '[HIDDEN_DIM' + str(i) + ' >> 2];\n')
f.write('\tint out[OUT_DIM >> 2];\n')
f.write('\n\tfor (int iter = 0; iter < SAMPLES; iter ++){\n')
f.write('\t\tfor(int i = 0; i < IN_DIM >> 2; i++) inp[i] = input[iter][i];\n\n')
f.write('\t\tpcount_enable(1);\n\n')
if(type_of_layer[0] == 'Linear'):
f.write('\t\tmlp_layer_' + str(optimal_config[0]) + 'bits(inp, y1, IN_DIM >> 2,')
f.write(' HIDDEN_DIM1 >> 2, W1, B1, SB1, MV1, SV1);\n')
for i, b_w in enumerate(optimal_config[1:-1], start = 1):
if(type_of_layer[i] == 'Linear'):
f.write('\t\tmlp_layer_' + str(b_w) + 'bits(y' + str(i) + ', y' + str(i+1))
f.write(', HIDDEN_DIM' + str(i) + ' >> 2, HIDDEN_DIM' + str(i+1) + ' >> 2, W' + str(i+1))
f.write(', B' + str(i+1) + ', SB' + str(i+1) + ', MV' + str(i+1) + ', SV' + str(i+1) + ');\n')
if(type_of_layer[-1] == 'Linear'):
f.write('\t\tmlp_layer_' + str(optimal_config[-1]) + 'bits(y')
f.write(str(len(int_weights)-1)+', out, HIDDEN_DIM'+str(len(int_weights)-1))
f.write(' >> 2, OUT_DIM >> 2, W' + str(len(int_weights)) + ', B')
f.write(str(len(int_weights)) + ', SB' + str(len(int_weights)) + ', MV')
f.write(str(len(int_weights)) + ', SV' + str(len(int_weights)) + ');\n\n')
f.write('\t\tpcount_enable(0);\n\n')
f.write('\t\tputs("Output Layer Values:\\n");\n')
f.write('\t\tfor(int i = 0; i < OUT_DIM >> 2; i++) {\n')
f.write('\t\t\tputhex((out[i] & 0xFF000000) >> 24);\n')
f.write('\t\t\tputs(" ");\n')
f.write('\t\t\tputhex((out[i] & 0xFF0000) >> 16);\n')
f.write('\t\t\tputs(" ");\n')
f.write('\t\t\tputhex((out[i] & 0xFF00) >> 8);\n')
f.write('\t\t\tputs(" ");\n')
f.write('\t\t\tputhex(out[i] & 0xFF);\n')
f.write('\t\t\tputs("\\n");\n')
f.write('\t\t}\n')
f.write('\t}\n')
f.write('}\n\n')
f.write('int main(void) {\n\n')
f.write('\tpcount_enable(0);\n\n')
f.write('\t' + name + '();\n\n')
f.write('\treturn 0;\n}')
def get_cnn_details(module, details = None):
if details is None:
details = []
for layer in module.children():
if isinstance(layer, nn.Conv2d):
details.append({
"layer_type": "Conv2d",
"in_channels": layer.in_channels,
"out_channels": layer.out_channels,
"kernel_size": layer.kernel_size,
"stride": layer.stride,
"padding": layer.padding,
"groups": layer.groups
})
elif isinstance(layer, nn.MaxPool2d):
details.append({
"layer_type": "MaxPool2d",
"kernel_size": layer.kernel_size,
"stride": layer.stride,
"padding": layer.padding
})
elif isinstance(layer, nn.AvgPool2d):
details.append({
"layer_type": "AvgPool2d",
"kernel_size": layer.kernel_size,
"stride": layer.stride,
"padding": layer.padding
})
elif isinstance(layer, nn.Linear):
details.append({
"layer_type": "Linear",
"in_features": layer.in_features,
"out_features": layer.out_features
})
# Recursively apply to children modules
get_cnn_details(layer, details)
return details
def generate_og_c_code_cnn(path, name, input, cnn_details, int_weights):
with open(path + '/' + name + '.c', 'w') as f:
f.write('#include "simple_system_common.h"\n')
f.write('#include "cnn_weights.h"\n')
f.write('#include "fully_connected.h"\n')
f.write('#include "ibex_cnn_params.h"\n')
f.write('#include "ibex_inputs.h"\n')
f.write('#include "conv2d.h"\n')
for detail in cnn_details[:-1]:
if detail["layer_type"] == "Conv2d":
if(detail["in_channels"] == detail["out_channels"] == detail["groups"] != 1):
f.write('#include "dws_conv.h"\n')
break
f.write('\n')
f.write('#define IMG_SZ ' + str(np.shape(input)[2]) + '\n')
f.write('#define NUM_FIL0 ' + str(np.shape(input)[1]) + '\n\n')
i = 1
for w in int_weights:
if(len(np.shape(w)) == 4):
f.write('#define FILTER' + str(i) + ' ' + str(w.shape[2]) + '\n')
i += 1
f.write('\n')
i = 1
for w in int_weights:
if(len(np.shape(w)) == 4):
f.write('#define NUM_FIL' + str(i) + ' ' + str(w.shape[0]) + '\n')
i += 1
f.write('\n')
i = 1
for detail in cnn_details:
if detail["layer_type"] == "Conv2d":
f.write('#define STRIDE' + str(i) + ' ' + str(detail["stride"][0]) + '\n')
i += 1
f.write('\n')
i = 1
for detail in cnn_details:
if detail["layer_type"] == "Conv2d":
if(detail["padding"] == 'same'):
f.write('#define PAD_TB' + str(i) + ' ' + str((detail["kernel_size"][0] - 1)//2) + '\n')
f.write('#define PAD_LR' + str(i) + ' ' + str((detail["kernel_size"][0] - 1)//2) + '\n')
elif(detail["padding"] == 'valid'):
f.write('#define PAD_TB' + str(i) + ' 0\n')
f.write('#define PAD_LR' + str(i) + ' 0\n')
else:
f.write('#define PAD_TB' + str(i) + ' ' + str(detail["padding"][0]) + '\n')
f.write('#define PAD_LR' + str(i) + ' ' + str(detail["padding"][0]) + '\n')
f.write('\n')
i += 1
i = 1
for detail in cnn_details:
if ((detail["layer_type"] == "MaxPool2d") or (detail["layer_type"] == "AvgPool2d")):
f.write('#define POOL_STRIDE' + str(i) + ' ' + str(detail["stride"]) + '\n')
f.write('#define POOL_SIZE' + str(i) + ' ' + str(detail["kernel_size"]) + '\n')
f.write('\n')
i += 1
i = 1
for w in int_weights[:-1]:
if(len(np.shape(w)) == 2):
f.write('#define DENSE_DIM' + str(i) + ' ' + str(w.shape[0]) + '\n')
i += 1
f.write('#define OUT_DIM ' + str(int_weights[-1].shape[0]) + '\n\n')
f.write('#define SAMPLES 1\nint outs[SAMPLES][OUT_DIM];\n\n')
f.write('void ' + name + '() {\n\n')
i = 1
fi = 1
st = 1
flatten = 0
for detail in cnn_details:
if detail["layer_type"] == "Conv2d":
f.write('\tint dout' + str(i) + ' = NUM_FIL' + str(fi) + ';\n')
if(i == 1):
f.write('\tint hout' + str(i) + ' = ((IMG_SZ - FILTER1 + 2 * PAD_TB1)/STRIDE1) + 1;\n')
f.write('\tint wout' + str(i) + ' = ((IMG_SZ - FILTER1 + 2 * PAD_LR1)/STRIDE1) + 1;\n')
else:
f.write('\tint hout' + str(i) + ' = ((hout' + str(i-1) + ' - FILTER' + str(fi))
f.write('+ 2 * PAD_TB' + str(fi) + ')/STRIDE' + str(fi) + ')+1;\n')
f.write('\tint wout' + str(i) + ' = ((wout' + str(i-1) + ' - FILTER' + str(fi))
f.write('+ 2 * PAD_LR' + str(fi) + ')/STRIDE' + str(fi) + ')+1;\n')
fi += 1
elif ((detail["layer_type"] == "MaxPool2d") or (detail["layer_type"] == "AvgPool2d")):
f.write('\tint dout' + str(i) + ' = dout' + str(i-1) + ';\n')
f.write('\tint hout' + str(i) + ' = hout' + str(i-1) + '/POOL_STRIDE' + str(st) + ';\n')
f.write('\tint wout' + str(i) + ' = wout' + str(i-1) + '/POOL_STRIDE' + str(st) + ';\n')
st += 1
elif detail["layer_type"] == "Linear":
if flatten == 0:
f.write('\tint flatten_dim = dout' + str(i-1) + ' * hout' + str(i-1) + ' * wout' + str(i-1) + ';\n')
flatten = 1
break
f.write('\n')
i += 1
f.write('\n')
i = 1
fi = 1
dn = 1
flatten = 0
f.write('\tint in[IMG_SZ][IMG_SZ][NUM_FIL0];\n')
f.write('\tint inp_dim[3] = {IMG_SZ, IMG_SZ, NUM_FIL0};\n\n')
for detail in cnn_details:
if detail["layer_type"] == "Conv2d":
f.write('\tint out' + str(i) + '[hout' + str(i) + '][wout' + str(i) + '][dout' + str(i) + '];\n')
f.write('\tint pad_' + str(i) + '[4] = {PAD_TB' + str(fi) + ', PAD_TB' + str(fi))
f.write(', PAD_LR' + str(fi) + ', PAD_LR' + str(fi) + '};\n')
f.write('\tint outp_dim' + str(i) + '[3] = {hout' + str(i) + ', wout' + str(i))
f.write(', dout' + str(i) + '};\n')
f.write('\tint f_dim' + str(i) + '[4] = {NUM_FIL' + str(fi) + ', FILTER' + str(fi))
f.write(', FILTER' + str(fi) + ', NUM_FIL' + str(fi-1) + '};\n')
fi += 1
elif ((detail["layer_type"] == "MaxPool2d") or (detail["layer_type"] == "AvgPool2d")):
f.write('\tint out' + str(i) + '[hout' + str(i) + '][wout' + str(i) + '][dout' + str(i) + '];\n')
f.write('\tint outp_dim' + str(i) + '[3] = {hout' + str(i) + ', wout' + str(i))
f.write(', dout' + str(i) + '};\n')
elif detail["layer_type"] == "Linear":
if flatten == 0:
f.write('\tint out' + str(i) + '[flatten_dim];\n')
flatten = 1
else:
f.write('\tint out' + str(i) + '[DENSE_DIM' + str(dn) + '];')
dn += 1
f.write('\n')
i += 1
f.write('\n\tint out[OUT_DIM];\n\n\tfor (int iter = 0; iter < SAMPLES; iter++){\n\n')
f.write('\t\tfor(int i = 0; i < IMG_SZ; i++){\n')
f.write('\t\t\tfor(int j = 0; j < IMG_SZ; j++){\n')
f.write('\t\t\t\tfor(int k = 0; k < NUM_FIL0; k++){\n')
f.write('\t\t\t\t\tin[i][j][k] = input[i][j][k][iter];\n')
f.write('\t\t\t\t}\n\t\t\t}\n\t\t}\n\n\t\tpcount_enable(1);\n\n')
i = 1
fi = 1
st = 1
dn = 1
flatten = 0
for detail in cnn_details[:-1]:
if detail["layer_type"] == "Conv2d":
if(detail["in_channels"] == detail["out_channels"] == detail["groups"] != 1):
conv_type = 'dw_conv'
elif(detail["kernel_size"][0] == 1):
conv_type = 'pw_conv'
else:
conv_type = "conv2"
if(i == 1):
f.write('\t\t' + conv_type + '(inp_dim, f_dim1, outp_dim1, in, F1, B1, ')
f.write('out1, STRIDE1, pad_1, SB1, MV1, SV1);')
else:
f.write('\t\t' + conv_type + '(outp_dim' + str(i-1) + ', f_dim' + str(i) + ', outp_dim' + str(i))
f.write(', out' + str(i-1) + ', F' + str(fi) + ', B' + str(fi) + ', out' + str(i))
f.write(', STRIDE' + str(fi) + ', pad_' + str(i) + ', SB' + str(fi))
f.write(', MV' + str(fi) + ', SV' + str(fi) + ');')
fi += 1
elif detail["layer_type"] == "MaxPool2d":
f.write('\t\tmaxpool2(outp_dim' + str(i-1) + ', outp_dim' + str(i))
f.write(', out' + str(i-1) + ', out' + str(i) + ', POOL_SIZE' + str(st) + ', POOL_STRIDE')
f.write(str(st) + ');\n')
st += 1
elif(detail["layer_type"] == "AvgPool2d"):
f.write('\t\tavgpool2(outp_dim' + str(i-1) + ', outp_dim' + str(i))
f.write(', out' + str(i-1) + ', out' + str(i) + ', POOL_SIZE' + str(st) + ', POOL_STRIDE')
f.write(str(st) + ');\n')
st += 1
elif detail["layer_type"] == "Linear":
if flatten == 0:
f.write('\t\tflatten(outp_dim' + str(i-1) + ', out' + str(i-1) + ', out' + str(i) + ');\n\n')
i += 1
f.write('\t\tmlp_layer(out' + str(i-1) + ', out' + str(i) + ', flatten_dim, DENSE_DIM1')
f.write(', W1, B' + str(fi + dn - 1) + ', SB' + str(fi + dn - 1) + ', MV' + str(fi + dn - 1))
f.write(', SV' + str(fi + dn - 1) + ');')
dn += 1
flatten = 1
else:
f.write('\t\tmlp_layer(out' + str(i-1) + ', out' + str(i) + ', DENSE_DIM' + str(dn-1))
f.write(', DENSE_DIM' + str(dn) + ', W' + str(dn) + ', B' + str(fi + dn - 1))
f.write(', SB' + str(fi + dn - 1) + ', MV' + str(fi + dn - 1))
f.write(', SV' + str(fi + dn - 1) + ');')
dn += 1
f.write('\n')
i += 1
if flatten == 0:
f.write('\t\tflatten(outp_dim' + str(i-1) + ', out' + str(i-1) + ', out' + str(i) + ');\n\n')
i += 1
f.write('\t\tmlp_layer(out' + str(i-1) + ', out, flatten_dim, OUT_DIM, ')
f.write('W1, B' + str(fi + dn - 1) + ', SB' + str(fi + dn - 1) + ', MV' + str(fi + dn - 1))
f.write(', SV' + str(fi + dn - 1) + ');')
else:
f.write('\t\tmlp_layer(out' + str(i-1) + ', out, DENSE_DIM' + str(dn-1))
f.write(', OUT_DIM, W' + str(dn) + ', B' + str(fi + dn - 1))
f.write(', SB' + str(fi + dn - 1) + ', MV' + str(fi + dn - 1))
f.write(', SV' + str(fi + dn - 1) + ');\n')
f.write('\n\t\tpcount_enable(0);\n\n')
f.write('\t\tputs("Output Layer Values:\\n");\n')
f.write('\t\tfor(int i = 0; i < OUT_DIM; i++) {\n')
f.write('\t\t\tputhex(out[i]);\n')
f.write('\t\t\tputs("\\n");\n')
f.write('\t\t}\n')
f.write('\t}\n')
f.write('}\n\n')
f.write('int main(void) {\n\n')
f.write('\tpcount_enable(0);\n\n')
f.write('\t' + name + '();\n\n')
f.write('\treturn 0;\n}')
return
def generate_opt_c_code_cnn(path, name, input, cnn_details, int_weights, optimal_config):
with open(path + '/' + name + '.c', 'w') as f:
f.write('#include "simple_system_common.h"\n')
f.write('#include "cnn_weights.h"\n')
f.write('#include "fully_connected_opt.h"\n')
f.write('#include "ibex_cnn_params.h"\n')
f.write('#include "ibex_inputs.h"\n')
f.write('#include "conv2d_opt.h"\n')
for detail in cnn_details[:-1]:
if detail["layer_type"] == "Conv2d":
if(detail["in_channels"] == detail["out_channels"] == detail["groups"] != 1):
f.write('#include "dws_conv_opt.h"\n')
break
f.write('\n')
f.write('#define IMG_SZ ' + str(np.shape(input)[2]) + '\n')
f.write('#define NUM_FIL0 ' + str(np.shape(input)[1]) + '\n\n')
i = 1
for w in int_weights:
if(len(np.shape(w)) == 4 or len(np.shape(w)) == 3):
f.write('#define FILTER' + str(i) + ' ' + str(w.shape[2]) + '\n')
i += 1
f.write('\n')
i = 1
for w in int_weights:
if(len(np.shape(w)) == 4 or len(np.shape(w)) == 3):
f.write('#define NUM_FIL' + str(i) + ' ' + str(w.shape[0]) + '\n')
i += 1
f.write('\n')
i = 1
for detail in cnn_details:
if detail["layer_type"] == "Conv2d":
f.write('#define STRIDE' + str(i) + ' ' + str(detail["stride"][0]) + '\n')
i += 1
f.write('\n')
i = 1
for detail in cnn_details:
if detail["layer_type"] == "Conv2d":
if(detail["padding"] == 'same'):
f.write('#define PAD_TB' + str(i) + ' ' + str((detail["kernel_size"][0] - 1)//2) + '\n')
f.write('#define PAD_LR' + str(i) + ' ' + str((detail["kernel_size"][0] - 1)//2) + '\n')
elif(detail["padding"] == 'valid'):
f.write('#define PAD_TB' + str(i) + ' 0\n')
f.write('#define PAD_LR' + str(i) + ' 0\n')
else:
f.write('#define PAD_TB' + str(i) + ' ' + str(detail["padding"][0]) + '\n')
f.write('#define PAD_LR' + str(i) + ' ' + str(detail["padding"][0]) + '\n')
f.write('\n')
i += 1
i = 1
for detail in cnn_details:
if ((detail["layer_type"] == "MaxPool2d") or (detail["layer_type"] == "AvgPool2d")):
f.write('#define POOL_STRIDE' + str(i) + ' ' + str(detail["stride"]) + '\n')
f.write('#define POOL_SIZE' + str(i) + ' ' + str(detail["kernel_size"]) + '\n')
f.write('\n')
i += 1
i = 1
for w in int_weights[:-1]:
if(len(np.shape(w)) == 2):
f.write('#define DENSE_DIM' + str(i) + ' ' + str(w.shape[0]) + '\n')
i += 1
f.write('#define OUT_DIM ' + str(int_weights[-1].shape[0]) + '\n\n')
f.write('#define SAMPLES 1\nint outs[SAMPLES][OUT_DIM];\n\n')
f.write('void ' + name + '() {\n\n')
i = 1
fi = 1
st = 1
flatten = 0
for detail in cnn_details:
if detail["layer_type"] == "Conv2d":
f.write('\tint dout' + str(i) + ' = NUM_FIL' + str(fi) + ';\n')
if(i == 1):
f.write('\tint hout' + str(i) + ' = ((IMG_SZ - FILTER1 + 2 * PAD_TB1)/STRIDE1) + 1;\n')
f.write('\tint wout' + str(i) + ' = ((IMG_SZ - FILTER1 + 2 * PAD_LR1)/STRIDE1) + 1;\n')
else:
f.write('\tint hout' + str(i) + ' = ((hout' + str(i-1) + ' - FILTER' + str(fi))
f.write('+ 2 * PAD_TB' + str(fi) + ')/STRIDE' + str(fi) + ')+1;\n')
f.write('\tint wout' + str(i) + ' = ((wout' + str(i-1) + ' - FILTER' + str(fi))
f.write('+ 2 * PAD_LR' + str(fi) + ')/STRIDE' + str(fi) + ')+1;\n')
fi += 1
elif ((detail["layer_type"] == "MaxPool2d") or (detail["layer_type"] == "AvgPool2d")):
f.write('\tint dout' + str(i) + ' = dout' + str(i-1) + ';\n')
f.write('\tint hout' + str(i) + ' = hout' + str(i-1) + '/POOL_STRIDE' + str(st) + ';\n')
f.write('\tint wout' + str(i) + ' = wout' + str(i-1) + '/POOL_STRIDE' + str(st) + ';\n')
st += 1
elif detail["layer_type"] == "Linear":
if flatten == 0:
f.write('\tint flatten_dim = dout' + str(i-1) + ' * hout' + str(i-1) + ' * wout' + str(i-1) + ';\n')
flatten = 1
break
f.write('\n')
i += 1
f.write('\n')
i = 1
fi = 1
dn = 1
flatten = 0
f.write('\tint in[IMG_SZ][IMG_SZ][NUM_FIL0];\n')
f.write('\tint inp_dim[3] = {IMG_SZ, IMG_SZ, NUM_FIL0};\n\n')
for detail in cnn_details:
if detail["layer_type"] == "Conv2d":
f.write('\tint out' + str(i) + '[hout' + str(i) + '][wout' + str(i) + '][dout' + str(i) + '];\n')
f.write('\tint pad_' + str(i) + '[4] = {PAD_TB' + str(fi) + ', PAD_TB' + str(fi))
f.write(', PAD_LR' + str(fi) + ', PAD_LR' + str(fi) + '};\n')
f.write('\tint outp_dim' + str(i) + '[3] = {hout' + str(i) + ', wout' + str(i))
f.write(', dout' + str(i) + '};\n')
f.write('\tint f_dim' + str(i) + '[4] = {NUM_FIL' + str(fi) + ', FILTER' + str(fi))
f.write(', FILTER' + str(fi) + ', NUM_FIL' + str(fi-1) + '};\n')
fi += 1
elif ((detail["layer_type"] == "MaxPool2d") or (detail["layer_type"] == "AvgPool2d")):
f.write('\tint out' + str(i) + '[hout' + str(i) + '][wout' + str(i) + '][dout' + str(i) + '];\n')
f.write('\tint outp_dim' + str(i) + '[3] = {hout' + str(i) + ', wout' + str(i))
f.write(', dout' + str(i) + '};\n')
elif detail["layer_type"] == "Linear":
if flatten == 0:
f.write('\tint out' + str(i) + '[flatten_dim];')
flatten = 1
else:
f.write('\tint out' + str(i) + '[DENSE_DIM' + str(dn) + '];')
dn += 1
f.write('\n')
i += 1
f.write('\n\tint out[OUT_DIM];\n\n\tfor (int iter = 0; iter < SAMPLES; iter++){\n\n')
f.write('\t\tfor(int i = 0; i < IMG_SZ; i++){\n')
f.write('\t\t\tfor(int j = 0; j < IMG_SZ; j++){\n')
f.write('\t\t\t\tfor(int k = 0; k < NUM_FIL0; k++){\n')
f.write('\t\t\t\t\tin[i][j][k] = input[i][j][k][iter];\n')
f.write('\t\t\t\t}\n\t\t\t}\n\t\t}\n\n\t\tpcount_enable(1);\n\n')
i = 1
j = 0
fi = 1
st = 1
dn = 1
flatten = 0
for detail in cnn_details[:-1]:
if detail["layer_type"] == "Conv2d":
if(detail["in_channels"] == detail["out_channels"] == detail["groups"] != 1):
conv_type = 'dw_conv_opt'
elif(detail["kernel_size"][0] == 1):
conv_type = 'pw_conv_' + str(optimal_config[j]) + 'bits'
else:
conv_type = 'conv2_' + str(optimal_config[j]) + 'bits'
if(i == 1):
f.write('\t\t' + conv_type)
if(np.shape(input)[1] == 1):
f.write('_1ch')
f.write('(inp_dim, f_dim1, outp_dim1, in, F1, B1, ')
f.write('out1, STRIDE1, pad_1, SB1, MV1, SV1);')
else:
f.write('\t\t' + conv_type + '(outp_dim' + str(i-1) + ', f_dim' + str(i))
f.write(', outp_dim' + str(i) + ', out' + str(i-1) + ', F' + str(fi) + ', B' + str(fi) + ', out')
f.write(str(i) + ', STRIDE' + str(fi) + ', pad_' + str(i) + ', SB' + str(fi))
f.write(', MV' + str(fi) + ', SV' + str(fi) + ');')
j += 1
fi += 1
elif detail["layer_type"] == "MaxPool2d":
f.write('\t\tmaxpool2_compressed(outp_dim' + str(i-1) + ', outp_dim' + str(i))
f.write(', out' + str(i-1) + ', out' + str(i) + ', POOL_SIZE' + str(st) + ', POOL_STRIDE')
f.write(str(st) + ');\n')
st += 1
elif(detail["layer_type"] == "AvgPool2d"):
f.write('\t\tavgpool2_compressed(outp_dim' + str(i-1) + ', outp_dim' + str(i))
f.write(', out' + str(i-1) + ', out' + str(i) + ', POOL_SIZE' + str(st) + ', POOL_STRIDE')
f.write(str(st) + ');\n')
st += 1
elif detail["layer_type"] == "Linear":
if flatten == 0:
f.write('\t\tflatten(outp_dim' + str(i-1) + ', out' + str(i-1) + ', out' + str(i) + ');\n\n')
i += 1
f.write('\t\tmlp_layer_' + str(optimal_config[j]) + 'bits(out' + str(i-1) + ', out' + str(i) + ', ')
f.write('flatten_dim, DENSE_DIM1, W1, B' + str(fi + dn - 1) + ', SB' + str(fi + dn - 1) + ', MV')
f.write(str(fi + dn - 1) + ', SV' + str(fi + dn - 1) + ');\n')
flatten = 1
else:
f.write('\t\tmlp_layer_' + str(optimal_config[j]) + 'bits(out' + str(i-1) + ', out' + str(i) + ', ')
f.write('DENSE_DIM' + str(dn-1) + ', DENSE_DIM' + str(dn) + ', W' + str(dn) + ', B')
f.write(str(fi + dn - 1) + ', SB' + str(fi + dn - 1) + ', MV' + str(fi + dn - 1))
f.write(', SV' + str(fi + dn - 1) + ');\n')
j += 1
dn += 1
f.write('\n')
i += 1
if flatten == 0:
f.write('\t\tflatten(outp_dim' + str(i-1) + ', out' + str(i-1) + ', out' + str(i) + ');\n\n')
i += 1
f.write('\t\tmlp_layer_' + str(optimal_config[j]) + 'bits(out' + str(i-1) + ', out, ')
f.write('flatten_dim, OUT_DIM, W1, B' + str(fi + dn - 1) + ', SB' + str(fi + dn - 1) + ', MV')
f.write(str(fi + dn - 1) + ', SV' + str(fi + dn - 1) + ');\n')
else:
f.write('\t\tmlp_layer_' + str(optimal_config[-1]) + 'bits(out' + str(i-1) + ', out, DENSE_DIM' + str(dn-1))
f.write(', OUT_DIM, W' + str(dn) + ', B' + str(fi + dn - 1))
f.write(', SB' + str(fi + dn - 1) + ', MV' + str(fi + dn - 1))
f.write(', SV' + str(fi + dn - 1) + ');\n')
f.write('\n\t\tpcount_enable(0);\n\n')
f.write('\t\tputs("Output Layer Values:\\n");\n')
f.write('\t\tfor(int i = 0; i < OUT_DIM; i++) {\n')
f.write('\t\t\tputhex((out[i] & 0xFF000000) >> 24);\n')
f.write('\t\t\tputs(" ");\n')
f.write('\t\t\tputhex((out[i] & 0xFF0000) >> 16);\n')
f.write('\t\t\tputs(" ");\n')
f.write('\t\t\tputhex((out[i] & 0xFF00) >> 8);\n')
f.write('\t\t\tputs(" ");\n')
f.write('\t\t\tputhex(out[i] & 0xFF);\n')
f.write('\t\t\tputs("\\n");\n')
f.write('\t\t}\n')
f.write('\t}\n')
f.write('}\n\n')
f.write('int main(void) {\n\n')
f.write('\tpcount_enable(0);\n\n')
f.write('\t' + name + '();\n\n')
f.write('\treturn 0;\n}')
return