Adding new features

This commit is contained in:
alexmr09
2024-07-23 13:00:49 +03:00
parent 9e044fd7fc
commit 745cc4ed6d
28 changed files with 33632 additions and 106 deletions
@@ -0,0 +1,15 @@
# Copyright lowRISC contributors.
# Licensed under the Apache License, Version 2.0, see LICENSE for details.
# SPDX-License-Identifier: Apache-2.0
#
# Generate a baremetal application
# Name of the program $(PROGRAM).c will be added as a source file
PROGRAM = cifar10_dws_cnn
PROGRAM_DIR := $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST))))
# Any extra source files to include in the build. Use the upper case .S
# extension for assembly files
EXTRA_SRCS :=
include ${PROGRAM_DIR}/../../common/common.mk
@@ -0,0 +1,298 @@
#include "simple_system_common.h"
#include "cnn_weights.h"
#include "fully_connected_opt.h"
#include "ibex_cnn_params.h"
#include "ibex_inputs.h"
#include "conv2d_opt.h"
#include "dws_conv_opt.h"
#define IMG_SZ 32
#define NUM_FIL0 1
#define FILTER1 3
#define FILTER2 1
#define FILTER3 3
#define FILTER4 1
#define FILTER5 3
#define FILTER6 1
#define FILTER7 3
#define FILTER8 1
#define FILTER9 3
#define FILTER10 1
#define FILTER11 3
#define FILTER12 1
#define NUM_FIL1 1
#define NUM_FIL2 16
#define NUM_FIL3 16
#define NUM_FIL4 16
#define NUM_FIL5 16
#define NUM_FIL6 32
#define NUM_FIL7 32
#define NUM_FIL8 32
#define NUM_FIL9 32
#define NUM_FIL10 64
#define NUM_FIL11 64
#define NUM_FIL12 64
#define STRIDE1 1
#define STRIDE2 1
#define STRIDE3 1
#define STRIDE4 1
#define STRIDE5 1
#define STRIDE6 1
#define STRIDE7 1
#define STRIDE8 1
#define STRIDE9 1
#define STRIDE10 1
#define STRIDE11 1
#define STRIDE12 1
#define PAD_TB1 1
#define PAD_LR1 1
#define PAD_TB2 0
#define PAD_LR2 0
#define PAD_TB3 1
#define PAD_LR3 1
#define PAD_TB4 0
#define PAD_LR4 0
#define PAD_TB5 1
#define PAD_LR5 1
#define PAD_TB6 0
#define PAD_LR6 0
#define PAD_TB7 1
#define PAD_LR7 1
#define PAD_TB8 0
#define PAD_LR8 0
#define PAD_TB9 1
#define PAD_LR9 1
#define PAD_TB10 0
#define PAD_LR10 0
#define PAD_TB11 1
#define PAD_LR11 1
#define PAD_TB12 0
#define PAD_LR12 0
#define POOL_STRIDE1 2
#define POOL_SIZE1 2
#define POOL_STRIDE2 2
#define POOL_SIZE2 2
#define POOL_STRIDE3 2
#define POOL_SIZE3 2
#define OUT_DIM 3
#define SAMPLES 1
int outs[SAMPLES][OUT_DIM];
void cifar10_dws_cnn() {
int dout1 = NUM_FIL1;
int hout1 = ((IMG_SZ - FILTER1 + 2 * PAD_TB1)/STRIDE1) + 1;
int wout1 = ((IMG_SZ - FILTER1 + 2 * PAD_LR1)/STRIDE1) + 1;
int dout2 = NUM_FIL2;
int hout2 = ((hout1 - FILTER2+ 2 * PAD_TB2)/STRIDE2)+1;
int wout2 = ((wout1 - FILTER2+ 2 * PAD_LR2)/STRIDE2)+1;
int dout3 = NUM_FIL3;
int hout3 = ((hout2 - FILTER3+ 2 * PAD_TB3)/STRIDE3)+1;
int wout3 = ((wout2 - FILTER3+ 2 * PAD_LR3)/STRIDE3)+1;
int dout4 = NUM_FIL4;
int hout4 = ((hout3 - FILTER4+ 2 * PAD_TB4)/STRIDE4)+1;
int wout4 = ((wout3 - FILTER4+ 2 * PAD_LR4)/STRIDE4)+1;
int dout5 = dout4;
int hout5 = hout4/POOL_STRIDE1;
int wout5 = wout4/POOL_STRIDE1;
int dout6 = NUM_FIL5;
int hout6 = ((hout5 - FILTER5+ 2 * PAD_TB5)/STRIDE5)+1;
int wout6 = ((wout5 - FILTER5+ 2 * PAD_LR5)/STRIDE5)+1;
int dout7 = NUM_FIL6;
int hout7 = ((hout6 - FILTER6+ 2 * PAD_TB6)/STRIDE6)+1;
int wout7 = ((wout6 - FILTER6+ 2 * PAD_LR6)/STRIDE6)+1;
int dout8 = NUM_FIL7;
int hout8 = ((hout7 - FILTER7+ 2 * PAD_TB7)/STRIDE7)+1;
int wout8 = ((wout7 - FILTER7+ 2 * PAD_LR7)/STRIDE7)+1;
int dout9 = NUM_FIL8;
int hout9 = ((hout8 - FILTER8+ 2 * PAD_TB8)/STRIDE8)+1;
int wout9 = ((wout8 - FILTER8+ 2 * PAD_LR8)/STRIDE8)+1;
int dout10 = dout9;
int hout10 = hout9/POOL_STRIDE2;
int wout10 = wout9/POOL_STRIDE2;
int dout11 = NUM_FIL9;
int hout11 = ((hout10 - FILTER9+ 2 * PAD_TB9)/STRIDE9)+1;
int wout11 = ((wout10 - FILTER9+ 2 * PAD_LR9)/STRIDE9)+1;
int dout12 = NUM_FIL10;
int hout12 = ((hout11 - FILTER10+ 2 * PAD_TB10)/STRIDE10)+1;
int wout12 = ((wout11 - FILTER10+ 2 * PAD_LR10)/STRIDE10)+1;
int dout13 = NUM_FIL11;
int hout13 = ((hout12 - FILTER11+ 2 * PAD_TB11)/STRIDE11)+1;
int wout13 = ((wout12 - FILTER11+ 2 * PAD_LR11)/STRIDE11)+1;
int dout14 = NUM_FIL12;
int hout14 = ((hout13 - FILTER12+ 2 * PAD_TB12)/STRIDE12)+1;
int wout14 = ((wout13 - FILTER12+ 2 * PAD_LR12)/STRIDE12)+1;
int dout15 = dout14;
int hout15 = hout14/POOL_STRIDE3;
int wout15 = wout14/POOL_STRIDE3;
int flatten_dim = dout15 * hout15 * wout15;
int in[IMG_SZ][IMG_SZ][NUM_FIL0];
int inp_dim[3] = {IMG_SZ, IMG_SZ, NUM_FIL0};
int out1[hout1][wout1][dout1];
int pad_1[4] = {PAD_TB1, PAD_TB1, PAD_LR1, PAD_LR1};
int outp_dim1[3] = {hout1, wout1, dout1};
int f_dim1[4] = {NUM_FIL1, FILTER1, FILTER1, NUM_FIL0};
int out2[hout2][wout2][dout2];
int pad_2[4] = {PAD_TB2, PAD_TB2, PAD_LR2, PAD_LR2};
int outp_dim2[3] = {hout2, wout2, dout2};
int f_dim2[4] = {NUM_FIL2, FILTER2, FILTER2, NUM_FIL1};
int out3[hout3][wout3][dout3];
int pad_3[4] = {PAD_TB3, PAD_TB3, PAD_LR3, PAD_LR3};
int outp_dim3[3] = {hout3, wout3, dout3};
int f_dim3[4] = {NUM_FIL3, FILTER3, FILTER3, NUM_FIL2};
int out4[hout4][wout4][dout4];
int pad_4[4] = {PAD_TB4, PAD_TB4, PAD_LR4, PAD_LR4};
int outp_dim4[3] = {hout4, wout4, dout4};
int f_dim4[4] = {NUM_FIL4, FILTER4, FILTER4, NUM_FIL3};
int out5[hout5][wout5][dout5];
int outp_dim5[3] = {hout5, wout5, dout5};
int out6[hout6][wout6][dout6];
int pad_6[4] = {PAD_TB5, PAD_TB5, PAD_LR5, PAD_LR5};
int outp_dim6[3] = {hout6, wout6, dout6};
int f_dim6[4] = {NUM_FIL5, FILTER5, FILTER5, NUM_FIL4};
int out7[hout7][wout7][dout7];
int pad_7[4] = {PAD_TB6, PAD_TB6, PAD_LR6, PAD_LR6};
int outp_dim7[3] = {hout7, wout7, dout7};
int f_dim7[4] = {NUM_FIL6, FILTER6, FILTER6, NUM_FIL5};
int out8[hout8][wout8][dout8];
int pad_8[4] = {PAD_TB7, PAD_TB7, PAD_LR7, PAD_LR7};
int outp_dim8[3] = {hout8, wout8, dout8};
int f_dim8[4] = {NUM_FIL7, FILTER7, FILTER7, NUM_FIL6};
int out9[hout9][wout9][dout9];
int pad_9[4] = {PAD_TB8, PAD_TB8, PAD_LR8, PAD_LR8};
int outp_dim9[3] = {hout9, wout9, dout9};
int f_dim9[4] = {NUM_FIL8, FILTER8, FILTER8, NUM_FIL7};
int out10[hout10][wout10][dout10];
int outp_dim10[3] = {hout10, wout10, dout10};
int out11[hout11][wout11][dout11];
int pad_11[4] = {PAD_TB9, PAD_TB9, PAD_LR9, PAD_LR9};
int outp_dim11[3] = {hout11, wout11, dout11};
int f_dim11[4] = {NUM_FIL9, FILTER9, FILTER9, NUM_FIL8};
int out12[hout12][wout12][dout12];
int pad_12[4] = {PAD_TB10, PAD_TB10, PAD_LR10, PAD_LR10};
int outp_dim12[3] = {hout12, wout12, dout12};
int f_dim12[4] = {NUM_FIL10, FILTER10, FILTER10, NUM_FIL9};
int out13[hout13][wout13][dout13];
int pad_13[4] = {PAD_TB11, PAD_TB11, PAD_LR11, PAD_LR11};
int outp_dim13[3] = {hout13, wout13, dout13};
int f_dim13[4] = {NUM_FIL11, FILTER11, FILTER11, NUM_FIL10};
int out14[hout14][wout14][dout14];
int pad_14[4] = {PAD_TB12, PAD_TB12, PAD_LR12, PAD_LR12};
int outp_dim14[3] = {hout14, wout14, dout14};
int f_dim14[4] = {NUM_FIL12, FILTER12, FILTER12, NUM_FIL11};
int out15[hout15][wout15][dout15];
int outp_dim15[3] = {hout15, wout15, dout15};
int out16[flatten_dim];
int out[OUT_DIM];
for (int iter = 0; iter < SAMPLES; iter++){
for(int i = 0; i < IMG_SZ; i++){
for(int j = 0; j < IMG_SZ; j++){
for(int k = 0; k < NUM_FIL0; k++){
in[i][j][k] = input[i][j][k][iter];
}
}
}
pcount_enable(1);
dw_conv_opt_1ch(inp_dim, f_dim1, outp_dim1, in, F1, B1, out1, STRIDE1, pad_1, SB1, MV1, SV1);
pw_conv_2bits(outp_dim1, f_dim2, outp_dim2, out1, F2, B2, out2, STRIDE2, pad_2, SB2, MV2, SV2);
dw_conv_opt(outp_dim2, f_dim3, outp_dim3, out2, F3, B3, out3, STRIDE3, pad_3, SB3, MV3, SV3);
pw_conv_8bits(outp_dim3, f_dim4, outp_dim4, out3, F4, B4, out4, STRIDE4, pad_4, SB4, MV4, SV4);
maxpool2_compressed(outp_dim4, outp_dim5, out4, out5, POOL_SIZE1, POOL_STRIDE1);
dw_conv_opt(outp_dim5, f_dim6, outp_dim6, out5, F5, B5, out6, STRIDE5, pad_6, SB5, MV5, SV5);
pw_conv_2bits(outp_dim6, f_dim7, outp_dim7, out6, F6, B6, out7, STRIDE6, pad_7, SB6, MV6, SV6);
dw_conv_opt(outp_dim7, f_dim8, outp_dim8, out7, F7, B7, out8, STRIDE7, pad_8, SB7, MV7, SV7);
pw_conv_8bits(outp_dim8, f_dim9, outp_dim9, out8, F8, B8, out9, STRIDE8, pad_9, SB8, MV8, SV8);
maxpool2_compressed(outp_dim9, outp_dim10, out9, out10, POOL_SIZE2, POOL_STRIDE2);
dw_conv_opt(outp_dim10, f_dim11, outp_dim11, out10, F9, B9, out11, STRIDE9, pad_11, SB9, MV9, SV9);
pw_conv_8bits(outp_dim11, f_dim12, outp_dim12, out11, F10, B10, out12, STRIDE10, pad_12, SB10, MV10, SV10);
dw_conv_opt(outp_dim12, f_dim13, outp_dim13, out12, F11, B11, out13, STRIDE11, pad_13, SB11, MV11, SV11);
pw_conv_8bits(outp_dim13, f_dim14, outp_dim14, out13, F12, B12, out14, STRIDE12, pad_14, SB12, MV12, SV12);
maxpool2_compressed(outp_dim14, outp_dim15, out14, out15, POOL_SIZE3, POOL_STRIDE3);
flatten(outp_dim15, out15, out16);
mlp_layer_8bits(out16, out, flatten_dim, OUT_DIM, W1, B13, SB13, MV13, SV13);
pcount_enable(0);
puts("Output Layer Values:\n");
for(int i = 0; i < OUT_DIM; i++) {
puthex((out[i] & 0xFF000000) >> 24);
puts(" ");
puthex((out[i] & 0xFF0000) >> 16);
puts(" ");
puthex((out[i] & 0xFF00) >> 8);
puts(" ");
puthex(out[i] & 0xFF);
puts("\n");
}
}
}
int main(void) {
pcount_enable(0);
cifar10_dws_cnn();
return 0;
}
File diff suppressed because one or more lines are too long
@@ -0,0 +1,84 @@
#ifndef IBEX_CNN_PARAMS_H
#define IBEX_CNN_PARAMS_H
#define MV1 1263225675
#define MV2 1886417008
#define MV3 1381126738
#define MV4 1263225675
#define MV5 1465341783
#define MV6 1280068684
#define MV7 1869573999
#define MV8 1600085855
#define MV9 1600085855
#define MV10 1970632053
#define MV11 1145324612
#define MV12 1532713819
#define MV13 1296911693
#define SV1 2029118401
#define SV2 946921921
#define SV3 2029118401
#define SV4 1893843841
#define SV5 1893843841
#define SV6 1082196481
#define SV7 2029118401
#define SV8 2029118401
#define SV9 2029118401
#define SV10 2164392961
#define SV11 2029118401
#define SV12 2029118401
#define SV13 2840765761
static const int SB1[1] = {
1
};
static const int SB2[16] = {
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
};
static const int SB3[16] = {
135266305, 1048577, 1, 8257, 8193, 135274497, 135266369, 8193, 1, 8193, 65, 1, 134217729, 1, 1, 134225921
};
static const int SB4[16] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1048576, 0, 0, 0, 0
};
static const int SB5[16] = {
134234177, 136323073, 135282689, 136331393, 270549121, 136331329, 136331329, 136323201, 270540929, 270549121, 270540801, 270532737, 2105473, 8321, 2105345, 2113601
};
static const int SB6[32] = {
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
};
static const int SB7[32] = {
402669825, 540041217, 537927937, 4194561, 272638209, 537952513, 540049665, 541098049, 404783361, 405831873, 2113793, 536879361, 403726593, 540049665, 271614209, 541089921, 272662721, 271614209, 406880513, 541081793, 272662785, 538992897, 272662785, 403726593, 540033281, 540049601, 1081537, 403726337, 269517057, 272646401, 3178625, 539001089
};
static const int SB8[32] = {
270565504, 536895744, 406880512, 541090048, 268435712, 406864128, 540049408, 541065216, 406872320, 541090048, 540049600, 405823552, 540041216, 4227264, 540049664, 271589632, 537952320, 4219008, 540033216, 540041408, 541090048, 540049600, 405823552, 405823680, 405823744, 541081856, 406880448, 402677888, 271606016, 138445056, 403726400, 405831680
};
static const int SB9[32] = {
677380417, 542146817, 806404417, 677421249, 677429569, 810598721, 677421185, 677429441, 408977665, 675332353, 536903937, 675283329, 675307905, 677429505, 811639105, 811639169, 809549953, 407945601, 676380929, 676380993, 810582273, 810598721, 677429569, 675299649, 541106433, 811630785, 675316097, 405848449, 811630913, 811630977, 806404225, 677421441
};
static const int SB10[64] = {
139501824, 676364608, 673227072, 810582336, 405840256, 408969536, 541114624, 810590528, 810557760, 675307776, 676331840, 811630848, 408994112, 676381056, 810598720, 537952576, 541114432, 674267392, 542162944, 677429568, 408985920, 677429504, 542155136, 676372864, 811639104, 407937344, 542146880, 811630976, 406896832, 675332416, 675316096, 674275712, 677421120, 810590528, 540066112, 408969536, 811647232, 407920960, 273727616, 677421312, 810582272, 676340096, 6332736, 671138176, 677421376, 677429568, 676372544, 540066176, 676372800, 536912192, 406872384, 676372800, 805347712, 810590464, 5284160, 274776448, 677413248, 541089984, 674283520, 541106560, 810598720, 137412992, 810598528, 811639168
};
static const int SB11[64] = {
810607041, 678486337, 945865089, 810607041, 673227201, 939565505, 946913729, 943767937, 946913729, 811647425, 944816449, 678478273, 811647361, 678478209, 812695937, 678453697, 943776065, 810598849, 944800129, 677437825, 678486401, 946913665, 946921793, 541114753, 945873345, 542163265, 544260417, 544244033, 939548801, 945865025, 678478145, 944824577, 812704129, 5300673, 946889089, 676389057, 941679041, 675340609, 809558465, 273735937, 678461889, 678478145, 812695873, 676381121, 678486465, 671138113, 810557825, 945856961, 944775489, 946921665, 946897345, 809533889, 812695937, 812687809, 812696001, 945865089, 676389249, 677413249, 945840449, 946913473, 943767937, 675332353, 676381121, 811647425
};
static const int SB12[64] = {
810598784, 811647296, 677404992, 809550144, 677429632, 811647296, 810582400, 675332480, 676381056, 810598720, 542163200, 543211840, 809533440, 673235328, 807444672, 675316096, 810582016, 541114560, 677396800, 810590528, 676381056, 138453376, 809550208, 810598784, 676372800, 810598784, 675332352, 542163328, 674242944, 677421440, 404799808, 542163328, 809542016, 809542016, 810598784, 139501952, 674283712, 541114752, 811622784, 676372672, 542155136, 543211904, 811639168, 811630912, 809542016, 676356480, 673218944, 811630976, 810598720, 810582208, 675307584, 810598784, 543203648, 542163264, 677404672, 811630784, 810590592, 810582400, 674275712, 810590528, 541098304, 675332416, 539001088, 811622784
};
static const int SB13[3] = {
273736128, 946913728, 675282944
};
#endif /* IBEX_CNN_PARAMS_H */
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,15 @@
# Copyright lowRISC contributors.
# Licensed under the Apache License, Version 2.0, see LICENSE for details.
# SPDX-License-Identifier: Apache-2.0
#
# Generate a baremetal application
# Name of the program $(PROGRAM).c will be added as a source file
PROGRAM = cifar10_dws_cnn
PROGRAM_DIR := $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST))))
# Any extra source files to include in the build. Use the upper case .S
# extension for assembly files
EXTRA_SRCS :=
include ${PROGRAM_DIR}/../../common/common.mk
@@ -0,0 +1,292 @@
#include "simple_system_common.h"
#include "cnn_weights.h"
#include "fully_connected.h"
#include "ibex_cnn_params.h"
#include "ibex_inputs.h"
#include "conv2d.h"
#include "dws_conv.h"
#define IMG_SZ 32
#define NUM_FIL0 3
#define FILTER1 3
#define FILTER2 1
#define FILTER3 3
#define FILTER4 1
#define FILTER5 3
#define FILTER6 1
#define FILTER7 3
#define FILTER8 1
#define FILTER9 3
#define FILTER10 1
#define FILTER11 3
#define FILTER12 1
#define NUM_FIL1 3
#define NUM_FIL2 64
#define NUM_FIL3 64
#define NUM_FIL4 64
#define NUM_FIL5 64
#define NUM_FIL6 128
#define NUM_FIL7 128
#define NUM_FIL8 128
#define NUM_FIL9 128
#define NUM_FIL10 256
#define NUM_FIL11 256
#define NUM_FIL12 256
#define STRIDE1 1
#define STRIDE2 1
#define STRIDE3 1
#define STRIDE4 1
#define STRIDE5 1
#define STRIDE6 1
#define STRIDE7 1
#define STRIDE8 1
#define STRIDE9 1
#define STRIDE10 1
#define STRIDE11 1
#define STRIDE12 1
#define PAD_TB1 1
#define PAD_LR1 1
#define PAD_TB2 0
#define PAD_LR2 0
#define PAD_TB3 1
#define PAD_LR3 1
#define PAD_TB4 0
#define PAD_LR4 0
#define PAD_TB5 1
#define PAD_LR5 1
#define PAD_TB6 0
#define PAD_LR6 0
#define PAD_TB7 1
#define PAD_LR7 1
#define PAD_TB8 0
#define PAD_LR8 0
#define PAD_TB9 1
#define PAD_LR9 1
#define PAD_TB10 0
#define PAD_LR10 0
#define PAD_TB11 1
#define PAD_LR11 1
#define PAD_TB12 0
#define PAD_LR12 0
#define POOL_STRIDE1 2
#define POOL_SIZE1 2
#define POOL_STRIDE2 2
#define POOL_SIZE2 2
#define POOL_STRIDE3 2
#define POOL_SIZE3 2
#define OUT_DIM 10
#define SAMPLES 1
int outs[SAMPLES][OUT_DIM];
void cifar10_dws_cnn() {
int dout1 = NUM_FIL1;
int hout1 = ((IMG_SZ - FILTER1 + 2 * PAD_TB1)/STRIDE1) + 1;
int wout1 = ((IMG_SZ - FILTER1 + 2 * PAD_LR1)/STRIDE1) + 1;
int dout2 = NUM_FIL2;
int hout2 = ((hout1 - FILTER2+ 2 * PAD_TB2)/STRIDE2)+1;
int wout2 = ((wout1 - FILTER2+ 2 * PAD_LR2)/STRIDE2)+1;
int dout3 = NUM_FIL3;
int hout3 = ((hout2 - FILTER3+ 2 * PAD_TB3)/STRIDE3)+1;
int wout3 = ((wout2 - FILTER3+ 2 * PAD_LR3)/STRIDE3)+1;
int dout4 = NUM_FIL4;
int hout4 = ((hout3 - FILTER4+ 2 * PAD_TB4)/STRIDE4)+1;
int wout4 = ((wout3 - FILTER4+ 2 * PAD_LR4)/STRIDE4)+1;
int dout5 = dout4;
int hout5 = hout4/POOL_STRIDE1;
int wout5 = wout4/POOL_STRIDE1;
int dout6 = NUM_FIL5;
int hout6 = ((hout5 - FILTER5+ 2 * PAD_TB5)/STRIDE5)+1;
int wout6 = ((wout5 - FILTER5+ 2 * PAD_LR5)/STRIDE5)+1;
int dout7 = NUM_FIL6;
int hout7 = ((hout6 - FILTER6+ 2 * PAD_TB6)/STRIDE6)+1;
int wout7 = ((wout6 - FILTER6+ 2 * PAD_LR6)/STRIDE6)+1;
int dout8 = NUM_FIL7;
int hout8 = ((hout7 - FILTER7+ 2 * PAD_TB7)/STRIDE7)+1;
int wout8 = ((wout7 - FILTER7+ 2 * PAD_LR7)/STRIDE7)+1;
int dout9 = NUM_FIL8;
int hout9 = ((hout8 - FILTER8+ 2 * PAD_TB8)/STRIDE8)+1;
int wout9 = ((wout8 - FILTER8+ 2 * PAD_LR8)/STRIDE8)+1;
int dout10 = dout9;
int hout10 = hout9/POOL_STRIDE2;
int wout10 = wout9/POOL_STRIDE2;
int dout11 = NUM_FIL9;
int hout11 = ((hout10 - FILTER9+ 2 * PAD_TB9)/STRIDE9)+1;
int wout11 = ((wout10 - FILTER9+ 2 * PAD_LR9)/STRIDE9)+1;
int dout12 = NUM_FIL10;
int hout12 = ((hout11 - FILTER10+ 2 * PAD_TB10)/STRIDE10)+1;
int wout12 = ((wout11 - FILTER10+ 2 * PAD_LR10)/STRIDE10)+1;
int dout13 = NUM_FIL11;
int hout13 = ((hout12 - FILTER11+ 2 * PAD_TB11)/STRIDE11)+1;
int wout13 = ((wout12 - FILTER11+ 2 * PAD_LR11)/STRIDE11)+1;
int dout14 = NUM_FIL12;
int hout14 = ((hout13 - FILTER12+ 2 * PAD_TB12)/STRIDE12)+1;
int wout14 = ((wout13 - FILTER12+ 2 * PAD_LR12)/STRIDE12)+1;
int dout15 = dout14;
int hout15 = hout14/POOL_STRIDE3;
int wout15 = wout14/POOL_STRIDE3;
int flatten_dim = dout15 * hout15 * wout15;
int in[IMG_SZ][IMG_SZ][NUM_FIL0];
int inp_dim[3] = {IMG_SZ, IMG_SZ, NUM_FIL0};
int out1[hout1][wout1][dout1];
int pad_1[4] = {PAD_TB1, PAD_TB1, PAD_LR1, PAD_LR1};
int outp_dim1[3] = {hout1, wout1, dout1};
int f_dim1[4] = {NUM_FIL1, FILTER1, FILTER1, NUM_FIL0};
int out2[hout2][wout2][dout2];
int pad_2[4] = {PAD_TB2, PAD_TB2, PAD_LR2, PAD_LR2};
int outp_dim2[3] = {hout2, wout2, dout2};
int f_dim2[4] = {NUM_FIL2, FILTER2, FILTER2, NUM_FIL1};
int out3[hout3][wout3][dout3];
int pad_3[4] = {PAD_TB3, PAD_TB3, PAD_LR3, PAD_LR3};
int outp_dim3[3] = {hout3, wout3, dout3};
int f_dim3[4] = {NUM_FIL3, FILTER3, FILTER3, NUM_FIL2};
int out4[hout4][wout4][dout4];
int pad_4[4] = {PAD_TB4, PAD_TB4, PAD_LR4, PAD_LR4};
int outp_dim4[3] = {hout4, wout4, dout4};
int f_dim4[4] = {NUM_FIL4, FILTER4, FILTER4, NUM_FIL3};
int out5[hout5][wout5][dout5];
int outp_dim5[3] = {hout5, wout5, dout5};
int out6[hout6][wout6][dout6];
int pad_6[4] = {PAD_TB5, PAD_TB5, PAD_LR5, PAD_LR5};
int outp_dim6[3] = {hout6, wout6, dout6};
int f_dim6[4] = {NUM_FIL5, FILTER5, FILTER5, NUM_FIL4};
int out7[hout7][wout7][dout7];
int pad_7[4] = {PAD_TB6, PAD_TB6, PAD_LR6, PAD_LR6};
int outp_dim7[3] = {hout7, wout7, dout7};
int f_dim7[4] = {NUM_FIL6, FILTER6, FILTER6, NUM_FIL5};
int out8[hout8][wout8][dout8];
int pad_8[4] = {PAD_TB7, PAD_TB7, PAD_LR7, PAD_LR7};
int outp_dim8[3] = {hout8, wout8, dout8};
int f_dim8[4] = {NUM_FIL7, FILTER7, FILTER7, NUM_FIL6};
int out9[hout9][wout9][dout9];
int pad_9[4] = {PAD_TB8, PAD_TB8, PAD_LR8, PAD_LR8};
int outp_dim9[3] = {hout9, wout9, dout9};
int f_dim9[4] = {NUM_FIL8, FILTER8, FILTER8, NUM_FIL7};
int out10[hout10][wout10][dout10];
int outp_dim10[3] = {hout10, wout10, dout10};
int out11[hout11][wout11][dout11];
int pad_11[4] = {PAD_TB9, PAD_TB9, PAD_LR9, PAD_LR9};
int outp_dim11[3] = {hout11, wout11, dout11};
int f_dim11[4] = {NUM_FIL9, FILTER9, FILTER9, NUM_FIL8};
int out12[hout12][wout12][dout12];
int pad_12[4] = {PAD_TB10, PAD_TB10, PAD_LR10, PAD_LR10};
int outp_dim12[3] = {hout12, wout12, dout12};
int f_dim12[4] = {NUM_FIL10, FILTER10, FILTER10, NUM_FIL9};
int out13[hout13][wout13][dout13];
int pad_13[4] = {PAD_TB11, PAD_TB11, PAD_LR11, PAD_LR11};
int outp_dim13[3] = {hout13, wout13, dout13};
int f_dim13[4] = {NUM_FIL11, FILTER11, FILTER11, NUM_FIL10};
int out14[hout14][wout14][dout14];
int pad_14[4] = {PAD_TB12, PAD_TB12, PAD_LR12, PAD_LR12};
int outp_dim14[3] = {hout14, wout14, dout14};
int f_dim14[4] = {NUM_FIL12, FILTER12, FILTER12, NUM_FIL11};
int out15[hout15][wout15][dout15];
int outp_dim15[3] = {hout15, wout15, dout15};
int out16[flatten_dim];
int out[OUT_DIM];
for (int iter = 0; iter < SAMPLES; iter++){
for(int i = 0; i < IMG_SZ; i++){
for(int j = 0; j < IMG_SZ; j++){
for(int k = 0; k < NUM_FIL0; k++){
in[i][j][k] = input[i][j][k][iter];
}
}
}
pcount_enable(1);
dw_conv(inp_dim, f_dim1, outp_dim1, in, F1, B1, out1, STRIDE1, pad_1, SB1, MV1, SV1);
pw_conv(outp_dim1, f_dim2, outp_dim2, out1, F2, B2, out2, STRIDE2, pad_2, SB2, MV2, SV2);
dw_conv(outp_dim2, f_dim3, outp_dim3, out2, F3, B3, out3, STRIDE3, pad_3, SB3, MV3, SV3);
pw_conv(outp_dim3, f_dim4, outp_dim4, out3, F4, B4, out4, STRIDE4, pad_4, SB4, MV4, SV4);
maxpool2(outp_dim4, outp_dim5, out4, out5, POOL_SIZE1, POOL_STRIDE1);
dw_conv(outp_dim5, f_dim6, outp_dim6, out5, F5, B5, out6, STRIDE5, pad_6, SB5, MV5, SV5);
pw_conv(outp_dim6, f_dim7, outp_dim7, out6, F6, B6, out7, STRIDE6, pad_7, SB6, MV6, SV6);
dw_conv(outp_dim7, f_dim8, outp_dim8, out7, F7, B7, out8, STRIDE7, pad_8, SB7, MV7, SV7);
pw_conv(outp_dim8, f_dim9, outp_dim9, out8, F8, B8, out9, STRIDE8, pad_9, SB8, MV8, SV8);
maxpool2(outp_dim9, outp_dim10, out9, out10, POOL_SIZE2, POOL_STRIDE2);
dw_conv(outp_dim10, f_dim11, outp_dim11, out10, F9, B9, out11, STRIDE9, pad_11, SB9, MV9, SV9);
pw_conv(outp_dim11, f_dim12, outp_dim12, out11, F10, B10, out12, STRIDE10, pad_12, SB10, MV10, SV10);
dw_conv(outp_dim12, f_dim13, outp_dim13, out12, F11, B11, out13, STRIDE11, pad_13, SB11, MV11, SV11);
pw_conv(outp_dim13, f_dim14, outp_dim14, out13, F12, B12, out14, STRIDE12, pad_14, SB12, MV12, SV12);
maxpool2(outp_dim14, outp_dim15, out14, out15, POOL_SIZE3, POOL_STRIDE3);
flatten(outp_dim15, out15, out16);
mlp_layer(out16, out, flatten_dim, OUT_DIM, W1, B13, SB13, MV13, SV13);
pcount_enable(0);
puts("Output Layer Values:\n");
for(int i = 0; i < OUT_DIM; i++) {
puthex(out[i]);
puts("\n");
}
}
}
int main(void) {
pcount_enable(0);
cifar10_dws_cnn();
return 0;
}
File diff suppressed because one or more lines are too long
@@ -0,0 +1,46 @@
#ifndef IBEX_CNN_PARAMS_H
#define IBEX_CNN_PARAMS_H
#define MV1 75
#define MV2 112
#define MV3 82
#define MV4 75
#define MV5 87
#define MV6 76
#define MV7 111
#define MV8 95
#define MV9 95
#define MV10 117
#define MV11 68
#define MV12 91
#define MV13 77
#define SV1 15
#define SV2 7
#define SV3 15
#define SV4 14
#define SV5 14
#define SV6 8
#define SV7 15
#define SV8 15
#define SV9 15
#define SV10 16
#define SV11 15
#define SV12 15
#define SV13 21
#define SB1 0
#define SB2 0
#define SB3 0
#define SB4 0
#define SB5 0
#define SB6 0
#define SB7 0
#define SB8 0
#define SB9 0
#define SB10 0
#define SB11 0
#define SB12 0
#define SB13 0
#endif /* IBEX_CNN_PARAMS_H */
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,15 @@
# Copyright lowRISC contributors.
# Licensed under the Apache License, Version 2.0, see LICENSE for details.
# SPDX-License-Identifier: Apache-2.0
#
# Generate a baremetal application
# Name of the program $(PROGRAM).c will be added as a source file
PROGRAM = cmsis_cnn
PROGRAM_DIR := $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST))))
# Any extra source files to include in the build. Use the upper case .S
# extension for assembly files
EXTRA_SRCS :=
include ${PROGRAM_DIR}/../../common/common.mk
@@ -0,0 +1,153 @@
#include "simple_system_common.h"
#include "cnn_weights.h"
#include "fully_connected_opt.h"
#include "ibex_cnn_params.h"
#include "ibex_inputs.h"
#include "conv2d_opt.h"
#define IMG_SZ 32
#define NUM_FIL0 1
#define FILTER1 5
#define FILTER2 5
#define FILTER3 5
#define NUM_FIL1 8
#define NUM_FIL2 8
#define NUM_FIL3 16
#define STRIDE1 1
#define STRIDE2 1
#define STRIDE3 1
#define PAD_TB1 2
#define PAD_LR1 2
#define PAD_TB2 2
#define PAD_LR2 2
#define PAD_TB3 2
#define PAD_LR3 2
#define POOL_STRIDE1 2
#define POOL_SIZE1 2
#define POOL_STRIDE2 2
#define POOL_SIZE2 2
#define POOL_STRIDE3 2
#define POOL_SIZE3 2
#define OUT_DIM 3
#define SAMPLES 1
int outs[SAMPLES][OUT_DIM];
void cmsis_cnn() {
int dout1 = NUM_FIL1;
int hout1 = ((IMG_SZ - FILTER1 + 2 * PAD_TB1)/STRIDE1) + 1;
int wout1 = ((IMG_SZ - FILTER1 + 2 * PAD_LR1)/STRIDE1) + 1;
int dout2 = dout1;
int hout2 = hout1/POOL_STRIDE1;
int wout2 = wout1/POOL_STRIDE1;
int dout3 = NUM_FIL2;
int hout3 = ((hout2 - FILTER2+ 2 * PAD_TB2)/STRIDE2)+1;
int wout3 = ((wout2 - FILTER2+ 2 * PAD_LR2)/STRIDE2)+1;
int dout4 = dout3;
int hout4 = hout3/POOL_STRIDE2;
int wout4 = wout3/POOL_STRIDE2;
int dout5 = NUM_FIL3;
int hout5 = ((hout4 - FILTER3+ 2 * PAD_TB3)/STRIDE3)+1;
int wout5 = ((wout4 - FILTER3+ 2 * PAD_LR3)/STRIDE3)+1;
int dout6 = dout5;
int hout6 = hout5/POOL_STRIDE3;
int wout6 = wout5/POOL_STRIDE3;
int flatten_dim = dout6 * hout6 * wout6;
int in[IMG_SZ][IMG_SZ][NUM_FIL0];
int inp_dim[3] = {IMG_SZ, IMG_SZ, NUM_FIL0};
int out1[hout1][wout1][dout1];
int pad_1[4] = {PAD_TB1, PAD_TB1, PAD_LR1, PAD_LR1};
int outp_dim1[3] = {hout1, wout1, dout1};
int f_dim1[4] = {NUM_FIL1, FILTER1, FILTER1, NUM_FIL0};
int out2[hout2][wout2][dout2];
int outp_dim2[3] = {hout2, wout2, dout2};
int out3[hout3][wout3][dout3];
int pad_3[4] = {PAD_TB2, PAD_TB2, PAD_LR2, PAD_LR2};
int outp_dim3[3] = {hout3, wout3, dout3};
int f_dim3[4] = {NUM_FIL2, FILTER2, FILTER2, NUM_FIL1};
int out4[hout4][wout4][dout4];
int outp_dim4[3] = {hout4, wout4, dout4};
int out5[hout5][wout5][dout5];
int pad_5[4] = {PAD_TB3, PAD_TB3, PAD_LR3, PAD_LR3};
int outp_dim5[3] = {hout5, wout5, dout5};
int f_dim5[4] = {NUM_FIL3, FILTER3, FILTER3, NUM_FIL2};
int out6[hout6][wout6][dout6];
int outp_dim6[3] = {hout6, wout6, dout6};
int out7[flatten_dim];
int out[OUT_DIM];
for (int iter = 0; iter < SAMPLES; iter++){
for(int i = 0; i < IMG_SZ; i++){
for(int j = 0; j < IMG_SZ; j++){
for(int k = 0; k < NUM_FIL0; k++){
in[i][j][k] = input[i][j][k][iter];
}
}
}
pcount_enable(1);
conv2_8bits_1ch(inp_dim, f_dim1, outp_dim1, in, F1, B1, out1, STRIDE1, pad_1, SB1, MV1, SV1);
maxpool2_compressed(outp_dim1, outp_dim2, out1, out2, POOL_SIZE1, POOL_STRIDE1);
conv2_8bits(outp_dim2, f_dim3, outp_dim3, out2, F2, B2, out3, STRIDE2, pad_3, SB2, MV2, SV2);
maxpool2_compressed(outp_dim3, outp_dim4, out3, out4, POOL_SIZE2, POOL_STRIDE2);
conv2_2bits(outp_dim4, f_dim5, outp_dim5, out4, F3, B3, out5, STRIDE3, pad_5, SB3, MV3, SV3);
maxpool2_compressed(outp_dim5, outp_dim6, out5, out6, POOL_SIZE3, POOL_STRIDE3);
flatten(outp_dim6, out6, out7);
mlp_layer_2bits(out7, out, flatten_dim, OUT_DIM, W1, B4, SB4, MV4, SV4);
pcount_enable(0);
puts("Output Layer Values:\n");
for(int i = 0; i < OUT_DIM; i++) {
puthex((out[i] & 0xFF000000) >> 24);
puts(" ");
puthex((out[i] & 0xFF0000) >> 16);
puts(" ");
puthex((out[i] & 0xFF00) >> 8);
puts(" ");
puthex(out[i] & 0xFF);
puts("\n");
}
}
}
int main(void) {
pcount_enable(0);
cmsis_cnn();
return 0;
}
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,30 @@
#ifndef IBEX_CNN_PARAMS_H
#define IBEX_CNN_PARAMS_H
#define MV1 1953789044
#define MV2 1229539657
#define MV3 1212696648
#define MV4 1330597711
#define SV1 2164392961
#define SV2 2299667521
#define SV3 1488020161
#define SV4 1623294721
static const int SB1[8] = {
812696004, 946880900, 1079034308, 946913796, 945865156, 1081139524, 946930052, 545309060
};
static const int SB2[8] = {
945873216, 945832320, 945865152, 944816576, 674283904, 543211776, 945873280, 944824704
};
static const int SB3[16] = {
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
};
static const int SB4[3] = {
3, 3, 3
};
#endif /* IBEX_CNN_PARAMS_H */
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,15 @@
# Copyright lowRISC contributors.
# Licensed under the Apache License, Version 2.0, see LICENSE for details.
# SPDX-License-Identifier: Apache-2.0
#
# Generate a baremetal application
# Name of the program $(PROGRAM).c will be added as a source file
PROGRAM = cmsis_cnn
PROGRAM_DIR := $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST))))
# Any extra source files to include in the build. Use the upper case .S
# extension for assembly files
EXTRA_SRCS :=
include ${PROGRAM_DIR}/../../common/common.mk
@@ -0,0 +1,147 @@
#include "simple_system_common.h"
#include "cnn_weights.h"
#include "fully_connected.h"
#include "ibex_cnn_params.h"
#include "ibex_inputs.h"
#include "conv2d.h"
#define IMG_SZ 32
#define NUM_FIL0 3
#define FILTER1 5
#define FILTER2 5
#define FILTER3 5
#define NUM_FIL1 32
#define NUM_FIL2 32
#define NUM_FIL3 64
#define STRIDE1 1
#define STRIDE2 1
#define STRIDE3 1
#define PAD_TB1 2
#define PAD_LR1 2
#define PAD_TB2 2
#define PAD_LR2 2
#define PAD_TB3 2
#define PAD_LR3 2
#define POOL_STRIDE1 2
#define POOL_SIZE1 2
#define POOL_STRIDE2 2
#define POOL_SIZE2 2
#define POOL_STRIDE3 2
#define POOL_SIZE3 2
#define OUT_DIM 10
#define SAMPLES 1
int outs[SAMPLES][OUT_DIM];
void cmsis_cnn() {
int dout1 = NUM_FIL1;
int hout1 = ((IMG_SZ - FILTER1 + 2 * PAD_TB1)/STRIDE1) + 1;
int wout1 = ((IMG_SZ - FILTER1 + 2 * PAD_LR1)/STRIDE1) + 1;
int dout2 = dout1;
int hout2 = hout1/POOL_STRIDE1;
int wout2 = wout1/POOL_STRIDE1;
int dout3 = NUM_FIL2;
int hout3 = ((hout2 - FILTER2+ 2 * PAD_TB2)/STRIDE2)+1;
int wout3 = ((wout2 - FILTER2+ 2 * PAD_LR2)/STRIDE2)+1;
int dout4 = dout3;
int hout4 = hout3/POOL_STRIDE2;
int wout4 = wout3/POOL_STRIDE2;
int dout5 = NUM_FIL3;
int hout5 = ((hout4 - FILTER3+ 2 * PAD_TB3)/STRIDE3)+1;
int wout5 = ((wout4 - FILTER3+ 2 * PAD_LR3)/STRIDE3)+1;
int dout6 = dout5;
int hout6 = hout5/POOL_STRIDE3;
int wout6 = wout5/POOL_STRIDE3;
int flatten_dim = dout6 * hout6 * wout6;
int in[IMG_SZ][IMG_SZ][NUM_FIL0];
int inp_dim[3] = {IMG_SZ, IMG_SZ, NUM_FIL0};
int out1[hout1][wout1][dout1];
int pad_1[4] = {PAD_TB1, PAD_TB1, PAD_LR1, PAD_LR1};
int outp_dim1[3] = {hout1, wout1, dout1};
int f_dim1[4] = {NUM_FIL1, FILTER1, FILTER1, NUM_FIL0};
int out2[hout2][wout2][dout2];
int outp_dim2[3] = {hout2, wout2, dout2};
int out3[hout3][wout3][dout3];
int pad_3[4] = {PAD_TB2, PAD_TB2, PAD_LR2, PAD_LR2};
int outp_dim3[3] = {hout3, wout3, dout3};
int f_dim3[4] = {NUM_FIL2, FILTER2, FILTER2, NUM_FIL1};
int out4[hout4][wout4][dout4];
int outp_dim4[3] = {hout4, wout4, dout4};
int out5[hout5][wout5][dout5];
int pad_5[4] = {PAD_TB3, PAD_TB3, PAD_LR3, PAD_LR3};
int outp_dim5[3] = {hout5, wout5, dout5};
int f_dim5[4] = {NUM_FIL3, FILTER3, FILTER3, NUM_FIL2};
int out6[hout6][wout6][dout6];
int outp_dim6[3] = {hout6, wout6, dout6};
int out7[flatten_dim];
int out[OUT_DIM];
for (int iter = 0; iter < SAMPLES; iter++){
for(int i = 0; i < IMG_SZ; i++){
for(int j = 0; j < IMG_SZ; j++){
for(int k = 0; k < NUM_FIL0; k++){
in[i][j][k] = input[i][j][k][iter];
}
}
}
pcount_enable(1);
conv2(inp_dim, f_dim1, outp_dim1, in, F1, B1, out1, STRIDE1, pad_1, SB1, MV1, SV1);
maxpool2(outp_dim1, outp_dim2, out1, out2, POOL_SIZE1, POOL_STRIDE1);
conv2(outp_dim2, f_dim3, outp_dim3, out2, F2, B2, out3, STRIDE2, pad_3, SB2, MV2, SV2);
maxpool2(outp_dim3, outp_dim4, out3, out4, POOL_SIZE2, POOL_STRIDE2);
conv2(outp_dim4, f_dim5, outp_dim5, out4, F3, B3, out5, STRIDE3, pad_5, SB3, MV3, SV3);
maxpool2(outp_dim5, outp_dim6, out5, out6, POOL_SIZE3, POOL_STRIDE3);
flatten(outp_dim6, out6, out7);
mlp_layer(out7, out, flatten_dim, OUT_DIM, W1, B4, SB4, MV4, SV4);
pcount_enable(0);
puts("Output Layer Values:\n");
for(int i = 0; i < OUT_DIM; i++) {
puthex(out[i]);
puts("\n");
}
}
}
int main(void) {
pcount_enable(0);
cmsis_cnn();
return 0;
}
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,19 @@
#ifndef IBEX_CNN_PARAMS_H
#define IBEX_CNN_PARAMS_H
#define MV1 116
#define MV2 73
#define MV3 72
#define MV4 79
#define SV1 16
#define SV2 17
#define SV3 11
#define SV4 12
#define SB1 0
#define SB2 0
#define SB3 0
#define SB4 0
#endif /* IBEX_CNN_PARAMS_H */
File diff suppressed because it is too large Load Diff
+70
View File
@@ -0,0 +1,70 @@
#ifndef DWS_CONV_H
#define DWS_CONV_H
void pw_conv(int in_dim[3], int fil_dim[4], int out_dim[3], int inp[in_dim[0]][in_dim[1]][in_dim[2]],
const int fil[fil_dim[0]][fil_dim[3]], const int bias[],
int out[out_dim[0]][out_dim[1]][out_dim[2]], int strides, int pad[],
const int bias_shift_mode, const int quantized_multiplier, const int out_shift_rl){
int i, j, k, m, res, str1, str2, quant_prod;
for (i = 0; i < out_dim[2]; i++) { // output depth
str1 = -pad[0] - strides;
for (j = 0; j < out_dim[0]; j++) { // output height
str1 += strides;
str2 = -pad[2] - strides;
for (k = 0; k < out_dim[1]; k++) { // output width
res = bias[i];
str2 += strides;
if (str1 < in_dim[0] && str1 >= 0 && str2 >= 0 && str2 < in_dim[1]) {
for (m = 0; m < fil_dim[3]; m++) { // filters depth
res += inp[str1][str2][m] * fil[i][m];
}
}
quant_prod = quantized_multiplier * res + (1 << (out_shift_rl -1));
quant_prod = quant_prod >> (out_shift_rl);
if(quant_prod < 0) quant_prod = 0;
if(quant_prod > 255) quant_prod = 255;
out[j][k][i] = quant_prod;
}
}
}
}
void dw_conv(int in_dim[3], int depthwise_fil_dim[4], int out_dim[3], int inp[in_dim[0]][in_dim[1]][in_dim[2]],
const int depthwise_fil[depthwise_fil_dim[0]][depthwise_fil_dim[1]][depthwise_fil_dim[2]][1], const int bias[],
int out[out_dim[0]][out_dim[1]][out_dim[2]], int strides, int pad[],
const int bias_shift_mode, const int depthwise_multiplier, const int depthwise_out_shift_rl){
int i, j, k, n, p, res, k1, k2, str1, str2, quant_prod;
// Depthwise convolution
for (i = 0; i < out_dim[2]; i++) { // output depth
str1 = -pad[0] - strides;
for (j = 0; j < out_dim[0]; j++) { // output height
str1 += strides;
str2 = -pad[2] - strides;
for (k = 0; k < out_dim[1]; k++) { // output width
res = bias[i];
str2 += strides;
for (p = 0; p < depthwise_fil_dim[1]; p++){ // depthwise filter height
for (n = 0; n < depthwise_fil_dim[2]; n++) { // depthwise filter width
k1 = str1 + p;
k2 = str2 + n;
if (k1 < in_dim[0] && k1 >= 0 && k2 >= 0 && k2 < in_dim[1]) {
res += inp[k1][k2][i] * depthwise_fil[i][p][n][0];
}
}
}
quant_prod = depthwise_multiplier * res + (1 << (depthwise_out_shift_rl -1));
quant_prod = quant_prod >> (depthwise_out_shift_rl);
if(quant_prod < 0) quant_prod = 0;
if(quant_prod > 255) quant_prod = 255;
out[j][k][i] = quant_prod;
}
}
}
}
#endif /* DWS_CONV_H */
+171
View File
@@ -0,0 +1,171 @@
#ifndef DWS_CONV_OPT_H
#define DWS_CONV_OPT_H
void pw_conv_8bits(int in_dim[3], int fil_dim[4], int out_dim[3], int inp[in_dim[0]][in_dim[1]][in_dim[2]],
const int fil[fil_dim[0]][fil_dim[3] << 2], const int bias[fil_dim[0]],
int out[out_dim[0]][out_dim[1]][out_dim[2]], int strides, int pad[], const int bias_shift_mode[],
const int quantized_multiplier, const int out_shift_rl){
int i, j, k, m, res, str1, str2, bias_val, w, in_cnn;
for (i = 0; i < out_dim[2]; i++) { // output depth
str1 = -pad[0] - strides;
for (j = 0; j < out_dim[0]; j++) { // output height
str1 += 1;
str2 = -pad[2] - strides;
for (k = 0; k < out_dim[1]; k++) { // output width
bias_val = bias[i];
str2 += 1;
asm volatile("neur_init %0, %1, %2\n":"=r"(res):"r"(bias_val),"r"(bias_shift_mode[i]):);
for (m = 0; m < fil_dim[3]; m++) { // filters depth
in_cnn = inp[str1][str2][m];
w = fil[i][4*m];
asm volatile("nn_mac_8b %0, %1,%2\n":"=r"(res):"r"(w),"r"(in_cnn):);
w = fil[i][4*m+1];
asm volatile("nn_mac_8b %0, %1,%2\n":"=r"(res):"r"(w),"r"(in_cnn):);
w = fil[i][4*m+2];
asm volatile("nn_mac_8b %0, %1,%2\n":"=r"(res):"r"(w),"r"(in_cnn):);
w = fil[i][4*m+3];
asm volatile("nn_mac_8b %0, %1,%2\n":"=r"(res):"r"(w),"r"(in_cnn):);
}
asm volatile("neur_res %0, %1, %2\n":"=r"(res):"r"(quantized_multiplier),"r"(out_shift_rl):);
out[j][k][i] = res;
}
}
}
}
void pw_conv_4bits(int in_dim[3], int fil_dim[4], int out_dim[3], int inp[in_dim[0]][in_dim[1]][in_dim[2]],
const int fil[fil_dim[0]][fil_dim[3] << 1], const int bias[fil_dim[0]],
int out[out_dim[0]][out_dim[1]][out_dim[2]], int strides, int pad[], const int bias_shift_mode[],
const int quantized_multiplier, const int out_shift_rl){
int i, j, k, m, res, str1, str2, bias_val, w, in_cnn;
for (i = 0; i < out_dim[2]; i++) { // output depth
str1 = -pad[0] - strides;
for (j = 0; j < out_dim[0]; j++) { // output height
str1 += 1;
str2 = -pad[2] - strides;
for (k = 0; k < out_dim[1]; k++) { // output width
bias_val = bias[i];
str2 += 1;
asm volatile("neur_init %0, %1, %2\n":"=r"(res):"r"(bias_val),"r"(bias_shift_mode[i]):);
for (m = 0; m < fil_dim[3]; m++) { // filters depth
in_cnn = inp[str1][str2][m];
w = fil[i][2*m];
asm volatile("nn_mac_4b %0, %1,%2\n":"=r"(res):"r"(w),"r"(in_cnn):);
w = fil[i][2*m+1];
asm volatile("nn_mac_4b %0, %1,%2\n":"=r"(res):"r"(w),"r"(in_cnn):);
}
asm volatile("neur_res %0, %1, %2\n":"=r"(res):"r"(quantized_multiplier),"r"(out_shift_rl):);
out[j][k][i] = res;
}
}
}
}
void pw_conv_2bits(int in_dim[3], int fil_dim[4], int out_dim[3], int inp[in_dim[0]][in_dim[1]][in_dim[2]],
const int fil[fil_dim[0]][fil_dim[3]], const int bias[fil_dim[0]],
int out[out_dim[0]][out_dim[1]][out_dim[2]], int strides, int pad[], const int bias_shift_mode[],
const int quantized_multiplier, const int out_shift_rl){
int i, j, k, m, res, str1, str2, bias_val, w, in_cnn;
for (i = 0; i < out_dim[2]; i++) { // output depth
str1 = -pad[0] - strides;
for (j = 0; j < out_dim[0]; j++) { // output height
str1 += 1;
str2 = -pad[2] - strides;
for (k = 0; k < out_dim[1]; k++) { // output width
bias_val = bias[i];
str2 += 1;
asm volatile("neur_init %0, %1, %2\n":"=r"(res):"r"(bias_val),"r"(bias_shift_mode[i]):);
for (m = 0; m < fil_dim[3]; m++) { // filters depth
in_cnn = inp[str1][str2][m];
w = fil[i][m];
asm volatile("nn_mac_2b %0, %1,%2\n":"=r"(res):"r"(w),"r"(in_cnn):);
}
asm volatile("neur_res %0, %1, %2\n":"=r"(res):"r"(quantized_multiplier),"r"(out_shift_rl):);
out[j][k][i] = res;
}
}
}
}
void dw_conv_opt(int in_dim[3], int depthwise_fil_dim[4], int out_dim[3],
int inp[in_dim[0]][in_dim[1]][in_dim[2]], const int depthwise_fil[depthwise_fil_dim[0]][depthwise_fil_dim[1]][depthwise_fil_dim[2]],
const int bias[depthwise_fil_dim[0]], int out[out_dim[0]][out_dim[1]][out_dim[2]],
int strides, int pad[], const int bias_shift_mode[], const int quantized_multiplier, const int out_shift_rl){
int i, j, k, n, p, res, k1, k2, str1, str2, bias_val, in_cnn, w;
// Depthwise convolution
for (i = 0; i < out_dim[2]; i++){ // output depth
str1 = -pad[0] - strides;
for (j = 0; j < out_dim[0]; j++) { // output height
str1 += strides;
str2 = -pad[2] - strides;
for (k = 0; k < out_dim[1]; k++) { // output width
bias_val = bias[i];
str2 += strides;
asm volatile("neur_init %0, %1, %2\n":"=r"(res):"r"(bias_val),"r"(bias_shift_mode[i]):);
for (p = 0; p < depthwise_fil_dim[1]; p++) { // depthwise filter height
for (n = 0; n < depthwise_fil_dim[2]; n++) { // depthwise filter width
k1 = str1 + p;
k2 = str2 + n;
if (k1 < in_dim[0] && k1 >= 0 && k2 >= 0 && k2 < in_dim[1]) {
in_cnn = inp[k1][k2][i];
w = depthwise_fil[i][p][n];
asm volatile("nn_mac_8b %0, %1,%2\n":"=r"(res):"r"(w),"r"(in_cnn):);
}
}
}
asm volatile("neur_res %0, %1, %2\n":"=r"(res):"r"(quantized_multiplier),"r"(out_shift_rl):);
out[j][k][i] = res;
}
}
}
}
void dw_conv_opt_1ch(int in_dim[3], int depthwise_fil_dim[4], int out_dim[3],
int inp[in_dim[0]][in_dim[1]][in_dim[2]], const int depthwise_fil[depthwise_fil_dim[0]][depthwise_fil_dim[1]][depthwise_fil_dim[2]],
const int bias[depthwise_fil_dim[0]], int out[out_dim[0]][out_dim[1]][out_dim[2]],
int strides, int pad[], const int bias_shift_mode[], const int quantized_multiplier, const int out_shift_rl){
int j, k, n, p, res, k1, k2, str1, str2, bias_val, in_cnn, w;
// Depthwise convolution
str1 = -pad[0] - strides;
for (j = 0; j < out_dim[0]; j++) { // output height
str1 += strides;
str2 = -pad[2] - strides;
for (k = 0; k < out_dim[1]; k++) { // output width
bias_val = bias[0];
str2 += strides;
asm volatile("neur_init %0, %1, %2\n":"=r"(res):"r"(bias_val),"r"(bias_shift_mode[0]):);
for (p = 0; p < depthwise_fil_dim[1]; p++) { // depthwise filter height
for (n = 0; n < depthwise_fil_dim[2]; n++) { // depthwise filter width
k1 = str1 + p;
k2 = str2 + n;
if (k1 < in_dim[0] && k1 >= 0 && k2 >= 0 && k2 < in_dim[1]) {
in_cnn = inp[k1][k2][0];
w = depthwise_fil[0][p][n];
asm volatile("nn_mac_8b %0, %1,%2\n":"=r"(res):"r"(w),"r"(in_cnn):);
}
}
}
asm volatile("neur_res %0, %1, %2\n":"=r"(res):"r"(quantized_multiplier),"r"(out_shift_rl):);
out[j][k][0] = res;
}
}
}
#endif /* DWS_CONV_OPT_H */
+89
View File
@@ -0,0 +1,89 @@
import init_utils
import common
# Initialize the environment and get the name
name = init_utils.initialize_environment(__file__)
args = init_utils.get_args()
# Set arguments from command line
max_acc_drop = args.max_acc_drop
device = args.device
from sklearn.model_selection import train_test_split
import torch.nn as nn
import torch.nn.functional as F
import tensorflow as tf
import numpy as np
# Load our Dataset
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.cifar10.load_data()
y_train = np.squeeze(y_train, axis = 1)
y_test = np.squeeze(y_test, axis = 1)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size = 0.15)
X_train = (np.transpose(X_train, (0,3,1,2)))
X_test = (np.transpose(X_test, (0,3,1,2)))
X_val = (np.transpose(X_val, (0,3,1,2)))
BATCH_SIZE = 128
epochs = 1
lr = 0.0001
class DepthwiseBlock(nn.Module):
def __init__(self, in_channels, out_channels):
super(DepthwiseBlock, self).__init__()
layers = []
layers.append(nn.Conv2d(in_channels = in_channels, out_channels = in_channels,
kernel_size = 3, padding = 1, groups = in_channels)) # Depthwise convolution
layers.append(nn.ReLU(inplace = True))
layers.append(nn.Conv2d(in_channels = in_channels, out_channels = out_channels,
kernel_size = 1, padding = 0)) # Pointwise convolution
layers.append(nn.ReLU(inplace = True))
self.block = nn.Sequential(*layers)
def forward(self, x):
return self.block(x)
class Cifar10_Dws_CNN(nn.Module):
def __init__(self):
super(Cifar10_Dws_CNN, self).__init__()
self.features = nn.Sequential(
DepthwiseBlock(in_channels = 3, out_channels = 64),
DepthwiseBlock(in_channels = 64, out_channels = 64),
nn.MaxPool2d(kernel_size = 2, stride = 2),
DepthwiseBlock(in_channels = 64, out_channels = 128),
DepthwiseBlock(in_channels = 128, out_channels = 128),
nn.MaxPool2d(kernel_size = 2, stride = 2),
DepthwiseBlock(in_channels = 128, out_channels = 256),
DepthwiseBlock(in_channels = 256, out_channels = 256),
nn.MaxPool2d(kernel_size = 2, stride = 2)
)
self.flatten = nn.Flatten()
self.classifier = nn.Sequential(
nn.Linear(256 * 4 * 4, 10) # Assuming input size is (32, 32) and after 3 max pooling layers, the size is (4, 4)
)
def forward(self, x):
x = self.features(x)
x = self.flatten(x)
x = self.classifier(x)
return F.log_softmax(x, dim = 1)
net = Cifar10_Dws_CNN()
common.create_ibex_qnn(net, name, device, X_train, y_train, X_test, y_test,
X_val = X_val, y_val = y_val, BATCH_SIZE = BATCH_SIZE,
epochs = epochs, lr = lr, max_acc_drop = max_acc_drop)
+77
View File
@@ -0,0 +1,77 @@
import init_utils
import common
# Initialize the environment and get the name
name = init_utils.initialize_environment(__file__)
args = init_utils.get_args()
# Set arguments from command line
max_acc_drop = args.max_acc_drop
device = args.device
from sklearn.model_selection import train_test_split
import torch.nn as nn
import torch.nn.functional as F
import tensorflow as tf
import numpy as np
# Load our Dataset
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.cifar10.load_data()
y_train = np.squeeze(y_train, axis = 1)
y_test = np.squeeze(y_test, axis = 1)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size = 0.15)
X_train = (np.transpose(X_train, (0,3,1,2)) - 128.0)/255.0
X_test = (np.transpose(X_test, (0,3,1,2)) - 128.0)/255.0
X_val = (np.transpose(X_val, (0,3,1,2)) - 128.0)/255.0
BATCH_SIZE = 32
epochs = 1
lr = 0.0001
class CMSIS_CNN(nn.Module):
def __init__(self):
super(CMSIS_CNN, self).__init__()
self.conv1 = nn.Conv2d(in_channels = 3, out_channels = 32, kernel_size = 5, padding = 2)
self.relu1 = nn.ReLU()
self.max1 = nn.MaxPool2d(2,2)
self.d1 = nn.Dropout(p = 0.25)
self.conv2 = nn.Conv2d(in_channels = 32, out_channels = 32, kernel_size = 5, padding = 2)
self.relu2 = nn.ReLU()
self.max2 = nn.MaxPool2d(2,2)
self.d2 = nn.Dropout(p = 0.25)
self.conv3 = nn.Conv2d(in_channels = 32, out_channels = 64, kernel_size = 5, padding = 2)
self.relu3 = nn.ReLU()
self.max3 = nn.MaxPool2d(2,2)
self.d3 = nn.Dropout(p = 0.4)
self.flatten = nn.Flatten()
self.linear1 = nn.Linear(1024, 10)
def forward(self,X):
X = self.relu1((self.conv1(X)))
X = self.max1(X)
X = self.d1(X)
X = self.relu2((self.conv2(X)))
X = self.max2(X)
X = self.d2(X)
X = self.relu3((self.conv3(X)))
X = self.max3(X)
X = self.d3(X)
X = self.flatten(X)
X = self.linear1(X)
return F.log_softmax(X, dim = 1)
net = CMSIS_CNN()
common.create_ibex_qnn(net, name, device, X_train, y_train, X_test, y_test,
X_val = X_val, y_val = y_val, BATCH_SIZE = BATCH_SIZE,
epochs = epochs, lr = lr, max_acc_drop = max_acc_drop)
+10
View File
@@ -125,3 +125,13 @@ def create_ibex_qnn(net, name, device, X_train, y_train, X_test, y_test, X_val =
print('\nSIMULATING MODEL ON IBEX CORE\nUSE THE OUTPUTS TO VERIFY THAT THE RESULTS ARE CORRECT !!') print('\nSIMULATING MODEL ON IBEX CORE\nUSE THE OUTPUTS TO VERIFY THAT THE RESULTS ARE CORRECT !!')
ibex_model = simulate_ibex.create_lenet_model(int_weights, int_og_bias, mul_vals, shift_vals) ibex_model = simulate_ibex.create_lenet_model(int_weights, int_og_bias, mul_vals, shift_vals)
simulate_ibex.eval_sim_model(quant_net, ibex_model, test_loader) simulate_ibex.eval_sim_model(quant_net, ibex_model, test_loader)
elif(name == 'cmsis_cnn'):
print('\nSIMULATING MODEL ON IBEX CORE\nUSE THE OUTPUTS TO VERIFY THAT THE RESULTS ARE CORRECT !!')
ibex_model = simulate_ibex.create_cmsis_cnn_model(int_weights, int_og_bias, mul_vals, shift_vals)
simulate_ibex.eval_sim_model(quant_net, ibex_model, test_loader)
elif(name == 'cifar10_dws_cnn'):
print('\nSIMULATING MODEL ON IBEX CORE\nUSE THE OUTPUTS TO VERIFY THAT THE RESULTS ARE CORRECT !!')
ibex_model = simulate_ibex.create_ibex_dws_model(int_weights, int_og_bias, mul_vals, shift_vals)
simulate_ibex.eval_sim_model(quant_net, ibex_model, test_loader)
+145 -32
View File
@@ -27,26 +27,34 @@ def quantize_multiplier(real_multiplier):
return quantized_multiplier, right_shift return quantized_multiplier, right_shift
def get_int_params(quant_net): def get_int_params(quant_net):
int_weights = [] int_weights = []
int_bias = [] int_bias = []
in_scales = [] in_scales = []
act_scales = [] act_scales = []
for _, module in quant_net.sequential.named_children(): def extract_quant_params(module):
if hasattr(module, 'weight') and module.weight is not None: for name, submodule in module.named_children():
int_weights.append(module.int_weight().cpu().numpy()) # Check if the submodule has weights and append them if present
int_bias.append(module.int_bias().cpu().numpy()) if hasattr(submodule, 'weight') and submodule.weight is not None:
in_scales.append(module.quant_bias_scale().cpu().detach().numpy()) int_weights.append(submodule.int_weight().cpu().detach().numpy())
int_bias.append(submodule.int_bias().cpu().detach().numpy())
in_scales.append(submodule.quant_bias_scale().cpu().detach().numpy())
if hasattr(module, 'quant_act_scale') and module.quant_act_scale is not None: # Check if the submodule has activation scale and append it if present
act_scales.append(module.quant_act_scale().cpu().detach().numpy()) if hasattr(submodule, 'quant_act_scale') and submodule.quant_act_scale is not None:
act_scales.append(submodule.quant_act_scale().cpu().detach().numpy())
act_scales.append(quant_net.o_quant.quant_act_scale().cpu().detach().numpy()) # Recursively extract parameters from the children modules
extract_quant_params(submodule)
# Start extraction from the top-level module
extract_quant_params(quant_net)
mul_vals, shift_vals = [], [] mul_vals, shift_vals = [], []
for i in range(len(act_scales)): for i in range(len(act_scales)-1):
M = in_scales[i]/act_scales[i] M = in_scales[i]/act_scales[i+1]
mul, shift = quantize_multiplier(M[0]) mul, shift = quantize_multiplier(M[0])
mul_vals.append(mul) mul_vals.append(mul)
shift_vals.append(shift) shift_vals.append(shift)
@@ -87,7 +95,12 @@ def decide_mode(network, weight_bit_width, input_uint8 = True):
for name, module in network.named_modules(): for name, module in network.named_modules():
if isinstance(module, layer_types_py): if isinstance(module, layer_types_py):
layer_type_name = module.__class__.__name__ layer_type_name = module.__class__.__name__
if(layer_type_name == 'Conv2d' or layer_type_name == 'Linear' or layer_type_name == 'DepthwiseConv2d'): if(layer_type_name == 'Linear'):
layer_type.append(layer_type_name)
if(layer_type_name == 'Conv2d'):
if(module.groups == module.in_channels):
layer_type.append('DepthwiseConv2d')
else:
layer_type.append(layer_type_name) layer_type.append(layer_type_name)
else: else:
if(layer_type_name == 'ReLU' or layer_type_name == 'Sigmoid'): if(layer_type_name == 'ReLU' or layer_type_name == 'Sigmoid'):
@@ -96,13 +109,13 @@ def decide_mode(network, weight_bit_width, input_uint8 = True):
for i in range(len(weight_bit_width)): for i in range(len(weight_bit_width)):
signed_input = 4 * input_sign[i] signed_input = 4 * input_sign[i]
if(layer_type[i] == 'DepthwiseConv2d'):
mode_per_layer.append(signed_input + 1)
else:
if(weight_bit_width[i] == 2): if(weight_bit_width[i] == 2):
mode_per_layer.append(signed_input + 3) mode_per_layer.append(signed_input + 3)
elif(weight_bit_width[i] == 4): elif(weight_bit_width[i] == 4):
mode_per_layer.append(signed_input + 2) mode_per_layer.append(signed_input + 2)
else:
if(layer_type[i] == 'DepthwiseConv2d'):
mode_per_layer.append(signed_input + 1)
else: else:
mode_per_layer.append(signed_input) mode_per_layer.append(signed_input)
@@ -161,6 +174,7 @@ def pad_inputs_weights(quant_net, test_loader, mode_per_layer,
else: else:
new_size_0 = a * 4 new_size_0 = a * 4
if((mode_per_layer[i] != 1) and (mode_per_layer[i] != 5)):
b = w.shape[1] // 4 b = w.shape[1] // 4
if(w.shape[1] % 4 != 0): if(w.shape[1] % 4 != 0):
new_size_1 = (b + 1) * 4 new_size_1 = (b + 1) * 4
@@ -170,6 +184,12 @@ def pad_inputs_weights(quant_net, test_loader, mode_per_layer,
new_w = np.zeros((new_size_0, new_size_1, w.shape[2], w.shape[3])).astype(np.int8) new_w = np.zeros((new_size_0, new_size_1, w.shape[2], w.shape[3])).astype(np.int8)
new_w[:w.shape[0], :w.shape[1], :, :] = w new_w[:w.shape[0], :w.shape[1], :, :] = w
else:
new_size_1 = 1
new_w = np.zeros((new_size_0, new_size_1, w.shape[2], w.shape[3])).astype(np.int8)
new_w[:w.shape[0], :w.shape[1], :, :] = w
new_w = np.squeeze(new_w, axis = 1)
padded_int_weights.append(new_w) padded_int_weights.append(new_w)
padded_int_biases = [] padded_int_biases = []
@@ -325,6 +345,15 @@ def concat_inputs_weights(mode_per_layer, padded_input, padded_int_weights, padd
comb = combine_values(vector) comb = combine_values(vector)
new_mat[i][j] = comb new_mat[i][j] = comb
elif(len(dims) == 3):
new_mat = np.zeros((int(dims[0]//4), dims[1], dims[2]), dtype = np.int64)
for i in range(int(dims[0]//4)):
for j in range(dims[1]):
for k in range(dims[2]):
vector = layer_weight[4*i : 4*(i+1), j, k]
comb = combine_values(vector)
new_mat[i][j][k] = comb
elif(len(dims) == 4): elif(len(dims) == 4):
if((mode_per_layer[iter] == 0) | (mode_per_layer[iter] == 4)): if((mode_per_layer[iter] == 0) | (mode_per_layer[iter] == 4)):
new_mat = np.zeros((int(dims[0]//4), dims[1], dims[2], dims[3]), dtype = np.int64) new_mat = np.zeros((int(dims[0]//4), dims[1], dims[2], dims[3]), dtype = np.int64)
@@ -602,9 +631,17 @@ def save_cnn_net_params(path, int_weights, int_biases, mul_vals, shift_vals, shi
dims = np.shape(int_weights[k]) dims = np.shape(int_weights[k])
mat = int_weights[k] mat = int_weights[k]
if(len(dims) == 2 or ((len(dims) == 4) and dims[2] == dims[3] == 1)):
f.write('static const int ')
if(len(dims) == 2): if(len(dims) == 2):
wi += 1 wi += 1
st = 'static const int W' + str(wi) + '[' + str(dims[0]) + ']' + '[' + str(dims[1]) + '] = {\n' f.write('W' + str(wi))
else:
mat = np.squeeze(mat, axis = (2,3))
fi += 1
f.write('F' + str(fi))
st = '[' + str(dims[0]) + ']' + '[' + str(dims[1]) + '] = {\n'
f.write(st) f.write(st)
for n in range(dims[0]): for n in range(dims[0]):
f.write('\t{') f.write('\t{')
@@ -619,6 +656,32 @@ def save_cnn_net_params(path, int_weights, int_biases, mul_vals, shift_vals, shi
f.write('\n') f.write('\n')
f.write('};\n\n') f.write('};\n\n')
elif (len(dims) == 3):
dims = np.shape(mat)
fi += 1
st = 'static const int F' + str(fi) + '[' + str(dims[0]) + '][' + str(dims[1])
st += '][' + str(dims[2]) + '] = {\n'
f.write(st)
for n in range(dims[0]):
f.write('\t{\n')
for l in range(dims[1]):
f.write('\t\t{')
for h in range(dims[2] - 1):
f.write(str(mat[n][l][h]) + ', ')
if dims[2] != 1:
f.write(str(mat[n][l][dims[2] - 1]) + '}')
else:
f.write(str(mat[n][l][0]) + '}')
if (l != dims[1] - 1):
f.write(',')
f.write('\n')
f.write('\t}')
if n != dims[0] - 1:
f.write(',')
f.write('\n')
f.write('};\n\n')
elif(len(dims) == 4): elif(len(dims) == 4):
mat = np.transpose(mat, (0, 2, 3, 1)) mat = np.transpose(mat, (0, 2, 3, 1))
dims = np.shape(mat) dims = np.shape(mat)
@@ -856,9 +919,11 @@ def generate_opt_c_code_mlp(path, name, int_weights, optimal_config, type_of_lay
f.write('\t' + name + '();\n\n') f.write('\t' + name + '();\n\n')
f.write('\treturn 0;\n}') f.write('\treturn 0;\n}')
def get_cnn_details(model): def get_cnn_details(module, details = None):
if details is None:
details = [] details = []
for layer in model.children():
for layer in module.children():
if isinstance(layer, nn.Conv2d): if isinstance(layer, nn.Conv2d):
details.append({ details.append({
"layer_type": "Conv2d", "layer_type": "Conv2d",
@@ -866,10 +931,11 @@ def get_cnn_details(model):
"out_channels": layer.out_channels, "out_channels": layer.out_channels,
"kernel_size": layer.kernel_size, "kernel_size": layer.kernel_size,
"stride": layer.stride, "stride": layer.stride,
"padding": layer.padding "padding": layer.padding,
"groups": layer.groups
}) })
elif (isinstance(layer, nn.MaxPool2d)): elif isinstance(layer, nn.MaxPool2d):
details.append({ details.append({
"layer_type": "MaxPool2d", "layer_type": "MaxPool2d",
"kernel_size": layer.kernel_size, "kernel_size": layer.kernel_size,
@@ -877,7 +943,7 @@ def get_cnn_details(model):
"padding": layer.padding "padding": layer.padding
}) })
elif (isinstance(layer, nn.AvgPool2d)): elif isinstance(layer, nn.AvgPool2d):
details.append({ details.append({
"layer_type": "AvgPool2d", "layer_type": "AvgPool2d",
"kernel_size": layer.kernel_size, "kernel_size": layer.kernel_size,
@@ -891,6 +957,10 @@ def get_cnn_details(model):
"in_features": layer.in_features, "in_features": layer.in_features,
"out_features": layer.out_features "out_features": layer.out_features
}) })
# Recursively apply to children modules
get_cnn_details(layer, details)
return details return details
def generate_og_c_code_cnn(path, name, input, cnn_details, int_weights): def generate_og_c_code_cnn(path, name, input, cnn_details, int_weights):
@@ -900,10 +970,17 @@ def generate_og_c_code_cnn(path, name, input, cnn_details, int_weights):
f.write('#include "fully_connected.h"\n') f.write('#include "fully_connected.h"\n')
f.write('#include "ibex_cnn_params.h"\n') f.write('#include "ibex_cnn_params.h"\n')
f.write('#include "ibex_inputs.h"\n') f.write('#include "ibex_inputs.h"\n')
f.write('#include "conv2d.h"\n\n') f.write('#include "conv2d.h"\n')
f.write('#define IMG_SZ ' + str(input.shape[2]) + '\n') for detail in cnn_details[:-1]:
f.write('#define NUM_FIL0 ' + str(int_weights[0].shape[1]) + '\n\n') if detail["layer_type"] == "Conv2d":
if(detail["in_channels"] == detail["out_channels"] == detail["groups"] != 1):
f.write('#include "dws_conv.h"\n')
break
f.write('\n')
f.write('#define IMG_SZ ' + str(np.shape(input)[2]) + '\n')
f.write('#define NUM_FIL0 ' + str(np.shape(input)[1]) + '\n\n')
i = 1 i = 1
for w in int_weights: for w in int_weights:
if(len(np.shape(w)) == 4): if(len(np.shape(w)) == 4):
@@ -1050,11 +1127,17 @@ def generate_og_c_code_cnn(path, name, input, cnn_details, int_weights):
for detail in cnn_details[:-1]: for detail in cnn_details[:-1]:
if detail["layer_type"] == "Conv2d": if detail["layer_type"] == "Conv2d":
if(detail["in_channels"] == detail["out_channels"] == detail["groups"] != 1):
conv_type = 'dw_conv'
elif(detail["kernel_size"][0] == 1):
conv_type = 'pw_conv'
else:
conv_type = "conv2"
if(i == 1): if(i == 1):
f.write('\t\tconv2(inp_dim, f_dim1, outp_dim1, in, F1, B1, ') f.write('\t\t' + conv_type + '(inp_dim, f_dim1, outp_dim1, in, F1, B1, ')
f.write('out1, STRIDE1, pad_1, SB1, MV1, SV1);') f.write('out1, STRIDE1, pad_1, SB1, MV1, SV1);')
else: else:
f.write('\t\tconv2(outp_dim' + str(i-1) + ', f_dim' + str(i) + ', outp_dim' + str(i)) f.write('\t\t' + conv_type + '(outp_dim' + str(i-1) + ', f_dim' + str(i) + ', outp_dim' + str(i))
f.write(', out' + str(i-1) + ', F' + str(fi) + ', B' + str(fi) + ', out' + str(i)) f.write(', out' + str(i-1) + ', F' + str(fi) + ', B' + str(fi) + ', out' + str(i))
f.write(', STRIDE' + str(fi) + ', pad_' + str(i) + ', SB' + str(fi)) f.write(', STRIDE' + str(fi) + ', pad_' + str(i) + ', SB' + str(fi))
f.write(', MV' + str(fi) + ', SV' + str(fi) + ');') f.write(', MV' + str(fi) + ', SV' + str(fi) + ');')
@@ -1091,6 +1174,13 @@ def generate_og_c_code_cnn(path, name, input, cnn_details, int_weights):
f.write('\n') f.write('\n')
i += 1 i += 1
if flatten == 0:
f.write('\t\tflatten(outp_dim' + str(i-1) + ', out' + str(i-1) + ', out' + str(i) + ');\n\n')
i += 1
f.write('\t\tmlp_layer(out' + str(i-1) + ', out, flatten_dim, OUT_DIM, ')
f.write('W1, B' + str(fi + dn - 1) + ', SB' + str(fi + dn - 1) + ', MV' + str(fi + dn - 1))
f.write(', SV' + str(fi + dn - 1) + ');')
else:
f.write('\t\tmlp_layer(out' + str(i-1) + ', out, DENSE_DIM' + str(dn-1)) f.write('\t\tmlp_layer(out' + str(i-1) + ', out, DENSE_DIM' + str(dn-1))
f.write(', OUT_DIM, W' + str(dn) + ', B' + str(fi + dn - 1)) f.write(', OUT_DIM, W' + str(dn) + ', B' + str(fi + dn - 1))
f.write(', SB' + str(fi + dn - 1) + ', MV' + str(fi + dn - 1)) f.write(', SB' + str(fi + dn - 1) + ', MV' + str(fi + dn - 1))
@@ -1119,13 +1209,21 @@ def generate_opt_c_code_cnn(path, name, input, cnn_details, int_weights, optimal
f.write('#include "fully_connected_opt.h"\n') f.write('#include "fully_connected_opt.h"\n')
f.write('#include "ibex_cnn_params.h"\n') f.write('#include "ibex_cnn_params.h"\n')
f.write('#include "ibex_inputs.h"\n') f.write('#include "ibex_inputs.h"\n')
f.write('#include "conv2d_opt.h"\n\n') f.write('#include "conv2d_opt.h"\n')
for detail in cnn_details[:-1]:
if detail["layer_type"] == "Conv2d":
if(detail["in_channels"] == detail["out_channels"] == detail["groups"] != 1):
f.write('#include "dws_conv_opt.h"\n')
break
f.write('\n')
f.write('#define IMG_SZ ' + str(np.shape(input)[2]) + '\n') f.write('#define IMG_SZ ' + str(np.shape(input)[2]) + '\n')
f.write('#define NUM_FIL0 ' + str(np.shape(input)[0]) + '\n\n') f.write('#define NUM_FIL0 ' + str(np.shape(input)[1]) + '\n\n')
i = 1 i = 1
for w in int_weights: for w in int_weights:
if(len(np.shape(w)) == 4): if(len(np.shape(w)) == 4 or len(np.shape(w)) == 3):
f.write('#define FILTER' + str(i) + ' ' + str(w.shape[2]) + '\n') f.write('#define FILTER' + str(i) + ' ' + str(w.shape[2]) + '\n')
i += 1 i += 1
@@ -1133,7 +1231,7 @@ def generate_opt_c_code_cnn(path, name, input, cnn_details, int_weights, optimal
i = 1 i = 1
for w in int_weights: for w in int_weights:
if(len(np.shape(w)) == 4): if(len(np.shape(w)) == 4 or len(np.shape(w)) == 3):
f.write('#define NUM_FIL' + str(i) + ' ' + str(w.shape[0]) + '\n') f.write('#define NUM_FIL' + str(i) + ' ' + str(w.shape[0]) + '\n')
i += 1 i += 1
@@ -1270,14 +1368,21 @@ def generate_opt_c_code_cnn(path, name, input, cnn_details, int_weights, optimal
for detail in cnn_details[:-1]: for detail in cnn_details[:-1]:
if detail["layer_type"] == "Conv2d": if detail["layer_type"] == "Conv2d":
if(detail["in_channels"] == detail["out_channels"] == detail["groups"] != 1):
conv_type = 'dw_conv_opt'
elif(detail["kernel_size"][0] == 1):
conv_type = 'pw_conv_' + str(optimal_config[j]) + 'bits'
else:
conv_type = 'conv2_' + str(optimal_config[j]) + 'bits'
if(i == 1): if(i == 1):
f.write('\t\tconv2_' + str(optimal_config[j]) + 'bits') f.write('\t\t' + conv_type)
if(np.shape(input)[0] == 1): if(np.shape(input)[1] == 1):
f.write('_1ch') f.write('_1ch')
f.write('(inp_dim, f_dim1, outp_dim1, in, F1, B1, ') f.write('(inp_dim, f_dim1, outp_dim1, in, F1, B1, ')
f.write('out1, STRIDE1, pad_1, SB1, MV1, SV1);') f.write('out1, STRIDE1, pad_1, SB1, MV1, SV1);')
else: else:
f.write('\t\tconv2_' + str(optimal_config[j]) + 'bits(outp_dim' + str(i-1) + ', f_dim' + str(i)) f.write('\t\t' + conv_type + '(outp_dim' + str(i-1) + ', f_dim' + str(i))
f.write(', outp_dim' + str(i) + ', out' + str(i-1) + ', F' + str(fi) + ', B' + str(fi) + ', out') f.write(', outp_dim' + str(i) + ', out' + str(i-1) + ', F' + str(fi) + ', B' + str(fi) + ', out')
f.write(str(i) + ', STRIDE' + str(fi) + ', pad_' + str(i) + ', SB' + str(fi)) f.write(str(i) + ', STRIDE' + str(fi) + ', pad_' + str(i) + ', SB' + str(fi))
f.write(', MV' + str(fi) + ', SV' + str(fi) + ');') f.write(', MV' + str(fi) + ', SV' + str(fi) + ');')
@@ -1314,11 +1419,19 @@ def generate_opt_c_code_cnn(path, name, input, cnn_details, int_weights, optimal
f.write('\n') f.write('\n')
i += 1 i += 1
if flatten == 0:
f.write('\t\tflatten(outp_dim' + str(i-1) + ', out' + str(i-1) + ', out' + str(i) + ');\n\n')
i += 1
f.write('\t\tmlp_layer_' + str(optimal_config[j]) + 'bits(out' + str(i-1) + ', out, ')
f.write('flatten_dim, OUT_DIM, W1, B' + str(fi + dn - 1) + ', SB' + str(fi + dn - 1) + ', MV')
f.write(str(fi + dn - 1) + ', SV' + str(fi + dn - 1) + ');\n')
else:
f.write('\t\tmlp_layer_' + str(optimal_config[-1]) + 'bits(out' + str(i-1) + ', out, DENSE_DIM' + str(dn-1)) f.write('\t\tmlp_layer_' + str(optimal_config[-1]) + 'bits(out' + str(i-1) + ', out, DENSE_DIM' + str(dn-1))
f.write(', OUT_DIM, W' + str(dn) + ', B' + str(fi + dn - 1)) f.write(', OUT_DIM, W' + str(dn) + ', B' + str(fi + dn - 1))
f.write(', SB' + str(fi + dn - 1) + ', MV' + str(fi + dn - 1)) f.write(', SB' + str(fi + dn - 1) + ', MV' + str(fi + dn - 1))
f.write(', SV' + str(fi + dn - 1) + ');\n') f.write(', SV' + str(fi + dn - 1) + ');\n')
f.write('\n\t\tpcount_enable(0);\n\n')
f.write('\t\tputs("Output Layer Values:\\n");\n') f.write('\t\tputs("Output Layer Values:\\n");\n')
f.write('\t\tfor(int i = 0; i < OUT_DIM; i++) {\n') f.write('\t\tfor(int i = 0; i < OUT_DIM; i++) {\n')
f.write('\t\t\tputhex((out[i] & 0xFF000000) >> 24);\n') f.write('\t\t\tputhex((out[i] & 0xFF000000) >> 24);\n')
+111 -20
View File
@@ -11,6 +11,9 @@ from torch import nn, optim
import brevitas.nn as qnn import brevitas.nn as qnn
from brevitas.quant import * from brevitas.quant import *
from brevitas.core.restrict_val import RestrictValueType
from collections import defaultdict
from torchinfo import summary from torchinfo import summary
def net_input_size(X_train): def net_input_size(X_train):
@@ -202,7 +205,21 @@ def generate_sequences(length, values = [2, 4, 8]):
def create_weight_confs(macc_per_layer): def create_weight_confs(macc_per_layer):
total_macc_opt = [] total_macc_opt = []
weights_per_layer = generate_sequences(len(macc_per_layer))
cc = 0
idx = []
if(len(macc_per_layer) >= 6):
for i, mpl in enumerate(macc_per_layer):
if(mpl/max(macc_per_layer) < 0.05):
cc += 1
idx.append(i)
weights_per_layer = generate_sequences(len(macc_per_layer) - cc)
for w in weights_per_layer:
for i in idx:
w.insert(i, 8)
for w_conf in weights_per_layer: for w_conf in weights_per_layer:
macc = 0 macc = 0
@@ -234,24 +251,47 @@ def create_weight_confs(macc_per_layer):
# Define a mapping from PyTorch layers to Brevitas layers # Define a mapping from PyTorch layers to Brevitas layers
def create_layer_mapping(bit_width): def create_layer_mapping(bit_width):
mapping = { mapping = {
nn.Conv2d: lambda layer, bw: qnn.QuantConv2d(in_channels = layer.in_channels, nn.Conv2d: lambda layer, bw: (qnn.QuantConv2d(in_channels=layer.in_channels,
out_channels = layer.out_channels, out_channels=layer.out_channels,
kernel_size = layer.kernel_size, kernel_size=layer.kernel_size,
stride = layer.stride[0], stride=layer.stride[0],
padding = layer.padding, padding=layer.padding,
bias = True, groups=layer.groups,
cache_inference_bias = True, bias=True,
bias_quant = Int32Bias, cache_inference_bias=True,
weight_bit_width = bw, bias_quant=Int32Bias,
weight_quant = Int8WeightPerTensorFloat), weight_bit_width=bw,
weight_quant=Int8WeightPerTensorFloat,
weight_scaling_min_val=2e-16,
restrict_scaling_type=RestrictValueType.LOG_FP,
return_quant_tensor=True
) if layer.groups != layer.in_channels else (
# Special case for depthwise convolutions
qnn.QuantConv2d(in_channels=layer.in_channels,
out_channels=layer.out_channels,
kernel_size=layer.kernel_size,
stride=layer.stride[0],
padding=layer.padding,
groups=layer.groups,
bias=True,
cache_inference_bias=True,
bias_quant=Int32Bias,
weight_bit_width=8, # Fixed bit width for depthwise convolutions
weight_quant=Int8WeightPerTensorFloat,
weight_scaling_min_val=2e-16,
restrict_scaling_type=RestrictValueType.LOG_FP,
return_quant_tensor=True))),
nn.Linear: lambda layer, bw: qnn.QuantLinear(in_features = layer.in_features, nn.Linear: lambda layer, bw: qnn.QuantLinear(in_features = layer.in_features,
out_features = layer.out_features, out_features = layer.out_features,
cache_inference_bias = True, cache_inference_bias = True,
weight_quant = Int8WeightPerTensorFloat,
bias_quant = Int32Bias, bias_quant = Int32Bias,
bias = True, bias = True,
weight_bit_width = bw),
weight_quant = Int8WeightPerTensorFloat,
weight_bit_width = bw,
return_quant_tensor=True),
nn.ReLU: lambda _, bw: qnn.QuantReLU(bit_width = bw, nn.ReLU: lambda _, bw: qnn.QuantReLU(bit_width = bw,
return_quant_tensor = True), return_quant_tensor = True),
@@ -278,13 +318,11 @@ def convert_layer(layer, bit_width, layer_mapping):
return layer return layer
# Function to convert a PyTorch model to a Brevitas model # Function to convert a PyTorch model to a Brevitas model
def convert_model(module, bit_widths, layer_mapping): def convert_model(module, bit_widths, layer_mapping, layer_idx = [0]):
layer_idx = [0]
brevitas_module = nn.Sequential() brevitas_module = nn.Sequential()
for name, layer in module.named_children(): for name, layer in module.named_children():
if list(layer.children()): # If the layer has children, recurse if list(layer.children()): # If the layer has children, recurse
brevitas_module.add_module(name, convert_model(layer, bit_widths, layer_mapping)) brevitas_module.add_module(name, convert_model(layer, bit_widths, layer_mapping, layer_idx))
else: else:
layer_type = type(layer) layer_type = type(layer)
if layer_type in [nn.Conv2d, nn.Linear]: if layer_type in [nn.Conv2d, nn.Linear]:
@@ -293,6 +331,7 @@ def convert_model(module, bit_widths, layer_mapping):
else: else:
bit_width = 8 bit_width = 8
brevitas_module.add_module(name, convert_layer(layer, bit_width, layer_mapping)) brevitas_module.add_module(name, convert_layer(layer, bit_width, layer_mapping))
return brevitas_module return brevitas_module
class Quant_Model(nn.Module): class Quant_Model(nn.Module):
@@ -300,13 +339,15 @@ class Quant_Model(nn.Module):
super(Quant_Model, self).__init__() super(Quant_Model, self).__init__()
if(input_sign): if(input_sign):
self.quant_inp = qnn.QuantIdentity(bit_width = 8, return_quant_tensor = True, self.quant_inp = qnn.QuantIdentity(bit_width = 8, return_quant_tensor = True,
act_quant = Uint8ActPerTensorFloat) act_quant = Uint8ActPerTensorFloat, scaling_min_val = 2e-16,
restrict_scaling_type = RestrictValueType.LOG_FP)
else: else:
self.quant_inp = qnn.QuantIdentity(bit_width = 8, return_quant_tensor = True, self.quant_inp = qnn.QuantIdentity(bit_width = 8, return_quant_tensor = True,
act_quant = Int8ActPerTensorFloat) act_quant = Int8ActPerTensorFloat, scaling_min_val = 2e-16,
restrict_scaling_type = RestrictValueType.LOG_FP)
self.sequential = convert_model(og_model, w, layer_mapping) self.sequential = convert_model(og_model, w, layer_mapping, [0])
self.o_quant = qnn.QuantIdentity(bit_width = 8, return_quant_tensor = True) self.o_quant = qnn.QuantIdentity(bit_width = 8, return_quant_tensor = True)
def forward(self, X): def forward(self, X):
@@ -315,6 +356,35 @@ class Quant_Model(nn.Module):
X = self.o_quant(X) X = self.o_quant(X)
return F.log_softmax(X, dim = 1) return F.log_softmax(X, dim = 1)
def count_layers_in_sequential(module):
# List to store the counts of Conv2d and Linear layers for each nn.Sequential module
sequential_counts = []
def _count_layers(submodule, prefix = ''):
if isinstance(submodule, nn.Sequential):
conv_count = 0
linear_count = 0
# Count Conv2d and Linear layers in the current nn.Sequential module
for child in submodule.children():
if isinstance(child, nn.Conv2d):
conv_count += 1
elif isinstance(child, nn.Linear):
linear_count += 1
# Append the counts to the list
sequential_counts.append((conv_count, linear_count))
# Recursively process children of the current nn.Sequential module
for name, child in submodule.named_children():
child_prefix = f"{prefix}.{name}" if prefix else name
_count_layers(child, child_prefix)
else:
# Process children of non-nn.Sequential modules
for name, child in submodule.named_children():
_count_layers(child, prefix)
_count_layers(module)
return sequential_counts[1:]
def train_quant_model(quant_net, train_loader, val_loader = None, device = 'cpu', def train_quant_model(quant_net, train_loader, val_loader = None, device = 'cpu',
epochs = 20, lr = 0.0001): epochs = 20, lr = 0.0001):
@@ -392,6 +462,7 @@ def dse(og_model, max_acc_drop, weights_per_layer, fp_accuracy, train_loader, te
device = 'cpu', epochs = 5, lr = 0.0001): device = 'cpu', epochs = 5, lr = 0.0001):
sign = calculate_minimum(train_loader) >= 0 sign = calculate_minimum(train_loader) >= 0
seq_counts = count_layers_in_sequential(og_model)
if max_acc_drop is not None: if max_acc_drop is not None:
print('\nDSE STARTING ... BINARY SEARCH') print('\nDSE STARTING ... BINARY SEARCH')
@@ -402,6 +473,16 @@ def dse(og_model, max_acc_drop, weights_per_layer, fp_accuracy, train_loader, te
mid = (low + high) // 2 mid = (low + high) // 2
w = weights_per_layer[mid] w = weights_per_layer[mid]
f_w = []
for i in range(len(seq_counts)):
t_w = w[i]
c,l = seq_counts[i]
for j in range(c+l):
f_w.append(t_w)
if(len(seq_counts) > 0):
w = f_w
# Create and train the quantized network # Create and train the quantized network
layer_mapping = create_layer_mapping(w) layer_mapping = create_layer_mapping(w)
quant_net = Quant_Model(og_model, w, layer_mapping, sign) quant_net = Quant_Model(og_model, w, layer_mapping, sign)
@@ -436,6 +517,16 @@ def dse(og_model, max_acc_drop, weights_per_layer, fp_accuracy, train_loader, te
print('\nDSE STARTING ... EXHAUSTIVE SEARCH') print('\nDSE STARTING ... EXHAUSTIVE SEARCH')
test_accuracy = [] test_accuracy = []
for i, w in enumerate(weights_per_layer): for i, w in enumerate(weights_per_layer):
f_w = []
for i in range(len(seq_counts)):
t_w = w[i]
c,l = seq_counts[i]
for j in range(c+l):
f_w.append(t_w)
if(len(seq_counts) > 0):
w = f_w
layer_mapping = create_layer_mapping(w) layer_mapping = create_layer_mapping(w)
quant_net = Quant_Model(og_model, w, layer_mapping, sign) quant_net = Quant_Model(og_model, w, layer_mapping, sign)
quant_net = quant_net.to(device) quant_net = quant_net.to(device)
+165 -25
View File
@@ -151,15 +151,154 @@ class Ibex_Lenet5(nn.Module):
return X return X
class Ibex_CMSIS_CNN(nn.Module):
def __init__(self, mul_vals, shift_vals):
super(Ibex_CMSIS_CNN, self).__init__()
self.m0 = mul_vals[0]
self.m1 = mul_vals[1]
self.m2 = mul_vals[2]
self.m3 = mul_vals[3]
self.s0 = shift_vals[0] + 7
self.s1 = shift_vals[1] + 7
self.s2 = shift_vals[2] + 7
self.s3 = shift_vals[3] + 7
self.conv1 = nn.Conv2d(in_channels = 3, out_channels = 32, kernel_size = 5, padding = 'same')
self.max1 = nn.MaxPool2d(2,2)
self.conv2 = nn.Conv2d(in_channels = 32, out_channels = 32, kernel_size = 5, padding = 'same')
self.max2 = nn.MaxPool2d(2,2)
self.conv3 = nn.Conv2d(in_channels = 32, out_channels = 64, kernel_size = 5, padding = 'same')
self.max3 = nn.MaxPool2d(2,2)
self.linear1 = nn.Linear(1024, 10)
def forward(self, X, print_out = False):
X = self.conv1(X)
X = torch.mul(X, self.m0)
X = torch.add(X, torch.bitwise_left_shift(torch.tensor(1), self.s0 -1)).type(torch.LongTensor)
X = torch.bitwise_right_shift(X, self.s0).type(torch.FloatTensor)
X = torch.clamp(X, min = 0, max = 255)
X = self.max1(X)
X = self.conv2(X)
X = torch.mul(X, self.m1)
X = torch.add(X, torch.bitwise_left_shift(torch.tensor(1), self.s1 -1)).type(torch.LongTensor)
X = torch.bitwise_right_shift(X, self.s1).type(torch.FloatTensor)
X = torch.clamp(X, min = 0, max = 255)
X = self.max2(X)
X = self.conv3(X)
X = torch.mul(X, self.m2)
X = torch.add(X, torch.bitwise_left_shift(torch.tensor(1), self.s2 -1)).type(torch.LongTensor)
X = torch.bitwise_right_shift(X, self.s2).type(torch.FloatTensor)
X = torch.clamp(X, min = 0, max = 255)
X = self.max3(X)
X = X.reshape(X.shape[0], -1)
X = self.linear1(X)
X = torch.mul(X, self.m3)
X = torch.add(X, torch.bitwise_left_shift(torch.tensor(1), self.s3 -1)).type(torch.LongTensor)
X = torch.bitwise_right_shift(X, self.s3).type(torch.FloatTensor)
X = torch.clamp(X, min = 0, max = 255)
if(print_out):
print(X)
return X
class Ibex_DepthwiseBlock(nn.Module):
def __init__(self, in_channels, out_channels, mul_vals, shift_vals):
super(Ibex_DepthwiseBlock, self).__init__()
self.dw = nn.Conv2d(in_channels = in_channels, out_channels = in_channels,
kernel_size = 3, padding = 1, groups = in_channels)
self.pw = nn.Conv2d(in_channels = in_channels, out_channels = out_channels,
kernel_size = 1, padding = 0)
self.m0 = mul_vals[0]
self.m1 = mul_vals[1]
self.s0 = shift_vals[0] + 7
self.s1 = shift_vals[1] + 7
def forward(self, X):
X = self.dw(X)
X = torch.mul(X, self.m0)
X = torch.add(X, torch.bitwise_left_shift(torch.tensor(1), self.s0 -1)).type(torch.LongTensor)
X = torch.bitwise_right_shift(X, self.s0).type(torch.FloatTensor)
X = torch.clamp(X, min = 0, max = 255)
X = self.pw(X)
X = torch.mul(X, self.m1)
X = torch.add(X, torch.bitwise_left_shift(torch.tensor(1), self.s1 -1)).type(torch.LongTensor)
X = torch.bitwise_right_shift(X, self.s1).type(torch.FloatTensor)
X = torch.clamp(X, min = 0, max = 255)
return X
class Ibex_Cifar10_Dws_CNN(nn.Module):
def __init__(self, mul_vals, shift_vals):
super(Ibex_Cifar10_Dws_CNN, self).__init__()
self.features = nn.Sequential(
Ibex_DepthwiseBlock(3, 64, mul_vals[0:2], shift_vals[0:2]),
Ibex_DepthwiseBlock(64, 64, mul_vals[2:4], shift_vals[2:4]),
nn.MaxPool2d(kernel_size = 2, stride = 2),
Ibex_DepthwiseBlock(64, 128, mul_vals[4:6], shift_vals[4:6]),
Ibex_DepthwiseBlock(128, 128, mul_vals[6:8], shift_vals[6:8]),
nn.MaxPool2d(kernel_size = 2, stride = 2),
Ibex_DepthwiseBlock(128, 256, mul_vals[8:10], shift_vals[8:10]),
Ibex_DepthwiseBlock(256, 256, mul_vals[10:12], shift_vals[10:12]),
nn.MaxPool2d(kernel_size = 2, stride = 2)
)
self.flatten = nn.Flatten()
self.classifier = nn.Sequential(
nn.Linear(256 * 4 * 4, 10)
)
self.m_cl = mul_vals[12]
self.s_cl = shift_vals[12] + 7
def forward(self, x, print_out = False):
x = self.features(x)
x = self.flatten(x)
x = self.classifier(x)
x = torch.mul(x, self.m_cl)
x = torch.add(x, torch.bitwise_left_shift(torch.tensor(1), self.s_cl - 1)).type(torch.LongTensor)
x = torch.bitwise_right_shift(x, self.s_cl).type(torch.FloatTensor)
x = torch.clamp(x, min = 0, max = 255)
if(print_out):
print(x)
return x
def configure_network(ibex_model_dict, int_weights, int_biases):
for i, (name, _) in enumerate(ibex_model_dict.items()):
if(i%2 == 0):
ibex_model_dict[name] = torch.tensor(int_weights[i//2])
else:
ibex_model_dict[name] = torch.tensor(int_biases[i//2])
return ibex_model_dict
def create_fann_model(int_weights, int_biases, mul_vals, shift_vals): def create_fann_model(int_weights, int_biases, mul_vals, shift_vals):
ibex_model = Ibex_FANN(mul_vals, shift_vals) ibex_model = Ibex_FANN(mul_vals, shift_vals)
ibex_model_dict = ibex_model.state_dict() ibex_model_dict = ibex_model.state_dict()
ibex_model_dict['linear1.weight'] = torch.tensor(int_weights[0]) ibex_model_dict = configure_network(ibex_model_dict, int_weights, int_biases)
ibex_model_dict['linear2.weight'] = torch.tensor(int_weights[1])
ibex_model_dict['linear1.bias'] = torch.tensor(int_biases[0])
ibex_model_dict['linear2.bias'] = torch.tensor(int_biases[1])
ibex_model.load_state_dict(ibex_model_dict) ibex_model.load_state_dict(ibex_model_dict)
return ibex_model return ibex_model
@@ -168,15 +307,7 @@ def create_uci_model(int_weights, int_biases, mul_vals, shift_vals):
ibex_model = Ibex_UCI_MLP(mul_vals, shift_vals) ibex_model = Ibex_UCI_MLP(mul_vals, shift_vals)
ibex_model_dict = ibex_model.state_dict() ibex_model_dict = ibex_model.state_dict()
ibex_model_dict['fc0.weight'] = torch.tensor(int_weights[0]) ibex_model_dict = configure_network(ibex_model_dict, int_weights, int_biases)
ibex_model_dict['fc1.weight'] = torch.tensor(int_weights[1])
ibex_model_dict['fc2.weight'] = torch.tensor(int_weights[2])
ibex_model_dict['fc3.weight'] = torch.tensor(int_weights[3])
ibex_model_dict['fc0.bias'] = torch.tensor(int_biases[0])
ibex_model_dict['fc1.bias'] = torch.tensor(int_biases[1])
ibex_model_dict['fc2.bias'] = torch.tensor(int_biases[2])
ibex_model_dict['fc3.bias'] = torch.tensor(int_biases[3])
ibex_model.load_state_dict(ibex_model_dict) ibex_model.load_state_dict(ibex_model_dict)
@@ -186,24 +317,33 @@ def create_lenet_model(int_weights, int_biases, mul_vals, shift_vals):
ibex_model = Ibex_Lenet5(mul_vals, shift_vals) ibex_model = Ibex_Lenet5(mul_vals, shift_vals)
ibex_model_dict = ibex_model.state_dict() ibex_model_dict = ibex_model.state_dict()
ibex_model_dict['conv1.weight'] = torch.tensor(int_weights[0]) ibex_model_dict = configure_network(ibex_model_dict, int_weights, int_biases)
ibex_model_dict['conv2.weight'] = torch.tensor(int_weights[1])
ibex_model_dict['fc1.weight'] = torch.tensor(int_weights[2])
ibex_model_dict['fc2.weight'] = torch.tensor(int_weights[3])
ibex_model_dict['fc3.weight'] = torch.tensor(int_weights[4])
ibex_model_dict['conv1.bias'] = torch.tensor(int_biases[0]) ibex_model.load_state_dict(ibex_model_dict)
ibex_model_dict['conv2.bias'] = torch.tensor(int_biases[1])
ibex_model_dict['fc1.bias'] = torch.tensor(int_biases[2]) return ibex_model
ibex_model_dict['fc2.bias'] = torch.tensor(int_biases[3])
ibex_model_dict['fc3.bias'] = torch.tensor(int_biases[4]) def create_cmsis_cnn_model(int_weights, int_biases, mul_vals, shift_vals):
ibex_model = Ibex_CMSIS_CNN(mul_vals, shift_vals)
ibex_model_dict = ibex_model.state_dict()
ibex_model_dict = configure_network(ibex_model_dict, int_weights, int_biases)
ibex_model.load_state_dict(ibex_model_dict)
return ibex_model
def create_ibex_dws_model(int_weights, int_biases, mul_vals, shift_vals):
ibex_model = Ibex_Cifar10_Dws_CNN(mul_vals, shift_vals)
ibex_model_dict = ibex_model.state_dict()
ibex_model_dict = configure_network(ibex_model_dict, int_weights, int_biases)
ibex_model.load_state_dict(ibex_model_dict) ibex_model.load_state_dict(ibex_model_dict)
return ibex_model return ibex_model
def eval_sim_model(quant_model, ibex_model, test_loader): def eval_sim_model(quant_model, ibex_model, test_loader):
# Turn off gradients for validation
with torch.no_grad(): with torch.no_grad():
ibex_model.eval() ibex_model.eval()
correct = 0 correct = 0