Adding new features
This commit is contained in:
@@ -0,0 +1,15 @@
|
||||
# Copyright lowRISC contributors.
|
||||
# Licensed under the Apache License, Version 2.0, see LICENSE for details.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
# Generate a baremetal application
|
||||
|
||||
# Name of the program $(PROGRAM).c will be added as a source file
|
||||
|
||||
PROGRAM = cifar10_dws_cnn
|
||||
PROGRAM_DIR := $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST))))
|
||||
# Any extra source files to include in the build. Use the upper case .S
|
||||
# extension for assembly files
|
||||
EXTRA_SRCS :=
|
||||
|
||||
include ${PROGRAM_DIR}/../../common/common.mk
|
||||
@@ -0,0 +1,298 @@
|
||||
#include "simple_system_common.h"
|
||||
#include "cnn_weights.h"
|
||||
#include "fully_connected_opt.h"
|
||||
#include "ibex_cnn_params.h"
|
||||
#include "ibex_inputs.h"
|
||||
#include "conv2d_opt.h"
|
||||
#include "dws_conv_opt.h"
|
||||
|
||||
#define IMG_SZ 32
|
||||
#define NUM_FIL0 1
|
||||
|
||||
#define FILTER1 3
|
||||
#define FILTER2 1
|
||||
#define FILTER3 3
|
||||
#define FILTER4 1
|
||||
#define FILTER5 3
|
||||
#define FILTER6 1
|
||||
#define FILTER7 3
|
||||
#define FILTER8 1
|
||||
#define FILTER9 3
|
||||
#define FILTER10 1
|
||||
#define FILTER11 3
|
||||
#define FILTER12 1
|
||||
|
||||
#define NUM_FIL1 1
|
||||
#define NUM_FIL2 16
|
||||
#define NUM_FIL3 16
|
||||
#define NUM_FIL4 16
|
||||
#define NUM_FIL5 16
|
||||
#define NUM_FIL6 32
|
||||
#define NUM_FIL7 32
|
||||
#define NUM_FIL8 32
|
||||
#define NUM_FIL9 32
|
||||
#define NUM_FIL10 64
|
||||
#define NUM_FIL11 64
|
||||
#define NUM_FIL12 64
|
||||
|
||||
#define STRIDE1 1
|
||||
#define STRIDE2 1
|
||||
#define STRIDE3 1
|
||||
#define STRIDE4 1
|
||||
#define STRIDE5 1
|
||||
#define STRIDE6 1
|
||||
#define STRIDE7 1
|
||||
#define STRIDE8 1
|
||||
#define STRIDE9 1
|
||||
#define STRIDE10 1
|
||||
#define STRIDE11 1
|
||||
#define STRIDE12 1
|
||||
|
||||
#define PAD_TB1 1
|
||||
#define PAD_LR1 1
|
||||
|
||||
#define PAD_TB2 0
|
||||
#define PAD_LR2 0
|
||||
|
||||
#define PAD_TB3 1
|
||||
#define PAD_LR3 1
|
||||
|
||||
#define PAD_TB4 0
|
||||
#define PAD_LR4 0
|
||||
|
||||
#define PAD_TB5 1
|
||||
#define PAD_LR5 1
|
||||
|
||||
#define PAD_TB6 0
|
||||
#define PAD_LR6 0
|
||||
|
||||
#define PAD_TB7 1
|
||||
#define PAD_LR7 1
|
||||
|
||||
#define PAD_TB8 0
|
||||
#define PAD_LR8 0
|
||||
|
||||
#define PAD_TB9 1
|
||||
#define PAD_LR9 1
|
||||
|
||||
#define PAD_TB10 0
|
||||
#define PAD_LR10 0
|
||||
|
||||
#define PAD_TB11 1
|
||||
#define PAD_LR11 1
|
||||
|
||||
#define PAD_TB12 0
|
||||
#define PAD_LR12 0
|
||||
|
||||
#define POOL_STRIDE1 2
|
||||
#define POOL_SIZE1 2
|
||||
|
||||
#define POOL_STRIDE2 2
|
||||
#define POOL_SIZE2 2
|
||||
|
||||
#define POOL_STRIDE3 2
|
||||
#define POOL_SIZE3 2
|
||||
|
||||
#define OUT_DIM 3
|
||||
|
||||
#define SAMPLES 1
|
||||
int outs[SAMPLES][OUT_DIM];
|
||||
|
||||
void cifar10_dws_cnn() {
|
||||
|
||||
int dout1 = NUM_FIL1;
|
||||
int hout1 = ((IMG_SZ - FILTER1 + 2 * PAD_TB1)/STRIDE1) + 1;
|
||||
int wout1 = ((IMG_SZ - FILTER1 + 2 * PAD_LR1)/STRIDE1) + 1;
|
||||
|
||||
int dout2 = NUM_FIL2;
|
||||
int hout2 = ((hout1 - FILTER2+ 2 * PAD_TB2)/STRIDE2)+1;
|
||||
int wout2 = ((wout1 - FILTER2+ 2 * PAD_LR2)/STRIDE2)+1;
|
||||
|
||||
int dout3 = NUM_FIL3;
|
||||
int hout3 = ((hout2 - FILTER3+ 2 * PAD_TB3)/STRIDE3)+1;
|
||||
int wout3 = ((wout2 - FILTER3+ 2 * PAD_LR3)/STRIDE3)+1;
|
||||
|
||||
int dout4 = NUM_FIL4;
|
||||
int hout4 = ((hout3 - FILTER4+ 2 * PAD_TB4)/STRIDE4)+1;
|
||||
int wout4 = ((wout3 - FILTER4+ 2 * PAD_LR4)/STRIDE4)+1;
|
||||
|
||||
int dout5 = dout4;
|
||||
int hout5 = hout4/POOL_STRIDE1;
|
||||
int wout5 = wout4/POOL_STRIDE1;
|
||||
|
||||
int dout6 = NUM_FIL5;
|
||||
int hout6 = ((hout5 - FILTER5+ 2 * PAD_TB5)/STRIDE5)+1;
|
||||
int wout6 = ((wout5 - FILTER5+ 2 * PAD_LR5)/STRIDE5)+1;
|
||||
|
||||
int dout7 = NUM_FIL6;
|
||||
int hout7 = ((hout6 - FILTER6+ 2 * PAD_TB6)/STRIDE6)+1;
|
||||
int wout7 = ((wout6 - FILTER6+ 2 * PAD_LR6)/STRIDE6)+1;
|
||||
|
||||
int dout8 = NUM_FIL7;
|
||||
int hout8 = ((hout7 - FILTER7+ 2 * PAD_TB7)/STRIDE7)+1;
|
||||
int wout8 = ((wout7 - FILTER7+ 2 * PAD_LR7)/STRIDE7)+1;
|
||||
|
||||
int dout9 = NUM_FIL8;
|
||||
int hout9 = ((hout8 - FILTER8+ 2 * PAD_TB8)/STRIDE8)+1;
|
||||
int wout9 = ((wout8 - FILTER8+ 2 * PAD_LR8)/STRIDE8)+1;
|
||||
|
||||
int dout10 = dout9;
|
||||
int hout10 = hout9/POOL_STRIDE2;
|
||||
int wout10 = wout9/POOL_STRIDE2;
|
||||
|
||||
int dout11 = NUM_FIL9;
|
||||
int hout11 = ((hout10 - FILTER9+ 2 * PAD_TB9)/STRIDE9)+1;
|
||||
int wout11 = ((wout10 - FILTER9+ 2 * PAD_LR9)/STRIDE9)+1;
|
||||
|
||||
int dout12 = NUM_FIL10;
|
||||
int hout12 = ((hout11 - FILTER10+ 2 * PAD_TB10)/STRIDE10)+1;
|
||||
int wout12 = ((wout11 - FILTER10+ 2 * PAD_LR10)/STRIDE10)+1;
|
||||
|
||||
int dout13 = NUM_FIL11;
|
||||
int hout13 = ((hout12 - FILTER11+ 2 * PAD_TB11)/STRIDE11)+1;
|
||||
int wout13 = ((wout12 - FILTER11+ 2 * PAD_LR11)/STRIDE11)+1;
|
||||
|
||||
int dout14 = NUM_FIL12;
|
||||
int hout14 = ((hout13 - FILTER12+ 2 * PAD_TB12)/STRIDE12)+1;
|
||||
int wout14 = ((wout13 - FILTER12+ 2 * PAD_LR12)/STRIDE12)+1;
|
||||
|
||||
int dout15 = dout14;
|
||||
int hout15 = hout14/POOL_STRIDE3;
|
||||
int wout15 = wout14/POOL_STRIDE3;
|
||||
|
||||
int flatten_dim = dout15 * hout15 * wout15;
|
||||
|
||||
int in[IMG_SZ][IMG_SZ][NUM_FIL0];
|
||||
int inp_dim[3] = {IMG_SZ, IMG_SZ, NUM_FIL0};
|
||||
|
||||
int out1[hout1][wout1][dout1];
|
||||
int pad_1[4] = {PAD_TB1, PAD_TB1, PAD_LR1, PAD_LR1};
|
||||
int outp_dim1[3] = {hout1, wout1, dout1};
|
||||
int f_dim1[4] = {NUM_FIL1, FILTER1, FILTER1, NUM_FIL0};
|
||||
|
||||
int out2[hout2][wout2][dout2];
|
||||
int pad_2[4] = {PAD_TB2, PAD_TB2, PAD_LR2, PAD_LR2};
|
||||
int outp_dim2[3] = {hout2, wout2, dout2};
|
||||
int f_dim2[4] = {NUM_FIL2, FILTER2, FILTER2, NUM_FIL1};
|
||||
|
||||
int out3[hout3][wout3][dout3];
|
||||
int pad_3[4] = {PAD_TB3, PAD_TB3, PAD_LR3, PAD_LR3};
|
||||
int outp_dim3[3] = {hout3, wout3, dout3};
|
||||
int f_dim3[4] = {NUM_FIL3, FILTER3, FILTER3, NUM_FIL2};
|
||||
|
||||
int out4[hout4][wout4][dout4];
|
||||
int pad_4[4] = {PAD_TB4, PAD_TB4, PAD_LR4, PAD_LR4};
|
||||
int outp_dim4[3] = {hout4, wout4, dout4};
|
||||
int f_dim4[4] = {NUM_FIL4, FILTER4, FILTER4, NUM_FIL3};
|
||||
|
||||
int out5[hout5][wout5][dout5];
|
||||
int outp_dim5[3] = {hout5, wout5, dout5};
|
||||
|
||||
int out6[hout6][wout6][dout6];
|
||||
int pad_6[4] = {PAD_TB5, PAD_TB5, PAD_LR5, PAD_LR5};
|
||||
int outp_dim6[3] = {hout6, wout6, dout6};
|
||||
int f_dim6[4] = {NUM_FIL5, FILTER5, FILTER5, NUM_FIL4};
|
||||
|
||||
int out7[hout7][wout7][dout7];
|
||||
int pad_7[4] = {PAD_TB6, PAD_TB6, PAD_LR6, PAD_LR6};
|
||||
int outp_dim7[3] = {hout7, wout7, dout7};
|
||||
int f_dim7[4] = {NUM_FIL6, FILTER6, FILTER6, NUM_FIL5};
|
||||
|
||||
int out8[hout8][wout8][dout8];
|
||||
int pad_8[4] = {PAD_TB7, PAD_TB7, PAD_LR7, PAD_LR7};
|
||||
int outp_dim8[3] = {hout8, wout8, dout8};
|
||||
int f_dim8[4] = {NUM_FIL7, FILTER7, FILTER7, NUM_FIL6};
|
||||
|
||||
int out9[hout9][wout9][dout9];
|
||||
int pad_9[4] = {PAD_TB8, PAD_TB8, PAD_LR8, PAD_LR8};
|
||||
int outp_dim9[3] = {hout9, wout9, dout9};
|
||||
int f_dim9[4] = {NUM_FIL8, FILTER8, FILTER8, NUM_FIL7};
|
||||
|
||||
int out10[hout10][wout10][dout10];
|
||||
int outp_dim10[3] = {hout10, wout10, dout10};
|
||||
|
||||
int out11[hout11][wout11][dout11];
|
||||
int pad_11[4] = {PAD_TB9, PAD_TB9, PAD_LR9, PAD_LR9};
|
||||
int outp_dim11[3] = {hout11, wout11, dout11};
|
||||
int f_dim11[4] = {NUM_FIL9, FILTER9, FILTER9, NUM_FIL8};
|
||||
|
||||
int out12[hout12][wout12][dout12];
|
||||
int pad_12[4] = {PAD_TB10, PAD_TB10, PAD_LR10, PAD_LR10};
|
||||
int outp_dim12[3] = {hout12, wout12, dout12};
|
||||
int f_dim12[4] = {NUM_FIL10, FILTER10, FILTER10, NUM_FIL9};
|
||||
|
||||
int out13[hout13][wout13][dout13];
|
||||
int pad_13[4] = {PAD_TB11, PAD_TB11, PAD_LR11, PAD_LR11};
|
||||
int outp_dim13[3] = {hout13, wout13, dout13};
|
||||
int f_dim13[4] = {NUM_FIL11, FILTER11, FILTER11, NUM_FIL10};
|
||||
|
||||
int out14[hout14][wout14][dout14];
|
||||
int pad_14[4] = {PAD_TB12, PAD_TB12, PAD_LR12, PAD_LR12};
|
||||
int outp_dim14[3] = {hout14, wout14, dout14};
|
||||
int f_dim14[4] = {NUM_FIL12, FILTER12, FILTER12, NUM_FIL11};
|
||||
|
||||
int out15[hout15][wout15][dout15];
|
||||
int outp_dim15[3] = {hout15, wout15, dout15};
|
||||
|
||||
int out16[flatten_dim];
|
||||
|
||||
int out[OUT_DIM];
|
||||
|
||||
for (int iter = 0; iter < SAMPLES; iter++){
|
||||
|
||||
for(int i = 0; i < IMG_SZ; i++){
|
||||
for(int j = 0; j < IMG_SZ; j++){
|
||||
for(int k = 0; k < NUM_FIL0; k++){
|
||||
in[i][j][k] = input[i][j][k][iter];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pcount_enable(1);
|
||||
|
||||
dw_conv_opt_1ch(inp_dim, f_dim1, outp_dim1, in, F1, B1, out1, STRIDE1, pad_1, SB1, MV1, SV1);
|
||||
pw_conv_2bits(outp_dim1, f_dim2, outp_dim2, out1, F2, B2, out2, STRIDE2, pad_2, SB2, MV2, SV2);
|
||||
dw_conv_opt(outp_dim2, f_dim3, outp_dim3, out2, F3, B3, out3, STRIDE3, pad_3, SB3, MV3, SV3);
|
||||
pw_conv_8bits(outp_dim3, f_dim4, outp_dim4, out3, F4, B4, out4, STRIDE4, pad_4, SB4, MV4, SV4);
|
||||
maxpool2_compressed(outp_dim4, outp_dim5, out4, out5, POOL_SIZE1, POOL_STRIDE1);
|
||||
|
||||
dw_conv_opt(outp_dim5, f_dim6, outp_dim6, out5, F5, B5, out6, STRIDE5, pad_6, SB5, MV5, SV5);
|
||||
pw_conv_2bits(outp_dim6, f_dim7, outp_dim7, out6, F6, B6, out7, STRIDE6, pad_7, SB6, MV6, SV6);
|
||||
dw_conv_opt(outp_dim7, f_dim8, outp_dim8, out7, F7, B7, out8, STRIDE7, pad_8, SB7, MV7, SV7);
|
||||
pw_conv_8bits(outp_dim8, f_dim9, outp_dim9, out8, F8, B8, out9, STRIDE8, pad_9, SB8, MV8, SV8);
|
||||
maxpool2_compressed(outp_dim9, outp_dim10, out9, out10, POOL_SIZE2, POOL_STRIDE2);
|
||||
|
||||
dw_conv_opt(outp_dim10, f_dim11, outp_dim11, out10, F9, B9, out11, STRIDE9, pad_11, SB9, MV9, SV9);
|
||||
pw_conv_8bits(outp_dim11, f_dim12, outp_dim12, out11, F10, B10, out12, STRIDE10, pad_12, SB10, MV10, SV10);
|
||||
dw_conv_opt(outp_dim12, f_dim13, outp_dim13, out12, F11, B11, out13, STRIDE11, pad_13, SB11, MV11, SV11);
|
||||
pw_conv_8bits(outp_dim13, f_dim14, outp_dim14, out13, F12, B12, out14, STRIDE12, pad_14, SB12, MV12, SV12);
|
||||
maxpool2_compressed(outp_dim14, outp_dim15, out14, out15, POOL_SIZE3, POOL_STRIDE3);
|
||||
|
||||
flatten(outp_dim15, out15, out16);
|
||||
|
||||
mlp_layer_8bits(out16, out, flatten_dim, OUT_DIM, W1, B13, SB13, MV13, SV13);
|
||||
|
||||
pcount_enable(0);
|
||||
|
||||
puts("Output Layer Values:\n");
|
||||
for(int i = 0; i < OUT_DIM; i++) {
|
||||
puthex((out[i] & 0xFF000000) >> 24);
|
||||
puts(" ");
|
||||
puthex((out[i] & 0xFF0000) >> 16);
|
||||
puts(" ");
|
||||
puthex((out[i] & 0xFF00) >> 8);
|
||||
puts(" ");
|
||||
puthex(out[i] & 0xFF);
|
||||
puts("\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int main(void) {
|
||||
|
||||
pcount_enable(0);
|
||||
|
||||
cifar10_dws_cnn();
|
||||
|
||||
return 0;
|
||||
}
|
||||
File diff suppressed because one or more lines are too long
@@ -0,0 +1,84 @@
|
||||
#ifndef IBEX_CNN_PARAMS_H
|
||||
#define IBEX_CNN_PARAMS_H
|
||||
|
||||
#define MV1 1263225675
|
||||
#define MV2 1886417008
|
||||
#define MV3 1381126738
|
||||
#define MV4 1263225675
|
||||
#define MV5 1465341783
|
||||
#define MV6 1280068684
|
||||
#define MV7 1869573999
|
||||
#define MV8 1600085855
|
||||
#define MV9 1600085855
|
||||
#define MV10 1970632053
|
||||
#define MV11 1145324612
|
||||
#define MV12 1532713819
|
||||
#define MV13 1296911693
|
||||
|
||||
#define SV1 2029118401
|
||||
#define SV2 946921921
|
||||
#define SV3 2029118401
|
||||
#define SV4 1893843841
|
||||
#define SV5 1893843841
|
||||
#define SV6 1082196481
|
||||
#define SV7 2029118401
|
||||
#define SV8 2029118401
|
||||
#define SV9 2029118401
|
||||
#define SV10 2164392961
|
||||
#define SV11 2029118401
|
||||
#define SV12 2029118401
|
||||
#define SV13 2840765761
|
||||
|
||||
static const int SB1[1] = {
|
||||
1
|
||||
};
|
||||
|
||||
static const int SB2[16] = {
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
|
||||
};
|
||||
|
||||
static const int SB3[16] = {
|
||||
135266305, 1048577, 1, 8257, 8193, 135274497, 135266369, 8193, 1, 8193, 65, 1, 134217729, 1, 1, 134225921
|
||||
};
|
||||
|
||||
static const int SB4[16] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1048576, 0, 0, 0, 0
|
||||
};
|
||||
|
||||
static const int SB5[16] = {
|
||||
134234177, 136323073, 135282689, 136331393, 270549121, 136331329, 136331329, 136323201, 270540929, 270549121, 270540801, 270532737, 2105473, 8321, 2105345, 2113601
|
||||
};
|
||||
|
||||
static const int SB6[32] = {
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
|
||||
};
|
||||
|
||||
static const int SB7[32] = {
|
||||
402669825, 540041217, 537927937, 4194561, 272638209, 537952513, 540049665, 541098049, 404783361, 405831873, 2113793, 536879361, 403726593, 540049665, 271614209, 541089921, 272662721, 271614209, 406880513, 541081793, 272662785, 538992897, 272662785, 403726593, 540033281, 540049601, 1081537, 403726337, 269517057, 272646401, 3178625, 539001089
|
||||
};
|
||||
|
||||
static const int SB8[32] = {
|
||||
270565504, 536895744, 406880512, 541090048, 268435712, 406864128, 540049408, 541065216, 406872320, 541090048, 540049600, 405823552, 540041216, 4227264, 540049664, 271589632, 537952320, 4219008, 540033216, 540041408, 541090048, 540049600, 405823552, 405823680, 405823744, 541081856, 406880448, 402677888, 271606016, 138445056, 403726400, 405831680
|
||||
};
|
||||
|
||||
static const int SB9[32] = {
|
||||
677380417, 542146817, 806404417, 677421249, 677429569, 810598721, 677421185, 677429441, 408977665, 675332353, 536903937, 675283329, 675307905, 677429505, 811639105, 811639169, 809549953, 407945601, 676380929, 676380993, 810582273, 810598721, 677429569, 675299649, 541106433, 811630785, 675316097, 405848449, 811630913, 811630977, 806404225, 677421441
|
||||
};
|
||||
|
||||
static const int SB10[64] = {
|
||||
139501824, 676364608, 673227072, 810582336, 405840256, 408969536, 541114624, 810590528, 810557760, 675307776, 676331840, 811630848, 408994112, 676381056, 810598720, 537952576, 541114432, 674267392, 542162944, 677429568, 408985920, 677429504, 542155136, 676372864, 811639104, 407937344, 542146880, 811630976, 406896832, 675332416, 675316096, 674275712, 677421120, 810590528, 540066112, 408969536, 811647232, 407920960, 273727616, 677421312, 810582272, 676340096, 6332736, 671138176, 677421376, 677429568, 676372544, 540066176, 676372800, 536912192, 406872384, 676372800, 805347712, 810590464, 5284160, 274776448, 677413248, 541089984, 674283520, 541106560, 810598720, 137412992, 810598528, 811639168
|
||||
};
|
||||
|
||||
static const int SB11[64] = {
|
||||
810607041, 678486337, 945865089, 810607041, 673227201, 939565505, 946913729, 943767937, 946913729, 811647425, 944816449, 678478273, 811647361, 678478209, 812695937, 678453697, 943776065, 810598849, 944800129, 677437825, 678486401, 946913665, 946921793, 541114753, 945873345, 542163265, 544260417, 544244033, 939548801, 945865025, 678478145, 944824577, 812704129, 5300673, 946889089, 676389057, 941679041, 675340609, 809558465, 273735937, 678461889, 678478145, 812695873, 676381121, 678486465, 671138113, 810557825, 945856961, 944775489, 946921665, 946897345, 809533889, 812695937, 812687809, 812696001, 945865089, 676389249, 677413249, 945840449, 946913473, 943767937, 675332353, 676381121, 811647425
|
||||
};
|
||||
|
||||
static const int SB12[64] = {
|
||||
810598784, 811647296, 677404992, 809550144, 677429632, 811647296, 810582400, 675332480, 676381056, 810598720, 542163200, 543211840, 809533440, 673235328, 807444672, 675316096, 810582016, 541114560, 677396800, 810590528, 676381056, 138453376, 809550208, 810598784, 676372800, 810598784, 675332352, 542163328, 674242944, 677421440, 404799808, 542163328, 809542016, 809542016, 810598784, 139501952, 674283712, 541114752, 811622784, 676372672, 542155136, 543211904, 811639168, 811630912, 809542016, 676356480, 673218944, 811630976, 810598720, 810582208, 675307584, 810598784, 543203648, 542163264, 677404672, 811630784, 810590592, 810582400, 674275712, 810590528, 541098304, 675332416, 539001088, 811622784
|
||||
};
|
||||
|
||||
static const int SB13[3] = {
|
||||
273736128, 946913728, 675282944
|
||||
};
|
||||
|
||||
#endif /* IBEX_CNN_PARAMS_H */
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,15 @@
|
||||
# Copyright lowRISC contributors.
|
||||
# Licensed under the Apache License, Version 2.0, see LICENSE for details.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
# Generate a baremetal application
|
||||
|
||||
# Name of the program $(PROGRAM).c will be added as a source file
|
||||
|
||||
PROGRAM = cifar10_dws_cnn
|
||||
PROGRAM_DIR := $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST))))
|
||||
# Any extra source files to include in the build. Use the upper case .S
|
||||
# extension for assembly files
|
||||
EXTRA_SRCS :=
|
||||
|
||||
include ${PROGRAM_DIR}/../../common/common.mk
|
||||
@@ -0,0 +1,292 @@
|
||||
#include "simple_system_common.h"
|
||||
#include "cnn_weights.h"
|
||||
#include "fully_connected.h"
|
||||
#include "ibex_cnn_params.h"
|
||||
#include "ibex_inputs.h"
|
||||
#include "conv2d.h"
|
||||
#include "dws_conv.h"
|
||||
|
||||
#define IMG_SZ 32
|
||||
#define NUM_FIL0 3
|
||||
|
||||
#define FILTER1 3
|
||||
#define FILTER2 1
|
||||
#define FILTER3 3
|
||||
#define FILTER4 1
|
||||
#define FILTER5 3
|
||||
#define FILTER6 1
|
||||
#define FILTER7 3
|
||||
#define FILTER8 1
|
||||
#define FILTER9 3
|
||||
#define FILTER10 1
|
||||
#define FILTER11 3
|
||||
#define FILTER12 1
|
||||
|
||||
#define NUM_FIL1 3
|
||||
#define NUM_FIL2 64
|
||||
#define NUM_FIL3 64
|
||||
#define NUM_FIL4 64
|
||||
#define NUM_FIL5 64
|
||||
#define NUM_FIL6 128
|
||||
#define NUM_FIL7 128
|
||||
#define NUM_FIL8 128
|
||||
#define NUM_FIL9 128
|
||||
#define NUM_FIL10 256
|
||||
#define NUM_FIL11 256
|
||||
#define NUM_FIL12 256
|
||||
|
||||
#define STRIDE1 1
|
||||
#define STRIDE2 1
|
||||
#define STRIDE3 1
|
||||
#define STRIDE4 1
|
||||
#define STRIDE5 1
|
||||
#define STRIDE6 1
|
||||
#define STRIDE7 1
|
||||
#define STRIDE8 1
|
||||
#define STRIDE9 1
|
||||
#define STRIDE10 1
|
||||
#define STRIDE11 1
|
||||
#define STRIDE12 1
|
||||
|
||||
#define PAD_TB1 1
|
||||
#define PAD_LR1 1
|
||||
|
||||
#define PAD_TB2 0
|
||||
#define PAD_LR2 0
|
||||
|
||||
#define PAD_TB3 1
|
||||
#define PAD_LR3 1
|
||||
|
||||
#define PAD_TB4 0
|
||||
#define PAD_LR4 0
|
||||
|
||||
#define PAD_TB5 1
|
||||
#define PAD_LR5 1
|
||||
|
||||
#define PAD_TB6 0
|
||||
#define PAD_LR6 0
|
||||
|
||||
#define PAD_TB7 1
|
||||
#define PAD_LR7 1
|
||||
|
||||
#define PAD_TB8 0
|
||||
#define PAD_LR8 0
|
||||
|
||||
#define PAD_TB9 1
|
||||
#define PAD_LR9 1
|
||||
|
||||
#define PAD_TB10 0
|
||||
#define PAD_LR10 0
|
||||
|
||||
#define PAD_TB11 1
|
||||
#define PAD_LR11 1
|
||||
|
||||
#define PAD_TB12 0
|
||||
#define PAD_LR12 0
|
||||
|
||||
#define POOL_STRIDE1 2
|
||||
#define POOL_SIZE1 2
|
||||
|
||||
#define POOL_STRIDE2 2
|
||||
#define POOL_SIZE2 2
|
||||
|
||||
#define POOL_STRIDE3 2
|
||||
#define POOL_SIZE3 2
|
||||
|
||||
#define OUT_DIM 10
|
||||
|
||||
#define SAMPLES 1
|
||||
int outs[SAMPLES][OUT_DIM];
|
||||
|
||||
void cifar10_dws_cnn() {
|
||||
|
||||
int dout1 = NUM_FIL1;
|
||||
int hout1 = ((IMG_SZ - FILTER1 + 2 * PAD_TB1)/STRIDE1) + 1;
|
||||
int wout1 = ((IMG_SZ - FILTER1 + 2 * PAD_LR1)/STRIDE1) + 1;
|
||||
|
||||
int dout2 = NUM_FIL2;
|
||||
int hout2 = ((hout1 - FILTER2+ 2 * PAD_TB2)/STRIDE2)+1;
|
||||
int wout2 = ((wout1 - FILTER2+ 2 * PAD_LR2)/STRIDE2)+1;
|
||||
|
||||
int dout3 = NUM_FIL3;
|
||||
int hout3 = ((hout2 - FILTER3+ 2 * PAD_TB3)/STRIDE3)+1;
|
||||
int wout3 = ((wout2 - FILTER3+ 2 * PAD_LR3)/STRIDE3)+1;
|
||||
|
||||
int dout4 = NUM_FIL4;
|
||||
int hout4 = ((hout3 - FILTER4+ 2 * PAD_TB4)/STRIDE4)+1;
|
||||
int wout4 = ((wout3 - FILTER4+ 2 * PAD_LR4)/STRIDE4)+1;
|
||||
|
||||
int dout5 = dout4;
|
||||
int hout5 = hout4/POOL_STRIDE1;
|
||||
int wout5 = wout4/POOL_STRIDE1;
|
||||
|
||||
int dout6 = NUM_FIL5;
|
||||
int hout6 = ((hout5 - FILTER5+ 2 * PAD_TB5)/STRIDE5)+1;
|
||||
int wout6 = ((wout5 - FILTER5+ 2 * PAD_LR5)/STRIDE5)+1;
|
||||
|
||||
int dout7 = NUM_FIL6;
|
||||
int hout7 = ((hout6 - FILTER6+ 2 * PAD_TB6)/STRIDE6)+1;
|
||||
int wout7 = ((wout6 - FILTER6+ 2 * PAD_LR6)/STRIDE6)+1;
|
||||
|
||||
int dout8 = NUM_FIL7;
|
||||
int hout8 = ((hout7 - FILTER7+ 2 * PAD_TB7)/STRIDE7)+1;
|
||||
int wout8 = ((wout7 - FILTER7+ 2 * PAD_LR7)/STRIDE7)+1;
|
||||
|
||||
int dout9 = NUM_FIL8;
|
||||
int hout9 = ((hout8 - FILTER8+ 2 * PAD_TB8)/STRIDE8)+1;
|
||||
int wout9 = ((wout8 - FILTER8+ 2 * PAD_LR8)/STRIDE8)+1;
|
||||
|
||||
int dout10 = dout9;
|
||||
int hout10 = hout9/POOL_STRIDE2;
|
||||
int wout10 = wout9/POOL_STRIDE2;
|
||||
|
||||
int dout11 = NUM_FIL9;
|
||||
int hout11 = ((hout10 - FILTER9+ 2 * PAD_TB9)/STRIDE9)+1;
|
||||
int wout11 = ((wout10 - FILTER9+ 2 * PAD_LR9)/STRIDE9)+1;
|
||||
|
||||
int dout12 = NUM_FIL10;
|
||||
int hout12 = ((hout11 - FILTER10+ 2 * PAD_TB10)/STRIDE10)+1;
|
||||
int wout12 = ((wout11 - FILTER10+ 2 * PAD_LR10)/STRIDE10)+1;
|
||||
|
||||
int dout13 = NUM_FIL11;
|
||||
int hout13 = ((hout12 - FILTER11+ 2 * PAD_TB11)/STRIDE11)+1;
|
||||
int wout13 = ((wout12 - FILTER11+ 2 * PAD_LR11)/STRIDE11)+1;
|
||||
|
||||
int dout14 = NUM_FIL12;
|
||||
int hout14 = ((hout13 - FILTER12+ 2 * PAD_TB12)/STRIDE12)+1;
|
||||
int wout14 = ((wout13 - FILTER12+ 2 * PAD_LR12)/STRIDE12)+1;
|
||||
|
||||
int dout15 = dout14;
|
||||
int hout15 = hout14/POOL_STRIDE3;
|
||||
int wout15 = wout14/POOL_STRIDE3;
|
||||
|
||||
int flatten_dim = dout15 * hout15 * wout15;
|
||||
|
||||
int in[IMG_SZ][IMG_SZ][NUM_FIL0];
|
||||
int inp_dim[3] = {IMG_SZ, IMG_SZ, NUM_FIL0};
|
||||
|
||||
int out1[hout1][wout1][dout1];
|
||||
int pad_1[4] = {PAD_TB1, PAD_TB1, PAD_LR1, PAD_LR1};
|
||||
int outp_dim1[3] = {hout1, wout1, dout1};
|
||||
int f_dim1[4] = {NUM_FIL1, FILTER1, FILTER1, NUM_FIL0};
|
||||
|
||||
int out2[hout2][wout2][dout2];
|
||||
int pad_2[4] = {PAD_TB2, PAD_TB2, PAD_LR2, PAD_LR2};
|
||||
int outp_dim2[3] = {hout2, wout2, dout2};
|
||||
int f_dim2[4] = {NUM_FIL2, FILTER2, FILTER2, NUM_FIL1};
|
||||
|
||||
int out3[hout3][wout3][dout3];
|
||||
int pad_3[4] = {PAD_TB3, PAD_TB3, PAD_LR3, PAD_LR3};
|
||||
int outp_dim3[3] = {hout3, wout3, dout3};
|
||||
int f_dim3[4] = {NUM_FIL3, FILTER3, FILTER3, NUM_FIL2};
|
||||
|
||||
int out4[hout4][wout4][dout4];
|
||||
int pad_4[4] = {PAD_TB4, PAD_TB4, PAD_LR4, PAD_LR4};
|
||||
int outp_dim4[3] = {hout4, wout4, dout4};
|
||||
int f_dim4[4] = {NUM_FIL4, FILTER4, FILTER4, NUM_FIL3};
|
||||
|
||||
int out5[hout5][wout5][dout5];
|
||||
int outp_dim5[3] = {hout5, wout5, dout5};
|
||||
|
||||
int out6[hout6][wout6][dout6];
|
||||
int pad_6[4] = {PAD_TB5, PAD_TB5, PAD_LR5, PAD_LR5};
|
||||
int outp_dim6[3] = {hout6, wout6, dout6};
|
||||
int f_dim6[4] = {NUM_FIL5, FILTER5, FILTER5, NUM_FIL4};
|
||||
|
||||
int out7[hout7][wout7][dout7];
|
||||
int pad_7[4] = {PAD_TB6, PAD_TB6, PAD_LR6, PAD_LR6};
|
||||
int outp_dim7[3] = {hout7, wout7, dout7};
|
||||
int f_dim7[4] = {NUM_FIL6, FILTER6, FILTER6, NUM_FIL5};
|
||||
|
||||
int out8[hout8][wout8][dout8];
|
||||
int pad_8[4] = {PAD_TB7, PAD_TB7, PAD_LR7, PAD_LR7};
|
||||
int outp_dim8[3] = {hout8, wout8, dout8};
|
||||
int f_dim8[4] = {NUM_FIL7, FILTER7, FILTER7, NUM_FIL6};
|
||||
|
||||
int out9[hout9][wout9][dout9];
|
||||
int pad_9[4] = {PAD_TB8, PAD_TB8, PAD_LR8, PAD_LR8};
|
||||
int outp_dim9[3] = {hout9, wout9, dout9};
|
||||
int f_dim9[4] = {NUM_FIL8, FILTER8, FILTER8, NUM_FIL7};
|
||||
|
||||
int out10[hout10][wout10][dout10];
|
||||
int outp_dim10[3] = {hout10, wout10, dout10};
|
||||
|
||||
int out11[hout11][wout11][dout11];
|
||||
int pad_11[4] = {PAD_TB9, PAD_TB9, PAD_LR9, PAD_LR9};
|
||||
int outp_dim11[3] = {hout11, wout11, dout11};
|
||||
int f_dim11[4] = {NUM_FIL9, FILTER9, FILTER9, NUM_FIL8};
|
||||
|
||||
int out12[hout12][wout12][dout12];
|
||||
int pad_12[4] = {PAD_TB10, PAD_TB10, PAD_LR10, PAD_LR10};
|
||||
int outp_dim12[3] = {hout12, wout12, dout12};
|
||||
int f_dim12[4] = {NUM_FIL10, FILTER10, FILTER10, NUM_FIL9};
|
||||
|
||||
int out13[hout13][wout13][dout13];
|
||||
int pad_13[4] = {PAD_TB11, PAD_TB11, PAD_LR11, PAD_LR11};
|
||||
int outp_dim13[3] = {hout13, wout13, dout13};
|
||||
int f_dim13[4] = {NUM_FIL11, FILTER11, FILTER11, NUM_FIL10};
|
||||
|
||||
int out14[hout14][wout14][dout14];
|
||||
int pad_14[4] = {PAD_TB12, PAD_TB12, PAD_LR12, PAD_LR12};
|
||||
int outp_dim14[3] = {hout14, wout14, dout14};
|
||||
int f_dim14[4] = {NUM_FIL12, FILTER12, FILTER12, NUM_FIL11};
|
||||
|
||||
int out15[hout15][wout15][dout15];
|
||||
int outp_dim15[3] = {hout15, wout15, dout15};
|
||||
|
||||
int out16[flatten_dim];
|
||||
|
||||
|
||||
int out[OUT_DIM];
|
||||
|
||||
for (int iter = 0; iter < SAMPLES; iter++){
|
||||
|
||||
for(int i = 0; i < IMG_SZ; i++){
|
||||
for(int j = 0; j < IMG_SZ; j++){
|
||||
for(int k = 0; k < NUM_FIL0; k++){
|
||||
in[i][j][k] = input[i][j][k][iter];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pcount_enable(1);
|
||||
|
||||
dw_conv(inp_dim, f_dim1, outp_dim1, in, F1, B1, out1, STRIDE1, pad_1, SB1, MV1, SV1);
|
||||
pw_conv(outp_dim1, f_dim2, outp_dim2, out1, F2, B2, out2, STRIDE2, pad_2, SB2, MV2, SV2);
|
||||
dw_conv(outp_dim2, f_dim3, outp_dim3, out2, F3, B3, out3, STRIDE3, pad_3, SB3, MV3, SV3);
|
||||
pw_conv(outp_dim3, f_dim4, outp_dim4, out3, F4, B4, out4, STRIDE4, pad_4, SB4, MV4, SV4);
|
||||
maxpool2(outp_dim4, outp_dim5, out4, out5, POOL_SIZE1, POOL_STRIDE1);
|
||||
|
||||
dw_conv(outp_dim5, f_dim6, outp_dim6, out5, F5, B5, out6, STRIDE5, pad_6, SB5, MV5, SV5);
|
||||
pw_conv(outp_dim6, f_dim7, outp_dim7, out6, F6, B6, out7, STRIDE6, pad_7, SB6, MV6, SV6);
|
||||
dw_conv(outp_dim7, f_dim8, outp_dim8, out7, F7, B7, out8, STRIDE7, pad_8, SB7, MV7, SV7);
|
||||
pw_conv(outp_dim8, f_dim9, outp_dim9, out8, F8, B8, out9, STRIDE8, pad_9, SB8, MV8, SV8);
|
||||
maxpool2(outp_dim9, outp_dim10, out9, out10, POOL_SIZE2, POOL_STRIDE2);
|
||||
|
||||
dw_conv(outp_dim10, f_dim11, outp_dim11, out10, F9, B9, out11, STRIDE9, pad_11, SB9, MV9, SV9);
|
||||
pw_conv(outp_dim11, f_dim12, outp_dim12, out11, F10, B10, out12, STRIDE10, pad_12, SB10, MV10, SV10);
|
||||
dw_conv(outp_dim12, f_dim13, outp_dim13, out12, F11, B11, out13, STRIDE11, pad_13, SB11, MV11, SV11);
|
||||
pw_conv(outp_dim13, f_dim14, outp_dim14, out13, F12, B12, out14, STRIDE12, pad_14, SB12, MV12, SV12);
|
||||
maxpool2(outp_dim14, outp_dim15, out14, out15, POOL_SIZE3, POOL_STRIDE3);
|
||||
|
||||
flatten(outp_dim15, out15, out16);
|
||||
|
||||
mlp_layer(out16, out, flatten_dim, OUT_DIM, W1, B13, SB13, MV13, SV13);
|
||||
pcount_enable(0);
|
||||
|
||||
puts("Output Layer Values:\n");
|
||||
for(int i = 0; i < OUT_DIM; i++) {
|
||||
puthex(out[i]);
|
||||
puts("\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int main(void) {
|
||||
|
||||
pcount_enable(0);
|
||||
|
||||
cifar10_dws_cnn();
|
||||
|
||||
return 0;
|
||||
}
|
||||
File diff suppressed because one or more lines are too long
@@ -0,0 +1,46 @@
|
||||
#ifndef IBEX_CNN_PARAMS_H
|
||||
#define IBEX_CNN_PARAMS_H
|
||||
|
||||
#define MV1 75
|
||||
#define MV2 112
|
||||
#define MV3 82
|
||||
#define MV4 75
|
||||
#define MV5 87
|
||||
#define MV6 76
|
||||
#define MV7 111
|
||||
#define MV8 95
|
||||
#define MV9 95
|
||||
#define MV10 117
|
||||
#define MV11 68
|
||||
#define MV12 91
|
||||
#define MV13 77
|
||||
|
||||
#define SV1 15
|
||||
#define SV2 7
|
||||
#define SV3 15
|
||||
#define SV4 14
|
||||
#define SV5 14
|
||||
#define SV6 8
|
||||
#define SV7 15
|
||||
#define SV8 15
|
||||
#define SV9 15
|
||||
#define SV10 16
|
||||
#define SV11 15
|
||||
#define SV12 15
|
||||
#define SV13 21
|
||||
|
||||
#define SB1 0
|
||||
#define SB2 0
|
||||
#define SB3 0
|
||||
#define SB4 0
|
||||
#define SB5 0
|
||||
#define SB6 0
|
||||
#define SB7 0
|
||||
#define SB8 0
|
||||
#define SB9 0
|
||||
#define SB10 0
|
||||
#define SB11 0
|
||||
#define SB12 0
|
||||
#define SB13 0
|
||||
|
||||
#endif /* IBEX_CNN_PARAMS_H */
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,15 @@
|
||||
# Copyright lowRISC contributors.
|
||||
# Licensed under the Apache License, Version 2.0, see LICENSE for details.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
# Generate a baremetal application
|
||||
|
||||
# Name of the program $(PROGRAM).c will be added as a source file
|
||||
|
||||
PROGRAM = cmsis_cnn
|
||||
PROGRAM_DIR := $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST))))
|
||||
# Any extra source files to include in the build. Use the upper case .S
|
||||
# extension for assembly files
|
||||
EXTRA_SRCS :=
|
||||
|
||||
include ${PROGRAM_DIR}/../../common/common.mk
|
||||
@@ -0,0 +1,153 @@
|
||||
#include "simple_system_common.h"
|
||||
#include "cnn_weights.h"
|
||||
#include "fully_connected_opt.h"
|
||||
#include "ibex_cnn_params.h"
|
||||
#include "ibex_inputs.h"
|
||||
#include "conv2d_opt.h"
|
||||
|
||||
#define IMG_SZ 32
|
||||
#define NUM_FIL0 1
|
||||
|
||||
#define FILTER1 5
|
||||
#define FILTER2 5
|
||||
#define FILTER3 5
|
||||
|
||||
#define NUM_FIL1 8
|
||||
#define NUM_FIL2 8
|
||||
#define NUM_FIL3 16
|
||||
|
||||
#define STRIDE1 1
|
||||
#define STRIDE2 1
|
||||
#define STRIDE3 1
|
||||
|
||||
#define PAD_TB1 2
|
||||
#define PAD_LR1 2
|
||||
|
||||
#define PAD_TB2 2
|
||||
#define PAD_LR2 2
|
||||
|
||||
#define PAD_TB3 2
|
||||
#define PAD_LR3 2
|
||||
|
||||
#define POOL_STRIDE1 2
|
||||
#define POOL_SIZE1 2
|
||||
|
||||
#define POOL_STRIDE2 2
|
||||
#define POOL_SIZE2 2
|
||||
|
||||
#define POOL_STRIDE3 2
|
||||
#define POOL_SIZE3 2
|
||||
|
||||
#define OUT_DIM 3
|
||||
|
||||
#define SAMPLES 1
|
||||
int outs[SAMPLES][OUT_DIM];
|
||||
|
||||
void cmsis_cnn() {
|
||||
|
||||
int dout1 = NUM_FIL1;
|
||||
int hout1 = ((IMG_SZ - FILTER1 + 2 * PAD_TB1)/STRIDE1) + 1;
|
||||
int wout1 = ((IMG_SZ - FILTER1 + 2 * PAD_LR1)/STRIDE1) + 1;
|
||||
|
||||
int dout2 = dout1;
|
||||
int hout2 = hout1/POOL_STRIDE1;
|
||||
int wout2 = wout1/POOL_STRIDE1;
|
||||
|
||||
int dout3 = NUM_FIL2;
|
||||
int hout3 = ((hout2 - FILTER2+ 2 * PAD_TB2)/STRIDE2)+1;
|
||||
int wout3 = ((wout2 - FILTER2+ 2 * PAD_LR2)/STRIDE2)+1;
|
||||
|
||||
int dout4 = dout3;
|
||||
int hout4 = hout3/POOL_STRIDE2;
|
||||
int wout4 = wout3/POOL_STRIDE2;
|
||||
|
||||
int dout5 = NUM_FIL3;
|
||||
int hout5 = ((hout4 - FILTER3+ 2 * PAD_TB3)/STRIDE3)+1;
|
||||
int wout5 = ((wout4 - FILTER3+ 2 * PAD_LR3)/STRIDE3)+1;
|
||||
|
||||
int dout6 = dout5;
|
||||
int hout6 = hout5/POOL_STRIDE3;
|
||||
int wout6 = wout5/POOL_STRIDE3;
|
||||
|
||||
int flatten_dim = dout6 * hout6 * wout6;
|
||||
|
||||
int in[IMG_SZ][IMG_SZ][NUM_FIL0];
|
||||
int inp_dim[3] = {IMG_SZ, IMG_SZ, NUM_FIL0};
|
||||
|
||||
int out1[hout1][wout1][dout1];
|
||||
int pad_1[4] = {PAD_TB1, PAD_TB1, PAD_LR1, PAD_LR1};
|
||||
int outp_dim1[3] = {hout1, wout1, dout1};
|
||||
int f_dim1[4] = {NUM_FIL1, FILTER1, FILTER1, NUM_FIL0};
|
||||
|
||||
int out2[hout2][wout2][dout2];
|
||||
int outp_dim2[3] = {hout2, wout2, dout2};
|
||||
|
||||
int out3[hout3][wout3][dout3];
|
||||
int pad_3[4] = {PAD_TB2, PAD_TB2, PAD_LR2, PAD_LR2};
|
||||
int outp_dim3[3] = {hout3, wout3, dout3};
|
||||
int f_dim3[4] = {NUM_FIL2, FILTER2, FILTER2, NUM_FIL1};
|
||||
|
||||
int out4[hout4][wout4][dout4];
|
||||
int outp_dim4[3] = {hout4, wout4, dout4};
|
||||
|
||||
int out5[hout5][wout5][dout5];
|
||||
int pad_5[4] = {PAD_TB3, PAD_TB3, PAD_LR3, PAD_LR3};
|
||||
int outp_dim5[3] = {hout5, wout5, dout5};
|
||||
int f_dim5[4] = {NUM_FIL3, FILTER3, FILTER3, NUM_FIL2};
|
||||
|
||||
int out6[hout6][wout6][dout6];
|
||||
int outp_dim6[3] = {hout6, wout6, dout6};
|
||||
|
||||
int out7[flatten_dim];
|
||||
|
||||
int out[OUT_DIM];
|
||||
|
||||
for (int iter = 0; iter < SAMPLES; iter++){
|
||||
|
||||
for(int i = 0; i < IMG_SZ; i++){
|
||||
for(int j = 0; j < IMG_SZ; j++){
|
||||
for(int k = 0; k < NUM_FIL0; k++){
|
||||
in[i][j][k] = input[i][j][k][iter];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pcount_enable(1);
|
||||
|
||||
conv2_8bits_1ch(inp_dim, f_dim1, outp_dim1, in, F1, B1, out1, STRIDE1, pad_1, SB1, MV1, SV1);
|
||||
maxpool2_compressed(outp_dim1, outp_dim2, out1, out2, POOL_SIZE1, POOL_STRIDE1);
|
||||
|
||||
conv2_8bits(outp_dim2, f_dim3, outp_dim3, out2, F2, B2, out3, STRIDE2, pad_3, SB2, MV2, SV2);
|
||||
maxpool2_compressed(outp_dim3, outp_dim4, out3, out4, POOL_SIZE2, POOL_STRIDE2);
|
||||
|
||||
conv2_2bits(outp_dim4, f_dim5, outp_dim5, out4, F3, B3, out5, STRIDE3, pad_5, SB3, MV3, SV3);
|
||||
maxpool2_compressed(outp_dim5, outp_dim6, out5, out6, POOL_SIZE3, POOL_STRIDE3);
|
||||
|
||||
flatten(outp_dim6, out6, out7);
|
||||
|
||||
mlp_layer_2bits(out7, out, flatten_dim, OUT_DIM, W1, B4, SB4, MV4, SV4);
|
||||
|
||||
pcount_enable(0);
|
||||
|
||||
puts("Output Layer Values:\n");
|
||||
for(int i = 0; i < OUT_DIM; i++) {
|
||||
puthex((out[i] & 0xFF000000) >> 24);
|
||||
puts(" ");
|
||||
puthex((out[i] & 0xFF0000) >> 16);
|
||||
puts(" ");
|
||||
puthex((out[i] & 0xFF00) >> 8);
|
||||
puts(" ");
|
||||
puthex(out[i] & 0xFF);
|
||||
puts("\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int main(void) {
|
||||
|
||||
pcount_enable(0);
|
||||
|
||||
cmsis_cnn();
|
||||
|
||||
return 0;
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,30 @@
|
||||
#ifndef IBEX_CNN_PARAMS_H
|
||||
#define IBEX_CNN_PARAMS_H
|
||||
|
||||
#define MV1 1953789044
|
||||
#define MV2 1229539657
|
||||
#define MV3 1212696648
|
||||
#define MV4 1330597711
|
||||
|
||||
#define SV1 2164392961
|
||||
#define SV2 2299667521
|
||||
#define SV3 1488020161
|
||||
#define SV4 1623294721
|
||||
|
||||
static const int SB1[8] = {
|
||||
812696004, 946880900, 1079034308, 946913796, 945865156, 1081139524, 946930052, 545309060
|
||||
};
|
||||
|
||||
static const int SB2[8] = {
|
||||
945873216, 945832320, 945865152, 944816576, 674283904, 543211776, 945873280, 944824704
|
||||
};
|
||||
|
||||
static const int SB3[16] = {
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
|
||||
};
|
||||
|
||||
static const int SB4[3] = {
|
||||
3, 3, 3
|
||||
};
|
||||
|
||||
#endif /* IBEX_CNN_PARAMS_H */
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,15 @@
|
||||
# Copyright lowRISC contributors.
|
||||
# Licensed under the Apache License, Version 2.0, see LICENSE for details.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
# Generate a baremetal application
|
||||
|
||||
# Name of the program $(PROGRAM).c will be added as a source file
|
||||
|
||||
PROGRAM = cmsis_cnn
|
||||
PROGRAM_DIR := $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST))))
|
||||
# Any extra source files to include in the build. Use the upper case .S
|
||||
# extension for assembly files
|
||||
EXTRA_SRCS :=
|
||||
|
||||
include ${PROGRAM_DIR}/../../common/common.mk
|
||||
@@ -0,0 +1,147 @@
|
||||
#include "simple_system_common.h"
|
||||
#include "cnn_weights.h"
|
||||
#include "fully_connected.h"
|
||||
#include "ibex_cnn_params.h"
|
||||
#include "ibex_inputs.h"
|
||||
#include "conv2d.h"
|
||||
|
||||
#define IMG_SZ 32
|
||||
#define NUM_FIL0 3
|
||||
|
||||
#define FILTER1 5
|
||||
#define FILTER2 5
|
||||
#define FILTER3 5
|
||||
|
||||
#define NUM_FIL1 32
|
||||
#define NUM_FIL2 32
|
||||
#define NUM_FIL3 64
|
||||
|
||||
#define STRIDE1 1
|
||||
#define STRIDE2 1
|
||||
#define STRIDE3 1
|
||||
|
||||
#define PAD_TB1 2
|
||||
#define PAD_LR1 2
|
||||
|
||||
#define PAD_TB2 2
|
||||
#define PAD_LR2 2
|
||||
|
||||
#define PAD_TB3 2
|
||||
#define PAD_LR3 2
|
||||
|
||||
#define POOL_STRIDE1 2
|
||||
#define POOL_SIZE1 2
|
||||
|
||||
#define POOL_STRIDE2 2
|
||||
#define POOL_SIZE2 2
|
||||
|
||||
#define POOL_STRIDE3 2
|
||||
#define POOL_SIZE3 2
|
||||
|
||||
#define OUT_DIM 10
|
||||
|
||||
#define SAMPLES 1
|
||||
int outs[SAMPLES][OUT_DIM];
|
||||
|
||||
void cmsis_cnn() {
|
||||
|
||||
int dout1 = NUM_FIL1;
|
||||
int hout1 = ((IMG_SZ - FILTER1 + 2 * PAD_TB1)/STRIDE1) + 1;
|
||||
int wout1 = ((IMG_SZ - FILTER1 + 2 * PAD_LR1)/STRIDE1) + 1;
|
||||
|
||||
int dout2 = dout1;
|
||||
int hout2 = hout1/POOL_STRIDE1;
|
||||
int wout2 = wout1/POOL_STRIDE1;
|
||||
|
||||
int dout3 = NUM_FIL2;
|
||||
int hout3 = ((hout2 - FILTER2+ 2 * PAD_TB2)/STRIDE2)+1;
|
||||
int wout3 = ((wout2 - FILTER2+ 2 * PAD_LR2)/STRIDE2)+1;
|
||||
|
||||
int dout4 = dout3;
|
||||
int hout4 = hout3/POOL_STRIDE2;
|
||||
int wout4 = wout3/POOL_STRIDE2;
|
||||
|
||||
int dout5 = NUM_FIL3;
|
||||
int hout5 = ((hout4 - FILTER3+ 2 * PAD_TB3)/STRIDE3)+1;
|
||||
int wout5 = ((wout4 - FILTER3+ 2 * PAD_LR3)/STRIDE3)+1;
|
||||
|
||||
int dout6 = dout5;
|
||||
int hout6 = hout5/POOL_STRIDE3;
|
||||
int wout6 = wout5/POOL_STRIDE3;
|
||||
|
||||
int flatten_dim = dout6 * hout6 * wout6;
|
||||
|
||||
int in[IMG_SZ][IMG_SZ][NUM_FIL0];
|
||||
int inp_dim[3] = {IMG_SZ, IMG_SZ, NUM_FIL0};
|
||||
|
||||
int out1[hout1][wout1][dout1];
|
||||
int pad_1[4] = {PAD_TB1, PAD_TB1, PAD_LR1, PAD_LR1};
|
||||
int outp_dim1[3] = {hout1, wout1, dout1};
|
||||
int f_dim1[4] = {NUM_FIL1, FILTER1, FILTER1, NUM_FIL0};
|
||||
|
||||
int out2[hout2][wout2][dout2];
|
||||
int outp_dim2[3] = {hout2, wout2, dout2};
|
||||
|
||||
int out3[hout3][wout3][dout3];
|
||||
int pad_3[4] = {PAD_TB2, PAD_TB2, PAD_LR2, PAD_LR2};
|
||||
int outp_dim3[3] = {hout3, wout3, dout3};
|
||||
int f_dim3[4] = {NUM_FIL2, FILTER2, FILTER2, NUM_FIL1};
|
||||
|
||||
int out4[hout4][wout4][dout4];
|
||||
int outp_dim4[3] = {hout4, wout4, dout4};
|
||||
|
||||
int out5[hout5][wout5][dout5];
|
||||
int pad_5[4] = {PAD_TB3, PAD_TB3, PAD_LR3, PAD_LR3};
|
||||
int outp_dim5[3] = {hout5, wout5, dout5};
|
||||
int f_dim5[4] = {NUM_FIL3, FILTER3, FILTER3, NUM_FIL2};
|
||||
|
||||
int out6[hout6][wout6][dout6];
|
||||
int outp_dim6[3] = {hout6, wout6, dout6};
|
||||
|
||||
int out7[flatten_dim];
|
||||
|
||||
|
||||
int out[OUT_DIM];
|
||||
|
||||
for (int iter = 0; iter < SAMPLES; iter++){
|
||||
|
||||
for(int i = 0; i < IMG_SZ; i++){
|
||||
for(int j = 0; j < IMG_SZ; j++){
|
||||
for(int k = 0; k < NUM_FIL0; k++){
|
||||
in[i][j][k] = input[i][j][k][iter];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pcount_enable(1);
|
||||
|
||||
conv2(inp_dim, f_dim1, outp_dim1, in, F1, B1, out1, STRIDE1, pad_1, SB1, MV1, SV1);
|
||||
maxpool2(outp_dim1, outp_dim2, out1, out2, POOL_SIZE1, POOL_STRIDE1);
|
||||
|
||||
conv2(outp_dim2, f_dim3, outp_dim3, out2, F2, B2, out3, STRIDE2, pad_3, SB2, MV2, SV2);
|
||||
maxpool2(outp_dim3, outp_dim4, out3, out4, POOL_SIZE2, POOL_STRIDE2);
|
||||
|
||||
conv2(outp_dim4, f_dim5, outp_dim5, out4, F3, B3, out5, STRIDE3, pad_5, SB3, MV3, SV3);
|
||||
maxpool2(outp_dim5, outp_dim6, out5, out6, POOL_SIZE3, POOL_STRIDE3);
|
||||
|
||||
flatten(outp_dim6, out6, out7);
|
||||
|
||||
mlp_layer(out7, out, flatten_dim, OUT_DIM, W1, B4, SB4, MV4, SV4);
|
||||
pcount_enable(0);
|
||||
|
||||
puts("Output Layer Values:\n");
|
||||
for(int i = 0; i < OUT_DIM; i++) {
|
||||
puthex(out[i]);
|
||||
puts("\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int main(void) {
|
||||
|
||||
pcount_enable(0);
|
||||
|
||||
cmsis_cnn();
|
||||
|
||||
return 0;
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,19 @@
|
||||
#ifndef IBEX_CNN_PARAMS_H
|
||||
#define IBEX_CNN_PARAMS_H
|
||||
|
||||
#define MV1 116
|
||||
#define MV2 73
|
||||
#define MV3 72
|
||||
#define MV4 79
|
||||
|
||||
#define SV1 16
|
||||
#define SV2 17
|
||||
#define SV3 11
|
||||
#define SV4 12
|
||||
|
||||
#define SB1 0
|
||||
#define SB2 0
|
||||
#define SB3 0
|
||||
#define SB4 0
|
||||
|
||||
#endif /* IBEX_CNN_PARAMS_H */
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,70 @@
|
||||
#ifndef DWS_CONV_H
|
||||
#define DWS_CONV_H
|
||||
|
||||
void pw_conv(int in_dim[3], int fil_dim[4], int out_dim[3], int inp[in_dim[0]][in_dim[1]][in_dim[2]],
|
||||
const int fil[fil_dim[0]][fil_dim[3]], const int bias[],
|
||||
int out[out_dim[0]][out_dim[1]][out_dim[2]], int strides, int pad[],
|
||||
const int bias_shift_mode, const int quantized_multiplier, const int out_shift_rl){
|
||||
|
||||
int i, j, k, m, res, str1, str2, quant_prod;
|
||||
|
||||
for (i = 0; i < out_dim[2]; i++) { // output depth
|
||||
str1 = -pad[0] - strides;
|
||||
for (j = 0; j < out_dim[0]; j++) { // output height
|
||||
str1 += strides;
|
||||
str2 = -pad[2] - strides;
|
||||
for (k = 0; k < out_dim[1]; k++) { // output width
|
||||
res = bias[i];
|
||||
str2 += strides;
|
||||
if (str1 < in_dim[0] && str1 >= 0 && str2 >= 0 && str2 < in_dim[1]) {
|
||||
for (m = 0; m < fil_dim[3]; m++) { // filters depth
|
||||
res += inp[str1][str2][m] * fil[i][m];
|
||||
}
|
||||
}
|
||||
quant_prod = quantized_multiplier * res + (1 << (out_shift_rl -1));
|
||||
quant_prod = quant_prod >> (out_shift_rl);
|
||||
if(quant_prod < 0) quant_prod = 0;
|
||||
if(quant_prod > 255) quant_prod = 255;
|
||||
out[j][k][i] = quant_prod;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void dw_conv(int in_dim[3], int depthwise_fil_dim[4], int out_dim[3], int inp[in_dim[0]][in_dim[1]][in_dim[2]],
|
||||
const int depthwise_fil[depthwise_fil_dim[0]][depthwise_fil_dim[1]][depthwise_fil_dim[2]][1], const int bias[],
|
||||
int out[out_dim[0]][out_dim[1]][out_dim[2]], int strides, int pad[],
|
||||
const int bias_shift_mode, const int depthwise_multiplier, const int depthwise_out_shift_rl){
|
||||
|
||||
int i, j, k, n, p, res, k1, k2, str1, str2, quant_prod;
|
||||
|
||||
// Depthwise convolution
|
||||
for (i = 0; i < out_dim[2]; i++) { // output depth
|
||||
str1 = -pad[0] - strides;
|
||||
for (j = 0; j < out_dim[0]; j++) { // output height
|
||||
str1 += strides;
|
||||
str2 = -pad[2] - strides;
|
||||
for (k = 0; k < out_dim[1]; k++) { // output width
|
||||
res = bias[i];
|
||||
str2 += strides;
|
||||
for (p = 0; p < depthwise_fil_dim[1]; p++){ // depthwise filter height
|
||||
for (n = 0; n < depthwise_fil_dim[2]; n++) { // depthwise filter width
|
||||
k1 = str1 + p;
|
||||
k2 = str2 + n;
|
||||
|
||||
if (k1 < in_dim[0] && k1 >= 0 && k2 >= 0 && k2 < in_dim[1]) {
|
||||
res += inp[k1][k2][i] * depthwise_fil[i][p][n][0];
|
||||
}
|
||||
}
|
||||
}
|
||||
quant_prod = depthwise_multiplier * res + (1 << (depthwise_out_shift_rl -1));
|
||||
quant_prod = quant_prod >> (depthwise_out_shift_rl);
|
||||
if(quant_prod < 0) quant_prod = 0;
|
||||
if(quant_prod > 255) quant_prod = 255;
|
||||
out[j][k][i] = quant_prod;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* DWS_CONV_H */
|
||||
@@ -0,0 +1,171 @@
|
||||
#ifndef DWS_CONV_OPT_H
|
||||
#define DWS_CONV_OPT_H
|
||||
|
||||
void pw_conv_8bits(int in_dim[3], int fil_dim[4], int out_dim[3], int inp[in_dim[0]][in_dim[1]][in_dim[2]],
|
||||
const int fil[fil_dim[0]][fil_dim[3] << 2], const int bias[fil_dim[0]],
|
||||
int out[out_dim[0]][out_dim[1]][out_dim[2]], int strides, int pad[], const int bias_shift_mode[],
|
||||
const int quantized_multiplier, const int out_shift_rl){
|
||||
|
||||
int i, j, k, m, res, str1, str2, bias_val, w, in_cnn;
|
||||
|
||||
for (i = 0; i < out_dim[2]; i++) { // output depth
|
||||
str1 = -pad[0] - strides;
|
||||
for (j = 0; j < out_dim[0]; j++) { // output height
|
||||
str1 += 1;
|
||||
str2 = -pad[2] - strides;
|
||||
for (k = 0; k < out_dim[1]; k++) { // output width
|
||||
bias_val = bias[i];
|
||||
str2 += 1;
|
||||
asm volatile("neur_init %0, %1, %2\n":"=r"(res):"r"(bias_val),"r"(bias_shift_mode[i]):);
|
||||
|
||||
for (m = 0; m < fil_dim[3]; m++) { // filters depth
|
||||
in_cnn = inp[str1][str2][m];
|
||||
w = fil[i][4*m];
|
||||
asm volatile("nn_mac_8b %0, %1,%2\n":"=r"(res):"r"(w),"r"(in_cnn):);
|
||||
|
||||
w = fil[i][4*m+1];
|
||||
asm volatile("nn_mac_8b %0, %1,%2\n":"=r"(res):"r"(w),"r"(in_cnn):);
|
||||
|
||||
w = fil[i][4*m+2];
|
||||
asm volatile("nn_mac_8b %0, %1,%2\n":"=r"(res):"r"(w),"r"(in_cnn):);
|
||||
|
||||
w = fil[i][4*m+3];
|
||||
asm volatile("nn_mac_8b %0, %1,%2\n":"=r"(res):"r"(w),"r"(in_cnn):);
|
||||
}
|
||||
asm volatile("neur_res %0, %1, %2\n":"=r"(res):"r"(quantized_multiplier),"r"(out_shift_rl):);
|
||||
out[j][k][i] = res;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void pw_conv_4bits(int in_dim[3], int fil_dim[4], int out_dim[3], int inp[in_dim[0]][in_dim[1]][in_dim[2]],
|
||||
const int fil[fil_dim[0]][fil_dim[3] << 1], const int bias[fil_dim[0]],
|
||||
int out[out_dim[0]][out_dim[1]][out_dim[2]], int strides, int pad[], const int bias_shift_mode[],
|
||||
const int quantized_multiplier, const int out_shift_rl){
|
||||
|
||||
int i, j, k, m, res, str1, str2, bias_val, w, in_cnn;
|
||||
|
||||
for (i = 0; i < out_dim[2]; i++) { // output depth
|
||||
str1 = -pad[0] - strides;
|
||||
for (j = 0; j < out_dim[0]; j++) { // output height
|
||||
str1 += 1;
|
||||
str2 = -pad[2] - strides;
|
||||
for (k = 0; k < out_dim[1]; k++) { // output width
|
||||
bias_val = bias[i];
|
||||
str2 += 1;
|
||||
asm volatile("neur_init %0, %1, %2\n":"=r"(res):"r"(bias_val),"r"(bias_shift_mode[i]):);
|
||||
for (m = 0; m < fil_dim[3]; m++) { // filters depth
|
||||
in_cnn = inp[str1][str2][m];
|
||||
w = fil[i][2*m];
|
||||
asm volatile("nn_mac_4b %0, %1,%2\n":"=r"(res):"r"(w),"r"(in_cnn):);
|
||||
|
||||
w = fil[i][2*m+1];
|
||||
asm volatile("nn_mac_4b %0, %1,%2\n":"=r"(res):"r"(w),"r"(in_cnn):);
|
||||
}
|
||||
asm volatile("neur_res %0, %1, %2\n":"=r"(res):"r"(quantized_multiplier),"r"(out_shift_rl):);
|
||||
out[j][k][i] = res;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void pw_conv_2bits(int in_dim[3], int fil_dim[4], int out_dim[3], int inp[in_dim[0]][in_dim[1]][in_dim[2]],
|
||||
const int fil[fil_dim[0]][fil_dim[3]], const int bias[fil_dim[0]],
|
||||
int out[out_dim[0]][out_dim[1]][out_dim[2]], int strides, int pad[], const int bias_shift_mode[],
|
||||
const int quantized_multiplier, const int out_shift_rl){
|
||||
|
||||
int i, j, k, m, res, str1, str2, bias_val, w, in_cnn;
|
||||
|
||||
for (i = 0; i < out_dim[2]; i++) { // output depth
|
||||
str1 = -pad[0] - strides;
|
||||
for (j = 0; j < out_dim[0]; j++) { // output height
|
||||
str1 += 1;
|
||||
str2 = -pad[2] - strides;
|
||||
for (k = 0; k < out_dim[1]; k++) { // output width
|
||||
bias_val = bias[i];
|
||||
str2 += 1;
|
||||
asm volatile("neur_init %0, %1, %2\n":"=r"(res):"r"(bias_val),"r"(bias_shift_mode[i]):);
|
||||
for (m = 0; m < fil_dim[3]; m++) { // filters depth
|
||||
in_cnn = inp[str1][str2][m];
|
||||
w = fil[i][m];
|
||||
asm volatile("nn_mac_2b %0, %1,%2\n":"=r"(res):"r"(w),"r"(in_cnn):);
|
||||
}
|
||||
asm volatile("neur_res %0, %1, %2\n":"=r"(res):"r"(quantized_multiplier),"r"(out_shift_rl):);
|
||||
out[j][k][i] = res;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void dw_conv_opt(int in_dim[3], int depthwise_fil_dim[4], int out_dim[3],
|
||||
int inp[in_dim[0]][in_dim[1]][in_dim[2]], const int depthwise_fil[depthwise_fil_dim[0]][depthwise_fil_dim[1]][depthwise_fil_dim[2]],
|
||||
const int bias[depthwise_fil_dim[0]], int out[out_dim[0]][out_dim[1]][out_dim[2]],
|
||||
int strides, int pad[], const int bias_shift_mode[], const int quantized_multiplier, const int out_shift_rl){
|
||||
|
||||
int i, j, k, n, p, res, k1, k2, str1, str2, bias_val, in_cnn, w;
|
||||
|
||||
// Depthwise convolution
|
||||
for (i = 0; i < out_dim[2]; i++){ // output depth
|
||||
str1 = -pad[0] - strides;
|
||||
for (j = 0; j < out_dim[0]; j++) { // output height
|
||||
str1 += strides;
|
||||
str2 = -pad[2] - strides;
|
||||
for (k = 0; k < out_dim[1]; k++) { // output width
|
||||
bias_val = bias[i];
|
||||
str2 += strides;
|
||||
asm volatile("neur_init %0, %1, %2\n":"=r"(res):"r"(bias_val),"r"(bias_shift_mode[i]):);
|
||||
for (p = 0; p < depthwise_fil_dim[1]; p++) { // depthwise filter height
|
||||
for (n = 0; n < depthwise_fil_dim[2]; n++) { // depthwise filter width
|
||||
k1 = str1 + p;
|
||||
k2 = str2 + n;
|
||||
|
||||
if (k1 < in_dim[0] && k1 >= 0 && k2 >= 0 && k2 < in_dim[1]) {
|
||||
in_cnn = inp[k1][k2][i];
|
||||
w = depthwise_fil[i][p][n];
|
||||
asm volatile("nn_mac_8b %0, %1,%2\n":"=r"(res):"r"(w),"r"(in_cnn):);
|
||||
}
|
||||
}
|
||||
}
|
||||
asm volatile("neur_res %0, %1, %2\n":"=r"(res):"r"(quantized_multiplier),"r"(out_shift_rl):);
|
||||
out[j][k][i] = res;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void dw_conv_opt_1ch(int in_dim[3], int depthwise_fil_dim[4], int out_dim[3],
|
||||
int inp[in_dim[0]][in_dim[1]][in_dim[2]], const int depthwise_fil[depthwise_fil_dim[0]][depthwise_fil_dim[1]][depthwise_fil_dim[2]],
|
||||
const int bias[depthwise_fil_dim[0]], int out[out_dim[0]][out_dim[1]][out_dim[2]],
|
||||
int strides, int pad[], const int bias_shift_mode[], const int quantized_multiplier, const int out_shift_rl){
|
||||
|
||||
int j, k, n, p, res, k1, k2, str1, str2, bias_val, in_cnn, w;
|
||||
|
||||
// Depthwise convolution
|
||||
str1 = -pad[0] - strides;
|
||||
for (j = 0; j < out_dim[0]; j++) { // output height
|
||||
str1 += strides;
|
||||
str2 = -pad[2] - strides;
|
||||
for (k = 0; k < out_dim[1]; k++) { // output width
|
||||
bias_val = bias[0];
|
||||
str2 += strides;
|
||||
asm volatile("neur_init %0, %1, %2\n":"=r"(res):"r"(bias_val),"r"(bias_shift_mode[0]):);
|
||||
for (p = 0; p < depthwise_fil_dim[1]; p++) { // depthwise filter height
|
||||
for (n = 0; n < depthwise_fil_dim[2]; n++) { // depthwise filter width
|
||||
k1 = str1 + p;
|
||||
k2 = str2 + n;
|
||||
|
||||
if (k1 < in_dim[0] && k1 >= 0 && k2 >= 0 && k2 < in_dim[1]) {
|
||||
in_cnn = inp[k1][k2][0];
|
||||
w = depthwise_fil[0][p][n];
|
||||
asm volatile("nn_mac_8b %0, %1,%2\n":"=r"(res):"r"(w),"r"(in_cnn):);
|
||||
}
|
||||
}
|
||||
}
|
||||
asm volatile("neur_res %0, %1, %2\n":"=r"(res):"r"(quantized_multiplier),"r"(out_shift_rl):);
|
||||
out[j][k][0] = res;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* DWS_CONV_OPT_H */
|
||||
@@ -0,0 +1,89 @@
|
||||
import init_utils
|
||||
import common
|
||||
|
||||
# Initialize the environment and get the name
|
||||
name = init_utils.initialize_environment(__file__)
|
||||
args = init_utils.get_args()
|
||||
|
||||
# Set arguments from command line
|
||||
max_acc_drop = args.max_acc_drop
|
||||
device = args.device
|
||||
|
||||
from sklearn.model_selection import train_test_split
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import tensorflow as tf
|
||||
import numpy as np
|
||||
|
||||
# Load our Dataset
|
||||
|
||||
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.cifar10.load_data()
|
||||
|
||||
y_train = np.squeeze(y_train, axis = 1)
|
||||
y_test = np.squeeze(y_test, axis = 1)
|
||||
|
||||
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size = 0.15)
|
||||
|
||||
X_train = (np.transpose(X_train, (0,3,1,2)))
|
||||
X_test = (np.transpose(X_test, (0,3,1,2)))
|
||||
X_val = (np.transpose(X_val, (0,3,1,2)))
|
||||
|
||||
BATCH_SIZE = 128
|
||||
epochs = 1
|
||||
lr = 0.0001
|
||||
|
||||
class DepthwiseBlock(nn.Module):
|
||||
def __init__(self, in_channels, out_channels):
|
||||
super(DepthwiseBlock, self).__init__()
|
||||
|
||||
layers = []
|
||||
|
||||
layers.append(nn.Conv2d(in_channels = in_channels, out_channels = in_channels,
|
||||
kernel_size = 3, padding = 1, groups = in_channels)) # Depthwise convolution
|
||||
|
||||
layers.append(nn.ReLU(inplace = True))
|
||||
|
||||
layers.append(nn.Conv2d(in_channels = in_channels, out_channels = out_channels,
|
||||
kernel_size = 1, padding = 0)) # Pointwise convolution
|
||||
|
||||
layers.append(nn.ReLU(inplace = True))
|
||||
|
||||
self.block = nn.Sequential(*layers)
|
||||
|
||||
def forward(self, x):
|
||||
return self.block(x)
|
||||
|
||||
class Cifar10_Dws_CNN(nn.Module):
|
||||
def __init__(self):
|
||||
super(Cifar10_Dws_CNN, self).__init__()
|
||||
self.features = nn.Sequential(
|
||||
DepthwiseBlock(in_channels = 3, out_channels = 64),
|
||||
DepthwiseBlock(in_channels = 64, out_channels = 64),
|
||||
nn.MaxPool2d(kernel_size = 2, stride = 2),
|
||||
|
||||
DepthwiseBlock(in_channels = 64, out_channels = 128),
|
||||
DepthwiseBlock(in_channels = 128, out_channels = 128),
|
||||
nn.MaxPool2d(kernel_size = 2, stride = 2),
|
||||
|
||||
DepthwiseBlock(in_channels = 128, out_channels = 256),
|
||||
DepthwiseBlock(in_channels = 256, out_channels = 256),
|
||||
nn.MaxPool2d(kernel_size = 2, stride = 2)
|
||||
)
|
||||
|
||||
self.flatten = nn.Flatten()
|
||||
|
||||
self.classifier = nn.Sequential(
|
||||
nn.Linear(256 * 4 * 4, 10) # Assuming input size is (32, 32) and after 3 max pooling layers, the size is (4, 4)
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.features(x)
|
||||
x = self.flatten(x)
|
||||
x = self.classifier(x)
|
||||
return F.log_softmax(x, dim = 1)
|
||||
|
||||
net = Cifar10_Dws_CNN()
|
||||
|
||||
common.create_ibex_qnn(net, name, device, X_train, y_train, X_test, y_test,
|
||||
X_val = X_val, y_val = y_val, BATCH_SIZE = BATCH_SIZE,
|
||||
epochs = epochs, lr = lr, max_acc_drop = max_acc_drop)
|
||||
@@ -0,0 +1,77 @@
|
||||
import init_utils
|
||||
import common
|
||||
|
||||
# Initialize the environment and get the name
|
||||
name = init_utils.initialize_environment(__file__)
|
||||
args = init_utils.get_args()
|
||||
|
||||
# Set arguments from command line
|
||||
max_acc_drop = args.max_acc_drop
|
||||
device = args.device
|
||||
|
||||
from sklearn.model_selection import train_test_split
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import tensorflow as tf
|
||||
import numpy as np
|
||||
|
||||
# Load our Dataset
|
||||
|
||||
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.cifar10.load_data()
|
||||
y_train = np.squeeze(y_train, axis = 1)
|
||||
y_test = np.squeeze(y_test, axis = 1)
|
||||
|
||||
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size = 0.15)
|
||||
|
||||
X_train = (np.transpose(X_train, (0,3,1,2)) - 128.0)/255.0
|
||||
X_test = (np.transpose(X_test, (0,3,1,2)) - 128.0)/255.0
|
||||
X_val = (np.transpose(X_val, (0,3,1,2)) - 128.0)/255.0
|
||||
|
||||
BATCH_SIZE = 32
|
||||
epochs = 1
|
||||
lr = 0.0001
|
||||
|
||||
class CMSIS_CNN(nn.Module):
|
||||
def __init__(self):
|
||||
super(CMSIS_CNN, self).__init__()
|
||||
self.conv1 = nn.Conv2d(in_channels = 3, out_channels = 32, kernel_size = 5, padding = 2)
|
||||
self.relu1 = nn.ReLU()
|
||||
self.max1 = nn.MaxPool2d(2,2)
|
||||
self.d1 = nn.Dropout(p = 0.25)
|
||||
|
||||
self.conv2 = nn.Conv2d(in_channels = 32, out_channels = 32, kernel_size = 5, padding = 2)
|
||||
self.relu2 = nn.ReLU()
|
||||
self.max2 = nn.MaxPool2d(2,2)
|
||||
self.d2 = nn.Dropout(p = 0.25)
|
||||
|
||||
self.conv3 = nn.Conv2d(in_channels = 32, out_channels = 64, kernel_size = 5, padding = 2)
|
||||
self.relu3 = nn.ReLU()
|
||||
self.max3 = nn.MaxPool2d(2,2)
|
||||
self.d3 = nn.Dropout(p = 0.4)
|
||||
|
||||
self.flatten = nn.Flatten()
|
||||
self.linear1 = nn.Linear(1024, 10)
|
||||
|
||||
def forward(self,X):
|
||||
X = self.relu1((self.conv1(X)))
|
||||
X = self.max1(X)
|
||||
X = self.d1(X)
|
||||
|
||||
X = self.relu2((self.conv2(X)))
|
||||
X = self.max2(X)
|
||||
X = self.d2(X)
|
||||
|
||||
X = self.relu3((self.conv3(X)))
|
||||
X = self.max3(X)
|
||||
X = self.d3(X)
|
||||
|
||||
X = self.flatten(X)
|
||||
|
||||
X = self.linear1(X)
|
||||
return F.log_softmax(X, dim = 1)
|
||||
|
||||
net = CMSIS_CNN()
|
||||
|
||||
common.create_ibex_qnn(net, name, device, X_train, y_train, X_test, y_test,
|
||||
X_val = X_val, y_val = y_val, BATCH_SIZE = BATCH_SIZE,
|
||||
epochs = epochs, lr = lr, max_acc_drop = max_acc_drop)
|
||||
@@ -125,3 +125,13 @@ def create_ibex_qnn(net, name, device, X_train, y_train, X_test, y_test, X_val =
|
||||
print('\nSIMULATING MODEL ON IBEX CORE\nUSE THE OUTPUTS TO VERIFY THAT THE RESULTS ARE CORRECT !!')
|
||||
ibex_model = simulate_ibex.create_lenet_model(int_weights, int_og_bias, mul_vals, shift_vals)
|
||||
simulate_ibex.eval_sim_model(quant_net, ibex_model, test_loader)
|
||||
|
||||
elif(name == 'cmsis_cnn'):
|
||||
print('\nSIMULATING MODEL ON IBEX CORE\nUSE THE OUTPUTS TO VERIFY THAT THE RESULTS ARE CORRECT !!')
|
||||
ibex_model = simulate_ibex.create_cmsis_cnn_model(int_weights, int_og_bias, mul_vals, shift_vals)
|
||||
simulate_ibex.eval_sim_model(quant_net, ibex_model, test_loader)
|
||||
|
||||
elif(name == 'cifar10_dws_cnn'):
|
||||
print('\nSIMULATING MODEL ON IBEX CORE\nUSE THE OUTPUTS TO VERIFY THAT THE RESULTS ARE CORRECT !!')
|
||||
ibex_model = simulate_ibex.create_ibex_dws_model(int_weights, int_og_bias, mul_vals, shift_vals)
|
||||
simulate_ibex.eval_sim_model(quant_net, ibex_model, test_loader)
|
||||
|
||||
+145
-32
@@ -27,26 +27,34 @@ def quantize_multiplier(real_multiplier):
|
||||
return quantized_multiplier, right_shift
|
||||
|
||||
def get_int_params(quant_net):
|
||||
|
||||
int_weights = []
|
||||
int_bias = []
|
||||
in_scales = []
|
||||
act_scales = []
|
||||
|
||||
for _, module in quant_net.sequential.named_children():
|
||||
if hasattr(module, 'weight') and module.weight is not None:
|
||||
int_weights.append(module.int_weight().cpu().numpy())
|
||||
int_bias.append(module.int_bias().cpu().numpy())
|
||||
in_scales.append(module.quant_bias_scale().cpu().detach().numpy())
|
||||
def extract_quant_params(module):
|
||||
for name, submodule in module.named_children():
|
||||
# Check if the submodule has weights and append them if present
|
||||
if hasattr(submodule, 'weight') and submodule.weight is not None:
|
||||
int_weights.append(submodule.int_weight().cpu().detach().numpy())
|
||||
int_bias.append(submodule.int_bias().cpu().detach().numpy())
|
||||
in_scales.append(submodule.quant_bias_scale().cpu().detach().numpy())
|
||||
|
||||
if hasattr(module, 'quant_act_scale') and module.quant_act_scale is not None:
|
||||
act_scales.append(module.quant_act_scale().cpu().detach().numpy())
|
||||
# Check if the submodule has activation scale and append it if present
|
||||
if hasattr(submodule, 'quant_act_scale') and submodule.quant_act_scale is not None:
|
||||
act_scales.append(submodule.quant_act_scale().cpu().detach().numpy())
|
||||
|
||||
act_scales.append(quant_net.o_quant.quant_act_scale().cpu().detach().numpy())
|
||||
# Recursively extract parameters from the children modules
|
||||
extract_quant_params(submodule)
|
||||
|
||||
# Start extraction from the top-level module
|
||||
extract_quant_params(quant_net)
|
||||
|
||||
mul_vals, shift_vals = [], []
|
||||
|
||||
for i in range(len(act_scales)):
|
||||
M = in_scales[i]/act_scales[i]
|
||||
for i in range(len(act_scales)-1):
|
||||
M = in_scales[i]/act_scales[i+1]
|
||||
mul, shift = quantize_multiplier(M[0])
|
||||
mul_vals.append(mul)
|
||||
shift_vals.append(shift)
|
||||
@@ -87,7 +95,12 @@ def decide_mode(network, weight_bit_width, input_uint8 = True):
|
||||
for name, module in network.named_modules():
|
||||
if isinstance(module, layer_types_py):
|
||||
layer_type_name = module.__class__.__name__
|
||||
if(layer_type_name == 'Conv2d' or layer_type_name == 'Linear' or layer_type_name == 'DepthwiseConv2d'):
|
||||
if(layer_type_name == 'Linear'):
|
||||
layer_type.append(layer_type_name)
|
||||
if(layer_type_name == 'Conv2d'):
|
||||
if(module.groups == module.in_channels):
|
||||
layer_type.append('DepthwiseConv2d')
|
||||
else:
|
||||
layer_type.append(layer_type_name)
|
||||
else:
|
||||
if(layer_type_name == 'ReLU' or layer_type_name == 'Sigmoid'):
|
||||
@@ -96,13 +109,13 @@ def decide_mode(network, weight_bit_width, input_uint8 = True):
|
||||
|
||||
for i in range(len(weight_bit_width)):
|
||||
signed_input = 4 * input_sign[i]
|
||||
if(layer_type[i] == 'DepthwiseConv2d'):
|
||||
mode_per_layer.append(signed_input + 1)
|
||||
else:
|
||||
if(weight_bit_width[i] == 2):
|
||||
mode_per_layer.append(signed_input + 3)
|
||||
elif(weight_bit_width[i] == 4):
|
||||
mode_per_layer.append(signed_input + 2)
|
||||
else:
|
||||
if(layer_type[i] == 'DepthwiseConv2d'):
|
||||
mode_per_layer.append(signed_input + 1)
|
||||
else:
|
||||
mode_per_layer.append(signed_input)
|
||||
|
||||
@@ -161,6 +174,7 @@ def pad_inputs_weights(quant_net, test_loader, mode_per_layer,
|
||||
else:
|
||||
new_size_0 = a * 4
|
||||
|
||||
if((mode_per_layer[i] != 1) and (mode_per_layer[i] != 5)):
|
||||
b = w.shape[1] // 4
|
||||
if(w.shape[1] % 4 != 0):
|
||||
new_size_1 = (b + 1) * 4
|
||||
@@ -170,6 +184,12 @@ def pad_inputs_weights(quant_net, test_loader, mode_per_layer,
|
||||
new_w = np.zeros((new_size_0, new_size_1, w.shape[2], w.shape[3])).astype(np.int8)
|
||||
new_w[:w.shape[0], :w.shape[1], :, :] = w
|
||||
|
||||
else:
|
||||
new_size_1 = 1
|
||||
new_w = np.zeros((new_size_0, new_size_1, w.shape[2], w.shape[3])).astype(np.int8)
|
||||
new_w[:w.shape[0], :w.shape[1], :, :] = w
|
||||
new_w = np.squeeze(new_w, axis = 1)
|
||||
|
||||
padded_int_weights.append(new_w)
|
||||
|
||||
padded_int_biases = []
|
||||
@@ -325,6 +345,15 @@ def concat_inputs_weights(mode_per_layer, padded_input, padded_int_weights, padd
|
||||
comb = combine_values(vector)
|
||||
new_mat[i][j] = comb
|
||||
|
||||
elif(len(dims) == 3):
|
||||
new_mat = np.zeros((int(dims[0]//4), dims[1], dims[2]), dtype = np.int64)
|
||||
for i in range(int(dims[0]//4)):
|
||||
for j in range(dims[1]):
|
||||
for k in range(dims[2]):
|
||||
vector = layer_weight[4*i : 4*(i+1), j, k]
|
||||
comb = combine_values(vector)
|
||||
new_mat[i][j][k] = comb
|
||||
|
||||
elif(len(dims) == 4):
|
||||
if((mode_per_layer[iter] == 0) | (mode_per_layer[iter] == 4)):
|
||||
new_mat = np.zeros((int(dims[0]//4), dims[1], dims[2], dims[3]), dtype = np.int64)
|
||||
@@ -602,9 +631,17 @@ def save_cnn_net_params(path, int_weights, int_biases, mul_vals, shift_vals, shi
|
||||
dims = np.shape(int_weights[k])
|
||||
mat = int_weights[k]
|
||||
|
||||
if(len(dims) == 2 or ((len(dims) == 4) and dims[2] == dims[3] == 1)):
|
||||
f.write('static const int ')
|
||||
if(len(dims) == 2):
|
||||
wi += 1
|
||||
st = 'static const int W' + str(wi) + '[' + str(dims[0]) + ']' + '[' + str(dims[1]) + '] = {\n'
|
||||
f.write('W' + str(wi))
|
||||
else:
|
||||
mat = np.squeeze(mat, axis = (2,3))
|
||||
fi += 1
|
||||
f.write('F' + str(fi))
|
||||
|
||||
st = '[' + str(dims[0]) + ']' + '[' + str(dims[1]) + '] = {\n'
|
||||
f.write(st)
|
||||
for n in range(dims[0]):
|
||||
f.write('\t{')
|
||||
@@ -619,6 +656,32 @@ def save_cnn_net_params(path, int_weights, int_biases, mul_vals, shift_vals, shi
|
||||
f.write('\n')
|
||||
f.write('};\n\n')
|
||||
|
||||
elif (len(dims) == 3):
|
||||
dims = np.shape(mat)
|
||||
fi += 1
|
||||
st = 'static const int F' + str(fi) + '[' + str(dims[0]) + '][' + str(dims[1])
|
||||
st += '][' + str(dims[2]) + '] = {\n'
|
||||
f.write(st)
|
||||
|
||||
for n in range(dims[0]):
|
||||
f.write('\t{\n')
|
||||
for l in range(dims[1]):
|
||||
f.write('\t\t{')
|
||||
for h in range(dims[2] - 1):
|
||||
f.write(str(mat[n][l][h]) + ', ')
|
||||
if dims[2] != 1:
|
||||
f.write(str(mat[n][l][dims[2] - 1]) + '}')
|
||||
else:
|
||||
f.write(str(mat[n][l][0]) + '}')
|
||||
if (l != dims[1] - 1):
|
||||
f.write(',')
|
||||
f.write('\n')
|
||||
f.write('\t}')
|
||||
if n != dims[0] - 1:
|
||||
f.write(',')
|
||||
f.write('\n')
|
||||
f.write('};\n\n')
|
||||
|
||||
elif(len(dims) == 4):
|
||||
mat = np.transpose(mat, (0, 2, 3, 1))
|
||||
dims = np.shape(mat)
|
||||
@@ -856,9 +919,11 @@ def generate_opt_c_code_mlp(path, name, int_weights, optimal_config, type_of_lay
|
||||
f.write('\t' + name + '();\n\n')
|
||||
f.write('\treturn 0;\n}')
|
||||
|
||||
def get_cnn_details(model):
|
||||
def get_cnn_details(module, details = None):
|
||||
if details is None:
|
||||
details = []
|
||||
for layer in model.children():
|
||||
|
||||
for layer in module.children():
|
||||
if isinstance(layer, nn.Conv2d):
|
||||
details.append({
|
||||
"layer_type": "Conv2d",
|
||||
@@ -866,10 +931,11 @@ def get_cnn_details(model):
|
||||
"out_channels": layer.out_channels,
|
||||
"kernel_size": layer.kernel_size,
|
||||
"stride": layer.stride,
|
||||
"padding": layer.padding
|
||||
"padding": layer.padding,
|
||||
"groups": layer.groups
|
||||
})
|
||||
|
||||
elif (isinstance(layer, nn.MaxPool2d)):
|
||||
elif isinstance(layer, nn.MaxPool2d):
|
||||
details.append({
|
||||
"layer_type": "MaxPool2d",
|
||||
"kernel_size": layer.kernel_size,
|
||||
@@ -877,7 +943,7 @@ def get_cnn_details(model):
|
||||
"padding": layer.padding
|
||||
})
|
||||
|
||||
elif (isinstance(layer, nn.AvgPool2d)):
|
||||
elif isinstance(layer, nn.AvgPool2d):
|
||||
details.append({
|
||||
"layer_type": "AvgPool2d",
|
||||
"kernel_size": layer.kernel_size,
|
||||
@@ -891,6 +957,10 @@ def get_cnn_details(model):
|
||||
"in_features": layer.in_features,
|
||||
"out_features": layer.out_features
|
||||
})
|
||||
|
||||
# Recursively apply to children modules
|
||||
get_cnn_details(layer, details)
|
||||
|
||||
return details
|
||||
|
||||
def generate_og_c_code_cnn(path, name, input, cnn_details, int_weights):
|
||||
@@ -900,10 +970,17 @@ def generate_og_c_code_cnn(path, name, input, cnn_details, int_weights):
|
||||
f.write('#include "fully_connected.h"\n')
|
||||
f.write('#include "ibex_cnn_params.h"\n')
|
||||
f.write('#include "ibex_inputs.h"\n')
|
||||
f.write('#include "conv2d.h"\n\n')
|
||||
f.write('#include "conv2d.h"\n')
|
||||
|
||||
f.write('#define IMG_SZ ' + str(input.shape[2]) + '\n')
|
||||
f.write('#define NUM_FIL0 ' + str(int_weights[0].shape[1]) + '\n\n')
|
||||
for detail in cnn_details[:-1]:
|
||||
if detail["layer_type"] == "Conv2d":
|
||||
if(detail["in_channels"] == detail["out_channels"] == detail["groups"] != 1):
|
||||
f.write('#include "dws_conv.h"\n')
|
||||
break
|
||||
|
||||
f.write('\n')
|
||||
f.write('#define IMG_SZ ' + str(np.shape(input)[2]) + '\n')
|
||||
f.write('#define NUM_FIL0 ' + str(np.shape(input)[1]) + '\n\n')
|
||||
i = 1
|
||||
for w in int_weights:
|
||||
if(len(np.shape(w)) == 4):
|
||||
@@ -1050,11 +1127,17 @@ def generate_og_c_code_cnn(path, name, input, cnn_details, int_weights):
|
||||
|
||||
for detail in cnn_details[:-1]:
|
||||
if detail["layer_type"] == "Conv2d":
|
||||
if(detail["in_channels"] == detail["out_channels"] == detail["groups"] != 1):
|
||||
conv_type = 'dw_conv'
|
||||
elif(detail["kernel_size"][0] == 1):
|
||||
conv_type = 'pw_conv'
|
||||
else:
|
||||
conv_type = "conv2"
|
||||
if(i == 1):
|
||||
f.write('\t\tconv2(inp_dim, f_dim1, outp_dim1, in, F1, B1, ')
|
||||
f.write('\t\t' + conv_type + '(inp_dim, f_dim1, outp_dim1, in, F1, B1, ')
|
||||
f.write('out1, STRIDE1, pad_1, SB1, MV1, SV1);')
|
||||
else:
|
||||
f.write('\t\tconv2(outp_dim' + str(i-1) + ', f_dim' + str(i) + ', outp_dim' + str(i))
|
||||
f.write('\t\t' + conv_type + '(outp_dim' + str(i-1) + ', f_dim' + str(i) + ', outp_dim' + str(i))
|
||||
f.write(', out' + str(i-1) + ', F' + str(fi) + ', B' + str(fi) + ', out' + str(i))
|
||||
f.write(', STRIDE' + str(fi) + ', pad_' + str(i) + ', SB' + str(fi))
|
||||
f.write(', MV' + str(fi) + ', SV' + str(fi) + ');')
|
||||
@@ -1091,6 +1174,13 @@ def generate_og_c_code_cnn(path, name, input, cnn_details, int_weights):
|
||||
f.write('\n')
|
||||
i += 1
|
||||
|
||||
if flatten == 0:
|
||||
f.write('\t\tflatten(outp_dim' + str(i-1) + ', out' + str(i-1) + ', out' + str(i) + ');\n\n')
|
||||
i += 1
|
||||
f.write('\t\tmlp_layer(out' + str(i-1) + ', out, flatten_dim, OUT_DIM, ')
|
||||
f.write('W1, B' + str(fi + dn - 1) + ', SB' + str(fi + dn - 1) + ', MV' + str(fi + dn - 1))
|
||||
f.write(', SV' + str(fi + dn - 1) + ');')
|
||||
else:
|
||||
f.write('\t\tmlp_layer(out' + str(i-1) + ', out, DENSE_DIM' + str(dn-1))
|
||||
f.write(', OUT_DIM, W' + str(dn) + ', B' + str(fi + dn - 1))
|
||||
f.write(', SB' + str(fi + dn - 1) + ', MV' + str(fi + dn - 1))
|
||||
@@ -1119,13 +1209,21 @@ def generate_opt_c_code_cnn(path, name, input, cnn_details, int_weights, optimal
|
||||
f.write('#include "fully_connected_opt.h"\n')
|
||||
f.write('#include "ibex_cnn_params.h"\n')
|
||||
f.write('#include "ibex_inputs.h"\n')
|
||||
f.write('#include "conv2d_opt.h"\n\n')
|
||||
f.write('#include "conv2d_opt.h"\n')
|
||||
|
||||
for detail in cnn_details[:-1]:
|
||||
if detail["layer_type"] == "Conv2d":
|
||||
if(detail["in_channels"] == detail["out_channels"] == detail["groups"] != 1):
|
||||
f.write('#include "dws_conv_opt.h"\n')
|
||||
break
|
||||
|
||||
f.write('\n')
|
||||
|
||||
f.write('#define IMG_SZ ' + str(np.shape(input)[2]) + '\n')
|
||||
f.write('#define NUM_FIL0 ' + str(np.shape(input)[0]) + '\n\n')
|
||||
f.write('#define NUM_FIL0 ' + str(np.shape(input)[1]) + '\n\n')
|
||||
i = 1
|
||||
for w in int_weights:
|
||||
if(len(np.shape(w)) == 4):
|
||||
if(len(np.shape(w)) == 4 or len(np.shape(w)) == 3):
|
||||
f.write('#define FILTER' + str(i) + ' ' + str(w.shape[2]) + '\n')
|
||||
i += 1
|
||||
|
||||
@@ -1133,7 +1231,7 @@ def generate_opt_c_code_cnn(path, name, input, cnn_details, int_weights, optimal
|
||||
|
||||
i = 1
|
||||
for w in int_weights:
|
||||
if(len(np.shape(w)) == 4):
|
||||
if(len(np.shape(w)) == 4 or len(np.shape(w)) == 3):
|
||||
f.write('#define NUM_FIL' + str(i) + ' ' + str(w.shape[0]) + '\n')
|
||||
i += 1
|
||||
|
||||
@@ -1270,14 +1368,21 @@ def generate_opt_c_code_cnn(path, name, input, cnn_details, int_weights, optimal
|
||||
|
||||
for detail in cnn_details[:-1]:
|
||||
if detail["layer_type"] == "Conv2d":
|
||||
if(detail["in_channels"] == detail["out_channels"] == detail["groups"] != 1):
|
||||
conv_type = 'dw_conv_opt'
|
||||
elif(detail["kernel_size"][0] == 1):
|
||||
conv_type = 'pw_conv_' + str(optimal_config[j]) + 'bits'
|
||||
else:
|
||||
conv_type = 'conv2_' + str(optimal_config[j]) + 'bits'
|
||||
|
||||
if(i == 1):
|
||||
f.write('\t\tconv2_' + str(optimal_config[j]) + 'bits')
|
||||
if(np.shape(input)[0] == 1):
|
||||
f.write('\t\t' + conv_type)
|
||||
if(np.shape(input)[1] == 1):
|
||||
f.write('_1ch')
|
||||
f.write('(inp_dim, f_dim1, outp_dim1, in, F1, B1, ')
|
||||
f.write('out1, STRIDE1, pad_1, SB1, MV1, SV1);')
|
||||
else:
|
||||
f.write('\t\tconv2_' + str(optimal_config[j]) + 'bits(outp_dim' + str(i-1) + ', f_dim' + str(i))
|
||||
f.write('\t\t' + conv_type + '(outp_dim' + str(i-1) + ', f_dim' + str(i))
|
||||
f.write(', outp_dim' + str(i) + ', out' + str(i-1) + ', F' + str(fi) + ', B' + str(fi) + ', out')
|
||||
f.write(str(i) + ', STRIDE' + str(fi) + ', pad_' + str(i) + ', SB' + str(fi))
|
||||
f.write(', MV' + str(fi) + ', SV' + str(fi) + ');')
|
||||
@@ -1314,11 +1419,19 @@ def generate_opt_c_code_cnn(path, name, input, cnn_details, int_weights, optimal
|
||||
f.write('\n')
|
||||
i += 1
|
||||
|
||||
if flatten == 0:
|
||||
f.write('\t\tflatten(outp_dim' + str(i-1) + ', out' + str(i-1) + ', out' + str(i) + ');\n\n')
|
||||
i += 1
|
||||
f.write('\t\tmlp_layer_' + str(optimal_config[j]) + 'bits(out' + str(i-1) + ', out, ')
|
||||
f.write('flatten_dim, OUT_DIM, W1, B' + str(fi + dn - 1) + ', SB' + str(fi + dn - 1) + ', MV')
|
||||
f.write(str(fi + dn - 1) + ', SV' + str(fi + dn - 1) + ');\n')
|
||||
else:
|
||||
f.write('\t\tmlp_layer_' + str(optimal_config[-1]) + 'bits(out' + str(i-1) + ', out, DENSE_DIM' + str(dn-1))
|
||||
f.write(', OUT_DIM, W' + str(dn) + ', B' + str(fi + dn - 1))
|
||||
f.write(', SB' + str(fi + dn - 1) + ', MV' + str(fi + dn - 1))
|
||||
f.write(', SV' + str(fi + dn - 1) + ');\n')
|
||||
|
||||
f.write('\n\t\tpcount_enable(0);\n\n')
|
||||
f.write('\t\tputs("Output Layer Values:\\n");\n')
|
||||
f.write('\t\tfor(int i = 0; i < OUT_DIM; i++) {\n')
|
||||
f.write('\t\t\tputhex((out[i] & 0xFF000000) >> 24);\n')
|
||||
|
||||
+103
-12
@@ -11,6 +11,9 @@ from torch import nn, optim
|
||||
|
||||
import brevitas.nn as qnn
|
||||
from brevitas.quant import *
|
||||
from brevitas.core.restrict_val import RestrictValueType
|
||||
|
||||
from collections import defaultdict
|
||||
from torchinfo import summary
|
||||
|
||||
def net_input_size(X_train):
|
||||
@@ -202,7 +205,21 @@ def generate_sequences(length, values = [2, 4, 8]):
|
||||
|
||||
def create_weight_confs(macc_per_layer):
|
||||
total_macc_opt = []
|
||||
weights_per_layer = generate_sequences(len(macc_per_layer))
|
||||
|
||||
cc = 0
|
||||
idx = []
|
||||
|
||||
if(len(macc_per_layer) >= 6):
|
||||
for i, mpl in enumerate(macc_per_layer):
|
||||
if(mpl/max(macc_per_layer) < 0.05):
|
||||
cc += 1
|
||||
idx.append(i)
|
||||
|
||||
weights_per_layer = generate_sequences(len(macc_per_layer) - cc)
|
||||
|
||||
for w in weights_per_layer:
|
||||
for i in idx:
|
||||
w.insert(i, 8)
|
||||
|
||||
for w_conf in weights_per_layer:
|
||||
macc = 0
|
||||
@@ -234,24 +251,47 @@ def create_weight_confs(macc_per_layer):
|
||||
# Define a mapping from PyTorch layers to Brevitas layers
|
||||
def create_layer_mapping(bit_width):
|
||||
mapping = {
|
||||
nn.Conv2d: lambda layer, bw: qnn.QuantConv2d(in_channels = layer.in_channels,
|
||||
nn.Conv2d: lambda layer, bw: (qnn.QuantConv2d(in_channels=layer.in_channels,
|
||||
out_channels=layer.out_channels,
|
||||
kernel_size=layer.kernel_size,
|
||||
stride=layer.stride[0],
|
||||
padding=layer.padding,
|
||||
groups=layer.groups,
|
||||
bias=True,
|
||||
cache_inference_bias=True,
|
||||
bias_quant=Int32Bias,
|
||||
weight_bit_width=bw,
|
||||
weight_quant = Int8WeightPerTensorFloat),
|
||||
weight_quant=Int8WeightPerTensorFloat,
|
||||
weight_scaling_min_val=2e-16,
|
||||
restrict_scaling_type=RestrictValueType.LOG_FP,
|
||||
return_quant_tensor=True
|
||||
) if layer.groups != layer.in_channels else (
|
||||
# Special case for depthwise convolutions
|
||||
qnn.QuantConv2d(in_channels=layer.in_channels,
|
||||
out_channels=layer.out_channels,
|
||||
kernel_size=layer.kernel_size,
|
||||
stride=layer.stride[0],
|
||||
padding=layer.padding,
|
||||
groups=layer.groups,
|
||||
bias=True,
|
||||
cache_inference_bias=True,
|
||||
bias_quant=Int32Bias,
|
||||
weight_bit_width=8, # Fixed bit width for depthwise convolutions
|
||||
weight_quant=Int8WeightPerTensorFloat,
|
||||
weight_scaling_min_val=2e-16,
|
||||
restrict_scaling_type=RestrictValueType.LOG_FP,
|
||||
return_quant_tensor=True))),
|
||||
|
||||
nn.Linear: lambda layer, bw: qnn.QuantLinear(in_features = layer.in_features,
|
||||
out_features = layer.out_features,
|
||||
|
||||
cache_inference_bias = True,
|
||||
weight_quant = Int8WeightPerTensorFloat,
|
||||
bias_quant = Int32Bias,
|
||||
bias = True,
|
||||
weight_bit_width = bw),
|
||||
|
||||
weight_quant = Int8WeightPerTensorFloat,
|
||||
weight_bit_width = bw,
|
||||
return_quant_tensor=True),
|
||||
|
||||
nn.ReLU: lambda _, bw: qnn.QuantReLU(bit_width = bw,
|
||||
return_quant_tensor = True),
|
||||
@@ -278,13 +318,11 @@ def convert_layer(layer, bit_width, layer_mapping):
|
||||
return layer
|
||||
|
||||
# Function to convert a PyTorch model to a Brevitas model
|
||||
def convert_model(module, bit_widths, layer_mapping):
|
||||
layer_idx = [0]
|
||||
def convert_model(module, bit_widths, layer_mapping, layer_idx = [0]):
|
||||
brevitas_module = nn.Sequential()
|
||||
|
||||
for name, layer in module.named_children():
|
||||
if list(layer.children()): # If the layer has children, recurse
|
||||
brevitas_module.add_module(name, convert_model(layer, bit_widths, layer_mapping))
|
||||
brevitas_module.add_module(name, convert_model(layer, bit_widths, layer_mapping, layer_idx))
|
||||
else:
|
||||
layer_type = type(layer)
|
||||
if layer_type in [nn.Conv2d, nn.Linear]:
|
||||
@@ -293,6 +331,7 @@ def convert_model(module, bit_widths, layer_mapping):
|
||||
else:
|
||||
bit_width = 8
|
||||
brevitas_module.add_module(name, convert_layer(layer, bit_width, layer_mapping))
|
||||
|
||||
return brevitas_module
|
||||
|
||||
class Quant_Model(nn.Module):
|
||||
@@ -300,13 +339,15 @@ class Quant_Model(nn.Module):
|
||||
super(Quant_Model, self).__init__()
|
||||
if(input_sign):
|
||||
self.quant_inp = qnn.QuantIdentity(bit_width = 8, return_quant_tensor = True,
|
||||
act_quant = Uint8ActPerTensorFloat)
|
||||
act_quant = Uint8ActPerTensorFloat, scaling_min_val = 2e-16,
|
||||
restrict_scaling_type = RestrictValueType.LOG_FP)
|
||||
|
||||
else:
|
||||
self.quant_inp = qnn.QuantIdentity(bit_width = 8, return_quant_tensor = True,
|
||||
act_quant = Int8ActPerTensorFloat)
|
||||
act_quant = Int8ActPerTensorFloat, scaling_min_val = 2e-16,
|
||||
restrict_scaling_type = RestrictValueType.LOG_FP)
|
||||
|
||||
self.sequential = convert_model(og_model, w, layer_mapping)
|
||||
self.sequential = convert_model(og_model, w, layer_mapping, [0])
|
||||
self.o_quant = qnn.QuantIdentity(bit_width = 8, return_quant_tensor = True)
|
||||
|
||||
def forward(self, X):
|
||||
@@ -315,6 +356,35 @@ class Quant_Model(nn.Module):
|
||||
X = self.o_quant(X)
|
||||
return F.log_softmax(X, dim = 1)
|
||||
|
||||
def count_layers_in_sequential(module):
|
||||
# List to store the counts of Conv2d and Linear layers for each nn.Sequential module
|
||||
sequential_counts = []
|
||||
|
||||
def _count_layers(submodule, prefix = ''):
|
||||
if isinstance(submodule, nn.Sequential):
|
||||
conv_count = 0
|
||||
linear_count = 0
|
||||
# Count Conv2d and Linear layers in the current nn.Sequential module
|
||||
for child in submodule.children():
|
||||
if isinstance(child, nn.Conv2d):
|
||||
conv_count += 1
|
||||
elif isinstance(child, nn.Linear):
|
||||
linear_count += 1
|
||||
# Append the counts to the list
|
||||
sequential_counts.append((conv_count, linear_count))
|
||||
# Recursively process children of the current nn.Sequential module
|
||||
for name, child in submodule.named_children():
|
||||
child_prefix = f"{prefix}.{name}" if prefix else name
|
||||
_count_layers(child, child_prefix)
|
||||
else:
|
||||
# Process children of non-nn.Sequential modules
|
||||
for name, child in submodule.named_children():
|
||||
_count_layers(child, prefix)
|
||||
|
||||
_count_layers(module)
|
||||
|
||||
return sequential_counts[1:]
|
||||
|
||||
def train_quant_model(quant_net, train_loader, val_loader = None, device = 'cpu',
|
||||
epochs = 20, lr = 0.0001):
|
||||
|
||||
@@ -392,6 +462,7 @@ def dse(og_model, max_acc_drop, weights_per_layer, fp_accuracy, train_loader, te
|
||||
device = 'cpu', epochs = 5, lr = 0.0001):
|
||||
|
||||
sign = calculate_minimum(train_loader) >= 0
|
||||
seq_counts = count_layers_in_sequential(og_model)
|
||||
|
||||
if max_acc_drop is not None:
|
||||
print('\nDSE STARTING ... BINARY SEARCH')
|
||||
@@ -402,6 +473,16 @@ def dse(og_model, max_acc_drop, weights_per_layer, fp_accuracy, train_loader, te
|
||||
mid = (low + high) // 2
|
||||
w = weights_per_layer[mid]
|
||||
|
||||
f_w = []
|
||||
for i in range(len(seq_counts)):
|
||||
t_w = w[i]
|
||||
c,l = seq_counts[i]
|
||||
for j in range(c+l):
|
||||
f_w.append(t_w)
|
||||
|
||||
if(len(seq_counts) > 0):
|
||||
w = f_w
|
||||
|
||||
# Create and train the quantized network
|
||||
layer_mapping = create_layer_mapping(w)
|
||||
quant_net = Quant_Model(og_model, w, layer_mapping, sign)
|
||||
@@ -436,6 +517,16 @@ def dse(og_model, max_acc_drop, weights_per_layer, fp_accuracy, train_loader, te
|
||||
print('\nDSE STARTING ... EXHAUSTIVE SEARCH')
|
||||
test_accuracy = []
|
||||
for i, w in enumerate(weights_per_layer):
|
||||
f_w = []
|
||||
for i in range(len(seq_counts)):
|
||||
t_w = w[i]
|
||||
c,l = seq_counts[i]
|
||||
for j in range(c+l):
|
||||
f_w.append(t_w)
|
||||
|
||||
if(len(seq_counts) > 0):
|
||||
w = f_w
|
||||
|
||||
layer_mapping = create_layer_mapping(w)
|
||||
quant_net = Quant_Model(og_model, w, layer_mapping, sign)
|
||||
quant_net = quant_net.to(device)
|
||||
|
||||
+165
-25
@@ -151,15 +151,154 @@ class Ibex_Lenet5(nn.Module):
|
||||
|
||||
return X
|
||||
|
||||
class Ibex_CMSIS_CNN(nn.Module):
|
||||
def __init__(self, mul_vals, shift_vals):
|
||||
super(Ibex_CMSIS_CNN, self).__init__()
|
||||
|
||||
self.m0 = mul_vals[0]
|
||||
self.m1 = mul_vals[1]
|
||||
self.m2 = mul_vals[2]
|
||||
self.m3 = mul_vals[3]
|
||||
|
||||
self.s0 = shift_vals[0] + 7
|
||||
self.s1 = shift_vals[1] + 7
|
||||
self.s2 = shift_vals[2] + 7
|
||||
self.s3 = shift_vals[3] + 7
|
||||
|
||||
self.conv1 = nn.Conv2d(in_channels = 3, out_channels = 32, kernel_size = 5, padding = 'same')
|
||||
self.max1 = nn.MaxPool2d(2,2)
|
||||
|
||||
self.conv2 = nn.Conv2d(in_channels = 32, out_channels = 32, kernel_size = 5, padding = 'same')
|
||||
self.max2 = nn.MaxPool2d(2,2)
|
||||
|
||||
self.conv3 = nn.Conv2d(in_channels = 32, out_channels = 64, kernel_size = 5, padding = 'same')
|
||||
self.max3 = nn.MaxPool2d(2,2)
|
||||
|
||||
self.linear1 = nn.Linear(1024, 10)
|
||||
|
||||
def forward(self, X, print_out = False):
|
||||
|
||||
X = self.conv1(X)
|
||||
X = torch.mul(X, self.m0)
|
||||
X = torch.add(X, torch.bitwise_left_shift(torch.tensor(1), self.s0 -1)).type(torch.LongTensor)
|
||||
X = torch.bitwise_right_shift(X, self.s0).type(torch.FloatTensor)
|
||||
X = torch.clamp(X, min = 0, max = 255)
|
||||
|
||||
X = self.max1(X)
|
||||
|
||||
X = self.conv2(X)
|
||||
X = torch.mul(X, self.m1)
|
||||
X = torch.add(X, torch.bitwise_left_shift(torch.tensor(1), self.s1 -1)).type(torch.LongTensor)
|
||||
X = torch.bitwise_right_shift(X, self.s1).type(torch.FloatTensor)
|
||||
X = torch.clamp(X, min = 0, max = 255)
|
||||
|
||||
X = self.max2(X)
|
||||
|
||||
X = self.conv3(X)
|
||||
X = torch.mul(X, self.m2)
|
||||
X = torch.add(X, torch.bitwise_left_shift(torch.tensor(1), self.s2 -1)).type(torch.LongTensor)
|
||||
X = torch.bitwise_right_shift(X, self.s2).type(torch.FloatTensor)
|
||||
X = torch.clamp(X, min = 0, max = 255)
|
||||
|
||||
X = self.max3(X)
|
||||
|
||||
X = X.reshape(X.shape[0], -1)
|
||||
X = self.linear1(X)
|
||||
X = torch.mul(X, self.m3)
|
||||
X = torch.add(X, torch.bitwise_left_shift(torch.tensor(1), self.s3 -1)).type(torch.LongTensor)
|
||||
X = torch.bitwise_right_shift(X, self.s3).type(torch.FloatTensor)
|
||||
X = torch.clamp(X, min = 0, max = 255)
|
||||
|
||||
if(print_out):
|
||||
print(X)
|
||||
return X
|
||||
|
||||
class Ibex_DepthwiseBlock(nn.Module):
|
||||
def __init__(self, in_channels, out_channels, mul_vals, shift_vals):
|
||||
super(Ibex_DepthwiseBlock, self).__init__()
|
||||
|
||||
self.dw = nn.Conv2d(in_channels = in_channels, out_channels = in_channels,
|
||||
kernel_size = 3, padding = 1, groups = in_channels)
|
||||
|
||||
self.pw = nn.Conv2d(in_channels = in_channels, out_channels = out_channels,
|
||||
kernel_size = 1, padding = 0)
|
||||
|
||||
self.m0 = mul_vals[0]
|
||||
self.m1 = mul_vals[1]
|
||||
|
||||
self.s0 = shift_vals[0] + 7
|
||||
self.s1 = shift_vals[1] + 7
|
||||
|
||||
def forward(self, X):
|
||||
X = self.dw(X)
|
||||
X = torch.mul(X, self.m0)
|
||||
X = torch.add(X, torch.bitwise_left_shift(torch.tensor(1), self.s0 -1)).type(torch.LongTensor)
|
||||
X = torch.bitwise_right_shift(X, self.s0).type(torch.FloatTensor)
|
||||
X = torch.clamp(X, min = 0, max = 255)
|
||||
|
||||
X = self.pw(X)
|
||||
X = torch.mul(X, self.m1)
|
||||
X = torch.add(X, torch.bitwise_left_shift(torch.tensor(1), self.s1 -1)).type(torch.LongTensor)
|
||||
X = torch.bitwise_right_shift(X, self.s1).type(torch.FloatTensor)
|
||||
X = torch.clamp(X, min = 0, max = 255)
|
||||
|
||||
return X
|
||||
|
||||
class Ibex_Cifar10_Dws_CNN(nn.Module):
|
||||
def __init__(self, mul_vals, shift_vals):
|
||||
super(Ibex_Cifar10_Dws_CNN, self).__init__()
|
||||
self.features = nn.Sequential(
|
||||
Ibex_DepthwiseBlock(3, 64, mul_vals[0:2], shift_vals[0:2]),
|
||||
Ibex_DepthwiseBlock(64, 64, mul_vals[2:4], shift_vals[2:4]),
|
||||
nn.MaxPool2d(kernel_size = 2, stride = 2),
|
||||
|
||||
Ibex_DepthwiseBlock(64, 128, mul_vals[4:6], shift_vals[4:6]),
|
||||
Ibex_DepthwiseBlock(128, 128, mul_vals[6:8], shift_vals[6:8]),
|
||||
nn.MaxPool2d(kernel_size = 2, stride = 2),
|
||||
|
||||
Ibex_DepthwiseBlock(128, 256, mul_vals[8:10], shift_vals[8:10]),
|
||||
Ibex_DepthwiseBlock(256, 256, mul_vals[10:12], shift_vals[10:12]),
|
||||
nn.MaxPool2d(kernel_size = 2, stride = 2)
|
||||
)
|
||||
|
||||
self.flatten = nn.Flatten()
|
||||
|
||||
self.classifier = nn.Sequential(
|
||||
nn.Linear(256 * 4 * 4, 10)
|
||||
)
|
||||
|
||||
self.m_cl = mul_vals[12]
|
||||
self.s_cl = shift_vals[12] + 7
|
||||
|
||||
def forward(self, x, print_out = False):
|
||||
x = self.features(x)
|
||||
x = self.flatten(x)
|
||||
x = self.classifier(x)
|
||||
|
||||
x = torch.mul(x, self.m_cl)
|
||||
x = torch.add(x, torch.bitwise_left_shift(torch.tensor(1), self.s_cl - 1)).type(torch.LongTensor)
|
||||
x = torch.bitwise_right_shift(x, self.s_cl).type(torch.FloatTensor)
|
||||
x = torch.clamp(x, min = 0, max = 255)
|
||||
|
||||
if(print_out):
|
||||
print(x)
|
||||
|
||||
return x
|
||||
|
||||
def configure_network(ibex_model_dict, int_weights, int_biases):
|
||||
for i, (name, _) in enumerate(ibex_model_dict.items()):
|
||||
if(i%2 == 0):
|
||||
ibex_model_dict[name] = torch.tensor(int_weights[i//2])
|
||||
else:
|
||||
ibex_model_dict[name] = torch.tensor(int_biases[i//2])
|
||||
|
||||
return ibex_model_dict
|
||||
|
||||
def create_fann_model(int_weights, int_biases, mul_vals, shift_vals):
|
||||
ibex_model = Ibex_FANN(mul_vals, shift_vals)
|
||||
ibex_model_dict = ibex_model.state_dict()
|
||||
|
||||
ibex_model_dict['linear1.weight'] = torch.tensor(int_weights[0])
|
||||
ibex_model_dict['linear2.weight'] = torch.tensor(int_weights[1])
|
||||
|
||||
ibex_model_dict['linear1.bias'] = torch.tensor(int_biases[0])
|
||||
ibex_model_dict['linear2.bias'] = torch.tensor(int_biases[1])
|
||||
ibex_model_dict = configure_network(ibex_model_dict, int_weights, int_biases)
|
||||
|
||||
ibex_model.load_state_dict(ibex_model_dict)
|
||||
return ibex_model
|
||||
@@ -168,15 +307,7 @@ def create_uci_model(int_weights, int_biases, mul_vals, shift_vals):
|
||||
ibex_model = Ibex_UCI_MLP(mul_vals, shift_vals)
|
||||
ibex_model_dict = ibex_model.state_dict()
|
||||
|
||||
ibex_model_dict['fc0.weight'] = torch.tensor(int_weights[0])
|
||||
ibex_model_dict['fc1.weight'] = torch.tensor(int_weights[1])
|
||||
ibex_model_dict['fc2.weight'] = torch.tensor(int_weights[2])
|
||||
ibex_model_dict['fc3.weight'] = torch.tensor(int_weights[3])
|
||||
|
||||
ibex_model_dict['fc0.bias'] = torch.tensor(int_biases[0])
|
||||
ibex_model_dict['fc1.bias'] = torch.tensor(int_biases[1])
|
||||
ibex_model_dict['fc2.bias'] = torch.tensor(int_biases[2])
|
||||
ibex_model_dict['fc3.bias'] = torch.tensor(int_biases[3])
|
||||
ibex_model_dict = configure_network(ibex_model_dict, int_weights, int_biases)
|
||||
|
||||
ibex_model.load_state_dict(ibex_model_dict)
|
||||
|
||||
@@ -186,24 +317,33 @@ def create_lenet_model(int_weights, int_biases, mul_vals, shift_vals):
|
||||
ibex_model = Ibex_Lenet5(mul_vals, shift_vals)
|
||||
ibex_model_dict = ibex_model.state_dict()
|
||||
|
||||
ibex_model_dict['conv1.weight'] = torch.tensor(int_weights[0])
|
||||
ibex_model_dict['conv2.weight'] = torch.tensor(int_weights[1])
|
||||
ibex_model_dict['fc1.weight'] = torch.tensor(int_weights[2])
|
||||
ibex_model_dict['fc2.weight'] = torch.tensor(int_weights[3])
|
||||
ibex_model_dict['fc3.weight'] = torch.tensor(int_weights[4])
|
||||
ibex_model_dict = configure_network(ibex_model_dict, int_weights, int_biases)
|
||||
|
||||
ibex_model_dict['conv1.bias'] = torch.tensor(int_biases[0])
|
||||
ibex_model_dict['conv2.bias'] = torch.tensor(int_biases[1])
|
||||
ibex_model_dict['fc1.bias'] = torch.tensor(int_biases[2])
|
||||
ibex_model_dict['fc2.bias'] = torch.tensor(int_biases[3])
|
||||
ibex_model_dict['fc3.bias'] = torch.tensor(int_biases[4])
|
||||
ibex_model.load_state_dict(ibex_model_dict)
|
||||
|
||||
return ibex_model
|
||||
|
||||
def create_cmsis_cnn_model(int_weights, int_biases, mul_vals, shift_vals):
|
||||
ibex_model = Ibex_CMSIS_CNN(mul_vals, shift_vals)
|
||||
ibex_model_dict = ibex_model.state_dict()
|
||||
|
||||
ibex_model_dict = configure_network(ibex_model_dict, int_weights, int_biases)
|
||||
|
||||
ibex_model.load_state_dict(ibex_model_dict)
|
||||
|
||||
return ibex_model
|
||||
|
||||
def create_ibex_dws_model(int_weights, int_biases, mul_vals, shift_vals):
|
||||
ibex_model = Ibex_Cifar10_Dws_CNN(mul_vals, shift_vals)
|
||||
ibex_model_dict = ibex_model.state_dict()
|
||||
|
||||
ibex_model_dict = configure_network(ibex_model_dict, int_weights, int_biases)
|
||||
|
||||
ibex_model.load_state_dict(ibex_model_dict)
|
||||
|
||||
return ibex_model
|
||||
|
||||
def eval_sim_model(quant_model, ibex_model, test_loader):
|
||||
# Turn off gradients for validation
|
||||
with torch.no_grad():
|
||||
ibex_model.eval()
|
||||
correct = 0
|
||||
|
||||
Reference in New Issue
Block a user