Adding new features

This commit is contained in:
alexmr09
2024-07-23 13:00:49 +03:00
parent 9e044fd7fc
commit 745cc4ed6d
28 changed files with 33632 additions and 106 deletions
@@ -0,0 +1,15 @@
# Copyright lowRISC contributors.
# Licensed under the Apache License, Version 2.0, see LICENSE for details.
# SPDX-License-Identifier: Apache-2.0
#
# Generate a baremetal application
# Name of the program $(PROGRAM).c will be added as a source file
PROGRAM = cifar10_dws_cnn
PROGRAM_DIR := $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST))))
# Any extra source files to include in the build. Use the upper case .S
# extension for assembly files
EXTRA_SRCS :=
include ${PROGRAM_DIR}/../../common/common.mk
@@ -0,0 +1,298 @@
#include "simple_system_common.h"
#include "cnn_weights.h"
#include "fully_connected_opt.h"
#include "ibex_cnn_params.h"
#include "ibex_inputs.h"
#include "conv2d_opt.h"
#include "dws_conv_opt.h"
#define IMG_SZ 32
#define NUM_FIL0 1
#define FILTER1 3
#define FILTER2 1
#define FILTER3 3
#define FILTER4 1
#define FILTER5 3
#define FILTER6 1
#define FILTER7 3
#define FILTER8 1
#define FILTER9 3
#define FILTER10 1
#define FILTER11 3
#define FILTER12 1
#define NUM_FIL1 1
#define NUM_FIL2 16
#define NUM_FIL3 16
#define NUM_FIL4 16
#define NUM_FIL5 16
#define NUM_FIL6 32
#define NUM_FIL7 32
#define NUM_FIL8 32
#define NUM_FIL9 32
#define NUM_FIL10 64
#define NUM_FIL11 64
#define NUM_FIL12 64
#define STRIDE1 1
#define STRIDE2 1
#define STRIDE3 1
#define STRIDE4 1
#define STRIDE5 1
#define STRIDE6 1
#define STRIDE7 1
#define STRIDE8 1
#define STRIDE9 1
#define STRIDE10 1
#define STRIDE11 1
#define STRIDE12 1
#define PAD_TB1 1
#define PAD_LR1 1
#define PAD_TB2 0
#define PAD_LR2 0
#define PAD_TB3 1
#define PAD_LR3 1
#define PAD_TB4 0
#define PAD_LR4 0
#define PAD_TB5 1
#define PAD_LR5 1
#define PAD_TB6 0
#define PAD_LR6 0
#define PAD_TB7 1
#define PAD_LR7 1
#define PAD_TB8 0
#define PAD_LR8 0
#define PAD_TB9 1
#define PAD_LR9 1
#define PAD_TB10 0
#define PAD_LR10 0
#define PAD_TB11 1
#define PAD_LR11 1
#define PAD_TB12 0
#define PAD_LR12 0
#define POOL_STRIDE1 2
#define POOL_SIZE1 2
#define POOL_STRIDE2 2
#define POOL_SIZE2 2
#define POOL_STRIDE3 2
#define POOL_SIZE3 2
#define OUT_DIM 3
#define SAMPLES 1
int outs[SAMPLES][OUT_DIM];
void cifar10_dws_cnn() {
int dout1 = NUM_FIL1;
int hout1 = ((IMG_SZ - FILTER1 + 2 * PAD_TB1)/STRIDE1) + 1;
int wout1 = ((IMG_SZ - FILTER1 + 2 * PAD_LR1)/STRIDE1) + 1;
int dout2 = NUM_FIL2;
int hout2 = ((hout1 - FILTER2+ 2 * PAD_TB2)/STRIDE2)+1;
int wout2 = ((wout1 - FILTER2+ 2 * PAD_LR2)/STRIDE2)+1;
int dout3 = NUM_FIL3;
int hout3 = ((hout2 - FILTER3+ 2 * PAD_TB3)/STRIDE3)+1;
int wout3 = ((wout2 - FILTER3+ 2 * PAD_LR3)/STRIDE3)+1;
int dout4 = NUM_FIL4;
int hout4 = ((hout3 - FILTER4+ 2 * PAD_TB4)/STRIDE4)+1;
int wout4 = ((wout3 - FILTER4+ 2 * PAD_LR4)/STRIDE4)+1;
int dout5 = dout4;
int hout5 = hout4/POOL_STRIDE1;
int wout5 = wout4/POOL_STRIDE1;
int dout6 = NUM_FIL5;
int hout6 = ((hout5 - FILTER5+ 2 * PAD_TB5)/STRIDE5)+1;
int wout6 = ((wout5 - FILTER5+ 2 * PAD_LR5)/STRIDE5)+1;
int dout7 = NUM_FIL6;
int hout7 = ((hout6 - FILTER6+ 2 * PAD_TB6)/STRIDE6)+1;
int wout7 = ((wout6 - FILTER6+ 2 * PAD_LR6)/STRIDE6)+1;
int dout8 = NUM_FIL7;
int hout8 = ((hout7 - FILTER7+ 2 * PAD_TB7)/STRIDE7)+1;
int wout8 = ((wout7 - FILTER7+ 2 * PAD_LR7)/STRIDE7)+1;
int dout9 = NUM_FIL8;
int hout9 = ((hout8 - FILTER8+ 2 * PAD_TB8)/STRIDE8)+1;
int wout9 = ((wout8 - FILTER8+ 2 * PAD_LR8)/STRIDE8)+1;
int dout10 = dout9;
int hout10 = hout9/POOL_STRIDE2;
int wout10 = wout9/POOL_STRIDE2;
int dout11 = NUM_FIL9;
int hout11 = ((hout10 - FILTER9+ 2 * PAD_TB9)/STRIDE9)+1;
int wout11 = ((wout10 - FILTER9+ 2 * PAD_LR9)/STRIDE9)+1;
int dout12 = NUM_FIL10;
int hout12 = ((hout11 - FILTER10+ 2 * PAD_TB10)/STRIDE10)+1;
int wout12 = ((wout11 - FILTER10+ 2 * PAD_LR10)/STRIDE10)+1;
int dout13 = NUM_FIL11;
int hout13 = ((hout12 - FILTER11+ 2 * PAD_TB11)/STRIDE11)+1;
int wout13 = ((wout12 - FILTER11+ 2 * PAD_LR11)/STRIDE11)+1;
int dout14 = NUM_FIL12;
int hout14 = ((hout13 - FILTER12+ 2 * PAD_TB12)/STRIDE12)+1;
int wout14 = ((wout13 - FILTER12+ 2 * PAD_LR12)/STRIDE12)+1;
int dout15 = dout14;
int hout15 = hout14/POOL_STRIDE3;
int wout15 = wout14/POOL_STRIDE3;
int flatten_dim = dout15 * hout15 * wout15;
int in[IMG_SZ][IMG_SZ][NUM_FIL0];
int inp_dim[3] = {IMG_SZ, IMG_SZ, NUM_FIL0};
int out1[hout1][wout1][dout1];
int pad_1[4] = {PAD_TB1, PAD_TB1, PAD_LR1, PAD_LR1};
int outp_dim1[3] = {hout1, wout1, dout1};
int f_dim1[4] = {NUM_FIL1, FILTER1, FILTER1, NUM_FIL0};
int out2[hout2][wout2][dout2];
int pad_2[4] = {PAD_TB2, PAD_TB2, PAD_LR2, PAD_LR2};
int outp_dim2[3] = {hout2, wout2, dout2};
int f_dim2[4] = {NUM_FIL2, FILTER2, FILTER2, NUM_FIL1};
int out3[hout3][wout3][dout3];
int pad_3[4] = {PAD_TB3, PAD_TB3, PAD_LR3, PAD_LR3};
int outp_dim3[3] = {hout3, wout3, dout3};
int f_dim3[4] = {NUM_FIL3, FILTER3, FILTER3, NUM_FIL2};
int out4[hout4][wout4][dout4];
int pad_4[4] = {PAD_TB4, PAD_TB4, PAD_LR4, PAD_LR4};
int outp_dim4[3] = {hout4, wout4, dout4};
int f_dim4[4] = {NUM_FIL4, FILTER4, FILTER4, NUM_FIL3};
int out5[hout5][wout5][dout5];
int outp_dim5[3] = {hout5, wout5, dout5};
int out6[hout6][wout6][dout6];
int pad_6[4] = {PAD_TB5, PAD_TB5, PAD_LR5, PAD_LR5};
int outp_dim6[3] = {hout6, wout6, dout6};
int f_dim6[4] = {NUM_FIL5, FILTER5, FILTER5, NUM_FIL4};
int out7[hout7][wout7][dout7];
int pad_7[4] = {PAD_TB6, PAD_TB6, PAD_LR6, PAD_LR6};
int outp_dim7[3] = {hout7, wout7, dout7};
int f_dim7[4] = {NUM_FIL6, FILTER6, FILTER6, NUM_FIL5};
int out8[hout8][wout8][dout8];
int pad_8[4] = {PAD_TB7, PAD_TB7, PAD_LR7, PAD_LR7};
int outp_dim8[3] = {hout8, wout8, dout8};
int f_dim8[4] = {NUM_FIL7, FILTER7, FILTER7, NUM_FIL6};
int out9[hout9][wout9][dout9];
int pad_9[4] = {PAD_TB8, PAD_TB8, PAD_LR8, PAD_LR8};
int outp_dim9[3] = {hout9, wout9, dout9};
int f_dim9[4] = {NUM_FIL8, FILTER8, FILTER8, NUM_FIL7};
int out10[hout10][wout10][dout10];
int outp_dim10[3] = {hout10, wout10, dout10};
int out11[hout11][wout11][dout11];
int pad_11[4] = {PAD_TB9, PAD_TB9, PAD_LR9, PAD_LR9};
int outp_dim11[3] = {hout11, wout11, dout11};
int f_dim11[4] = {NUM_FIL9, FILTER9, FILTER9, NUM_FIL8};
int out12[hout12][wout12][dout12];
int pad_12[4] = {PAD_TB10, PAD_TB10, PAD_LR10, PAD_LR10};
int outp_dim12[3] = {hout12, wout12, dout12};
int f_dim12[4] = {NUM_FIL10, FILTER10, FILTER10, NUM_FIL9};
int out13[hout13][wout13][dout13];
int pad_13[4] = {PAD_TB11, PAD_TB11, PAD_LR11, PAD_LR11};
int outp_dim13[3] = {hout13, wout13, dout13};
int f_dim13[4] = {NUM_FIL11, FILTER11, FILTER11, NUM_FIL10};
int out14[hout14][wout14][dout14];
int pad_14[4] = {PAD_TB12, PAD_TB12, PAD_LR12, PAD_LR12};
int outp_dim14[3] = {hout14, wout14, dout14};
int f_dim14[4] = {NUM_FIL12, FILTER12, FILTER12, NUM_FIL11};
int out15[hout15][wout15][dout15];
int outp_dim15[3] = {hout15, wout15, dout15};
int out16[flatten_dim];
int out[OUT_DIM];
for (int iter = 0; iter < SAMPLES; iter++){
for(int i = 0; i < IMG_SZ; i++){
for(int j = 0; j < IMG_SZ; j++){
for(int k = 0; k < NUM_FIL0; k++){
in[i][j][k] = input[i][j][k][iter];
}
}
}
pcount_enable(1);
dw_conv_opt_1ch(inp_dim, f_dim1, outp_dim1, in, F1, B1, out1, STRIDE1, pad_1, SB1, MV1, SV1);
pw_conv_2bits(outp_dim1, f_dim2, outp_dim2, out1, F2, B2, out2, STRIDE2, pad_2, SB2, MV2, SV2);
dw_conv_opt(outp_dim2, f_dim3, outp_dim3, out2, F3, B3, out3, STRIDE3, pad_3, SB3, MV3, SV3);
pw_conv_8bits(outp_dim3, f_dim4, outp_dim4, out3, F4, B4, out4, STRIDE4, pad_4, SB4, MV4, SV4);
maxpool2_compressed(outp_dim4, outp_dim5, out4, out5, POOL_SIZE1, POOL_STRIDE1);
dw_conv_opt(outp_dim5, f_dim6, outp_dim6, out5, F5, B5, out6, STRIDE5, pad_6, SB5, MV5, SV5);
pw_conv_2bits(outp_dim6, f_dim7, outp_dim7, out6, F6, B6, out7, STRIDE6, pad_7, SB6, MV6, SV6);
dw_conv_opt(outp_dim7, f_dim8, outp_dim8, out7, F7, B7, out8, STRIDE7, pad_8, SB7, MV7, SV7);
pw_conv_8bits(outp_dim8, f_dim9, outp_dim9, out8, F8, B8, out9, STRIDE8, pad_9, SB8, MV8, SV8);
maxpool2_compressed(outp_dim9, outp_dim10, out9, out10, POOL_SIZE2, POOL_STRIDE2);
dw_conv_opt(outp_dim10, f_dim11, outp_dim11, out10, F9, B9, out11, STRIDE9, pad_11, SB9, MV9, SV9);
pw_conv_8bits(outp_dim11, f_dim12, outp_dim12, out11, F10, B10, out12, STRIDE10, pad_12, SB10, MV10, SV10);
dw_conv_opt(outp_dim12, f_dim13, outp_dim13, out12, F11, B11, out13, STRIDE11, pad_13, SB11, MV11, SV11);
pw_conv_8bits(outp_dim13, f_dim14, outp_dim14, out13, F12, B12, out14, STRIDE12, pad_14, SB12, MV12, SV12);
maxpool2_compressed(outp_dim14, outp_dim15, out14, out15, POOL_SIZE3, POOL_STRIDE3);
flatten(outp_dim15, out15, out16);
mlp_layer_8bits(out16, out, flatten_dim, OUT_DIM, W1, B13, SB13, MV13, SV13);
pcount_enable(0);
puts("Output Layer Values:\n");
for(int i = 0; i < OUT_DIM; i++) {
puthex((out[i] & 0xFF000000) >> 24);
puts(" ");
puthex((out[i] & 0xFF0000) >> 16);
puts(" ");
puthex((out[i] & 0xFF00) >> 8);
puts(" ");
puthex(out[i] & 0xFF);
puts("\n");
}
}
}
int main(void) {
pcount_enable(0);
cifar10_dws_cnn();
return 0;
}
File diff suppressed because one or more lines are too long
@@ -0,0 +1,84 @@
#ifndef IBEX_CNN_PARAMS_H
#define IBEX_CNN_PARAMS_H
#define MV1 1263225675
#define MV2 1886417008
#define MV3 1381126738
#define MV4 1263225675
#define MV5 1465341783
#define MV6 1280068684
#define MV7 1869573999
#define MV8 1600085855
#define MV9 1600085855
#define MV10 1970632053
#define MV11 1145324612
#define MV12 1532713819
#define MV13 1296911693
#define SV1 2029118401
#define SV2 946921921
#define SV3 2029118401
#define SV4 1893843841
#define SV5 1893843841
#define SV6 1082196481
#define SV7 2029118401
#define SV8 2029118401
#define SV9 2029118401
#define SV10 2164392961
#define SV11 2029118401
#define SV12 2029118401
#define SV13 2840765761
static const int SB1[1] = {
1
};
static const int SB2[16] = {
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
};
static const int SB3[16] = {
135266305, 1048577, 1, 8257, 8193, 135274497, 135266369, 8193, 1, 8193, 65, 1, 134217729, 1, 1, 134225921
};
static const int SB4[16] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1048576, 0, 0, 0, 0
};
static const int SB5[16] = {
134234177, 136323073, 135282689, 136331393, 270549121, 136331329, 136331329, 136323201, 270540929, 270549121, 270540801, 270532737, 2105473, 8321, 2105345, 2113601
};
static const int SB6[32] = {
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
};
static const int SB7[32] = {
402669825, 540041217, 537927937, 4194561, 272638209, 537952513, 540049665, 541098049, 404783361, 405831873, 2113793, 536879361, 403726593, 540049665, 271614209, 541089921, 272662721, 271614209, 406880513, 541081793, 272662785, 538992897, 272662785, 403726593, 540033281, 540049601, 1081537, 403726337, 269517057, 272646401, 3178625, 539001089
};
static const int SB8[32] = {
270565504, 536895744, 406880512, 541090048, 268435712, 406864128, 540049408, 541065216, 406872320, 541090048, 540049600, 405823552, 540041216, 4227264, 540049664, 271589632, 537952320, 4219008, 540033216, 540041408, 541090048, 540049600, 405823552, 405823680, 405823744, 541081856, 406880448, 402677888, 271606016, 138445056, 403726400, 405831680
};
static const int SB9[32] = {
677380417, 542146817, 806404417, 677421249, 677429569, 810598721, 677421185, 677429441, 408977665, 675332353, 536903937, 675283329, 675307905, 677429505, 811639105, 811639169, 809549953, 407945601, 676380929, 676380993, 810582273, 810598721, 677429569, 675299649, 541106433, 811630785, 675316097, 405848449, 811630913, 811630977, 806404225, 677421441
};
static const int SB10[64] = {
139501824, 676364608, 673227072, 810582336, 405840256, 408969536, 541114624, 810590528, 810557760, 675307776, 676331840, 811630848, 408994112, 676381056, 810598720, 537952576, 541114432, 674267392, 542162944, 677429568, 408985920, 677429504, 542155136, 676372864, 811639104, 407937344, 542146880, 811630976, 406896832, 675332416, 675316096, 674275712, 677421120, 810590528, 540066112, 408969536, 811647232, 407920960, 273727616, 677421312, 810582272, 676340096, 6332736, 671138176, 677421376, 677429568, 676372544, 540066176, 676372800, 536912192, 406872384, 676372800, 805347712, 810590464, 5284160, 274776448, 677413248, 541089984, 674283520, 541106560, 810598720, 137412992, 810598528, 811639168
};
static const int SB11[64] = {
810607041, 678486337, 945865089, 810607041, 673227201, 939565505, 946913729, 943767937, 946913729, 811647425, 944816449, 678478273, 811647361, 678478209, 812695937, 678453697, 943776065, 810598849, 944800129, 677437825, 678486401, 946913665, 946921793, 541114753, 945873345, 542163265, 544260417, 544244033, 939548801, 945865025, 678478145, 944824577, 812704129, 5300673, 946889089, 676389057, 941679041, 675340609, 809558465, 273735937, 678461889, 678478145, 812695873, 676381121, 678486465, 671138113, 810557825, 945856961, 944775489, 946921665, 946897345, 809533889, 812695937, 812687809, 812696001, 945865089, 676389249, 677413249, 945840449, 946913473, 943767937, 675332353, 676381121, 811647425
};
static const int SB12[64] = {
810598784, 811647296, 677404992, 809550144, 677429632, 811647296, 810582400, 675332480, 676381056, 810598720, 542163200, 543211840, 809533440, 673235328, 807444672, 675316096, 810582016, 541114560, 677396800, 810590528, 676381056, 138453376, 809550208, 810598784, 676372800, 810598784, 675332352, 542163328, 674242944, 677421440, 404799808, 542163328, 809542016, 809542016, 810598784, 139501952, 674283712, 541114752, 811622784, 676372672, 542155136, 543211904, 811639168, 811630912, 809542016, 676356480, 673218944, 811630976, 810598720, 810582208, 675307584, 810598784, 543203648, 542163264, 677404672, 811630784, 810590592, 810582400, 674275712, 810590528, 541098304, 675332416, 539001088, 811622784
};
static const int SB13[3] = {
273736128, 946913728, 675282944
};
#endif /* IBEX_CNN_PARAMS_H */
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,15 @@
# Copyright lowRISC contributors.
# Licensed under the Apache License, Version 2.0, see LICENSE for details.
# SPDX-License-Identifier: Apache-2.0
#
# Generate a baremetal application
# Name of the program $(PROGRAM).c will be added as a source file
PROGRAM = cifar10_dws_cnn
PROGRAM_DIR := $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST))))
# Any extra source files to include in the build. Use the upper case .S
# extension for assembly files
EXTRA_SRCS :=
include ${PROGRAM_DIR}/../../common/common.mk
@@ -0,0 +1,292 @@
#include "simple_system_common.h"
#include "cnn_weights.h"
#include "fully_connected.h"
#include "ibex_cnn_params.h"
#include "ibex_inputs.h"
#include "conv2d.h"
#include "dws_conv.h"
#define IMG_SZ 32
#define NUM_FIL0 3
#define FILTER1 3
#define FILTER2 1
#define FILTER3 3
#define FILTER4 1
#define FILTER5 3
#define FILTER6 1
#define FILTER7 3
#define FILTER8 1
#define FILTER9 3
#define FILTER10 1
#define FILTER11 3
#define FILTER12 1
#define NUM_FIL1 3
#define NUM_FIL2 64
#define NUM_FIL3 64
#define NUM_FIL4 64
#define NUM_FIL5 64
#define NUM_FIL6 128
#define NUM_FIL7 128
#define NUM_FIL8 128
#define NUM_FIL9 128
#define NUM_FIL10 256
#define NUM_FIL11 256
#define NUM_FIL12 256
#define STRIDE1 1
#define STRIDE2 1
#define STRIDE3 1
#define STRIDE4 1
#define STRIDE5 1
#define STRIDE6 1
#define STRIDE7 1
#define STRIDE8 1
#define STRIDE9 1
#define STRIDE10 1
#define STRIDE11 1
#define STRIDE12 1
#define PAD_TB1 1
#define PAD_LR1 1
#define PAD_TB2 0
#define PAD_LR2 0
#define PAD_TB3 1
#define PAD_LR3 1
#define PAD_TB4 0
#define PAD_LR4 0
#define PAD_TB5 1
#define PAD_LR5 1
#define PAD_TB6 0
#define PAD_LR6 0
#define PAD_TB7 1
#define PAD_LR7 1
#define PAD_TB8 0
#define PAD_LR8 0
#define PAD_TB9 1
#define PAD_LR9 1
#define PAD_TB10 0
#define PAD_LR10 0
#define PAD_TB11 1
#define PAD_LR11 1
#define PAD_TB12 0
#define PAD_LR12 0
#define POOL_STRIDE1 2
#define POOL_SIZE1 2
#define POOL_STRIDE2 2
#define POOL_SIZE2 2
#define POOL_STRIDE3 2
#define POOL_SIZE3 2
#define OUT_DIM 10
#define SAMPLES 1
int outs[SAMPLES][OUT_DIM];
void cifar10_dws_cnn() {
int dout1 = NUM_FIL1;
int hout1 = ((IMG_SZ - FILTER1 + 2 * PAD_TB1)/STRIDE1) + 1;
int wout1 = ((IMG_SZ - FILTER1 + 2 * PAD_LR1)/STRIDE1) + 1;
int dout2 = NUM_FIL2;
int hout2 = ((hout1 - FILTER2+ 2 * PAD_TB2)/STRIDE2)+1;
int wout2 = ((wout1 - FILTER2+ 2 * PAD_LR2)/STRIDE2)+1;
int dout3 = NUM_FIL3;
int hout3 = ((hout2 - FILTER3+ 2 * PAD_TB3)/STRIDE3)+1;
int wout3 = ((wout2 - FILTER3+ 2 * PAD_LR3)/STRIDE3)+1;
int dout4 = NUM_FIL4;
int hout4 = ((hout3 - FILTER4+ 2 * PAD_TB4)/STRIDE4)+1;
int wout4 = ((wout3 - FILTER4+ 2 * PAD_LR4)/STRIDE4)+1;
int dout5 = dout4;
int hout5 = hout4/POOL_STRIDE1;
int wout5 = wout4/POOL_STRIDE1;
int dout6 = NUM_FIL5;
int hout6 = ((hout5 - FILTER5+ 2 * PAD_TB5)/STRIDE5)+1;
int wout6 = ((wout5 - FILTER5+ 2 * PAD_LR5)/STRIDE5)+1;
int dout7 = NUM_FIL6;
int hout7 = ((hout6 - FILTER6+ 2 * PAD_TB6)/STRIDE6)+1;
int wout7 = ((wout6 - FILTER6+ 2 * PAD_LR6)/STRIDE6)+1;
int dout8 = NUM_FIL7;
int hout8 = ((hout7 - FILTER7+ 2 * PAD_TB7)/STRIDE7)+1;
int wout8 = ((wout7 - FILTER7+ 2 * PAD_LR7)/STRIDE7)+1;
int dout9 = NUM_FIL8;
int hout9 = ((hout8 - FILTER8+ 2 * PAD_TB8)/STRIDE8)+1;
int wout9 = ((wout8 - FILTER8+ 2 * PAD_LR8)/STRIDE8)+1;
int dout10 = dout9;
int hout10 = hout9/POOL_STRIDE2;
int wout10 = wout9/POOL_STRIDE2;
int dout11 = NUM_FIL9;
int hout11 = ((hout10 - FILTER9+ 2 * PAD_TB9)/STRIDE9)+1;
int wout11 = ((wout10 - FILTER9+ 2 * PAD_LR9)/STRIDE9)+1;
int dout12 = NUM_FIL10;
int hout12 = ((hout11 - FILTER10+ 2 * PAD_TB10)/STRIDE10)+1;
int wout12 = ((wout11 - FILTER10+ 2 * PAD_LR10)/STRIDE10)+1;
int dout13 = NUM_FIL11;
int hout13 = ((hout12 - FILTER11+ 2 * PAD_TB11)/STRIDE11)+1;
int wout13 = ((wout12 - FILTER11+ 2 * PAD_LR11)/STRIDE11)+1;
int dout14 = NUM_FIL12;
int hout14 = ((hout13 - FILTER12+ 2 * PAD_TB12)/STRIDE12)+1;
int wout14 = ((wout13 - FILTER12+ 2 * PAD_LR12)/STRIDE12)+1;
int dout15 = dout14;
int hout15 = hout14/POOL_STRIDE3;
int wout15 = wout14/POOL_STRIDE3;
int flatten_dim = dout15 * hout15 * wout15;
int in[IMG_SZ][IMG_SZ][NUM_FIL0];
int inp_dim[3] = {IMG_SZ, IMG_SZ, NUM_FIL0};
int out1[hout1][wout1][dout1];
int pad_1[4] = {PAD_TB1, PAD_TB1, PAD_LR1, PAD_LR1};
int outp_dim1[3] = {hout1, wout1, dout1};
int f_dim1[4] = {NUM_FIL1, FILTER1, FILTER1, NUM_FIL0};
int out2[hout2][wout2][dout2];
int pad_2[4] = {PAD_TB2, PAD_TB2, PAD_LR2, PAD_LR2};
int outp_dim2[3] = {hout2, wout2, dout2};
int f_dim2[4] = {NUM_FIL2, FILTER2, FILTER2, NUM_FIL1};
int out3[hout3][wout3][dout3];
int pad_3[4] = {PAD_TB3, PAD_TB3, PAD_LR3, PAD_LR3};
int outp_dim3[3] = {hout3, wout3, dout3};
int f_dim3[4] = {NUM_FIL3, FILTER3, FILTER3, NUM_FIL2};
int out4[hout4][wout4][dout4];
int pad_4[4] = {PAD_TB4, PAD_TB4, PAD_LR4, PAD_LR4};
int outp_dim4[3] = {hout4, wout4, dout4};
int f_dim4[4] = {NUM_FIL4, FILTER4, FILTER4, NUM_FIL3};
int out5[hout5][wout5][dout5];
int outp_dim5[3] = {hout5, wout5, dout5};
int out6[hout6][wout6][dout6];
int pad_6[4] = {PAD_TB5, PAD_TB5, PAD_LR5, PAD_LR5};
int outp_dim6[3] = {hout6, wout6, dout6};
int f_dim6[4] = {NUM_FIL5, FILTER5, FILTER5, NUM_FIL4};
int out7[hout7][wout7][dout7];
int pad_7[4] = {PAD_TB6, PAD_TB6, PAD_LR6, PAD_LR6};
int outp_dim7[3] = {hout7, wout7, dout7};
int f_dim7[4] = {NUM_FIL6, FILTER6, FILTER6, NUM_FIL5};
int out8[hout8][wout8][dout8];
int pad_8[4] = {PAD_TB7, PAD_TB7, PAD_LR7, PAD_LR7};
int outp_dim8[3] = {hout8, wout8, dout8};
int f_dim8[4] = {NUM_FIL7, FILTER7, FILTER7, NUM_FIL6};
int out9[hout9][wout9][dout9];
int pad_9[4] = {PAD_TB8, PAD_TB8, PAD_LR8, PAD_LR8};
int outp_dim9[3] = {hout9, wout9, dout9};
int f_dim9[4] = {NUM_FIL8, FILTER8, FILTER8, NUM_FIL7};
int out10[hout10][wout10][dout10];
int outp_dim10[3] = {hout10, wout10, dout10};
int out11[hout11][wout11][dout11];
int pad_11[4] = {PAD_TB9, PAD_TB9, PAD_LR9, PAD_LR9};
int outp_dim11[3] = {hout11, wout11, dout11};
int f_dim11[4] = {NUM_FIL9, FILTER9, FILTER9, NUM_FIL8};
int out12[hout12][wout12][dout12];
int pad_12[4] = {PAD_TB10, PAD_TB10, PAD_LR10, PAD_LR10};
int outp_dim12[3] = {hout12, wout12, dout12};
int f_dim12[4] = {NUM_FIL10, FILTER10, FILTER10, NUM_FIL9};
int out13[hout13][wout13][dout13];
int pad_13[4] = {PAD_TB11, PAD_TB11, PAD_LR11, PAD_LR11};
int outp_dim13[3] = {hout13, wout13, dout13};
int f_dim13[4] = {NUM_FIL11, FILTER11, FILTER11, NUM_FIL10};
int out14[hout14][wout14][dout14];
int pad_14[4] = {PAD_TB12, PAD_TB12, PAD_LR12, PAD_LR12};
int outp_dim14[3] = {hout14, wout14, dout14};
int f_dim14[4] = {NUM_FIL12, FILTER12, FILTER12, NUM_FIL11};
int out15[hout15][wout15][dout15];
int outp_dim15[3] = {hout15, wout15, dout15};
int out16[flatten_dim];
int out[OUT_DIM];
for (int iter = 0; iter < SAMPLES; iter++){
for(int i = 0; i < IMG_SZ; i++){
for(int j = 0; j < IMG_SZ; j++){
for(int k = 0; k < NUM_FIL0; k++){
in[i][j][k] = input[i][j][k][iter];
}
}
}
pcount_enable(1);
dw_conv(inp_dim, f_dim1, outp_dim1, in, F1, B1, out1, STRIDE1, pad_1, SB1, MV1, SV1);
pw_conv(outp_dim1, f_dim2, outp_dim2, out1, F2, B2, out2, STRIDE2, pad_2, SB2, MV2, SV2);
dw_conv(outp_dim2, f_dim3, outp_dim3, out2, F3, B3, out3, STRIDE3, pad_3, SB3, MV3, SV3);
pw_conv(outp_dim3, f_dim4, outp_dim4, out3, F4, B4, out4, STRIDE4, pad_4, SB4, MV4, SV4);
maxpool2(outp_dim4, outp_dim5, out4, out5, POOL_SIZE1, POOL_STRIDE1);
dw_conv(outp_dim5, f_dim6, outp_dim6, out5, F5, B5, out6, STRIDE5, pad_6, SB5, MV5, SV5);
pw_conv(outp_dim6, f_dim7, outp_dim7, out6, F6, B6, out7, STRIDE6, pad_7, SB6, MV6, SV6);
dw_conv(outp_dim7, f_dim8, outp_dim8, out7, F7, B7, out8, STRIDE7, pad_8, SB7, MV7, SV7);
pw_conv(outp_dim8, f_dim9, outp_dim9, out8, F8, B8, out9, STRIDE8, pad_9, SB8, MV8, SV8);
maxpool2(outp_dim9, outp_dim10, out9, out10, POOL_SIZE2, POOL_STRIDE2);
dw_conv(outp_dim10, f_dim11, outp_dim11, out10, F9, B9, out11, STRIDE9, pad_11, SB9, MV9, SV9);
pw_conv(outp_dim11, f_dim12, outp_dim12, out11, F10, B10, out12, STRIDE10, pad_12, SB10, MV10, SV10);
dw_conv(outp_dim12, f_dim13, outp_dim13, out12, F11, B11, out13, STRIDE11, pad_13, SB11, MV11, SV11);
pw_conv(outp_dim13, f_dim14, outp_dim14, out13, F12, B12, out14, STRIDE12, pad_14, SB12, MV12, SV12);
maxpool2(outp_dim14, outp_dim15, out14, out15, POOL_SIZE3, POOL_STRIDE3);
flatten(outp_dim15, out15, out16);
mlp_layer(out16, out, flatten_dim, OUT_DIM, W1, B13, SB13, MV13, SV13);
pcount_enable(0);
puts("Output Layer Values:\n");
for(int i = 0; i < OUT_DIM; i++) {
puthex(out[i]);
puts("\n");
}
}
}
int main(void) {
pcount_enable(0);
cifar10_dws_cnn();
return 0;
}
File diff suppressed because one or more lines are too long
@@ -0,0 +1,46 @@
#ifndef IBEX_CNN_PARAMS_H
#define IBEX_CNN_PARAMS_H
#define MV1 75
#define MV2 112
#define MV3 82
#define MV4 75
#define MV5 87
#define MV6 76
#define MV7 111
#define MV8 95
#define MV9 95
#define MV10 117
#define MV11 68
#define MV12 91
#define MV13 77
#define SV1 15
#define SV2 7
#define SV3 15
#define SV4 14
#define SV5 14
#define SV6 8
#define SV7 15
#define SV8 15
#define SV9 15
#define SV10 16
#define SV11 15
#define SV12 15
#define SV13 21
#define SB1 0
#define SB2 0
#define SB3 0
#define SB4 0
#define SB5 0
#define SB6 0
#define SB7 0
#define SB8 0
#define SB9 0
#define SB10 0
#define SB11 0
#define SB12 0
#define SB13 0
#endif /* IBEX_CNN_PARAMS_H */
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,15 @@
# Copyright lowRISC contributors.
# Licensed under the Apache License, Version 2.0, see LICENSE for details.
# SPDX-License-Identifier: Apache-2.0
#
# Generate a baremetal application
# Name of the program $(PROGRAM).c will be added as a source file
PROGRAM = cmsis_cnn
PROGRAM_DIR := $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST))))
# Any extra source files to include in the build. Use the upper case .S
# extension for assembly files
EXTRA_SRCS :=
include ${PROGRAM_DIR}/../../common/common.mk
@@ -0,0 +1,153 @@
#include "simple_system_common.h"
#include "cnn_weights.h"
#include "fully_connected_opt.h"
#include "ibex_cnn_params.h"
#include "ibex_inputs.h"
#include "conv2d_opt.h"
#define IMG_SZ 32
#define NUM_FIL0 1
#define FILTER1 5
#define FILTER2 5
#define FILTER3 5
#define NUM_FIL1 8
#define NUM_FIL2 8
#define NUM_FIL3 16
#define STRIDE1 1
#define STRIDE2 1
#define STRIDE3 1
#define PAD_TB1 2
#define PAD_LR1 2
#define PAD_TB2 2
#define PAD_LR2 2
#define PAD_TB3 2
#define PAD_LR3 2
#define POOL_STRIDE1 2
#define POOL_SIZE1 2
#define POOL_STRIDE2 2
#define POOL_SIZE2 2
#define POOL_STRIDE3 2
#define POOL_SIZE3 2
#define OUT_DIM 3
#define SAMPLES 1
int outs[SAMPLES][OUT_DIM];
void cmsis_cnn() {
int dout1 = NUM_FIL1;
int hout1 = ((IMG_SZ - FILTER1 + 2 * PAD_TB1)/STRIDE1) + 1;
int wout1 = ((IMG_SZ - FILTER1 + 2 * PAD_LR1)/STRIDE1) + 1;
int dout2 = dout1;
int hout2 = hout1/POOL_STRIDE1;
int wout2 = wout1/POOL_STRIDE1;
int dout3 = NUM_FIL2;
int hout3 = ((hout2 - FILTER2+ 2 * PAD_TB2)/STRIDE2)+1;
int wout3 = ((wout2 - FILTER2+ 2 * PAD_LR2)/STRIDE2)+1;
int dout4 = dout3;
int hout4 = hout3/POOL_STRIDE2;
int wout4 = wout3/POOL_STRIDE2;
int dout5 = NUM_FIL3;
int hout5 = ((hout4 - FILTER3+ 2 * PAD_TB3)/STRIDE3)+1;
int wout5 = ((wout4 - FILTER3+ 2 * PAD_LR3)/STRIDE3)+1;
int dout6 = dout5;
int hout6 = hout5/POOL_STRIDE3;
int wout6 = wout5/POOL_STRIDE3;
int flatten_dim = dout6 * hout6 * wout6;
int in[IMG_SZ][IMG_SZ][NUM_FIL0];
int inp_dim[3] = {IMG_SZ, IMG_SZ, NUM_FIL0};
int out1[hout1][wout1][dout1];
int pad_1[4] = {PAD_TB1, PAD_TB1, PAD_LR1, PAD_LR1};
int outp_dim1[3] = {hout1, wout1, dout1};
int f_dim1[4] = {NUM_FIL1, FILTER1, FILTER1, NUM_FIL0};
int out2[hout2][wout2][dout2];
int outp_dim2[3] = {hout2, wout2, dout2};
int out3[hout3][wout3][dout3];
int pad_3[4] = {PAD_TB2, PAD_TB2, PAD_LR2, PAD_LR2};
int outp_dim3[3] = {hout3, wout3, dout3};
int f_dim3[4] = {NUM_FIL2, FILTER2, FILTER2, NUM_FIL1};
int out4[hout4][wout4][dout4];
int outp_dim4[3] = {hout4, wout4, dout4};
int out5[hout5][wout5][dout5];
int pad_5[4] = {PAD_TB3, PAD_TB3, PAD_LR3, PAD_LR3};
int outp_dim5[3] = {hout5, wout5, dout5};
int f_dim5[4] = {NUM_FIL3, FILTER3, FILTER3, NUM_FIL2};
int out6[hout6][wout6][dout6];
int outp_dim6[3] = {hout6, wout6, dout6};
int out7[flatten_dim];
int out[OUT_DIM];
for (int iter = 0; iter < SAMPLES; iter++){
for(int i = 0; i < IMG_SZ; i++){
for(int j = 0; j < IMG_SZ; j++){
for(int k = 0; k < NUM_FIL0; k++){
in[i][j][k] = input[i][j][k][iter];
}
}
}
pcount_enable(1);
conv2_8bits_1ch(inp_dim, f_dim1, outp_dim1, in, F1, B1, out1, STRIDE1, pad_1, SB1, MV1, SV1);
maxpool2_compressed(outp_dim1, outp_dim2, out1, out2, POOL_SIZE1, POOL_STRIDE1);
conv2_8bits(outp_dim2, f_dim3, outp_dim3, out2, F2, B2, out3, STRIDE2, pad_3, SB2, MV2, SV2);
maxpool2_compressed(outp_dim3, outp_dim4, out3, out4, POOL_SIZE2, POOL_STRIDE2);
conv2_2bits(outp_dim4, f_dim5, outp_dim5, out4, F3, B3, out5, STRIDE3, pad_5, SB3, MV3, SV3);
maxpool2_compressed(outp_dim5, outp_dim6, out5, out6, POOL_SIZE3, POOL_STRIDE3);
flatten(outp_dim6, out6, out7);
mlp_layer_2bits(out7, out, flatten_dim, OUT_DIM, W1, B4, SB4, MV4, SV4);
pcount_enable(0);
puts("Output Layer Values:\n");
for(int i = 0; i < OUT_DIM; i++) {
puthex((out[i] & 0xFF000000) >> 24);
puts(" ");
puthex((out[i] & 0xFF0000) >> 16);
puts(" ");
puthex((out[i] & 0xFF00) >> 8);
puts(" ");
puthex(out[i] & 0xFF);
puts("\n");
}
}
}
int main(void) {
pcount_enable(0);
cmsis_cnn();
return 0;
}
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,30 @@
#ifndef IBEX_CNN_PARAMS_H
#define IBEX_CNN_PARAMS_H
#define MV1 1953789044
#define MV2 1229539657
#define MV3 1212696648
#define MV4 1330597711
#define SV1 2164392961
#define SV2 2299667521
#define SV3 1488020161
#define SV4 1623294721
static const int SB1[8] = {
812696004, 946880900, 1079034308, 946913796, 945865156, 1081139524, 946930052, 545309060
};
static const int SB2[8] = {
945873216, 945832320, 945865152, 944816576, 674283904, 543211776, 945873280, 944824704
};
static const int SB3[16] = {
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
};
static const int SB4[3] = {
3, 3, 3
};
#endif /* IBEX_CNN_PARAMS_H */
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,15 @@
# Copyright lowRISC contributors.
# Licensed under the Apache License, Version 2.0, see LICENSE for details.
# SPDX-License-Identifier: Apache-2.0
#
# Generate a baremetal application
# Name of the program $(PROGRAM).c will be added as a source file
PROGRAM = cmsis_cnn
PROGRAM_DIR := $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST))))
# Any extra source files to include in the build. Use the upper case .S
# extension for assembly files
EXTRA_SRCS :=
include ${PROGRAM_DIR}/../../common/common.mk
@@ -0,0 +1,147 @@
#include "simple_system_common.h"
#include "cnn_weights.h"
#include "fully_connected.h"
#include "ibex_cnn_params.h"
#include "ibex_inputs.h"
#include "conv2d.h"
#define IMG_SZ 32
#define NUM_FIL0 3
#define FILTER1 5
#define FILTER2 5
#define FILTER3 5
#define NUM_FIL1 32
#define NUM_FIL2 32
#define NUM_FIL3 64
#define STRIDE1 1
#define STRIDE2 1
#define STRIDE3 1
#define PAD_TB1 2
#define PAD_LR1 2
#define PAD_TB2 2
#define PAD_LR2 2
#define PAD_TB3 2
#define PAD_LR3 2
#define POOL_STRIDE1 2
#define POOL_SIZE1 2
#define POOL_STRIDE2 2
#define POOL_SIZE2 2
#define POOL_STRIDE3 2
#define POOL_SIZE3 2
#define OUT_DIM 10
#define SAMPLES 1
int outs[SAMPLES][OUT_DIM];
void cmsis_cnn() {
int dout1 = NUM_FIL1;
int hout1 = ((IMG_SZ - FILTER1 + 2 * PAD_TB1)/STRIDE1) + 1;
int wout1 = ((IMG_SZ - FILTER1 + 2 * PAD_LR1)/STRIDE1) + 1;
int dout2 = dout1;
int hout2 = hout1/POOL_STRIDE1;
int wout2 = wout1/POOL_STRIDE1;
int dout3 = NUM_FIL2;
int hout3 = ((hout2 - FILTER2+ 2 * PAD_TB2)/STRIDE2)+1;
int wout3 = ((wout2 - FILTER2+ 2 * PAD_LR2)/STRIDE2)+1;
int dout4 = dout3;
int hout4 = hout3/POOL_STRIDE2;
int wout4 = wout3/POOL_STRIDE2;
int dout5 = NUM_FIL3;
int hout5 = ((hout4 - FILTER3+ 2 * PAD_TB3)/STRIDE3)+1;
int wout5 = ((wout4 - FILTER3+ 2 * PAD_LR3)/STRIDE3)+1;
int dout6 = dout5;
int hout6 = hout5/POOL_STRIDE3;
int wout6 = wout5/POOL_STRIDE3;
int flatten_dim = dout6 * hout6 * wout6;
int in[IMG_SZ][IMG_SZ][NUM_FIL0];
int inp_dim[3] = {IMG_SZ, IMG_SZ, NUM_FIL0};
int out1[hout1][wout1][dout1];
int pad_1[4] = {PAD_TB1, PAD_TB1, PAD_LR1, PAD_LR1};
int outp_dim1[3] = {hout1, wout1, dout1};
int f_dim1[4] = {NUM_FIL1, FILTER1, FILTER1, NUM_FIL0};
int out2[hout2][wout2][dout2];
int outp_dim2[3] = {hout2, wout2, dout2};
int out3[hout3][wout3][dout3];
int pad_3[4] = {PAD_TB2, PAD_TB2, PAD_LR2, PAD_LR2};
int outp_dim3[3] = {hout3, wout3, dout3};
int f_dim3[4] = {NUM_FIL2, FILTER2, FILTER2, NUM_FIL1};
int out4[hout4][wout4][dout4];
int outp_dim4[3] = {hout4, wout4, dout4};
int out5[hout5][wout5][dout5];
int pad_5[4] = {PAD_TB3, PAD_TB3, PAD_LR3, PAD_LR3};
int outp_dim5[3] = {hout5, wout5, dout5};
int f_dim5[4] = {NUM_FIL3, FILTER3, FILTER3, NUM_FIL2};
int out6[hout6][wout6][dout6];
int outp_dim6[3] = {hout6, wout6, dout6};
int out7[flatten_dim];
int out[OUT_DIM];
for (int iter = 0; iter < SAMPLES; iter++){
for(int i = 0; i < IMG_SZ; i++){
for(int j = 0; j < IMG_SZ; j++){
for(int k = 0; k < NUM_FIL0; k++){
in[i][j][k] = input[i][j][k][iter];
}
}
}
pcount_enable(1);
conv2(inp_dim, f_dim1, outp_dim1, in, F1, B1, out1, STRIDE1, pad_1, SB1, MV1, SV1);
maxpool2(outp_dim1, outp_dim2, out1, out2, POOL_SIZE1, POOL_STRIDE1);
conv2(outp_dim2, f_dim3, outp_dim3, out2, F2, B2, out3, STRIDE2, pad_3, SB2, MV2, SV2);
maxpool2(outp_dim3, outp_dim4, out3, out4, POOL_SIZE2, POOL_STRIDE2);
conv2(outp_dim4, f_dim5, outp_dim5, out4, F3, B3, out5, STRIDE3, pad_5, SB3, MV3, SV3);
maxpool2(outp_dim5, outp_dim6, out5, out6, POOL_SIZE3, POOL_STRIDE3);
flatten(outp_dim6, out6, out7);
mlp_layer(out7, out, flatten_dim, OUT_DIM, W1, B4, SB4, MV4, SV4);
pcount_enable(0);
puts("Output Layer Values:\n");
for(int i = 0; i < OUT_DIM; i++) {
puthex(out[i]);
puts("\n");
}
}
}
int main(void) {
pcount_enable(0);
cmsis_cnn();
return 0;
}
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,19 @@
#ifndef IBEX_CNN_PARAMS_H
#define IBEX_CNN_PARAMS_H
#define MV1 116
#define MV2 73
#define MV3 72
#define MV4 79
#define SV1 16
#define SV2 17
#define SV3 11
#define SV4 12
#define SB1 0
#define SB2 0
#define SB3 0
#define SB4 0
#endif /* IBEX_CNN_PARAMS_H */
File diff suppressed because it is too large Load Diff
+70
View File
@@ -0,0 +1,70 @@
#ifndef DWS_CONV_H
#define DWS_CONV_H
void pw_conv(int in_dim[3], int fil_dim[4], int out_dim[3], int inp[in_dim[0]][in_dim[1]][in_dim[2]],
const int fil[fil_dim[0]][fil_dim[3]], const int bias[],
int out[out_dim[0]][out_dim[1]][out_dim[2]], int strides, int pad[],
const int bias_shift_mode, const int quantized_multiplier, const int out_shift_rl){
int i, j, k, m, res, str1, str2, quant_prod;
for (i = 0; i < out_dim[2]; i++) { // output depth
str1 = -pad[0] - strides;
for (j = 0; j < out_dim[0]; j++) { // output height
str1 += strides;
str2 = -pad[2] - strides;
for (k = 0; k < out_dim[1]; k++) { // output width
res = bias[i];
str2 += strides;
if (str1 < in_dim[0] && str1 >= 0 && str2 >= 0 && str2 < in_dim[1]) {
for (m = 0; m < fil_dim[3]; m++) { // filters depth
res += inp[str1][str2][m] * fil[i][m];
}
}
quant_prod = quantized_multiplier * res + (1 << (out_shift_rl -1));
quant_prod = quant_prod >> (out_shift_rl);
if(quant_prod < 0) quant_prod = 0;
if(quant_prod > 255) quant_prod = 255;
out[j][k][i] = quant_prod;
}
}
}
}
void dw_conv(int in_dim[3], int depthwise_fil_dim[4], int out_dim[3], int inp[in_dim[0]][in_dim[1]][in_dim[2]],
const int depthwise_fil[depthwise_fil_dim[0]][depthwise_fil_dim[1]][depthwise_fil_dim[2]][1], const int bias[],
int out[out_dim[0]][out_dim[1]][out_dim[2]], int strides, int pad[],
const int bias_shift_mode, const int depthwise_multiplier, const int depthwise_out_shift_rl){
int i, j, k, n, p, res, k1, k2, str1, str2, quant_prod;
// Depthwise convolution
for (i = 0; i < out_dim[2]; i++) { // output depth
str1 = -pad[0] - strides;
for (j = 0; j < out_dim[0]; j++) { // output height
str1 += strides;
str2 = -pad[2] - strides;
for (k = 0; k < out_dim[1]; k++) { // output width
res = bias[i];
str2 += strides;
for (p = 0; p < depthwise_fil_dim[1]; p++){ // depthwise filter height
for (n = 0; n < depthwise_fil_dim[2]; n++) { // depthwise filter width
k1 = str1 + p;
k2 = str2 + n;
if (k1 < in_dim[0] && k1 >= 0 && k2 >= 0 && k2 < in_dim[1]) {
res += inp[k1][k2][i] * depthwise_fil[i][p][n][0];
}
}
}
quant_prod = depthwise_multiplier * res + (1 << (depthwise_out_shift_rl -1));
quant_prod = quant_prod >> (depthwise_out_shift_rl);
if(quant_prod < 0) quant_prod = 0;
if(quant_prod > 255) quant_prod = 255;
out[j][k][i] = quant_prod;
}
}
}
}
#endif /* DWS_CONV_H */
+171
View File
@@ -0,0 +1,171 @@
#ifndef DWS_CONV_OPT_H
#define DWS_CONV_OPT_H
void pw_conv_8bits(int in_dim[3], int fil_dim[4], int out_dim[3], int inp[in_dim[0]][in_dim[1]][in_dim[2]],
const int fil[fil_dim[0]][fil_dim[3] << 2], const int bias[fil_dim[0]],
int out[out_dim[0]][out_dim[1]][out_dim[2]], int strides, int pad[], const int bias_shift_mode[],
const int quantized_multiplier, const int out_shift_rl){
int i, j, k, m, res, str1, str2, bias_val, w, in_cnn;
for (i = 0; i < out_dim[2]; i++) { // output depth
str1 = -pad[0] - strides;
for (j = 0; j < out_dim[0]; j++) { // output height
str1 += 1;
str2 = -pad[2] - strides;
for (k = 0; k < out_dim[1]; k++) { // output width
bias_val = bias[i];
str2 += 1;
asm volatile("neur_init %0, %1, %2\n":"=r"(res):"r"(bias_val),"r"(bias_shift_mode[i]):);
for (m = 0; m < fil_dim[3]; m++) { // filters depth
in_cnn = inp[str1][str2][m];
w = fil[i][4*m];
asm volatile("nn_mac_8b %0, %1,%2\n":"=r"(res):"r"(w),"r"(in_cnn):);
w = fil[i][4*m+1];
asm volatile("nn_mac_8b %0, %1,%2\n":"=r"(res):"r"(w),"r"(in_cnn):);
w = fil[i][4*m+2];
asm volatile("nn_mac_8b %0, %1,%2\n":"=r"(res):"r"(w),"r"(in_cnn):);
w = fil[i][4*m+3];
asm volatile("nn_mac_8b %0, %1,%2\n":"=r"(res):"r"(w),"r"(in_cnn):);
}
asm volatile("neur_res %0, %1, %2\n":"=r"(res):"r"(quantized_multiplier),"r"(out_shift_rl):);
out[j][k][i] = res;
}
}
}
}
void pw_conv_4bits(int in_dim[3], int fil_dim[4], int out_dim[3], int inp[in_dim[0]][in_dim[1]][in_dim[2]],
const int fil[fil_dim[0]][fil_dim[3] << 1], const int bias[fil_dim[0]],
int out[out_dim[0]][out_dim[1]][out_dim[2]], int strides, int pad[], const int bias_shift_mode[],
const int quantized_multiplier, const int out_shift_rl){
int i, j, k, m, res, str1, str2, bias_val, w, in_cnn;
for (i = 0; i < out_dim[2]; i++) { // output depth
str1 = -pad[0] - strides;
for (j = 0; j < out_dim[0]; j++) { // output height
str1 += 1;
str2 = -pad[2] - strides;
for (k = 0; k < out_dim[1]; k++) { // output width
bias_val = bias[i];
str2 += 1;
asm volatile("neur_init %0, %1, %2\n":"=r"(res):"r"(bias_val),"r"(bias_shift_mode[i]):);
for (m = 0; m < fil_dim[3]; m++) { // filters depth
in_cnn = inp[str1][str2][m];
w = fil[i][2*m];
asm volatile("nn_mac_4b %0, %1,%2\n":"=r"(res):"r"(w),"r"(in_cnn):);
w = fil[i][2*m+1];
asm volatile("nn_mac_4b %0, %1,%2\n":"=r"(res):"r"(w),"r"(in_cnn):);
}
asm volatile("neur_res %0, %1, %2\n":"=r"(res):"r"(quantized_multiplier),"r"(out_shift_rl):);
out[j][k][i] = res;
}
}
}
}
void pw_conv_2bits(int in_dim[3], int fil_dim[4], int out_dim[3], int inp[in_dim[0]][in_dim[1]][in_dim[2]],
const int fil[fil_dim[0]][fil_dim[3]], const int bias[fil_dim[0]],
int out[out_dim[0]][out_dim[1]][out_dim[2]], int strides, int pad[], const int bias_shift_mode[],
const int quantized_multiplier, const int out_shift_rl){
int i, j, k, m, res, str1, str2, bias_val, w, in_cnn;
for (i = 0; i < out_dim[2]; i++) { // output depth
str1 = -pad[0] - strides;
for (j = 0; j < out_dim[0]; j++) { // output height
str1 += 1;
str2 = -pad[2] - strides;
for (k = 0; k < out_dim[1]; k++) { // output width
bias_val = bias[i];
str2 += 1;
asm volatile("neur_init %0, %1, %2\n":"=r"(res):"r"(bias_val),"r"(bias_shift_mode[i]):);
for (m = 0; m < fil_dim[3]; m++) { // filters depth
in_cnn = inp[str1][str2][m];
w = fil[i][m];
asm volatile("nn_mac_2b %0, %1,%2\n":"=r"(res):"r"(w),"r"(in_cnn):);
}
asm volatile("neur_res %0, %1, %2\n":"=r"(res):"r"(quantized_multiplier),"r"(out_shift_rl):);
out[j][k][i] = res;
}
}
}
}
void dw_conv_opt(int in_dim[3], int depthwise_fil_dim[4], int out_dim[3],
int inp[in_dim[0]][in_dim[1]][in_dim[2]], const int depthwise_fil[depthwise_fil_dim[0]][depthwise_fil_dim[1]][depthwise_fil_dim[2]],
const int bias[depthwise_fil_dim[0]], int out[out_dim[0]][out_dim[1]][out_dim[2]],
int strides, int pad[], const int bias_shift_mode[], const int quantized_multiplier, const int out_shift_rl){
int i, j, k, n, p, res, k1, k2, str1, str2, bias_val, in_cnn, w;
// Depthwise convolution
for (i = 0; i < out_dim[2]; i++){ // output depth
str1 = -pad[0] - strides;
for (j = 0; j < out_dim[0]; j++) { // output height
str1 += strides;
str2 = -pad[2] - strides;
for (k = 0; k < out_dim[1]; k++) { // output width
bias_val = bias[i];
str2 += strides;
asm volatile("neur_init %0, %1, %2\n":"=r"(res):"r"(bias_val),"r"(bias_shift_mode[i]):);
for (p = 0; p < depthwise_fil_dim[1]; p++) { // depthwise filter height
for (n = 0; n < depthwise_fil_dim[2]; n++) { // depthwise filter width
k1 = str1 + p;
k2 = str2 + n;
if (k1 < in_dim[0] && k1 >= 0 && k2 >= 0 && k2 < in_dim[1]) {
in_cnn = inp[k1][k2][i];
w = depthwise_fil[i][p][n];
asm volatile("nn_mac_8b %0, %1,%2\n":"=r"(res):"r"(w),"r"(in_cnn):);
}
}
}
asm volatile("neur_res %0, %1, %2\n":"=r"(res):"r"(quantized_multiplier),"r"(out_shift_rl):);
out[j][k][i] = res;
}
}
}
}
void dw_conv_opt_1ch(int in_dim[3], int depthwise_fil_dim[4], int out_dim[3],
int inp[in_dim[0]][in_dim[1]][in_dim[2]], const int depthwise_fil[depthwise_fil_dim[0]][depthwise_fil_dim[1]][depthwise_fil_dim[2]],
const int bias[depthwise_fil_dim[0]], int out[out_dim[0]][out_dim[1]][out_dim[2]],
int strides, int pad[], const int bias_shift_mode[], const int quantized_multiplier, const int out_shift_rl){
int j, k, n, p, res, k1, k2, str1, str2, bias_val, in_cnn, w;
// Depthwise convolution
str1 = -pad[0] - strides;
for (j = 0; j < out_dim[0]; j++) { // output height
str1 += strides;
str2 = -pad[2] - strides;
for (k = 0; k < out_dim[1]; k++) { // output width
bias_val = bias[0];
str2 += strides;
asm volatile("neur_init %0, %1, %2\n":"=r"(res):"r"(bias_val),"r"(bias_shift_mode[0]):);
for (p = 0; p < depthwise_fil_dim[1]; p++) { // depthwise filter height
for (n = 0; n < depthwise_fil_dim[2]; n++) { // depthwise filter width
k1 = str1 + p;
k2 = str2 + n;
if (k1 < in_dim[0] && k1 >= 0 && k2 >= 0 && k2 < in_dim[1]) {
in_cnn = inp[k1][k2][0];
w = depthwise_fil[0][p][n];
asm volatile("nn_mac_8b %0, %1,%2\n":"=r"(res):"r"(w),"r"(in_cnn):);
}
}
}
asm volatile("neur_res %0, %1, %2\n":"=r"(res):"r"(quantized_multiplier),"r"(out_shift_rl):);
out[j][k][0] = res;
}
}
}
#endif /* DWS_CONV_OPT_H */