Adding files

This commit is contained in:
alexmr09
2024-07-19 13:30:31 +03:00
commit 08fb8ef728
7245 changed files with 3055662 additions and 0 deletions
+77
View File
@@ -0,0 +1,77 @@
# Copyright lowRISC contributors.
# Licensed under the Apache License, Version 2.0, see LICENSE for details.
# SPDX-License-Identifier: Apache-2.0
COMMON_DIR := $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST))))
COMMON_SRCS = $(wildcard $(COMMON_DIR)/*.c)
INCS := -I$(COMMON_DIR)
#ARCH = rv32im # to disable compressed instructions
ARCH ?= rv32imc
ifdef PROGRAM
PROGRAM_C := $(PROGRAM).c
endif
SRCS = $(COMMON_SRCS) $(PROGRAM_C) $(EXTRA_SRCS)
C_SRCS = $(filter %.c, $(SRCS))
ASM_SRCS = $(filter %.S, $(SRCS))
CPLUSPLUS = $(filter %.cpp $(SRCS))
CC = riscv32-unknown-elf-gcc
CROSS_COMPILE = $(patsubst %-gcc,%-,$(CC))
OBJCOPY ?= $(CROSS_COMPILE)objcopy
OBJDUMP ?= $(CROSS_COMPILE)objdump
LINKER_SCRIPT ?= $(COMMON_DIR)/link.ld
CRT ?= $(COMMON_DIR)/crt0.S
CFLAGS ?= -march=$(ARCH) -mabi=ilp32 -static -mcmodel=medany -Wall -g -O3\
-fvisibility=hidden -nostartfiles -ffreestanding $(PROGRAM_CFLAGS)
OBJS := ${C_SRCS:.c=.o} ${ASM_SRCS:.S=.o} ${CRT:.S=.o}
DEPS = $(OBJS:%.o=%.d)
ifdef PROGRAM
OUTFILES := $(PROGRAM).elf $(PROGRAM).vmem $(PROGRAM).bin
else
OUTFILES := $(OBJS)
endif
all: $(OUTFILES)
ifdef PROGRAM
$(PROGRAM).elf: $(OBJS) $(LINKER_SCRIPT)
$(CC) $(CFLAGS) -T $(LINKER_SCRIPT) $(OBJS) -o $@ $(LIBS)
.PHONY: disassemble
disassemble: $(PROGRAM).dis
endif
%.dis: %.elf
$(OBJDUMP) -fhSD $^ > $@
# Note: this target requires the srecord package to be installed.
# XXX: This could be replaced by objcopy once
# https://sourceware.org/bugzilla/show_bug.cgi?id=19921
# is widely available.
%.vmem: %.bin
srec_cat $^ -binary -offset 0x0000 -byte-swap 4 -o $@ -vmem
%.bin: %.elf
$(OBJCOPY) -O binary $^ $@
%.o: %.c
$(CC) $(CFLAGS) -MMD -c $(INCS) -o $@ $<
%.o: %.S
$(CC) $(CFLAGS) -MMD -c $(INCS) -o $@ $<
clean:
$(RM) -f $(OBJS) $(DEPS)
distclean: clean
$(RM) -f $(OUTFILES)
+109
View File
@@ -0,0 +1,109 @@
#ifndef CONV2D_H
#define CONV2D_H
void conv2(int in_dim[3], int fil_dim[4], int out_dim[3], int inp[in_dim[0]][in_dim[1]][in_dim[2]], const int fil[fil_dim[0]][fil_dim[1]][fil_dim[2]][fil_dim[3]], const int bias[fil_dim[0]], int out[out_dim[0]][out_dim[1]][out_dim[2]], int strides, int pad[4], const int bias_shift_mode, const int quantized_multiplier, const int out_shift_rl){
int i, j, k, m, n, p, res, k1, k2, str1, str2, quant_prod;
for (i = 0; i < out_dim[2]; i++) { // output depth
str1 = -pad[0] - strides;
for (j = 0; j < out_dim[0]; j++) { // output height
str1 += strides;
str2 = -pad[2] - strides;
for (k = 0; k < out_dim[1]; k++) { // output width
res = bias[i];
str2 += strides;
for (p = 0; p < fil_dim[1]; p++) { // filters height
for (n = 0; n < fil_dim[2]; n++) { // filters width
k1 = str1 + p;
k2 = str2 + n;
if (k1 < in_dim[0] && k1 >= 0 && k2 >= 0 && k2 < in_dim[1]) {
for (m = 0; m < fil_dim[3]; m++) { // filters depth
res += inp[k1][k2][m] * fil[i][p][n][m];
}
}
}
}
quant_prod = quantized_multiplier * res + (1 << (out_shift_rl-1));
quant_prod = quant_prod >> out_shift_rl;
if(quant_prod < 0) quant_prod = 0;
if(quant_prod > 255) quant_prod = 255;
out[j][k][i] = quant_prod;
}
}
}
}
void maxpool2(int in_dim[3], int out_dim[3], int inp[in_dim[0]][in_dim[1]][in_dim[2]], int out[out_dim[0]][out_dim[1]][out_dim[2]], int pool_size, int strides) {
int i, j, m, n, d, max_value, value, k1, k2, str1, str2;
for (d = 0; d < out_dim[2]; d++) {
str1 = 0;
for (i = 0; i < out_dim[0]; i++) {
if (i != 0) str1 += strides;
str2 = 0;
for (j = 0; j < out_dim[1]; j++) {
if (j != 0) str2 += strides;
max_value = 0;
for (m = 0; m < pool_size; m++) {
for (n = 0; n < pool_size; n++) {
k1 = str1 + m;
k2 = str2 + n;
if (k1 >= 0 && k2 >=0 && k1 < in_dim[0] && k2 < in_dim[1]){
value = inp[k1][k2][d];
if (value > max_value) max_value = value;
}
}
}
out[i][j][d] = max_value;
}
}
}
}
void avgpool2(int in_dim[3], int out_dim[3], int inp[in_dim[0]][in_dim[1]][in_dim[2]], int out[out_dim[0]][out_dim[1]][out_dim[2]], int pool_size, int strides) {
int i, j, m, n, d, avg_value, value, k1, k2, str1, str2;
for (d = 0; d < out_dim[2]; d++) {
str1 = 0;
for (i = 0; i < out_dim[0]; i++) {
if (i != 0) str1 += strides;
str2 = 0;
for (j = 0; j < out_dim[1]; j++) {
if (j != 0) str2 += strides;
avg_value = 0;
for (m = 0; m < pool_size; m++) {
for (n = 0; n < pool_size; n++) {
k1 = str1 + m;
k2 = str2 + n;
if (k1 >= 0 && k2 >=0 && k1 < in_dim[0] && k2 < in_dim[1]){
value = inp[k1][k2][d];
avg_value += value;
}
}
}
avg_value = avg_value / (pool_size * pool_size);
out[i][j][d] = avg_value;
}
}
}
}
void flatten(int in_dim[3], int inp[in_dim[0]][in_dim[1]][in_dim[2]], int out[]){
int index = 0;
for (int i = 0; i < in_dim[2]; i++){
for(int j = 0; j < in_dim[0]; j++){
for(int k = 0; k < in_dim[1]; k++){
out[index++] = inp[j][k][i];
}
}
}
}
#endif /* CONV2D_H */
+358
View File
@@ -0,0 +1,358 @@
#ifndef CONV2D_OPT_H
#define CONV2D_OPT_H
void conv2_8bits(int in_dim[3], int fil_dim[4], int out_dim[3], int inp[in_dim[0]][in_dim[1]][in_dim[2]], const int fil[fil_dim[0]][fil_dim[1]][fil_dim[2]][fil_dim[3] << 2], const int bias[fil_dim[0]], int out[out_dim[0]][out_dim[1]][out_dim[2]], int strides, int pad[4], const int bias_shift_mode[], const int quantized_multiplier, const int out_shift_rl){
int i, j, k, m, n, p, res, k1, k2, str1, str2, w, in_cnn, bias_val;
for (i = 0; i < out_dim[2]; i++) { // output depth
str1 = -pad[0] - strides;
for (j = 0; j < out_dim[0]; j++) { // output height
str1 += strides;
str2 = -pad[2] - strides;
for (k = 0; k < out_dim[1]; k++) { // output width
bias_val = bias[i];
asm volatile("neur_init %0, %1, %2\n":"=r"(res):"r"(bias_val),"r"(bias_shift_mode[i]):);
str2 += strides;
for (p = 0; p < fil_dim[1]; p++) { // filters height
for (n = 0; n < fil_dim[2]; n++) { // filters width
k1 = str1 + p;
k2 = str2 + n;
if (k1 < in_dim[0] && k1 >= 0 && k2 >= 0 && k2 < in_dim[1]) {
for (m = 0; m < fil_dim[3]; m++) { // filters depth
in_cnn = inp[k1][k2][m];
w = fil[i][p][n][4*m];
asm volatile("nn_mac_8b %0, %1,%2\n":"=r"(res):"r"(w),"r"(in_cnn):);
w = fil[i][p][n][4*m+1];
asm volatile("nn_mac_8b %0, %1,%2\n":"=r"(res):"r"(w),"r"(in_cnn):);
w = fil[i][p][n][4*m+2];
asm volatile("nn_mac_8b %0, %1,%2\n":"=r"(res):"r"(w),"r"(in_cnn):);
w = fil[i][p][n][4*m+3];
asm volatile("nn_mac_8b %0, %1,%2\n":"=r"(res):"r"(w),"r"(in_cnn):);
}
}
}
}
asm volatile("neur_res %0, %1, %2\n":"=r"(res):"r"(quantized_multiplier),"r"(out_shift_rl):);
out[j][k][i] = res;
}
}
}
}
void conv2_8bits_1ch(int in_dim[3], int fil_dim[4], int out_dim[3], int inp[in_dim[0]][in_dim[1]][in_dim[2]], const int fil[fil_dim[0]][fil_dim[1]][fil_dim[2]][fil_dim[3] << 2], const int bias[fil_dim[0]], int out[out_dim[0]][out_dim[1]][out_dim[2]], int strides, int pad[4], const int bias_shift_mode[], const int quantized_multiplier, const int out_shift_rl){
int i, j, k, n, p, res, k1, k2, str1, str2, w, in_cnn, bias_val;
for (i = 0; i < out_dim[2]; i++) { // output depth
str1 = -pad[0] - strides;
for (j = 0; j < out_dim[0]; j++) { // output height
str1 += strides;
str2 = -pad[2] - strides;
for (k = 0; k < out_dim[1]; k++) { // output width
bias_val = bias[i];
asm volatile("neur_init %0, %1, %2\n":"=r"(res):"r"(bias_val),"r"(bias_shift_mode[i]):);
str2 += strides;
for (p = 0; p < fil_dim[1]; p++) { // filters height
for (n = 0; n < fil_dim[2]; n++) { // filters width
k1 = str1 + p;
k2 = str2 + n;
if (k1 < in_dim[0] && k1 >= 0 && k2 >= 0 && k2 < in_dim[1]) {
in_cnn = inp[k1][k2][0];
w = fil[i][p][n][0];
asm volatile("nn_mac_8b %0, %1,%2\n":"=r"(res):"r"(w),"r"(in_cnn):);
w = fil[i][p][n][1];
asm volatile("nn_mac_8b %0, %1,%2\n":"=r"(res):"r"(w),"r"(in_cnn):);
w = fil[i][p][n][2];
asm volatile("nn_mac_8b %0, %1,%2\n":"=r"(res):"r"(w),"r"(in_cnn):);
w = fil[i][p][n][3];
asm volatile("nn_mac_8b %0, %1,%2\n":"=r"(res):"r"(w),"r"(in_cnn):);
}
}
}
asm volatile("neur_res %0, %1, %2\n":"=r"(res):"r"(quantized_multiplier),"r"(out_shift_rl):);
out[j][k][i] = res;
}
}
}
}
void conv2_4bits(int in_dim[3], int fil_dim[4], int out_dim[3], int inp[in_dim[0]][in_dim[1]][in_dim[2]], const int fil[fil_dim[0]][fil_dim[1]][fil_dim[2]][fil_dim[3] << 1], const int bias[fil_dim[0]], int out[out_dim[0]][out_dim[1]][out_dim[2]], int strides, int pad[4], const int bias_shift_mode[], const int quantized_multiplier, const int out_shift_rl){
int i, j, k, m, n, p, res, k1, k2, str1, str2, w, in_cnn, bias_val;
for (i = 0; i < out_dim[2]; i++) { // output depth
str1 = -pad[0];
for (j = 0; j < out_dim[0]; j++) { // output height
if (j != 0) str1 += strides;
str2 = -pad[2];
for (k = 0; k < out_dim[1]; k++) { // output width
bias_val = bias[i];
asm volatile("neur_init %0, %1, %2\n":"=r"(res):"r"(bias_val),"r"(bias_shift_mode[i]):);
if (k != 0) str2 += strides;
for (p = 0; p < fil_dim[1]; p++) { // filters height
for (n = 0; n < fil_dim[2]; n++) { // filters width
k1 = str1 + p;
k2 = str2 + n;
if (k1 < in_dim[0] && k1 >= 0 && k2 >= 0 && k2 < in_dim[1]) {
for (m = 0; m < fil_dim[3]; m++) { // filters depth
in_cnn = inp[k1][k2][m];
w = fil[i][p][n][2*m];
asm volatile("nn_mac_4b %0, %1,%2\n":"=r"(res):"r"(w),"r"(in_cnn):);
w = fil[i][p][n][2*m+1];
asm volatile("nn_mac_4b %0, %1,%2\n":"=r"(res):"r"(w),"r"(in_cnn):);
}
}
}
}
asm volatile("neur_res %0, %1, %2\n":"=r"(res):"r"(quantized_multiplier),"r"(out_shift_rl):);
out[j][k][i] = res;
}
}
}
}
void conv2_4bits_1ch(int in_dim[3], int fil_dim[4], int out_dim[3], int inp[in_dim[0]][in_dim[1]][in_dim[2]], const int fil[fil_dim[0]][fil_dim[1]][fil_dim[2]][fil_dim[3] << 1], const int bias[fil_dim[0]], int out[out_dim[0]][out_dim[1]][out_dim[2]], int strides, int pad[4], const int bias_shift_mode[], const int quantized_multiplier, const int out_shift_rl){
int i, j, k, n, p, res, k1, k2, str1, str2, w, in_cnn, bias_val;
for (i = 0; i < out_dim[2]; i++) { // output depth
str1 = -pad[0] - strides;
for (j = 0; j < out_dim[0]; j++) { // output height
str1 += strides;
str2 = -pad[2] - strides;
for (k = 0; k < out_dim[1]; k++) { // output width
bias_val = bias[i];
asm volatile("neur_init %0, %1, %2\n":"=r"(res):"r"(bias_val),"r"(bias_shift_mode[i]):);
str2 += strides;
for (p = 0; p < fil_dim[1]; p++) { // filters height
for (n = 0; n < fil_dim[2]; n++) { // filters width
k1 = str1 + p;
k2 = str2 + n;
if (k1 < in_dim[0] && k1 >= 0 && k2 >= 0 && k2 < in_dim[1]) {
in_cnn = inp[k1][k2][0];
w = fil[i][p][n][0];
asm volatile("nn_mac_2b %0, %1,%2\n":"=r"(res):"r"(w),"r"(in_cnn):);
w = fil[i][p][n][1];
asm volatile("nn_mac_2b %0, %1,%2\n":"=r"(res):"r"(w),"r"(in_cnn):);
}
}
}
asm volatile("neur_res %0, %1, %2\n":"=r"(res):"r"(quantized_multiplier),"r"(out_shift_rl):);
out[j][k][i] = res;
}
}
}
}
void conv2_2bits(int in_dim[3], int fil_dim[4], int out_dim[3], int inp[in_dim[0]][in_dim[1]][in_dim[2]], const int fil[fil_dim[0]][fil_dim[1]][fil_dim[2]][fil_dim[3]], const int bias[fil_dim[0]], int out[out_dim[0]][out_dim[1]][out_dim[2]], int strides, int pad[4], const int bias_shift_mode[], const int quantized_multiplier, const int out_shift_rl){
int i, j, k, m, n, p, res, k1, k2, str1, str2, w, in_cnn, bias_val;
for (i = 0; i < out_dim[2]; i++) { // output depth
str1 = -pad[0] - strides;
for (j = 0; j < out_dim[0]; j++) { // output height
str1 += strides;
str2 = -pad[2] - strides;
for (k = 0; k < out_dim[1]; k++) { // output width
bias_val = bias[i];
asm volatile("neur_init %0, %1, %2\n":"=r"(res):"r"(bias_val),"r"(bias_shift_mode[i]):);
str2 += strides;
for (p = 0; p < fil_dim[1]; p++) { // filters height
for (n = 0; n < fil_dim[2]; n++) { // filters width
k1 = str1 + p;
k2 = str2 + n;
if (k1 < in_dim[0] && k1 >= 0 && k2 >= 0 && k2 < in_dim[1]) {
for (m = 0; m < fil_dim[3]; m++) { // filters depth
in_cnn = inp[k1][k2][m];
w = fil[i][p][n][m];
asm volatile("nn_mac_2b %0, %1,%2\n":"=r"(res):"r"(w),"r"(in_cnn):);
}
}
}
}
asm volatile("neur_res %0, %1, %2\n":"=r"(res):"r"(quantized_multiplier),"r"(out_shift_rl):);
out[j][k][i] = res;
}
}
}
}
void conv2_2bits_1ch(int in_dim[3], int fil_dim[4], int out_dim[3], int inp[in_dim[0]][in_dim[1]][1], const int fil[fil_dim[0]][fil_dim[1]][fil_dim[2]][1], const int bias[fil_dim[0]], int out[out_dim[0]][out_dim[1]][out_dim[2]], int strides, int pad[4], const int bias_shift_mode[], const int quantized_multiplier, const int out_shift_rl){
int i, j, k, n, p, res, k1, k2, str1, str2, w, in_cnn, bias_val;
for (i = 0; i < out_dim[2]; i++) { // output depth
str1 = -pad[0] -strides;
for (j = 0; j < out_dim[0]; j++) { // output height
str1 += strides;
str2 = -pad[2] - strides;
for (k = 0; k < out_dim[1]; k++) { // output width
bias_val = bias[i];
asm volatile("neur_init %0, %1, %2\n":"=r"(res):"r"(bias_val),"r"(bias_shift_mode[i]):);
str2 += strides;
for (p = 0; p < fil_dim[1]; p++) { // filters height
for (n = 0; n < fil_dim[2]; n++) { // filters width
k1 = str1 + p;
k2 = str2 + n;
if (k1 < in_dim[0] && k1 >= 0 && k2 >= 0 && k2 < in_dim[1]) {
in_cnn = inp[k1][k2][0];
w = fil[i][p][n][0];
asm volatile("nn_mac_2b %0, %1,%2\n":"=r"(res):"r"(w),"r"(in_cnn):);
}
}
}
asm volatile("neur_res %0, %1, %2\n":"=r"(res):"r"(quantized_multiplier),"r"(out_shift_rl):);
out[j][k][i] = res;
}
}
}
}
void maxpool2_compressed(int in_dim[3], int out_dim[3], int inp[in_dim[0]][in_dim[1]][in_dim[2]], int out[out_dim[0]][out_dim[1]][out_dim[2]], int pool_size, int strides) {
int i, j, m, n, d, k1, k2, str1, str2;
uint32_t value1, value2, value3, value4;
uint32_t max_value1, max_value2, max_value3, max_value4, c;
for (d = 0; d < out_dim[2]; d++) {
str1 = 0;
for (i = 0; i < out_dim[0]; i++) {
if (i != 0) str1 += strides;
str2 = 0;
for (j = 0; j < out_dim[1]; j++) {
if (j != 0) str2 += strides;
max_value1 = 0;
max_value2 = 0;
max_value3 = 0;
max_value4 = 0;
for (m = 0; m < pool_size; m++) {
for (n = 0; n < pool_size; n++) {
k1 = str1 + m;
k2 = str2 + n;
if (k1 >= 0 && k2 >=0 && k1 < in_dim[0] && k2 < in_dim[1]){
value1 = inp[k1][k2][d] & 0xFF000000;
value2 = inp[k1][k2][d] & 0x00FF0000;
value3 = inp[k1][k2][d] & 0x0000FF00;
value4 = inp[k1][k2][d] & 0x000000FF;
if (value1 > max_value1) {
max_value1 = value1;
}
if (value2 > max_value2) {
max_value2 = value2;
}
if (value3 > max_value3) {
max_value3 = value3;
}
if (value4 > max_value4) {
max_value4 = value4;
}
}
}
}
c = max_value1 | max_value2 | max_value3 | max_value4;
out[i][j][d] = c;
}
}
}
}
void avgpool2_compressed(int in_dim[3], int out_dim[3], int inp[in_dim[0]][in_dim[1]][in_dim[2]], int out[out_dim[0]][out_dim[1]][out_dim[2]], int pool_size, int strides) {
int i, j, m, n, d, k1, k2, str1, str2;
int avg_value1, avg_value2, avg_value3, avg_value4;
for (d = 0; d < out_dim[2]; d++) {
str1 = -strides;
for (i = 0; i < out_dim[0]; i++) {
str1 += strides;
str2 = -strides;
for (j = 0; j < out_dim[1]; j++) {
str2 += strides;
avg_value1 = 0;
avg_value2 = 0;
avg_value3 = 0;
avg_value4 = 0;
for (m = 0; m < pool_size; m++) {
for (n = 0; n < pool_size; n++) {
k1 = str1 + m;
k2 = str2 + n;
if (k1 >= 0 && k2 >=0 && k1 < in_dim[0] && k2 < in_dim[1]){
avg_value1 += ((inp[k1][k2][d] & 0xFF000000) >> 24);
avg_value2 += ((inp[k1][k2][d] & 0x00FF0000) >> 16);
avg_value3 += ((inp[k1][k2][d] & 0x0000FF00) >> 8);
avg_value4 += (inp[k1][k2][d] & 0x000000FF);
}
}
}
avg_value1 = avg_value1 / (pool_size * pool_size);
avg_value2 = avg_value2 / (pool_size * pool_size);
avg_value3 = avg_value3 / (pool_size * pool_size);
avg_value4 = avg_value4 / (pool_size * pool_size);
out[i][j][d] = ((avg_value1 << 24) | (avg_value2 << 16) | (avg_value3 << 8) | (avg_value4)) ;
}
}
}
}
void flatten(int in_dim[3], int inp[in_dim[0]][in_dim[1]][in_dim[2]], int out[]){
int index = 0;
int values[in_dim[0]][in_dim[1]][in_dim[2] << 2];
for (int i = 0; i < in_dim[0]; i++){
for(int j = 0; j < in_dim[1]; j++){
for(int k = 0; k < in_dim[2]; k++){
values[i][j][4*k] = (inp[i][j][k] & 0xFF000000) >> 24;
values[i][j][4*k+1] = (inp[i][j][k] & 0x00FF0000) >> 16;
values[i][j][4*k+2] = (inp[i][j][k] & 0x0000FF00) >> 8;
values[i][j][4*k+3] = inp[i][j][k] & 0x000000FF;
}
}
}
int out_dim = (in_dim[0] * in_dim[1] * in_dim[2]) << 2;
int flatten_matrix[out_dim];
for (int k = 0; k < in_dim[2] << 2; k++){
for(int j = 0; j < in_dim[0]; j++){
for(int i = 0; i < in_dim[1]; i++){
flatten_matrix[index++] = values[j][i][k];
}
}
}
for(int i = 0; i < out_dim >> 2; i++){
out[i] = (flatten_matrix[4*i] << 24 | flatten_matrix[4*i+1] << 16 | flatten_matrix[4*i+2] << 8 | flatten_matrix[4*i+3]);
}
}
#endif /* CONV2D_OPT_H */
+102
View File
@@ -0,0 +1,102 @@
# Copyright lowRISC contributors.
# Licensed under the Apache License, Version 2.0, see LICENSE for details.
# SPDX-License-Identifier: Apache-2.0
#include "simple_system_regs.h"
.section .text
default_exc_handler:
jal x0, simple_exc_handler
timer_handler:
jal x0, simple_timer_handler
reset_handler:
/* set all registers to zero */
mv x1, x0
mv x2, x1
mv x3, x1
mv x4, x1
mv x5, x1
mv x6, x1
mv x7, x1
mv x8, x1
mv x9, x1
mv x10, x1
mv x11, x1
mv x12, x1
mv x13, x1
mv x14, x1
mv x15, x1
mv x16, x1
mv x17, x1
mv x18, x1
mv x19, x1
mv x20, x1
mv x21, x1
mv x22, x1
mv x23, x1
mv x24, x1
mv x25, x1
mv x26, x1
mv x27, x1
mv x28, x1
mv x29, x1
mv x30, x1
mv x31, x1
/* stack initilization */
la x2, _stack_start
_start:
.global _start
/* clear BSS */
la x26, _bss_start
la x27, _bss_end
bge x26, x27, zero_loop_end
zero_loop:
sw x0, 0(x26)
addi x26, x26, 4
ble x26, x27, zero_loop
zero_loop_end:
main_entry:
/* jump to main program entry point (argc = argv = 0) */
addi x10, x0, 0
addi x11, x0, 0
jal x1, main
/* Halt simulation */
li x5, SIM_CTRL_BASE + SIM_CTRL_CTRL
li x6, 1
sw x6, 0(x5)
/* If execution ends up here just put the core to sleep */
sleep_loop:
wfi
j sleep_loop
/* =================================================== [ exceptions ] === */
/* This section has to be down here, since we have to disable rvc for it */
.section .vectors, "ax"
.option norvc;
// All unimplemented interrupts/exceptions go to the default_exc_handler.
.org 0x00
.rept 7
jal x0, default_exc_handler
.endr
jal x0, timer_handler
.rept 23
jal x0, default_exc_handler
.endr
// reset vector
.org 0x80
jal x0, reset_handler
+3
View File
@@ -0,0 +1,3 @@
/home/alex/Desktop/ibex_tools/ibex/examples/sw/simple_system/common/crt0.o: \
/home/alex/Desktop/ibex_tools/ibex/examples/sw/simple_system/common/crt0.S \
/home/alex/Desktop/ibex_tools/ibex/examples/sw/simple_system/common/simple_system_regs.h
Binary file not shown.
+28
View File
@@ -0,0 +1,28 @@
#ifndef FULLY_CONNECTED_H
#define FULLY_CONNECTED_H
#include <stdint.h>
void mlp_layer(int input[], int output[], int num_inputs, int num_outputs, const int weights[][num_inputs], const int bias[], const int bias_shift_mode, const int quantized_multiplier, const int out_shift_rl){
// Compute the output for each neuron
int z, w, inp, quant_prod;
for (int i = 0; i < num_outputs; i++) {
z = bias[i];
for (int j = 0; j < num_inputs; j++) {
w = weights[i][j];
inp = input[j];
z += w*inp;
}
quant_prod = quantized_multiplier * z + (1 << (out_shift_rl-1));
quant_prod = quant_prod >> out_shift_rl;
if(quant_prod < 0) quant_prod = 0;
if(quant_prod > 255) quant_prod = 255;
output[i] = quant_prod;
}
}
#endif /* FULLY_CONNECTED_H */
@@ -0,0 +1,77 @@
#ifndef FULLY_CONNECTED_OPT_H
#define FULLY_CONNECTED_OPT_H
#include <stdint.h>
void mlp_layer_2bits(int input[], int output[], int num_inputs, int num_outputs, const int weights[][num_inputs], const int bias[], const int bias_shift_mode[], const int quantized_multiplier, const int out_shift_rl){
// Compute the output for each neuron
int z, bias_val, w, inp, temp;
for (int i = 0; i < num_outputs; i++) {
bias_val = bias[i];
asm volatile("neur_init %0, %1, %2\n":"=r"(z):"r"(bias_val),"r"(bias_shift_mode[i]):);
for (int j = 0; j < num_inputs; j++) {
w = weights[i][j];
inp = input[j];
asm volatile("nn_mac_2b %0, %1,%2\n":"=r"(temp):"r"(w),"r"(inp):);
}
asm volatile("neur_res %0, %1, %2\n":"=r"(z):"r"(quantized_multiplier),"r"(out_shift_rl):);
output[i] = z;
}
}
void mlp_layer_4bits(int input[], int output[], int num_inputs, int num_outputs, const int weights[][num_inputs << 1], const int bias[], const int bias_shift_mode[], const int quantized_multiplier, const int out_shift_rl){
// Compute the output for each neuron
int z, bias_val, w, inp, temp;
for (int i = 0; i < num_outputs; i++) {
bias_val = bias[i];
asm volatile("neur_init %0, %1, %2\n":"=r"(z):"r"(bias_val),"r"(bias_shift_mode[i]):);
for (int j = 0; j < num_inputs; j++) {
w = weights[i][2*j];
inp = input[j];
asm volatile("nn_mac_4b %0, %1,%2\n":"=r"(temp):"r"(w),"r"(inp):);
w = weights[i][2*j+1];
asm volatile("nn_mac_4b %0, %1,%2\n":"=r"(temp):"r"(w),"r"(inp):);
}
asm volatile("neur_res %0, %1, %2\n":"=r"(z):"r"(quantized_multiplier),"r"(out_shift_rl):);
output[i] = z;
}
}
void mlp_layer_8bits(int input[], int output[], int num_inputs, int num_outputs, const int weights[][num_inputs << 2], const int bias[], const int bias_shift_mode[], const int quantized_multiplier, const int out_shift_rl){
// Compute the output for each neuron
int z, bias_val, w, inp, temp;
for (int i = 0; i < num_outputs; i++) {
bias_val = bias[i];
asm volatile("neur_init %0, %1, %2\n":"=r"(z):"r"(bias_val),"r"(bias_shift_mode[i]):);
for (int j = 0; j < num_inputs; j++) {
w = weights[i][4*j];
inp = input[j];
asm volatile("nn_mac_8b %0, %1,%2\n":"=r"(temp):"r"(w),"r"(inp):);
w = weights[i][4*j+1];
asm volatile("nn_mac_8b %0, %1,%2\n":"=r"(temp):"r"(w),"r"(inp):);
w = weights[i][4*j+2];
asm volatile("nn_mac_8b %0, %1,%2\n":"=r"(temp):"r"(w),"r"(inp):);
w = weights[i][4*j+3];
asm volatile("nn_mac_8b %0, %1,%2\n":"=r"(temp):"r"(w),"r"(inp):);
}
asm volatile("neur_res %0, %1, %2\n":"=r"(z):"r"(quantized_multiplier),"r"(out_shift_rl):);
output[i] = z;
}
}
#endif /* FULLY_CONNECTED_OPT_H */
+91
View File
@@ -0,0 +1,91 @@
/* Copyright lowRISC contributors.
Licensed under the Apache License, Version 2.0, see LICENSE for details.
SPDX-License-Identifier: Apache-2.0 */
OUTPUT_ARCH(riscv)
/* Change this if you'd like different sizes. Arty A7-100(35) has a maximum of 607.5KB(225KB)
BRAM space. Configuration below is for maximum BRAM capacity with Artya A7-35 while letting
CoreMark run (.vmem of 152.8KB).
*/
MEMORY
{
ram : ORIGIN = 0x00100000, LENGTH = 0x750000
stack : ORIGIN = 0x00850000, LENGTH = 0x200000
}
/* Stack information variables */
_min_stack = 0x10000; /* 8K - minimum stack space to reserve */
_stack_len = LENGTH(stack);
_stack_start = ORIGIN(stack) + LENGTH(stack);
_entry_point = _vectors_start + 0x80;
ENTRY(_entry_point)
/* The tohost address is used by Spike for a magic "stop me now" message. This
is set to equal SIM_CTRL_CTRL (see simple_system_regs.h), which has that
effect in simple_system simulations. Note that it must be 8-byte aligned.
We don't read data back from Spike, so fromhost is set to some dummy value:
we place it just above the top of the stack.
*/
tohost = 0x20008;
fromhost = _stack_start + 0x10;
SECTIONS
{
.vectors :
{
. = ALIGN(4);
_vectors_start = .;
KEEP(*(.vectors))
_vectors_end = .;
} > ram
.text : {
. = ALIGN(4);
*(.text)
*(.text.*)
} > ram
.rodata : {
. = ALIGN(4);
/* Small RO data before large RO data */
*(.srodata)
*(.srodata.*)
*(.rodata);
*(.rodata.*)
} > ram
.data : {
. = ALIGN(4);
/* Small data before large data */
*(.sdata)
*(.sdata.*)
*(.data);
*(.data.*)
} > ram
.bss :
{
. = ALIGN(4);
_bss_start = .;
/* Small BSS before large BSS */
*(.sbss)
*(.sbss.*)
*(.bss)
*(.bss.*)
*(COMMON)
_bss_end = .;
} > ram
/* ensure there is enough room for stack */
.stack (NOLOAD): {
. = ALIGN(4);
. = . + _min_stack ;
. = ALIGN(4);
stack = . ;
_stack = . ;
} > stack
}
@@ -0,0 +1,185 @@
// Copyright lowRISC contributors.
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0
#include "simple_system_common.h"
int putchar(int c) {
DEV_WRITE(SIM_CTRL_BASE + SIM_CTRL_OUT, (unsigned char)c);
return c;
}
int puts(const char *str) {
while (*str) {
putchar(*str++);
}
return 0;
}
void puthex(uint32_t h) {
int cur_digit;
// Iterate through h taking top 4 bits each time and outputting ASCII of hex
// digit for those 4 bits
for (int i = 0; i < 8; i++) {
cur_digit = h >> 28;
if (cur_digit < 10)
putchar('0' + cur_digit);
else
putchar('A' - 10 + cur_digit);
h <<= 4;
}
}
void sim_halt() { DEV_WRITE(SIM_CTRL_BASE + SIM_CTRL_CTRL, 1); }
void pcount_reset() {
asm volatile(
"csrw minstret, x0\n"
"csrw mcycle, x0\n"
"csrw mhpmcounter3, x0\n"
"csrw mhpmcounter4, x0\n"
"csrw mhpmcounter5, x0\n"
"csrw mhpmcounter6, x0\n"
"csrw mhpmcounter7, x0\n"
"csrw mhpmcounter8, x0\n"
"csrw mhpmcounter9, x0\n"
"csrw mhpmcounter10, x0\n"
"csrw mhpmcounter11, x0\n"
"csrw mhpmcounter12, x0\n"
"csrw mhpmcounter13, x0\n"
"csrw mhpmcounter14, x0\n"
"csrw mhpmcounter15, x0\n"
"csrw mhpmcounter16, x0\n"
"csrw mhpmcounter17, x0\n"
"csrw mhpmcounter18, x0\n"
"csrw mhpmcounter19, x0\n"
"csrw mhpmcounter20, x0\n"
"csrw mhpmcounter21, x0\n"
"csrw mhpmcounter22, x0\n"
"csrw mhpmcounter23, x0\n"
"csrw mhpmcounter24, x0\n"
"csrw mhpmcounter25, x0\n"
"csrw mhpmcounter26, x0\n"
"csrw mhpmcounter27, x0\n"
"csrw mhpmcounter28, x0\n"
"csrw mhpmcounter29, x0\n"
"csrw mhpmcounter30, x0\n"
"csrw mhpmcounter31, x0\n"
"csrw minstreth, x0\n"
"csrw mcycleh, x0\n"
"csrw mhpmcounter3h, x0\n"
"csrw mhpmcounter4h, x0\n"
"csrw mhpmcounter5h, x0\n"
"csrw mhpmcounter6h, x0\n"
"csrw mhpmcounter7h, x0\n"
"csrw mhpmcounter8h, x0\n"
"csrw mhpmcounter9h, x0\n"
"csrw mhpmcounter10h, x0\n"
"csrw mhpmcounter11h, x0\n"
"csrw mhpmcounter12h, x0\n"
"csrw mhpmcounter13h, x0\n"
"csrw mhpmcounter14h, x0\n"
"csrw mhpmcounter15h, x0\n"
"csrw mhpmcounter16h, x0\n"
"csrw mhpmcounter17h, x0\n"
"csrw mhpmcounter18h, x0\n"
"csrw mhpmcounter19h, x0\n"
"csrw mhpmcounter20h, x0\n"
"csrw mhpmcounter21h, x0\n"
"csrw mhpmcounter22h, x0\n"
"csrw mhpmcounter23h, x0\n"
"csrw mhpmcounter24h, x0\n"
"csrw mhpmcounter25h, x0\n"
"csrw mhpmcounter26h, x0\n"
"csrw mhpmcounter27h, x0\n"
"csrw mhpmcounter28h, x0\n"
"csrw mhpmcounter29h, x0\n"
"csrw mhpmcounter30h, x0\n"
"csrw mhpmcounter31h, x0\n");
}
unsigned int get_mepc() {
uint32_t result;
__asm__ volatile("csrr %0, mepc;" : "=r"(result));
return result;
}
unsigned int get_mcause() {
uint32_t result;
__asm__ volatile("csrr %0, mcause;" : "=r"(result));
return result;
}
unsigned int get_mtval() {
uint32_t result;
__asm__ volatile("csrr %0, mtval;" : "=r"(result));
return result;
}
void simple_exc_handler(void) {
puts("EXCEPTION!!!\n");
puts("============\n");
puts("MEPC: 0x");
puthex(get_mepc());
puts("\nMCAUSE: 0x");
puthex(get_mcause());
puts("\nMTVAL: 0x");
puthex(get_mtval());
putchar('\n');
sim_halt();
while(1);
}
volatile uint64_t time_elapsed;
uint64_t time_increment;
inline static void increment_timecmp(uint64_t time_base) {
uint64_t current_time = timer_read();
current_time += time_base;
timecmp_update(current_time);
}
void timer_enable(uint64_t time_base) {
time_elapsed = 0;
time_increment = time_base;
// Set timer values
increment_timecmp(time_base);
// enable timer interrupt
asm volatile("csrs mie, %0\n" : : "r"(0x80));
// enable global interrupt
asm volatile("csrs mstatus, %0\n" : : "r"(0x8));
}
void timer_disable(void) { asm volatile("csrc mie, %0\n" : : "r"(0x80)); }
uint64_t timer_read(void) {
uint32_t current_timeh;
uint32_t current_time;
// check if time overflowed while reading and try again
do {
current_timeh = DEV_READ(TIMER_BASE + TIMER_MTIMEH, 0);
current_time = DEV_READ(TIMER_BASE + TIMER_MTIME, 0);
} while (current_timeh != DEV_READ(TIMER_BASE + TIMER_MTIMEH, 0));
uint64_t final_time = ((uint64_t)current_timeh << 32) | current_time;
return final_time;
}
void timecmp_update(uint64_t new_time) {
DEV_WRITE(TIMER_BASE + TIMER_MTIMECMP, -1);
DEV_WRITE(TIMER_BASE + TIMER_MTIMECMPH, new_time >> 32);
DEV_WRITE(TIMER_BASE + TIMER_MTIMECMP, new_time);
}
uint64_t get_elapsed_time(void) { return time_elapsed; }
void simple_timer_handler(void) __attribute__((interrupt));
void simple_timer_handler(void) {
increment_timecmp(time_increment);
time_elapsed++;
}
@@ -0,0 +1,4 @@
/home/alex/Desktop/ibex_tools/ibex/examples/sw/simple_system/common/simple_system_common.o: \
/home/alex/Desktop/ibex_tools/ibex/examples/sw/simple_system/common/simple_system_common.c \
/home/alex/Desktop/ibex_tools/ibex/examples/sw/simple_system/common/simple_system_common.h \
/home/alex/Desktop/ibex_tools/ibex/examples/sw/simple_system/common/simple_system_regs.h
@@ -0,0 +1,99 @@
// Copyright lowRISC contributors.
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0
#ifndef SIMPLE_SYSTEM_COMMON_H__
#include <stdint.h>
#include <stdio.h>
#include "simple_system_regs.h"
#define DEV_WRITE(addr, val) (*((volatile uint32_t *)(addr)) = val)
#define DEV_READ(addr, val) (*((volatile uint32_t *)(addr)))
#define PCOUNT_READ(name, dst) asm volatile("csrr %0, " #name ";" : "=r"(dst))
/**
* Writes character to simulator out log. Signature matches c stdlib function
* of the same name.
*
* @param c Character to output
* @returns Character output (never fails so no EOF ever returned)
*/
int putchar(int c);
/**
* Writes string to simulator out log. Signature matches c stdlib function of
* the same name.
*
* @param str String to output
* @returns 0 always (never fails so no error)
*/
int puts(const char *str);
/**
* Writes ASCII hex representation of number to simulator out log.
*
* @param h Number to output in hex
*/
void puthex(uint32_t h);
/**
* Immediately halts the simulation
*/
void sim_halt();
/**
* Enables/disables performance counters. This effects mcycle and minstret as
* well as the mhpmcounterN counters.
*
* @param enable if non-zero enables, otherwise disables
*/
static inline void pcount_enable(int enable) {
// Note cycle is disabled with everything else
unsigned int inhibit_val = enable ? 0x0 : 0xFFFFFFFF;
// CSR 0x320 was called `mucounteren` in the privileged spec v1.9.1, it was
// then dropped in v1.10, and then re-added in v1.11 with the name
// `mcountinhibit`. Unfortunately, the version of binutils we use only allows
// the old name, and LLVM only supports the new name (though this is changed
// on trunk to support both), so we use the numeric value here for maximum
// compatibility.
asm volatile("csrw 0x320, %0\n" : : "r"(inhibit_val));
}
/**
* Resets all performance counters. This effects mcycle and minstret as well
* as the mhpmcounterN counters.
*/
void pcount_reset();
/**
* Enables timer interrupt
*
* @param time_base Number of time ticks to count before interrupt
*/
void timer_enable(uint64_t time_base);
/**
* Returns current mtime value
*/
uint64_t timer_read(void);
/**
* Set a new timer value
*
* @param new_time New value for time
*/
void timecmp_update(uint64_t new_time);
/**
* Disables timer interrupt
*/
void timer_disable(void);
/**
* Returns current global time value
*/
uint64_t get_elapsed_time(void);
#endif
Binary file not shown.
@@ -0,0 +1,18 @@
// Copyright lowRISC contributors.
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0
#ifndef SIMPLE_SYSTEM_REGS_H__
#define SIMPLE_SYSTEM_REGS_H__
#define SIM_CTRL_BASE 0x20000
#define SIM_CTRL_OUT 0x0
#define SIM_CTRL_CTRL 0x8
#define TIMER_BASE 0x30000
#define TIMER_MTIME 0x0
#define TIMER_MTIMEH 0x4
#define TIMER_MTIMECMP 0x8
#define TIMER_MTIMECMPH 0xC
#endif // SIMPLE_SYSTEM_REGS_H__