Adding files
This commit is contained in:
@@ -0,0 +1,77 @@
|
||||
# Copyright lowRISC contributors.
|
||||
# Licensed under the Apache License, Version 2.0, see LICENSE for details.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
COMMON_DIR := $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST))))
|
||||
|
||||
COMMON_SRCS = $(wildcard $(COMMON_DIR)/*.c)
|
||||
INCS := -I$(COMMON_DIR)
|
||||
|
||||
#ARCH = rv32im # to disable compressed instructions
|
||||
ARCH ?= rv32imc
|
||||
|
||||
ifdef PROGRAM
|
||||
PROGRAM_C := $(PROGRAM).c
|
||||
endif
|
||||
|
||||
SRCS = $(COMMON_SRCS) $(PROGRAM_C) $(EXTRA_SRCS)
|
||||
|
||||
C_SRCS = $(filter %.c, $(SRCS))
|
||||
ASM_SRCS = $(filter %.S, $(SRCS))
|
||||
CPLUSPLUS = $(filter %.cpp $(SRCS))
|
||||
|
||||
CC = riscv32-unknown-elf-gcc
|
||||
|
||||
CROSS_COMPILE = $(patsubst %-gcc,%-,$(CC))
|
||||
|
||||
OBJCOPY ?= $(CROSS_COMPILE)objcopy
|
||||
OBJDUMP ?= $(CROSS_COMPILE)objdump
|
||||
|
||||
LINKER_SCRIPT ?= $(COMMON_DIR)/link.ld
|
||||
CRT ?= $(COMMON_DIR)/crt0.S
|
||||
CFLAGS ?= -march=$(ARCH) -mabi=ilp32 -static -mcmodel=medany -Wall -g -O3\
|
||||
-fvisibility=hidden -nostartfiles -ffreestanding $(PROGRAM_CFLAGS)
|
||||
|
||||
OBJS := ${C_SRCS:.c=.o} ${ASM_SRCS:.S=.o} ${CRT:.S=.o}
|
||||
|
||||
DEPS = $(OBJS:%.o=%.d)
|
||||
ifdef PROGRAM
|
||||
OUTFILES := $(PROGRAM).elf $(PROGRAM).vmem $(PROGRAM).bin
|
||||
else
|
||||
OUTFILES := $(OBJS)
|
||||
endif
|
||||
|
||||
all: $(OUTFILES)
|
||||
|
||||
ifdef PROGRAM
|
||||
$(PROGRAM).elf: $(OBJS) $(LINKER_SCRIPT)
|
||||
$(CC) $(CFLAGS) -T $(LINKER_SCRIPT) $(OBJS) -o $@ $(LIBS)
|
||||
|
||||
.PHONY: disassemble
|
||||
disassemble: $(PROGRAM).dis
|
||||
endif
|
||||
|
||||
%.dis: %.elf
|
||||
$(OBJDUMP) -fhSD $^ > $@
|
||||
|
||||
# Note: this target requires the srecord package to be installed.
|
||||
# XXX: This could be replaced by objcopy once
|
||||
# https://sourceware.org/bugzilla/show_bug.cgi?id=19921
|
||||
# is widely available.
|
||||
%.vmem: %.bin
|
||||
srec_cat $^ -binary -offset 0x0000 -byte-swap 4 -o $@ -vmem
|
||||
|
||||
%.bin: %.elf
|
||||
$(OBJCOPY) -O binary $^ $@
|
||||
|
||||
%.o: %.c
|
||||
$(CC) $(CFLAGS) -MMD -c $(INCS) -o $@ $<
|
||||
|
||||
%.o: %.S
|
||||
$(CC) $(CFLAGS) -MMD -c $(INCS) -o $@ $<
|
||||
|
||||
clean:
|
||||
$(RM) -f $(OBJS) $(DEPS)
|
||||
|
||||
distclean: clean
|
||||
$(RM) -f $(OUTFILES)
|
||||
@@ -0,0 +1,109 @@
|
||||
#ifndef CONV2D_H
|
||||
#define CONV2D_H
|
||||
|
||||
void conv2(int in_dim[3], int fil_dim[4], int out_dim[3], int inp[in_dim[0]][in_dim[1]][in_dim[2]], const int fil[fil_dim[0]][fil_dim[1]][fil_dim[2]][fil_dim[3]], const int bias[fil_dim[0]], int out[out_dim[0]][out_dim[1]][out_dim[2]], int strides, int pad[4], const int bias_shift_mode, const int quantized_multiplier, const int out_shift_rl){
|
||||
|
||||
int i, j, k, m, n, p, res, k1, k2, str1, str2, quant_prod;
|
||||
|
||||
for (i = 0; i < out_dim[2]; i++) { // output depth
|
||||
str1 = -pad[0] - strides;
|
||||
for (j = 0; j < out_dim[0]; j++) { // output height
|
||||
str1 += strides;
|
||||
str2 = -pad[2] - strides;
|
||||
for (k = 0; k < out_dim[1]; k++) { // output width
|
||||
res = bias[i];
|
||||
str2 += strides;
|
||||
for (p = 0; p < fil_dim[1]; p++) { // filters height
|
||||
for (n = 0; n < fil_dim[2]; n++) { // filters width
|
||||
k1 = str1 + p;
|
||||
k2 = str2 + n;
|
||||
if (k1 < in_dim[0] && k1 >= 0 && k2 >= 0 && k2 < in_dim[1]) {
|
||||
for (m = 0; m < fil_dim[3]; m++) { // filters depth
|
||||
res += inp[k1][k2][m] * fil[i][p][n][m];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
quant_prod = quantized_multiplier * res + (1 << (out_shift_rl-1));
|
||||
quant_prod = quant_prod >> out_shift_rl;
|
||||
|
||||
if(quant_prod < 0) quant_prod = 0;
|
||||
if(quant_prod > 255) quant_prod = 255;
|
||||
out[j][k][i] = quant_prod;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void maxpool2(int in_dim[3], int out_dim[3], int inp[in_dim[0]][in_dim[1]][in_dim[2]], int out[out_dim[0]][out_dim[1]][out_dim[2]], int pool_size, int strides) {
|
||||
|
||||
int i, j, m, n, d, max_value, value, k1, k2, str1, str2;
|
||||
|
||||
for (d = 0; d < out_dim[2]; d++) {
|
||||
str1 = 0;
|
||||
for (i = 0; i < out_dim[0]; i++) {
|
||||
if (i != 0) str1 += strides;
|
||||
str2 = 0;
|
||||
for (j = 0; j < out_dim[1]; j++) {
|
||||
if (j != 0) str2 += strides;
|
||||
max_value = 0;
|
||||
|
||||
for (m = 0; m < pool_size; m++) {
|
||||
for (n = 0; n < pool_size; n++) {
|
||||
k1 = str1 + m;
|
||||
k2 = str2 + n;
|
||||
if (k1 >= 0 && k2 >=0 && k1 < in_dim[0] && k2 < in_dim[1]){
|
||||
value = inp[k1][k2][d];
|
||||
if (value > max_value) max_value = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
out[i][j][d] = max_value;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void avgpool2(int in_dim[3], int out_dim[3], int inp[in_dim[0]][in_dim[1]][in_dim[2]], int out[out_dim[0]][out_dim[1]][out_dim[2]], int pool_size, int strides) {
|
||||
|
||||
int i, j, m, n, d, avg_value, value, k1, k2, str1, str2;
|
||||
|
||||
for (d = 0; d < out_dim[2]; d++) {
|
||||
str1 = 0;
|
||||
for (i = 0; i < out_dim[0]; i++) {
|
||||
if (i != 0) str1 += strides;
|
||||
str2 = 0;
|
||||
for (j = 0; j < out_dim[1]; j++) {
|
||||
if (j != 0) str2 += strides;
|
||||
avg_value = 0;
|
||||
|
||||
for (m = 0; m < pool_size; m++) {
|
||||
for (n = 0; n < pool_size; n++) {
|
||||
k1 = str1 + m;
|
||||
k2 = str2 + n;
|
||||
if (k1 >= 0 && k2 >=0 && k1 < in_dim[0] && k2 < in_dim[1]){
|
||||
value = inp[k1][k2][d];
|
||||
avg_value += value;
|
||||
}
|
||||
}
|
||||
}
|
||||
avg_value = avg_value / (pool_size * pool_size);
|
||||
out[i][j][d] = avg_value;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void flatten(int in_dim[3], int inp[in_dim[0]][in_dim[1]][in_dim[2]], int out[]){
|
||||
|
||||
int index = 0;
|
||||
for (int i = 0; i < in_dim[2]; i++){
|
||||
for(int j = 0; j < in_dim[0]; j++){
|
||||
for(int k = 0; k < in_dim[1]; k++){
|
||||
out[index++] = inp[j][k][i];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* CONV2D_H */
|
||||
@@ -0,0 +1,358 @@
|
||||
#ifndef CONV2D_OPT_H
|
||||
#define CONV2D_OPT_H
|
||||
|
||||
void conv2_8bits(int in_dim[3], int fil_dim[4], int out_dim[3], int inp[in_dim[0]][in_dim[1]][in_dim[2]], const int fil[fil_dim[0]][fil_dim[1]][fil_dim[2]][fil_dim[3] << 2], const int bias[fil_dim[0]], int out[out_dim[0]][out_dim[1]][out_dim[2]], int strides, int pad[4], const int bias_shift_mode[], const int quantized_multiplier, const int out_shift_rl){
|
||||
|
||||
int i, j, k, m, n, p, res, k1, k2, str1, str2, w, in_cnn, bias_val;
|
||||
|
||||
for (i = 0; i < out_dim[2]; i++) { // output depth
|
||||
str1 = -pad[0] - strides;
|
||||
for (j = 0; j < out_dim[0]; j++) { // output height
|
||||
str1 += strides;
|
||||
str2 = -pad[2] - strides;
|
||||
for (k = 0; k < out_dim[1]; k++) { // output width
|
||||
bias_val = bias[i];
|
||||
asm volatile("neur_init %0, %1, %2\n":"=r"(res):"r"(bias_val),"r"(bias_shift_mode[i]):);
|
||||
str2 += strides;
|
||||
for (p = 0; p < fil_dim[1]; p++) { // filters height
|
||||
for (n = 0; n < fil_dim[2]; n++) { // filters width
|
||||
k1 = str1 + p;
|
||||
k2 = str2 + n;
|
||||
|
||||
if (k1 < in_dim[0] && k1 >= 0 && k2 >= 0 && k2 < in_dim[1]) {
|
||||
for (m = 0; m < fil_dim[3]; m++) { // filters depth
|
||||
in_cnn = inp[k1][k2][m];
|
||||
w = fil[i][p][n][4*m];
|
||||
asm volatile("nn_mac_8b %0, %1,%2\n":"=r"(res):"r"(w),"r"(in_cnn):);
|
||||
|
||||
w = fil[i][p][n][4*m+1];
|
||||
asm volatile("nn_mac_8b %0, %1,%2\n":"=r"(res):"r"(w),"r"(in_cnn):);
|
||||
|
||||
w = fil[i][p][n][4*m+2];
|
||||
asm volatile("nn_mac_8b %0, %1,%2\n":"=r"(res):"r"(w),"r"(in_cnn):);
|
||||
|
||||
w = fil[i][p][n][4*m+3];
|
||||
asm volatile("nn_mac_8b %0, %1,%2\n":"=r"(res):"r"(w),"r"(in_cnn):);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
asm volatile("neur_res %0, %1, %2\n":"=r"(res):"r"(quantized_multiplier),"r"(out_shift_rl):);
|
||||
out[j][k][i] = res;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void conv2_8bits_1ch(int in_dim[3], int fil_dim[4], int out_dim[3], int inp[in_dim[0]][in_dim[1]][in_dim[2]], const int fil[fil_dim[0]][fil_dim[1]][fil_dim[2]][fil_dim[3] << 2], const int bias[fil_dim[0]], int out[out_dim[0]][out_dim[1]][out_dim[2]], int strides, int pad[4], const int bias_shift_mode[], const int quantized_multiplier, const int out_shift_rl){
|
||||
|
||||
int i, j, k, n, p, res, k1, k2, str1, str2, w, in_cnn, bias_val;
|
||||
|
||||
for (i = 0; i < out_dim[2]; i++) { // output depth
|
||||
str1 = -pad[0] - strides;
|
||||
for (j = 0; j < out_dim[0]; j++) { // output height
|
||||
str1 += strides;
|
||||
str2 = -pad[2] - strides;
|
||||
for (k = 0; k < out_dim[1]; k++) { // output width
|
||||
bias_val = bias[i];
|
||||
asm volatile("neur_init %0, %1, %2\n":"=r"(res):"r"(bias_val),"r"(bias_shift_mode[i]):);
|
||||
str2 += strides;
|
||||
for (p = 0; p < fil_dim[1]; p++) { // filters height
|
||||
for (n = 0; n < fil_dim[2]; n++) { // filters width
|
||||
k1 = str1 + p;
|
||||
k2 = str2 + n;
|
||||
|
||||
if (k1 < in_dim[0] && k1 >= 0 && k2 >= 0 && k2 < in_dim[1]) {
|
||||
in_cnn = inp[k1][k2][0];
|
||||
w = fil[i][p][n][0];
|
||||
asm volatile("nn_mac_8b %0, %1,%2\n":"=r"(res):"r"(w),"r"(in_cnn):);
|
||||
|
||||
w = fil[i][p][n][1];
|
||||
asm volatile("nn_mac_8b %0, %1,%2\n":"=r"(res):"r"(w),"r"(in_cnn):);
|
||||
|
||||
w = fil[i][p][n][2];
|
||||
asm volatile("nn_mac_8b %0, %1,%2\n":"=r"(res):"r"(w),"r"(in_cnn):);
|
||||
|
||||
w = fil[i][p][n][3];
|
||||
asm volatile("nn_mac_8b %0, %1,%2\n":"=r"(res):"r"(w),"r"(in_cnn):);
|
||||
}
|
||||
}
|
||||
}
|
||||
asm volatile("neur_res %0, %1, %2\n":"=r"(res):"r"(quantized_multiplier),"r"(out_shift_rl):);
|
||||
out[j][k][i] = res;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void conv2_4bits(int in_dim[3], int fil_dim[4], int out_dim[3], int inp[in_dim[0]][in_dim[1]][in_dim[2]], const int fil[fil_dim[0]][fil_dim[1]][fil_dim[2]][fil_dim[3] << 1], const int bias[fil_dim[0]], int out[out_dim[0]][out_dim[1]][out_dim[2]], int strides, int pad[4], const int bias_shift_mode[], const int quantized_multiplier, const int out_shift_rl){
|
||||
|
||||
int i, j, k, m, n, p, res, k1, k2, str1, str2, w, in_cnn, bias_val;
|
||||
|
||||
for (i = 0; i < out_dim[2]; i++) { // output depth
|
||||
str1 = -pad[0];
|
||||
for (j = 0; j < out_dim[0]; j++) { // output height
|
||||
if (j != 0) str1 += strides;
|
||||
str2 = -pad[2];
|
||||
for (k = 0; k < out_dim[1]; k++) { // output width
|
||||
bias_val = bias[i];
|
||||
asm volatile("neur_init %0, %1, %2\n":"=r"(res):"r"(bias_val),"r"(bias_shift_mode[i]):);
|
||||
if (k != 0) str2 += strides;
|
||||
for (p = 0; p < fil_dim[1]; p++) { // filters height
|
||||
for (n = 0; n < fil_dim[2]; n++) { // filters width
|
||||
k1 = str1 + p;
|
||||
k2 = str2 + n;
|
||||
|
||||
if (k1 < in_dim[0] && k1 >= 0 && k2 >= 0 && k2 < in_dim[1]) {
|
||||
for (m = 0; m < fil_dim[3]; m++) { // filters depth
|
||||
in_cnn = inp[k1][k2][m];
|
||||
w = fil[i][p][n][2*m];
|
||||
asm volatile("nn_mac_4b %0, %1,%2\n":"=r"(res):"r"(w),"r"(in_cnn):);
|
||||
|
||||
w = fil[i][p][n][2*m+1];
|
||||
asm volatile("nn_mac_4b %0, %1,%2\n":"=r"(res):"r"(w),"r"(in_cnn):);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
asm volatile("neur_res %0, %1, %2\n":"=r"(res):"r"(quantized_multiplier),"r"(out_shift_rl):);
|
||||
out[j][k][i] = res;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void conv2_4bits_1ch(int in_dim[3], int fil_dim[4], int out_dim[3], int inp[in_dim[0]][in_dim[1]][in_dim[2]], const int fil[fil_dim[0]][fil_dim[1]][fil_dim[2]][fil_dim[3] << 1], const int bias[fil_dim[0]], int out[out_dim[0]][out_dim[1]][out_dim[2]], int strides, int pad[4], const int bias_shift_mode[], const int quantized_multiplier, const int out_shift_rl){
|
||||
|
||||
int i, j, k, n, p, res, k1, k2, str1, str2, w, in_cnn, bias_val;
|
||||
|
||||
for (i = 0; i < out_dim[2]; i++) { // output depth
|
||||
str1 = -pad[0] - strides;
|
||||
for (j = 0; j < out_dim[0]; j++) { // output height
|
||||
str1 += strides;
|
||||
str2 = -pad[2] - strides;
|
||||
for (k = 0; k < out_dim[1]; k++) { // output width
|
||||
bias_val = bias[i];
|
||||
asm volatile("neur_init %0, %1, %2\n":"=r"(res):"r"(bias_val),"r"(bias_shift_mode[i]):);
|
||||
str2 += strides;
|
||||
for (p = 0; p < fil_dim[1]; p++) { // filters height
|
||||
for (n = 0; n < fil_dim[2]; n++) { // filters width
|
||||
k1 = str1 + p;
|
||||
k2 = str2 + n;
|
||||
|
||||
if (k1 < in_dim[0] && k1 >= 0 && k2 >= 0 && k2 < in_dim[1]) {
|
||||
in_cnn = inp[k1][k2][0];
|
||||
w = fil[i][p][n][0];
|
||||
asm volatile("nn_mac_2b %0, %1,%2\n":"=r"(res):"r"(w),"r"(in_cnn):);
|
||||
|
||||
w = fil[i][p][n][1];
|
||||
asm volatile("nn_mac_2b %0, %1,%2\n":"=r"(res):"r"(w),"r"(in_cnn):);
|
||||
}
|
||||
}
|
||||
}
|
||||
asm volatile("neur_res %0, %1, %2\n":"=r"(res):"r"(quantized_multiplier),"r"(out_shift_rl):);
|
||||
out[j][k][i] = res;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void conv2_2bits(int in_dim[3], int fil_dim[4], int out_dim[3], int inp[in_dim[0]][in_dim[1]][in_dim[2]], const int fil[fil_dim[0]][fil_dim[1]][fil_dim[2]][fil_dim[3]], const int bias[fil_dim[0]], int out[out_dim[0]][out_dim[1]][out_dim[2]], int strides, int pad[4], const int bias_shift_mode[], const int quantized_multiplier, const int out_shift_rl){
|
||||
|
||||
int i, j, k, m, n, p, res, k1, k2, str1, str2, w, in_cnn, bias_val;
|
||||
|
||||
for (i = 0; i < out_dim[2]; i++) { // output depth
|
||||
str1 = -pad[0] - strides;
|
||||
for (j = 0; j < out_dim[0]; j++) { // output height
|
||||
str1 += strides;
|
||||
str2 = -pad[2] - strides;
|
||||
for (k = 0; k < out_dim[1]; k++) { // output width
|
||||
bias_val = bias[i];
|
||||
asm volatile("neur_init %0, %1, %2\n":"=r"(res):"r"(bias_val),"r"(bias_shift_mode[i]):);
|
||||
str2 += strides;
|
||||
for (p = 0; p < fil_dim[1]; p++) { // filters height
|
||||
for (n = 0; n < fil_dim[2]; n++) { // filters width
|
||||
k1 = str1 + p;
|
||||
k2 = str2 + n;
|
||||
|
||||
if (k1 < in_dim[0] && k1 >= 0 && k2 >= 0 && k2 < in_dim[1]) {
|
||||
for (m = 0; m < fil_dim[3]; m++) { // filters depth
|
||||
in_cnn = inp[k1][k2][m];
|
||||
w = fil[i][p][n][m];
|
||||
asm volatile("nn_mac_2b %0, %1,%2\n":"=r"(res):"r"(w),"r"(in_cnn):);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
asm volatile("neur_res %0, %1, %2\n":"=r"(res):"r"(quantized_multiplier),"r"(out_shift_rl):);
|
||||
out[j][k][i] = res;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void conv2_2bits_1ch(int in_dim[3], int fil_dim[4], int out_dim[3], int inp[in_dim[0]][in_dim[1]][1], const int fil[fil_dim[0]][fil_dim[1]][fil_dim[2]][1], const int bias[fil_dim[0]], int out[out_dim[0]][out_dim[1]][out_dim[2]], int strides, int pad[4], const int bias_shift_mode[], const int quantized_multiplier, const int out_shift_rl){
|
||||
|
||||
int i, j, k, n, p, res, k1, k2, str1, str2, w, in_cnn, bias_val;
|
||||
|
||||
for (i = 0; i < out_dim[2]; i++) { // output depth
|
||||
str1 = -pad[0] -strides;
|
||||
for (j = 0; j < out_dim[0]; j++) { // output height
|
||||
str1 += strides;
|
||||
str2 = -pad[2] - strides;
|
||||
for (k = 0; k < out_dim[1]; k++) { // output width
|
||||
bias_val = bias[i];
|
||||
asm volatile("neur_init %0, %1, %2\n":"=r"(res):"r"(bias_val),"r"(bias_shift_mode[i]):);
|
||||
str2 += strides;
|
||||
for (p = 0; p < fil_dim[1]; p++) { // filters height
|
||||
for (n = 0; n < fil_dim[2]; n++) { // filters width
|
||||
k1 = str1 + p;
|
||||
k2 = str2 + n;
|
||||
|
||||
if (k1 < in_dim[0] && k1 >= 0 && k2 >= 0 && k2 < in_dim[1]) {
|
||||
in_cnn = inp[k1][k2][0];
|
||||
w = fil[i][p][n][0];
|
||||
asm volatile("nn_mac_2b %0, %1,%2\n":"=r"(res):"r"(w),"r"(in_cnn):);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
asm volatile("neur_res %0, %1, %2\n":"=r"(res):"r"(quantized_multiplier),"r"(out_shift_rl):);
|
||||
out[j][k][i] = res;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void maxpool2_compressed(int in_dim[3], int out_dim[3], int inp[in_dim[0]][in_dim[1]][in_dim[2]], int out[out_dim[0]][out_dim[1]][out_dim[2]], int pool_size, int strides) {
|
||||
|
||||
int i, j, m, n, d, k1, k2, str1, str2;
|
||||
uint32_t value1, value2, value3, value4;
|
||||
uint32_t max_value1, max_value2, max_value3, max_value4, c;
|
||||
|
||||
for (d = 0; d < out_dim[2]; d++) {
|
||||
str1 = 0;
|
||||
for (i = 0; i < out_dim[0]; i++) {
|
||||
if (i != 0) str1 += strides;
|
||||
str2 = 0;
|
||||
for (j = 0; j < out_dim[1]; j++) {
|
||||
if (j != 0) str2 += strides;
|
||||
max_value1 = 0;
|
||||
max_value2 = 0;
|
||||
max_value3 = 0;
|
||||
max_value4 = 0;
|
||||
|
||||
for (m = 0; m < pool_size; m++) {
|
||||
for (n = 0; n < pool_size; n++) {
|
||||
k1 = str1 + m;
|
||||
k2 = str2 + n;
|
||||
if (k1 >= 0 && k2 >=0 && k1 < in_dim[0] && k2 < in_dim[1]){
|
||||
value1 = inp[k1][k2][d] & 0xFF000000;
|
||||
value2 = inp[k1][k2][d] & 0x00FF0000;
|
||||
value3 = inp[k1][k2][d] & 0x0000FF00;
|
||||
value4 = inp[k1][k2][d] & 0x000000FF;
|
||||
|
||||
if (value1 > max_value1) {
|
||||
max_value1 = value1;
|
||||
}
|
||||
|
||||
if (value2 > max_value2) {
|
||||
max_value2 = value2;
|
||||
}
|
||||
|
||||
if (value3 > max_value3) {
|
||||
max_value3 = value3;
|
||||
}
|
||||
|
||||
if (value4 > max_value4) {
|
||||
max_value4 = value4;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
c = max_value1 | max_value2 | max_value3 | max_value4;
|
||||
out[i][j][d] = c;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void avgpool2_compressed(int in_dim[3], int out_dim[3], int inp[in_dim[0]][in_dim[1]][in_dim[2]], int out[out_dim[0]][out_dim[1]][out_dim[2]], int pool_size, int strides) {
|
||||
|
||||
int i, j, m, n, d, k1, k2, str1, str2;
|
||||
int avg_value1, avg_value2, avg_value3, avg_value4;
|
||||
|
||||
for (d = 0; d < out_dim[2]; d++) {
|
||||
str1 = -strides;
|
||||
for (i = 0; i < out_dim[0]; i++) {
|
||||
str1 += strides;
|
||||
str2 = -strides;
|
||||
for (j = 0; j < out_dim[1]; j++) {
|
||||
str2 += strides;
|
||||
avg_value1 = 0;
|
||||
avg_value2 = 0;
|
||||
avg_value3 = 0;
|
||||
avg_value4 = 0;
|
||||
|
||||
for (m = 0; m < pool_size; m++) {
|
||||
for (n = 0; n < pool_size; n++) {
|
||||
k1 = str1 + m;
|
||||
k2 = str2 + n;
|
||||
if (k1 >= 0 && k2 >=0 && k1 < in_dim[0] && k2 < in_dim[1]){
|
||||
avg_value1 += ((inp[k1][k2][d] & 0xFF000000) >> 24);
|
||||
avg_value2 += ((inp[k1][k2][d] & 0x00FF0000) >> 16);
|
||||
avg_value3 += ((inp[k1][k2][d] & 0x0000FF00) >> 8);
|
||||
avg_value4 += (inp[k1][k2][d] & 0x000000FF);
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
avg_value1 = avg_value1 / (pool_size * pool_size);
|
||||
avg_value2 = avg_value2 / (pool_size * pool_size);
|
||||
avg_value3 = avg_value3 / (pool_size * pool_size);
|
||||
avg_value4 = avg_value4 / (pool_size * pool_size);
|
||||
|
||||
out[i][j][d] = ((avg_value1 << 24) | (avg_value2 << 16) | (avg_value3 << 8) | (avg_value4)) ;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void flatten(int in_dim[3], int inp[in_dim[0]][in_dim[1]][in_dim[2]], int out[]){
|
||||
|
||||
int index = 0;
|
||||
|
||||
int values[in_dim[0]][in_dim[1]][in_dim[2] << 2];
|
||||
|
||||
for (int i = 0; i < in_dim[0]; i++){
|
||||
for(int j = 0; j < in_dim[1]; j++){
|
||||
for(int k = 0; k < in_dim[2]; k++){
|
||||
values[i][j][4*k] = (inp[i][j][k] & 0xFF000000) >> 24;
|
||||
values[i][j][4*k+1] = (inp[i][j][k] & 0x00FF0000) >> 16;
|
||||
values[i][j][4*k+2] = (inp[i][j][k] & 0x0000FF00) >> 8;
|
||||
values[i][j][4*k+3] = inp[i][j][k] & 0x000000FF;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int out_dim = (in_dim[0] * in_dim[1] * in_dim[2]) << 2;
|
||||
int flatten_matrix[out_dim];
|
||||
|
||||
for (int k = 0; k < in_dim[2] << 2; k++){
|
||||
for(int j = 0; j < in_dim[0]; j++){
|
||||
for(int i = 0; i < in_dim[1]; i++){
|
||||
flatten_matrix[index++] = values[j][i][k];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for(int i = 0; i < out_dim >> 2; i++){
|
||||
out[i] = (flatten_matrix[4*i] << 24 | flatten_matrix[4*i+1] << 16 | flatten_matrix[4*i+2] << 8 | flatten_matrix[4*i+3]);
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* CONV2D_OPT_H */
|
||||
@@ -0,0 +1,102 @@
|
||||
# Copyright lowRISC contributors.
|
||||
# Licensed under the Apache License, Version 2.0, see LICENSE for details.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "simple_system_regs.h"
|
||||
|
||||
.section .text
|
||||
|
||||
default_exc_handler:
|
||||
jal x0, simple_exc_handler
|
||||
|
||||
timer_handler:
|
||||
jal x0, simple_timer_handler
|
||||
|
||||
reset_handler:
|
||||
/* set all registers to zero */
|
||||
mv x1, x0
|
||||
mv x2, x1
|
||||
mv x3, x1
|
||||
mv x4, x1
|
||||
mv x5, x1
|
||||
mv x6, x1
|
||||
mv x7, x1
|
||||
mv x8, x1
|
||||
mv x9, x1
|
||||
mv x10, x1
|
||||
mv x11, x1
|
||||
mv x12, x1
|
||||
mv x13, x1
|
||||
mv x14, x1
|
||||
mv x15, x1
|
||||
mv x16, x1
|
||||
mv x17, x1
|
||||
mv x18, x1
|
||||
mv x19, x1
|
||||
mv x20, x1
|
||||
mv x21, x1
|
||||
mv x22, x1
|
||||
mv x23, x1
|
||||
mv x24, x1
|
||||
mv x25, x1
|
||||
mv x26, x1
|
||||
mv x27, x1
|
||||
mv x28, x1
|
||||
mv x29, x1
|
||||
mv x30, x1
|
||||
mv x31, x1
|
||||
|
||||
/* stack initilization */
|
||||
la x2, _stack_start
|
||||
|
||||
_start:
|
||||
.global _start
|
||||
|
||||
/* clear BSS */
|
||||
la x26, _bss_start
|
||||
la x27, _bss_end
|
||||
|
||||
bge x26, x27, zero_loop_end
|
||||
|
||||
zero_loop:
|
||||
sw x0, 0(x26)
|
||||
addi x26, x26, 4
|
||||
ble x26, x27, zero_loop
|
||||
zero_loop_end:
|
||||
|
||||
|
||||
main_entry:
|
||||
/* jump to main program entry point (argc = argv = 0) */
|
||||
addi x10, x0, 0
|
||||
addi x11, x0, 0
|
||||
jal x1, main
|
||||
|
||||
/* Halt simulation */
|
||||
li x5, SIM_CTRL_BASE + SIM_CTRL_CTRL
|
||||
li x6, 1
|
||||
sw x6, 0(x5)
|
||||
|
||||
/* If execution ends up here just put the core to sleep */
|
||||
sleep_loop:
|
||||
wfi
|
||||
j sleep_loop
|
||||
|
||||
/* =================================================== [ exceptions ] === */
|
||||
/* This section has to be down here, since we have to disable rvc for it */
|
||||
|
||||
.section .vectors, "ax"
|
||||
.option norvc;
|
||||
|
||||
// All unimplemented interrupts/exceptions go to the default_exc_handler.
|
||||
.org 0x00
|
||||
.rept 7
|
||||
jal x0, default_exc_handler
|
||||
.endr
|
||||
jal x0, timer_handler
|
||||
.rept 23
|
||||
jal x0, default_exc_handler
|
||||
.endr
|
||||
|
||||
// reset vector
|
||||
.org 0x80
|
||||
jal x0, reset_handler
|
||||
@@ -0,0 +1,3 @@
|
||||
/home/alex/Desktop/ibex_tools/ibex/examples/sw/simple_system/common/crt0.o: \
|
||||
/home/alex/Desktop/ibex_tools/ibex/examples/sw/simple_system/common/crt0.S \
|
||||
/home/alex/Desktop/ibex_tools/ibex/examples/sw/simple_system/common/simple_system_regs.h
|
||||
Binary file not shown.
@@ -0,0 +1,28 @@
|
||||
#ifndef FULLY_CONNECTED_H
|
||||
#define FULLY_CONNECTED_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
void mlp_layer(int input[], int output[], int num_inputs, int num_outputs, const int weights[][num_inputs], const int bias[], const int bias_shift_mode, const int quantized_multiplier, const int out_shift_rl){
|
||||
|
||||
// Compute the output for each neuron
|
||||
int z, w, inp, quant_prod;
|
||||
|
||||
for (int i = 0; i < num_outputs; i++) {
|
||||
z = bias[i];
|
||||
|
||||
for (int j = 0; j < num_inputs; j++) {
|
||||
w = weights[i][j];
|
||||
inp = input[j];
|
||||
z += w*inp;
|
||||
}
|
||||
quant_prod = quantized_multiplier * z + (1 << (out_shift_rl-1));
|
||||
quant_prod = quant_prod >> out_shift_rl;
|
||||
|
||||
if(quant_prod < 0) quant_prod = 0;
|
||||
if(quant_prod > 255) quant_prod = 255;
|
||||
|
||||
output[i] = quant_prod;
|
||||
}
|
||||
}
|
||||
#endif /* FULLY_CONNECTED_H */
|
||||
@@ -0,0 +1,77 @@
|
||||
#ifndef FULLY_CONNECTED_OPT_H
|
||||
#define FULLY_CONNECTED_OPT_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
void mlp_layer_2bits(int input[], int output[], int num_inputs, int num_outputs, const int weights[][num_inputs], const int bias[], const int bias_shift_mode[], const int quantized_multiplier, const int out_shift_rl){
|
||||
|
||||
// Compute the output for each neuron
|
||||
int z, bias_val, w, inp, temp;
|
||||
|
||||
for (int i = 0; i < num_outputs; i++) {
|
||||
bias_val = bias[i];
|
||||
asm volatile("neur_init %0, %1, %2\n":"=r"(z):"r"(bias_val),"r"(bias_shift_mode[i]):);
|
||||
|
||||
for (int j = 0; j < num_inputs; j++) {
|
||||
w = weights[i][j];
|
||||
inp = input[j];
|
||||
asm volatile("nn_mac_2b %0, %1,%2\n":"=r"(temp):"r"(w),"r"(inp):);
|
||||
}
|
||||
asm volatile("neur_res %0, %1, %2\n":"=r"(z):"r"(quantized_multiplier),"r"(out_shift_rl):);
|
||||
output[i] = z;
|
||||
}
|
||||
}
|
||||
|
||||
void mlp_layer_4bits(int input[], int output[], int num_inputs, int num_outputs, const int weights[][num_inputs << 1], const int bias[], const int bias_shift_mode[], const int quantized_multiplier, const int out_shift_rl){
|
||||
|
||||
// Compute the output for each neuron
|
||||
int z, bias_val, w, inp, temp;
|
||||
|
||||
for (int i = 0; i < num_outputs; i++) {
|
||||
bias_val = bias[i];
|
||||
asm volatile("neur_init %0, %1, %2\n":"=r"(z):"r"(bias_val),"r"(bias_shift_mode[i]):);
|
||||
|
||||
for (int j = 0; j < num_inputs; j++) {
|
||||
w = weights[i][2*j];
|
||||
inp = input[j];
|
||||
asm volatile("nn_mac_4b %0, %1,%2\n":"=r"(temp):"r"(w),"r"(inp):);
|
||||
|
||||
w = weights[i][2*j+1];
|
||||
asm volatile("nn_mac_4b %0, %1,%2\n":"=r"(temp):"r"(w),"r"(inp):);
|
||||
|
||||
}
|
||||
asm volatile("neur_res %0, %1, %2\n":"=r"(z):"r"(quantized_multiplier),"r"(out_shift_rl):);
|
||||
output[i] = z;
|
||||
}
|
||||
}
|
||||
|
||||
void mlp_layer_8bits(int input[], int output[], int num_inputs, int num_outputs, const int weights[][num_inputs << 2], const int bias[], const int bias_shift_mode[], const int quantized_multiplier, const int out_shift_rl){
|
||||
|
||||
// Compute the output for each neuron
|
||||
int z, bias_val, w, inp, temp;
|
||||
|
||||
for (int i = 0; i < num_outputs; i++) {
|
||||
bias_val = bias[i];
|
||||
asm volatile("neur_init %0, %1, %2\n":"=r"(z):"r"(bias_val),"r"(bias_shift_mode[i]):);
|
||||
|
||||
for (int j = 0; j < num_inputs; j++) {
|
||||
w = weights[i][4*j];
|
||||
inp = input[j];
|
||||
asm volatile("nn_mac_8b %0, %1,%2\n":"=r"(temp):"r"(w),"r"(inp):);
|
||||
|
||||
w = weights[i][4*j+1];
|
||||
asm volatile("nn_mac_8b %0, %1,%2\n":"=r"(temp):"r"(w),"r"(inp):);
|
||||
|
||||
w = weights[i][4*j+2];
|
||||
asm volatile("nn_mac_8b %0, %1,%2\n":"=r"(temp):"r"(w),"r"(inp):);
|
||||
|
||||
w = weights[i][4*j+3];
|
||||
asm volatile("nn_mac_8b %0, %1,%2\n":"=r"(temp):"r"(w),"r"(inp):);
|
||||
}
|
||||
|
||||
asm volatile("neur_res %0, %1, %2\n":"=r"(z):"r"(quantized_multiplier),"r"(out_shift_rl):);
|
||||
output[i] = z;
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* FULLY_CONNECTED_OPT_H */
|
||||
@@ -0,0 +1,91 @@
|
||||
/* Copyright lowRISC contributors.
|
||||
Licensed under the Apache License, Version 2.0, see LICENSE for details.
|
||||
SPDX-License-Identifier: Apache-2.0 */
|
||||
|
||||
OUTPUT_ARCH(riscv)
|
||||
|
||||
/* Change this if you'd like different sizes. Arty A7-100(35) has a maximum of 607.5KB(225KB)
|
||||
BRAM space. Configuration below is for maximum BRAM capacity with Artya A7-35 while letting
|
||||
CoreMark run (.vmem of 152.8KB).
|
||||
*/
|
||||
|
||||
MEMORY
|
||||
{
|
||||
ram : ORIGIN = 0x00100000, LENGTH = 0x750000
|
||||
stack : ORIGIN = 0x00850000, LENGTH = 0x200000
|
||||
}
|
||||
|
||||
/* Stack information variables */
|
||||
_min_stack = 0x10000; /* 8K - minimum stack space to reserve */
|
||||
_stack_len = LENGTH(stack);
|
||||
_stack_start = ORIGIN(stack) + LENGTH(stack);
|
||||
|
||||
_entry_point = _vectors_start + 0x80;
|
||||
ENTRY(_entry_point)
|
||||
|
||||
/* The tohost address is used by Spike for a magic "stop me now" message. This
|
||||
is set to equal SIM_CTRL_CTRL (see simple_system_regs.h), which has that
|
||||
effect in simple_system simulations. Note that it must be 8-byte aligned.
|
||||
|
||||
We don't read data back from Spike, so fromhost is set to some dummy value:
|
||||
we place it just above the top of the stack.
|
||||
*/
|
||||
tohost = 0x20008;
|
||||
fromhost = _stack_start + 0x10;
|
||||
|
||||
SECTIONS
|
||||
{
|
||||
.vectors :
|
||||
{
|
||||
. = ALIGN(4);
|
||||
_vectors_start = .;
|
||||
KEEP(*(.vectors))
|
||||
_vectors_end = .;
|
||||
} > ram
|
||||
|
||||
.text : {
|
||||
. = ALIGN(4);
|
||||
*(.text)
|
||||
*(.text.*)
|
||||
} > ram
|
||||
|
||||
.rodata : {
|
||||
. = ALIGN(4);
|
||||
/* Small RO data before large RO data */
|
||||
*(.srodata)
|
||||
*(.srodata.*)
|
||||
*(.rodata);
|
||||
*(.rodata.*)
|
||||
} > ram
|
||||
|
||||
.data : {
|
||||
. = ALIGN(4);
|
||||
/* Small data before large data */
|
||||
*(.sdata)
|
||||
*(.sdata.*)
|
||||
*(.data);
|
||||
*(.data.*)
|
||||
} > ram
|
||||
|
||||
.bss :
|
||||
{
|
||||
. = ALIGN(4);
|
||||
_bss_start = .;
|
||||
/* Small BSS before large BSS */
|
||||
*(.sbss)
|
||||
*(.sbss.*)
|
||||
*(.bss)
|
||||
*(.bss.*)
|
||||
*(COMMON)
|
||||
_bss_end = .;
|
||||
} > ram
|
||||
|
||||
/* ensure there is enough room for stack */
|
||||
.stack (NOLOAD): {
|
||||
. = ALIGN(4);
|
||||
. = . + _min_stack ;
|
||||
. = ALIGN(4);
|
||||
stack = . ;
|
||||
_stack = . ;
|
||||
} > stack
|
||||
}
|
||||
@@ -0,0 +1,185 @@
|
||||
// Copyright lowRISC contributors.
|
||||
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#include "simple_system_common.h"
|
||||
|
||||
int putchar(int c) {
|
||||
DEV_WRITE(SIM_CTRL_BASE + SIM_CTRL_OUT, (unsigned char)c);
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
int puts(const char *str) {
|
||||
while (*str) {
|
||||
putchar(*str++);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void puthex(uint32_t h) {
|
||||
int cur_digit;
|
||||
// Iterate through h taking top 4 bits each time and outputting ASCII of hex
|
||||
// digit for those 4 bits
|
||||
for (int i = 0; i < 8; i++) {
|
||||
cur_digit = h >> 28;
|
||||
|
||||
if (cur_digit < 10)
|
||||
putchar('0' + cur_digit);
|
||||
else
|
||||
putchar('A' - 10 + cur_digit);
|
||||
|
||||
h <<= 4;
|
||||
}
|
||||
}
|
||||
|
||||
void sim_halt() { DEV_WRITE(SIM_CTRL_BASE + SIM_CTRL_CTRL, 1); }
|
||||
|
||||
void pcount_reset() {
|
||||
asm volatile(
|
||||
"csrw minstret, x0\n"
|
||||
"csrw mcycle, x0\n"
|
||||
"csrw mhpmcounter3, x0\n"
|
||||
"csrw mhpmcounter4, x0\n"
|
||||
"csrw mhpmcounter5, x0\n"
|
||||
"csrw mhpmcounter6, x0\n"
|
||||
"csrw mhpmcounter7, x0\n"
|
||||
"csrw mhpmcounter8, x0\n"
|
||||
"csrw mhpmcounter9, x0\n"
|
||||
"csrw mhpmcounter10, x0\n"
|
||||
"csrw mhpmcounter11, x0\n"
|
||||
"csrw mhpmcounter12, x0\n"
|
||||
"csrw mhpmcounter13, x0\n"
|
||||
"csrw mhpmcounter14, x0\n"
|
||||
"csrw mhpmcounter15, x0\n"
|
||||
"csrw mhpmcounter16, x0\n"
|
||||
"csrw mhpmcounter17, x0\n"
|
||||
"csrw mhpmcounter18, x0\n"
|
||||
"csrw mhpmcounter19, x0\n"
|
||||
"csrw mhpmcounter20, x0\n"
|
||||
"csrw mhpmcounter21, x0\n"
|
||||
"csrw mhpmcounter22, x0\n"
|
||||
"csrw mhpmcounter23, x0\n"
|
||||
"csrw mhpmcounter24, x0\n"
|
||||
"csrw mhpmcounter25, x0\n"
|
||||
"csrw mhpmcounter26, x0\n"
|
||||
"csrw mhpmcounter27, x0\n"
|
||||
"csrw mhpmcounter28, x0\n"
|
||||
"csrw mhpmcounter29, x0\n"
|
||||
"csrw mhpmcounter30, x0\n"
|
||||
"csrw mhpmcounter31, x0\n"
|
||||
"csrw minstreth, x0\n"
|
||||
"csrw mcycleh, x0\n"
|
||||
"csrw mhpmcounter3h, x0\n"
|
||||
"csrw mhpmcounter4h, x0\n"
|
||||
"csrw mhpmcounter5h, x0\n"
|
||||
"csrw mhpmcounter6h, x0\n"
|
||||
"csrw mhpmcounter7h, x0\n"
|
||||
"csrw mhpmcounter8h, x0\n"
|
||||
"csrw mhpmcounter9h, x0\n"
|
||||
"csrw mhpmcounter10h, x0\n"
|
||||
"csrw mhpmcounter11h, x0\n"
|
||||
"csrw mhpmcounter12h, x0\n"
|
||||
"csrw mhpmcounter13h, x0\n"
|
||||
"csrw mhpmcounter14h, x0\n"
|
||||
"csrw mhpmcounter15h, x0\n"
|
||||
"csrw mhpmcounter16h, x0\n"
|
||||
"csrw mhpmcounter17h, x0\n"
|
||||
"csrw mhpmcounter18h, x0\n"
|
||||
"csrw mhpmcounter19h, x0\n"
|
||||
"csrw mhpmcounter20h, x0\n"
|
||||
"csrw mhpmcounter21h, x0\n"
|
||||
"csrw mhpmcounter22h, x0\n"
|
||||
"csrw mhpmcounter23h, x0\n"
|
||||
"csrw mhpmcounter24h, x0\n"
|
||||
"csrw mhpmcounter25h, x0\n"
|
||||
"csrw mhpmcounter26h, x0\n"
|
||||
"csrw mhpmcounter27h, x0\n"
|
||||
"csrw mhpmcounter28h, x0\n"
|
||||
"csrw mhpmcounter29h, x0\n"
|
||||
"csrw mhpmcounter30h, x0\n"
|
||||
"csrw mhpmcounter31h, x0\n");
|
||||
}
|
||||
|
||||
unsigned int get_mepc() {
|
||||
uint32_t result;
|
||||
__asm__ volatile("csrr %0, mepc;" : "=r"(result));
|
||||
return result;
|
||||
}
|
||||
|
||||
unsigned int get_mcause() {
|
||||
uint32_t result;
|
||||
__asm__ volatile("csrr %0, mcause;" : "=r"(result));
|
||||
return result;
|
||||
}
|
||||
|
||||
unsigned int get_mtval() {
|
||||
uint32_t result;
|
||||
__asm__ volatile("csrr %0, mtval;" : "=r"(result));
|
||||
return result;
|
||||
}
|
||||
|
||||
void simple_exc_handler(void) {
|
||||
puts("EXCEPTION!!!\n");
|
||||
puts("============\n");
|
||||
puts("MEPC: 0x");
|
||||
puthex(get_mepc());
|
||||
puts("\nMCAUSE: 0x");
|
||||
puthex(get_mcause());
|
||||
puts("\nMTVAL: 0x");
|
||||
puthex(get_mtval());
|
||||
putchar('\n');
|
||||
sim_halt();
|
||||
|
||||
while(1);
|
||||
}
|
||||
|
||||
volatile uint64_t time_elapsed;
|
||||
uint64_t time_increment;
|
||||
|
||||
inline static void increment_timecmp(uint64_t time_base) {
|
||||
uint64_t current_time = timer_read();
|
||||
current_time += time_base;
|
||||
timecmp_update(current_time);
|
||||
}
|
||||
|
||||
void timer_enable(uint64_t time_base) {
|
||||
time_elapsed = 0;
|
||||
time_increment = time_base;
|
||||
// Set timer values
|
||||
increment_timecmp(time_base);
|
||||
// enable timer interrupt
|
||||
asm volatile("csrs mie, %0\n" : : "r"(0x80));
|
||||
// enable global interrupt
|
||||
asm volatile("csrs mstatus, %0\n" : : "r"(0x8));
|
||||
}
|
||||
|
||||
void timer_disable(void) { asm volatile("csrc mie, %0\n" : : "r"(0x80)); }
|
||||
|
||||
uint64_t timer_read(void) {
|
||||
uint32_t current_timeh;
|
||||
uint32_t current_time;
|
||||
// check if time overflowed while reading and try again
|
||||
do {
|
||||
current_timeh = DEV_READ(TIMER_BASE + TIMER_MTIMEH, 0);
|
||||
current_time = DEV_READ(TIMER_BASE + TIMER_MTIME, 0);
|
||||
} while (current_timeh != DEV_READ(TIMER_BASE + TIMER_MTIMEH, 0));
|
||||
uint64_t final_time = ((uint64_t)current_timeh << 32) | current_time;
|
||||
return final_time;
|
||||
}
|
||||
|
||||
void timecmp_update(uint64_t new_time) {
|
||||
DEV_WRITE(TIMER_BASE + TIMER_MTIMECMP, -1);
|
||||
DEV_WRITE(TIMER_BASE + TIMER_MTIMECMPH, new_time >> 32);
|
||||
DEV_WRITE(TIMER_BASE + TIMER_MTIMECMP, new_time);
|
||||
}
|
||||
|
||||
uint64_t get_elapsed_time(void) { return time_elapsed; }
|
||||
|
||||
void simple_timer_handler(void) __attribute__((interrupt));
|
||||
|
||||
void simple_timer_handler(void) {
|
||||
increment_timecmp(time_increment);
|
||||
time_elapsed++;
|
||||
}
|
||||
@@ -0,0 +1,4 @@
|
||||
/home/alex/Desktop/ibex_tools/ibex/examples/sw/simple_system/common/simple_system_common.o: \
|
||||
/home/alex/Desktop/ibex_tools/ibex/examples/sw/simple_system/common/simple_system_common.c \
|
||||
/home/alex/Desktop/ibex_tools/ibex/examples/sw/simple_system/common/simple_system_common.h \
|
||||
/home/alex/Desktop/ibex_tools/ibex/examples/sw/simple_system/common/simple_system_regs.h
|
||||
@@ -0,0 +1,99 @@
|
||||
// Copyright lowRISC contributors.
|
||||
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#ifndef SIMPLE_SYSTEM_COMMON_H__
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "simple_system_regs.h"
|
||||
|
||||
#define DEV_WRITE(addr, val) (*((volatile uint32_t *)(addr)) = val)
|
||||
#define DEV_READ(addr, val) (*((volatile uint32_t *)(addr)))
|
||||
#define PCOUNT_READ(name, dst) asm volatile("csrr %0, " #name ";" : "=r"(dst))
|
||||
|
||||
/**
|
||||
* Writes character to simulator out log. Signature matches c stdlib function
|
||||
* of the same name.
|
||||
*
|
||||
* @param c Character to output
|
||||
* @returns Character output (never fails so no EOF ever returned)
|
||||
*/
|
||||
int putchar(int c);
|
||||
|
||||
/**
|
||||
* Writes string to simulator out log. Signature matches c stdlib function of
|
||||
* the same name.
|
||||
*
|
||||
* @param str String to output
|
||||
* @returns 0 always (never fails so no error)
|
||||
*/
|
||||
int puts(const char *str);
|
||||
|
||||
/**
|
||||
* Writes ASCII hex representation of number to simulator out log.
|
||||
*
|
||||
* @param h Number to output in hex
|
||||
*/
|
||||
void puthex(uint32_t h);
|
||||
|
||||
/**
|
||||
* Immediately halts the simulation
|
||||
*/
|
||||
void sim_halt();
|
||||
|
||||
/**
|
||||
* Enables/disables performance counters. This effects mcycle and minstret as
|
||||
* well as the mhpmcounterN counters.
|
||||
*
|
||||
* @param enable if non-zero enables, otherwise disables
|
||||
*/
|
||||
static inline void pcount_enable(int enable) {
|
||||
// Note cycle is disabled with everything else
|
||||
unsigned int inhibit_val = enable ? 0x0 : 0xFFFFFFFF;
|
||||
// CSR 0x320 was called `mucounteren` in the privileged spec v1.9.1, it was
|
||||
// then dropped in v1.10, and then re-added in v1.11 with the name
|
||||
// `mcountinhibit`. Unfortunately, the version of binutils we use only allows
|
||||
// the old name, and LLVM only supports the new name (though this is changed
|
||||
// on trunk to support both), so we use the numeric value here for maximum
|
||||
// compatibility.
|
||||
asm volatile("csrw 0x320, %0\n" : : "r"(inhibit_val));
|
||||
}
|
||||
|
||||
/**
|
||||
* Resets all performance counters. This effects mcycle and minstret as well
|
||||
* as the mhpmcounterN counters.
|
||||
*/
|
||||
void pcount_reset();
|
||||
|
||||
/**
|
||||
* Enables timer interrupt
|
||||
*
|
||||
* @param time_base Number of time ticks to count before interrupt
|
||||
*/
|
||||
void timer_enable(uint64_t time_base);
|
||||
|
||||
/**
|
||||
* Returns current mtime value
|
||||
*/
|
||||
uint64_t timer_read(void);
|
||||
|
||||
/**
|
||||
* Set a new timer value
|
||||
*
|
||||
* @param new_time New value for time
|
||||
*/
|
||||
void timecmp_update(uint64_t new_time);
|
||||
|
||||
/**
|
||||
* Disables timer interrupt
|
||||
*/
|
||||
void timer_disable(void);
|
||||
|
||||
/**
|
||||
* Returns current global time value
|
||||
*/
|
||||
uint64_t get_elapsed_time(void);
|
||||
|
||||
#endif
|
||||
Binary file not shown.
@@ -0,0 +1,18 @@
|
||||
// Copyright lowRISC contributors.
|
||||
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#ifndef SIMPLE_SYSTEM_REGS_H__
|
||||
#define SIMPLE_SYSTEM_REGS_H__
|
||||
|
||||
#define SIM_CTRL_BASE 0x20000
|
||||
#define SIM_CTRL_OUT 0x0
|
||||
#define SIM_CTRL_CTRL 0x8
|
||||
|
||||
#define TIMER_BASE 0x30000
|
||||
#define TIMER_MTIME 0x0
|
||||
#define TIMER_MTIMEH 0x4
|
||||
#define TIMER_MTIMECMP 0x8
|
||||
#define TIMER_MTIMECMPH 0xC
|
||||
|
||||
#endif // SIMPLE_SYSTEM_REGS_H__
|
||||
Reference in New Issue
Block a user