Adding files

2024-07-19 13:30:31 +03:00
commit 08fb8ef728
7245 changed files with 3055662 additions and 0 deletions
@@ -0,0 +1,77 @@
+# Copyright lowRISC contributors.
+# Licensed under the Apache License, Version 2.0, see LICENSE for details.
+# SPDX-License-Identifier: Apache-2.0
+
+COMMON_DIR := $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST))))
+
+COMMON_SRCS = $(wildcard $(COMMON_DIR)/*.c)
+INCS := -I$(COMMON_DIR)
+
+#ARCH = rv32im # to disable compressed instructions
+ARCH ?= rv32imc
+
+ifdef PROGRAM
+PROGRAM_C := $(PROGRAM).c
+endif
+
+SRCS = $(COMMON_SRCS) $(PROGRAM_C) $(EXTRA_SRCS)
+
+C_SRCS = $(filter %.c, $(SRCS))
+ASM_SRCS = $(filter %.S, $(SRCS))
+CPLUSPLUS = $(filter %.cpp $(SRCS))
+
+CC = riscv32-unknown-elf-gcc
+
+CROSS_COMPILE = $(patsubst %-gcc,%-,$(CC))
+
+OBJCOPY ?= $(CROSS_COMPILE)objcopy
+OBJDUMP ?= $(CROSS_COMPILE)objdump
+
+LINKER_SCRIPT ?= $(COMMON_DIR)/link.ld
+CRT ?= $(COMMON_DIR)/crt0.S
+CFLAGS ?= -march=$(ARCH) -mabi=ilp32 -static -mcmodel=medany -Wall -g -O3\
+	-fvisibility=hidden -nostartfiles -ffreestanding $(PROGRAM_CFLAGS)
+
+OBJS := ${C_SRCS:.c=.o} ${ASM_SRCS:.S=.o} ${CRT:.S=.o}
+
+DEPS = $(OBJS:%.o=%.d)
+ifdef PROGRAM
+OUTFILES := $(PROGRAM).elf $(PROGRAM).vmem $(PROGRAM).bin
+else
+OUTFILES := $(OBJS)
+endif
+
+all: $(OUTFILES)
+
+ifdef PROGRAM
+$(PROGRAM).elf: $(OBJS) $(LINKER_SCRIPT)
+	$(CC) $(CFLAGS) -T $(LINKER_SCRIPT) $(OBJS) -o $@ $(LIBS)
+
+.PHONY: disassemble
+disassemble: $(PROGRAM).dis
+endif
+
+%.dis: %.elf
+	$(OBJDUMP) -fhSD $^ > $@
+
+# Note: this target requires the srecord package to be installed.
+# XXX: This could be replaced by objcopy once
+# https://sourceware.org/bugzilla/show_bug.cgi?id=19921
+# is widely available.
+%.vmem: %.bin
+	srec_cat $^ -binary -offset 0x0000 -byte-swap 4 -o $@ -vmem
+
+%.bin: %.elf
+	$(OBJCOPY) -O binary $^ $@
+
+%.o: %.c
+	$(CC) $(CFLAGS) -MMD -c $(INCS) -o $@ $<
+
+%.o: %.S
+	$(CC) $(CFLAGS) -MMD -c $(INCS) -o $@ $<
+
+clean:
+	$(RM) -f $(OBJS) $(DEPS)
+
+distclean: clean
+	$(RM) -f $(OUTFILES)
@@ -0,0 +1,109 @@
+#ifndef CONV2D_H
+#define CONV2D_H
+
+void conv2(int in_dim[3], int fil_dim[4], int out_dim[3], int inp[in_dim[0]][in_dim[1]][in_dim[2]], const int fil[fil_dim[0]][fil_dim[1]][fil_dim[2]][fil_dim[3]], const int bias[fil_dim[0]], int out[out_dim[0]][out_dim[1]][out_dim[2]], int strides, int pad[4], const int bias_shift_mode, const int quantized_multiplier, const int out_shift_rl){
+
+	int i, j, k, m, n, p, res, k1, k2, str1, str2, quant_prod;
+	
+	for (i = 0; i < out_dim[2]; i++) {   // output depth
+		str1 = -pad[0] - strides;
+		for (j = 0; j < out_dim[0]; j++) {  // output height
+			str1 += strides;
+			str2 = -pad[2] - strides;
+	        	for (k = 0; k < out_dim[1]; k++) {  // output width
+                    		res = bias[i];
+                    		str2 += strides;
+                    		for (p = 0; p < fil_dim[1]; p++) {  // filters height
+                            		for (n = 0; n < fil_dim[2]; n++) {  // filters width
+                            			k1 = str1 + p; 
+                            			k2 = str2 + n;
+                            			if (k1 < in_dim[0] && k1 >= 0 && k2 >= 0 && k2 < in_dim[1]) { 
+							for (m = 0; m < fil_dim[3]; m++) {   // filters depth
+                            					res += inp[k1][k2][m] * fil[i][p][n][m];
+                            				}
+                                		}
+                        		}
+                    		}
+                    		quant_prod = quantized_multiplier * res + (1 << (out_shift_rl-1));
+        			quant_prod = quant_prod >> out_shift_rl;
+        
+        			if(quant_prod < 0) quant_prod = 0;
+        			if(quant_prod > 255) quant_prod = 255;
+                    		out[j][k][i] = quant_prod;
+            		}
+        	}
+    	}
+}
+
+void maxpool2(int in_dim[3], int out_dim[3], int inp[in_dim[0]][in_dim[1]][in_dim[2]], int out[out_dim[0]][out_dim[1]][out_dim[2]], int pool_size, int strides) {
+
+	int i, j, m, n, d, max_value, value, k1, k2, str1, str2;
+
+    	for (d = 0; d < out_dim[2]; d++) {
+    		str1 = 0;
+        	for (i = 0; i < out_dim[0]; i++) {
+        		if (i != 0) str1 += strides;
+        		str2 = 0;
+            		for (j = 0; j < out_dim[1]; j++) {
+            			if (j != 0) str2 += strides;
+                		max_value = 0;
+                		
+                		for (m = 0; m < pool_size; m++) {
+                    			for (n = 0; n < pool_size; n++) {
+                    				k1 = str1 + m;
+                    				k2 = str2 + n;
+                    				if (k1 >= 0 && k2 >=0 && k1 < in_dim[0] && k2 < in_dim[1]){
+                         				value = inp[k1][k2][d];
+                        				if (value > max_value) max_value = value;
+                        			}
+                    			}
+                		}
+                		out[i][j][d] = max_value;
+            		}
+        	}
+    	}
+}
+
+void avgpool2(int in_dim[3], int out_dim[3], int inp[in_dim[0]][in_dim[1]][in_dim[2]], int out[out_dim[0]][out_dim[1]][out_dim[2]], int pool_size, int strides) {
+
+	int i, j, m, n, d, avg_value, value, k1, k2, str1, str2;
+	
+    	for (d = 0; d < out_dim[2]; d++) {
+    		str1 = 0;
+        	for (i = 0; i < out_dim[0]; i++) {
+        		if (i != 0) str1 += strides;
+        		str2 = 0;
+            		for (j = 0; j < out_dim[1]; j++) {
+            			if (j != 0) str2 += strides;
+                		avg_value = 0;
+
+                		for (m = 0; m < pool_size; m++) {
+                    			for (n = 0; n < pool_size; n++) {
+                    				k1 = str1 + m;
+                    				k2 = str2 + n;
+                    				if (k1 >= 0 && k2 >=0 && k1 < in_dim[0] && k2 < in_dim[1]){
+                         				value = inp[k1][k2][d];
+                          				avg_value += value;
+                        			}
+                    			}
+                		}
+                		avg_value = avg_value / (pool_size * pool_size);
+                		out[i][j][d] = avg_value;
+            		}
+        	}
+    	}
+}
+
+void flatten(int in_dim[3], int inp[in_dim[0]][in_dim[1]][in_dim[2]], int out[]){
+
+	int index = 0;
+	for (int i = 0; i < in_dim[2]; i++){
+		for(int j = 0; j < in_dim[0]; j++){
+			for(int k = 0; k < in_dim[1]; k++){
+				out[index++] = inp[j][k][i];
+			}
+		}
+	}
+}
+
+#endif  /* CONV2D_H */
@@ -0,0 +1,358 @@
+#ifndef CONV2D_OPT_H
+#define CONV2D_OPT_H
+
+void conv2_8bits(int in_dim[3], int fil_dim[4], int out_dim[3], int inp[in_dim[0]][in_dim[1]][in_dim[2]], const int fil[fil_dim[0]][fil_dim[1]][fil_dim[2]][fil_dim[3] << 2], const int bias[fil_dim[0]], int out[out_dim[0]][out_dim[1]][out_dim[2]], int strides, int pad[4], const int bias_shift_mode[], const int quantized_multiplier, const int out_shift_rl){
+
+	int i, j, k, m, n, p, res, k1, k2, str1, str2, w, in_cnn, bias_val;
+	
+	for (i = 0; i < out_dim[2]; i++) {   // output depth
+		str1 = -pad[0] - strides;
+		for (j = 0; j < out_dim[0]; j++) {  // output height
+			str1 += strides;
+			str2 = -pad[2] - strides;
+	        	for (k = 0; k < out_dim[1]; k++) {  // output width
+                    		bias_val = bias[i];
+                    		asm volatile("neur_init %0, %1, %2\n":"=r"(res):"r"(bias_val),"r"(bias_shift_mode[i]):);
+                    		str2 += strides;
+                    		for (p = 0; p < fil_dim[1]; p++) {  // filters height
+                            		for (n = 0; n < fil_dim[2]; n++) {  // filters width
+                            			k1 = str1 + p; 
+                            			k2 = str2 + n;
+                            				
+                            			if (k1 < in_dim[0] && k1 >= 0 && k2 >= 0 && k2 < in_dim[1]) { 
+							for (m = 0; m < fil_dim[3]; m++) {   // filters depth
+                            					in_cnn = inp[k1][k2][m];
+                            					w = fil[i][p][n][4*m];
+                            					asm volatile("nn_mac_8b %0, %1,%2\n":"=r"(res):"r"(w),"r"(in_cnn):);
+                            					
+                            					w = fil[i][p][n][4*m+1];
+                            					asm volatile("nn_mac_8b %0, %1,%2\n":"=r"(res):"r"(w),"r"(in_cnn):);
+                            					
+                            					w = fil[i][p][n][4*m+2];
+                            					asm volatile("nn_mac_8b %0, %1,%2\n":"=r"(res):"r"(w),"r"(in_cnn):);
+                            					
+                            					w = fil[i][p][n][4*m+3];
+                            					asm volatile("nn_mac_8b %0, %1,%2\n":"=r"(res):"r"(w),"r"(in_cnn):);
+                            				}
+                                		}
+                        		}
+                    		}
+                    		asm volatile("neur_res %0, %1, %2\n":"=r"(res):"r"(quantized_multiplier),"r"(out_shift_rl):);
+                    		out[j][k][i] = res;
+            		}
+        	}
+    	}
+}
+
+void conv2_8bits_1ch(int in_dim[3], int fil_dim[4], int out_dim[3], int inp[in_dim[0]][in_dim[1]][in_dim[2]], const int fil[fil_dim[0]][fil_dim[1]][fil_dim[2]][fil_dim[3] << 2], const int bias[fil_dim[0]], int out[out_dim[0]][out_dim[1]][out_dim[2]], int strides, int pad[4], const int bias_shift_mode[], const int quantized_multiplier, const int out_shift_rl){
+
+	int i, j, k, n, p, res, k1, k2, str1, str2, w, in_cnn, bias_val;
+	
+	for (i = 0; i < out_dim[2]; i++) {   // output depth
+		str1 = -pad[0] - strides;
+		for (j = 0; j < out_dim[0]; j++) {  // output height
+			str1 += strides;
+			str2 = -pad[2] - strides;
+	        	for (k = 0; k < out_dim[1]; k++) {  // output width
+                    		bias_val = bias[i];
+                    		asm volatile("neur_init %0, %1, %2\n":"=r"(res):"r"(bias_val),"r"(bias_shift_mode[i]):);
+                    		str2 += strides;
+                    		for (p = 0; p < fil_dim[1]; p++) {  // filters height
+                            		for (n = 0; n < fil_dim[2]; n++) {  // filters width
+                            			k1 = str1 + p; 
+                            			k2 = str2 + n;
+                            				
+                            			if (k1 < in_dim[0] && k1 >= 0 && k2 >= 0 && k2 < in_dim[1]) { 
+                            			          in_cnn = inp[k1][k2][0];
+                            				  w = fil[i][p][n][0];
+                            				  asm volatile("nn_mac_8b %0, %1,%2\n":"=r"(res):"r"(w),"r"(in_cnn):);
+                            					
+                            				  w = fil[i][p][n][1];
+                            				  asm volatile("nn_mac_8b %0, %1,%2\n":"=r"(res):"r"(w),"r"(in_cnn):);
+                            					
+                            				  w = fil[i][p][n][2];
+                            				  asm volatile("nn_mac_8b %0, %1,%2\n":"=r"(res):"r"(w),"r"(in_cnn):);
+                            					
+                            				  w = fil[i][p][n][3];
+                            				  asm volatile("nn_mac_8b %0, %1,%2\n":"=r"(res):"r"(w),"r"(in_cnn):);
+                                		}
+                        		}
+                    		}
+                    		asm volatile("neur_res %0, %1, %2\n":"=r"(res):"r"(quantized_multiplier),"r"(out_shift_rl):);
+                    		out[j][k][i] = res;
+            		}
+        	}
+    	}
+}
+
+void conv2_4bits(int in_dim[3], int fil_dim[4], int out_dim[3], int inp[in_dim[0]][in_dim[1]][in_dim[2]], const int fil[fil_dim[0]][fil_dim[1]][fil_dim[2]][fil_dim[3] << 1], const int bias[fil_dim[0]], int out[out_dim[0]][out_dim[1]][out_dim[2]], int strides, int pad[4], const int bias_shift_mode[], const int quantized_multiplier, const int out_shift_rl){
+
+	int i, j, k, m, n, p, res, k1, k2, str1, str2, w, in_cnn, bias_val;
+	
+	for (i = 0; i < out_dim[2]; i++) {   // output depth
+		str1 = -pad[0];
+		for (j = 0; j < out_dim[0]; j++) {  // output height
+			if (j != 0) str1 += strides;
+			str2 = -pad[2];
+	        	for (k = 0; k < out_dim[1]; k++) {  // output width
+                    		bias_val = bias[i];
+                    		asm volatile("neur_init %0, %1, %2\n":"=r"(res):"r"(bias_val),"r"(bias_shift_mode[i]):);
+                    		if (k != 0) str2 += strides;
+                    		for (p = 0; p < fil_dim[1]; p++) {  // filters height
+                            		for (n = 0; n < fil_dim[2]; n++) {  // filters width
+                            			k1 = str1 + p; 
+                            			k2 = str2 + n;
+                            				
+                            			if (k1 < in_dim[0] && k1 >= 0 && k2 >= 0 && k2 < in_dim[1]) { 
+							for (m = 0; m < fil_dim[3]; m++) {   // filters depth
+                            					in_cnn = inp[k1][k2][m];
+                            					w = fil[i][p][n][2*m];
+                            					asm volatile("nn_mac_4b %0, %1,%2\n":"=r"(res):"r"(w),"r"(in_cnn):);
+                            					
+                            					w = fil[i][p][n][2*m+1];
+                            					asm volatile("nn_mac_4b %0, %1,%2\n":"=r"(res):"r"(w),"r"(in_cnn):);
+                            				}
+                                		}
+                        		}
+                    		}
+                    		asm volatile("neur_res %0, %1, %2\n":"=r"(res):"r"(quantized_multiplier),"r"(out_shift_rl):);
+                    		out[j][k][i] = res;
+            		}
+        	}
+    	}    	
+}
+
+void conv2_4bits_1ch(int in_dim[3], int fil_dim[4], int out_dim[3], int inp[in_dim[0]][in_dim[1]][in_dim[2]], const int fil[fil_dim[0]][fil_dim[1]][fil_dim[2]][fil_dim[3] << 1], const int bias[fil_dim[0]], int out[out_dim[0]][out_dim[1]][out_dim[2]], int strides, int pad[4], const int bias_shift_mode[], const int quantized_multiplier, const int out_shift_rl){
+
+	int i, j, k, n, p, res, k1, k2, str1, str2, w, in_cnn, bias_val;
+	
+	for (i = 0; i < out_dim[2]; i++) {   // output depth
+		str1 = -pad[0] - strides;
+		for (j = 0; j < out_dim[0]; j++) {  // output height
+			str1 += strides;
+			str2 = -pad[2] - strides;
+	        	for (k = 0; k < out_dim[1]; k++) {  // output width
+                    		bias_val = bias[i];
+                    		asm volatile("neur_init %0, %1, %2\n":"=r"(res):"r"(bias_val),"r"(bias_shift_mode[i]):);
+                    		str2 += strides;
+                    		for (p = 0; p < fil_dim[1]; p++) {  // filters height
+                            		for (n = 0; n < fil_dim[2]; n++) {  // filters width
+                            			k1 = str1 + p; 
+                            			k2 = str2 + n;
+                            				
+                            			if (k1 < in_dim[0] && k1 >= 0 && k2 >= 0 && k2 < in_dim[1]) {
+                            				in_cnn = inp[k1][k2][0];
+                            				w = fil[i][p][n][0];
+                            				asm volatile("nn_mac_2b %0, %1,%2\n":"=r"(res):"r"(w),"r"(in_cnn):);
+                            				
+                            				w = fil[i][p][n][1];
+                            				asm volatile("nn_mac_2b %0, %1,%2\n":"=r"(res):"r"(w),"r"(in_cnn):);
+                                		}
+                        		}
+                    		}
+                    		asm volatile("neur_res %0, %1, %2\n":"=r"(res):"r"(quantized_multiplier),"r"(out_shift_rl):);
+                    		out[j][k][i] = res;
+            		}
+        	}
+    	}    	
+}
+
+void conv2_2bits(int in_dim[3], int fil_dim[4], int out_dim[3], int inp[in_dim[0]][in_dim[1]][in_dim[2]], const int fil[fil_dim[0]][fil_dim[1]][fil_dim[2]][fil_dim[3]], const int bias[fil_dim[0]], int out[out_dim[0]][out_dim[1]][out_dim[2]], int strides, int pad[4], const int bias_shift_mode[], const int quantized_multiplier, const int out_shift_rl){
+
+	int i, j, k, m, n, p, res, k1, k2, str1, str2, w, in_cnn, bias_val;
+	
+	for (i = 0; i < out_dim[2]; i++) {   // output depth
+		str1 = -pad[0] - strides;
+		for (j = 0; j < out_dim[0]; j++) {  // output height
+			str1 += strides;
+			str2 = -pad[2] - strides;
+	        	for (k = 0; k < out_dim[1]; k++) {  // output width
+                    		bias_val = bias[i];
+                    		asm volatile("neur_init %0, %1, %2\n":"=r"(res):"r"(bias_val),"r"(bias_shift_mode[i]):);
+                    		str2 += strides;
+                    		for (p = 0; p < fil_dim[1]; p++) {  // filters height
+                            		for (n = 0; n < fil_dim[2]; n++) {  // filters width
+                            			k1 = str1 + p; 
+                            			k2 = str2 + n;
+                            				
+                            			if (k1 < in_dim[0] && k1 >= 0 && k2 >= 0 && k2 < in_dim[1]) { 
+							for (m = 0; m < fil_dim[3]; m++) {   // filters depth
+                            					in_cnn = inp[k1][k2][m];
+                            					w = fil[i][p][n][m];
+                            					asm volatile("nn_mac_2b %0, %1,%2\n":"=r"(res):"r"(w),"r"(in_cnn):);
+                            				}
+                                		}
+                        		}
+                    		}
+                    		asm volatile("neur_res %0, %1, %2\n":"=r"(res):"r"(quantized_multiplier),"r"(out_shift_rl):);
+                    		out[j][k][i] = res;
+            		}
+        	}
+    	}
+}
+
+
+void conv2_2bits_1ch(int in_dim[3], int fil_dim[4], int out_dim[3], int inp[in_dim[0]][in_dim[1]][1], const int fil[fil_dim[0]][fil_dim[1]][fil_dim[2]][1], const int bias[fil_dim[0]], int out[out_dim[0]][out_dim[1]][out_dim[2]], int strides, int pad[4], const int bias_shift_mode[], const int quantized_multiplier, const int out_shift_rl){
+
+	int i, j, k, n, p, res, k1, k2, str1, str2, w, in_cnn, bias_val;
+	
+	for (i = 0; i < out_dim[2]; i++) {   // output depth
+		str1 = -pad[0] -strides;
+		for (j = 0; j < out_dim[0]; j++) {  // output height
+			str1 += strides;
+			str2 = -pad[2] - strides;
+	        	for (k = 0; k < out_dim[1]; k++) {  // output width
+                    		bias_val = bias[i];
+                    		asm volatile("neur_init %0, %1, %2\n":"=r"(res):"r"(bias_val),"r"(bias_shift_mode[i]):);
+                    		str2 += strides;
+                    		for (p = 0; p < fil_dim[1]; p++) {  // filters height
+                            		for (n = 0; n < fil_dim[2]; n++) {  // filters width
+                            			k1 = str1 + p; 
+                            			k2 = str2 + n;
+                            				
+                            			if (k1 < in_dim[0] && k1 >= 0 && k2 >= 0 && k2 < in_dim[1]) { 
+                            				in_cnn = inp[k1][k2][0];
+                            				w = fil[i][p][n][0];
+                            				asm volatile("nn_mac_2b %0, %1,%2\n":"=r"(res):"r"(w),"r"(in_cnn):);
+                                		}
+                        		}
+                    		}
+    				
+                    		asm volatile("neur_res %0, %1, %2\n":"=r"(res):"r"(quantized_multiplier),"r"(out_shift_rl):);
+                    		out[j][k][i] = res;
+            		}
+        	}
+    	}
+}
+
+void maxpool2_compressed(int in_dim[3], int out_dim[3], int inp[in_dim[0]][in_dim[1]][in_dim[2]], int out[out_dim[0]][out_dim[1]][out_dim[2]], int pool_size, int strides) {
+
+	int i, j, m, n, d, k1, k2, str1, str2;
+	uint32_t value1, value2, value3, value4;
+	uint32_t max_value1, max_value2, max_value3, max_value4, c;
+	
+    	for (d = 0; d < out_dim[2]; d++) {
+    		str1 = 0;
+        	for (i = 0; i < out_dim[0]; i++) {
+        		if (i != 0) str1 += strides;
+        		str2 = 0;
+            		for (j = 0; j < out_dim[1]; j++) {
+            			if (j != 0) str2 += strides;
+                		max_value1 = 0;
+                		max_value2 = 0;
+                		max_value3 = 0;
+                		max_value4 = 0;
+
+                		for (m = 0; m < pool_size; m++) {
+                    			for (n = 0; n < pool_size; n++) {
+                    				k1 = str1 + m;
+                    				k2 = str2 + n;
+                    				if (k1 >= 0 && k2 >=0 && k1 < in_dim[0] && k2 < in_dim[1]){
+                         				value1 = inp[k1][k2][d] & 0xFF000000;
+                         				value2 = inp[k1][k2][d] & 0x00FF0000;
+                         				value3 = inp[k1][k2][d] & 0x0000FF00;
+                         				value4 = inp[k1][k2][d] & 0x000000FF;
+                         				
+                        				if (value1 > max_value1) {
+                            					max_value1 = value1;
+                        				}
+                        				
+                        				if (value2 > max_value2) {
+                        					max_value2 = value2;
+                        				}
+                        				
+                        				if (value3 > max_value3) {
+                            					max_value3 = value3;
+                        				}
+                        				
+                        				if (value4 > max_value4) {
+                        					max_value4 = value4;
+                        				}
+                        				
+                        			}
+                    			}
+                		}
+                		
+                		c = max_value1 | max_value2 | max_value3 | max_value4;
+                		out[i][j][d] = c;
+            		}
+        	}
+    	}	
+}
+
+void avgpool2_compressed(int in_dim[3], int out_dim[3], int inp[in_dim[0]][in_dim[1]][in_dim[2]], int out[out_dim[0]][out_dim[1]][out_dim[2]], int pool_size, int strides) {
+
+	int i, j, m, n, d, k1, k2, str1, str2;
+	int avg_value1, avg_value2, avg_value3, avg_value4;
+
+    	for (d = 0; d < out_dim[2]; d++) {
+    		str1 = -strides;
+        	for (i = 0; i < out_dim[0]; i++) {
+        		str1 += strides;
+        		str2 = -strides;
+            		for (j = 0; j < out_dim[1]; j++) {
+            			str2 += strides;
+                		avg_value1 = 0;
+                		avg_value2 = 0;
+                		avg_value3 = 0;
+                		avg_value4 = 0;
+
+                		for (m = 0; m < pool_size; m++) {
+                    			for (n = 0; n < pool_size; n++) {
+                    				k1 = str1 + m;
+                    				k2 = str2 + n;
+                    				if (k1 >= 0 && k2 >=0 && k1 < in_dim[0] && k2 < in_dim[1]){
+                         				avg_value1 += ((inp[k1][k2][d] & 0xFF000000) >> 24);
+                         				avg_value2 += ((inp[k1][k2][d] & 0x00FF0000) >> 16);
+                         				avg_value3 += ((inp[k1][k2][d] & 0x0000FF00) >> 8);
+                         				avg_value4 += (inp[k1][k2][d]  & 0x000000FF);
+                         				
+                        			}
+                    			}
+                		}
+                		
+                		avg_value1 = avg_value1 / (pool_size * pool_size);
+                		avg_value2 = avg_value2 / (pool_size * pool_size);
+                		avg_value3 = avg_value3 / (pool_size * pool_size);
+                		avg_value4 = avg_value4 / (pool_size * pool_size);
+                		
+                		out[i][j][d] = ((avg_value1 << 24) | (avg_value2 << 16) | (avg_value3 << 8) | (avg_value4)) ;
+            		}
+        	}
+    	}
+}
+
+void flatten(int in_dim[3], int inp[in_dim[0]][in_dim[1]][in_dim[2]], int out[]){
+
+	int index = 0;
+	
+	int values[in_dim[0]][in_dim[1]][in_dim[2] << 2];
+	
+	for (int i = 0; i < in_dim[0]; i++){
+		for(int j = 0; j < in_dim[1]; j++){
+			for(int k = 0; k < in_dim[2]; k++){
+				values[i][j][4*k]   = (inp[i][j][k] & 0xFF000000) >> 24;
+				values[i][j][4*k+1] = (inp[i][j][k] & 0x00FF0000) >> 16;
+				values[i][j][4*k+2] = (inp[i][j][k] & 0x0000FF00) >> 8;
+				values[i][j][4*k+3] =  inp[i][j][k] & 0x000000FF;
+			}
+		}
+	}
+	
+	int out_dim = (in_dim[0] * in_dim[1] * in_dim[2]) << 2;
+	int flatten_matrix[out_dim];
+	
+	for (int k = 0; k < in_dim[2] << 2; k++){
+		for(int j = 0; j < in_dim[0]; j++){
+			for(int i = 0; i < in_dim[1]; i++){
+				flatten_matrix[index++] = values[j][i][k];
+			}
+		}
+	}
+	
+	for(int i = 0; i < out_dim >> 2; i++){
+		out[i] = (flatten_matrix[4*i] << 24 | flatten_matrix[4*i+1] << 16 | flatten_matrix[4*i+2] << 8 | flatten_matrix[4*i+3]);
+	}	
+}
+
+#endif  /* CONV2D_OPT_H */
@@ -0,0 +1,102 @@
+# Copyright lowRISC contributors.
+# Licensed under the Apache License, Version 2.0, see LICENSE for details.
+# SPDX-License-Identifier: Apache-2.0
+
+#include "simple_system_regs.h"
+
+.section .text
+
+default_exc_handler:
+  jal x0, simple_exc_handler
+
+timer_handler:
+  jal x0, simple_timer_handler
+
+reset_handler:
+  /* set all registers to zero */
+  mv  x1, x0
+  mv  x2, x1
+  mv  x3, x1
+  mv  x4, x1
+  mv  x5, x1
+  mv  x6, x1
+  mv  x7, x1
+  mv  x8, x1
+  mv  x9, x1
+  mv x10, x1
+  mv x11, x1
+  mv x12, x1
+  mv x13, x1
+  mv x14, x1
+  mv x15, x1
+  mv x16, x1
+  mv x17, x1
+  mv x18, x1
+  mv x19, x1
+  mv x20, x1
+  mv x21, x1
+  mv x22, x1
+  mv x23, x1
+  mv x24, x1
+  mv x25, x1
+  mv x26, x1
+  mv x27, x1
+  mv x28, x1
+  mv x29, x1
+  mv x30, x1
+  mv x31, x1
+
+  /* stack initilization */
+  la   x2, _stack_start
+
+_start:
+  .global _start
+
+  /* clear BSS */
+  la x26, _bss_start
+  la x27, _bss_end
+
+  bge x26, x27, zero_loop_end
+
+zero_loop:
+  sw x0, 0(x26)
+  addi x26, x26, 4
+  ble x26, x27, zero_loop
+zero_loop_end:
+
+
+main_entry:
+  /* jump to main program entry point (argc = argv = 0) */
+  addi x10, x0, 0
+  addi x11, x0, 0
+  jal x1, main
+
+  /* Halt simulation */
+  li x5, SIM_CTRL_BASE + SIM_CTRL_CTRL
+  li x6, 1
+  sw x6, 0(x5)
+
+  /* If execution ends up here just put the core to sleep */
+sleep_loop:
+  wfi
+  j sleep_loop
+
+/* =================================================== [ exceptions ] === */
+/* This section has to be down here, since we have to disable rvc for it  */
+
+  .section .vectors, "ax"
+  .option norvc;
+
+  // All unimplemented interrupts/exceptions go to the default_exc_handler.
+  .org 0x00
+  .rept 7
+  jal x0, default_exc_handler
+  .endr
+  jal x0, timer_handler
+  .rept 23
+  jal x0, default_exc_handler
+  .endr
+
+  // reset vector
+  .org 0x80
+  jal x0, reset_handler
@@ -0,0 +1,3 @@
+/home/alex/Desktop/ibex_tools/ibex/examples/sw/simple_system/common/crt0.o: \
+ /home/alex/Desktop/ibex_tools/ibex/examples/sw/simple_system/common/crt0.S \
+ /home/alex/Desktop/ibex_tools/ibex/examples/sw/simple_system/common/simple_system_regs.h
@@ -0,0 +1,28 @@
+#ifndef FULLY_CONNECTED_H
+#define FULLY_CONNECTED_H
+
+#include <stdint.h>
+
+void mlp_layer(int input[], int output[], int num_inputs, int num_outputs, const int weights[][num_inputs], const int bias[], const int bias_shift_mode, const int quantized_multiplier, const int out_shift_rl){
+
+	// Compute the output for each neuron
+    	int z, w, inp, quant_prod;
+    	
+    	for (int i = 0; i < num_outputs; i++) {
+    		z = bias[i];
+        	
+        	for (int j = 0; j < num_inputs; j++) {
+        		w = weights[i][j];
+        		inp = input[j];
+        		z += w*inp;
+        	}
+        	quant_prod = quantized_multiplier * z + (1 << (out_shift_rl-1));
+        	quant_prod = quant_prod >> out_shift_rl;
+        
+        	if(quant_prod < 0) quant_prod = 0;
+        	if(quant_prod > 255) quant_prod = 255;
+        	
+        	output[i] = quant_prod;
+ 	}
+}
+#endif /* FULLY_CONNECTED_H */
@@ -0,0 +1,77 @@
+#ifndef FULLY_CONNECTED_OPT_H
+#define FULLY_CONNECTED_OPT_H
+
+#include <stdint.h>
+
+void mlp_layer_2bits(int input[], int output[], int num_inputs, int num_outputs, const int weights[][num_inputs], const int bias[], const int bias_shift_mode[], const int quantized_multiplier, const int out_shift_rl){
+
+	// Compute the output for each neuron
+    	int z, bias_val, w, inp, temp;
+    	
+    	for (int i = 0; i < num_outputs; i++) {
+    		bias_val = bias[i];
+    		asm volatile("neur_init %0, %1, %2\n":"=r"(z):"r"(bias_val),"r"(bias_shift_mode[i]):);
+        	
+        	for (int j = 0; j < num_inputs; j++) {
+        		w = weights[i][j];
+        		inp = input[j];
+        		asm volatile("nn_mac_2b %0, %1,%2\n":"=r"(temp):"r"(w),"r"(inp):);
+        	}
+        	asm volatile("neur_res %0, %1, %2\n":"=r"(z):"r"(quantized_multiplier),"r"(out_shift_rl):);
+        	output[i] = z;
+ 	}
+}
+
+void mlp_layer_4bits(int input[], int output[], int num_inputs, int num_outputs, const int weights[][num_inputs << 1], const int bias[], const int bias_shift_mode[], const int quantized_multiplier, const int out_shift_rl){
+
+	// Compute the output for each neuron
+    	int z, bias_val, w, inp, temp;
+    	
+    	for (int i = 0; i < num_outputs; i++) {
+    		bias_val = bias[i];
+    		asm volatile("neur_init %0, %1, %2\n":"=r"(z):"r"(bias_val),"r"(bias_shift_mode[i]):);
+        	
+        	for (int j = 0; j < num_inputs; j++) {
+        		w = weights[i][2*j];
+        		inp = input[j];
+        		asm volatile("nn_mac_4b %0, %1,%2\n":"=r"(temp):"r"(w),"r"(inp):);
+        		
+        		w = weights[i][2*j+1];
+        		asm volatile("nn_mac_4b %0, %1,%2\n":"=r"(temp):"r"(w),"r"(inp):);
+        		
+        	}
+        	asm volatile("neur_res %0, %1, %2\n":"=r"(z):"r"(quantized_multiplier),"r"(out_shift_rl):);
+        	output[i] = z;
+ 	}
+}
+
+void mlp_layer_8bits(int input[], int output[], int num_inputs, int num_outputs, const int weights[][num_inputs << 2], const int bias[], const int bias_shift_mode[], const int quantized_multiplier, const int out_shift_rl){
+
+	// Compute the output for each neuron
+    	int z, bias_val, w, inp, temp;
+    	
+    	for (int i = 0; i < num_outputs; i++) {
+    		bias_val = bias[i];
+    		asm volatile("neur_init %0, %1, %2\n":"=r"(z):"r"(bias_val),"r"(bias_shift_mode[i]):);
+        	
+        	for (int j = 0; j < num_inputs; j++) {
+        		w = weights[i][4*j];
+        		inp = input[j];
+        		asm volatile("nn_mac_8b %0, %1,%2\n":"=r"(temp):"r"(w),"r"(inp):);
+        		
+        		w = weights[i][4*j+1];
+        		asm volatile("nn_mac_8b %0, %1,%2\n":"=r"(temp):"r"(w),"r"(inp):);
+        		
+        		w = weights[i][4*j+2];
+        		asm volatile("nn_mac_8b %0, %1,%2\n":"=r"(temp):"r"(w),"r"(inp):);
+        		
+        		w = weights[i][4*j+3];
+        		asm volatile("nn_mac_8b %0, %1,%2\n":"=r"(temp):"r"(w),"r"(inp):);        		
+        	}
+        	
+        	asm volatile("neur_res %0, %1, %2\n":"=r"(z):"r"(quantized_multiplier),"r"(out_shift_rl):);
+        	output[i] = z;
+ 	}
+}
+
+#endif /* FULLY_CONNECTED_OPT_H */
@@ -0,0 +1,91 @@
+/* Copyright lowRISC contributors.
+   Licensed under the Apache License, Version 2.0, see LICENSE for details.
+   SPDX-License-Identifier: Apache-2.0 */
+
+OUTPUT_ARCH(riscv)
+
+/* Change this if you'd like different sizes. Arty A7-100(35) has a maximum of 607.5KB(225KB)
+   BRAM space. Configuration below is for maximum BRAM capacity with Artya A7-35 while letting
+   CoreMark run (.vmem of 152.8KB).
+*/
+
+MEMORY
+{
+    ram         : ORIGIN = 0x00100000, LENGTH = 0x750000 
+    stack       : ORIGIN = 0x00850000, LENGTH = 0x200000  
+}
+
+/* Stack information variables */
+_min_stack      = 0x10000;   /* 8K - minimum stack space to reserve */
+_stack_len     = LENGTH(stack);
+_stack_start   = ORIGIN(stack) + LENGTH(stack);
+
+_entry_point = _vectors_start + 0x80;
+ENTRY(_entry_point)
+
+/* The tohost address is used by Spike for a magic "stop me now" message. This
+   is set to equal SIM_CTRL_CTRL (see simple_system_regs.h), which has that
+   effect in simple_system simulations. Note that it must be 8-byte aligned.
+
+   We don't read data back from Spike, so fromhost is set to some dummy value:
+   we place it just above the top of the stack.
+ */
+tohost   = 0x20008;
+fromhost = _stack_start + 0x10;
+
+SECTIONS
+{
+    .vectors :
+    {
+        . = ALIGN(4);
+		_vectors_start = .;
+        KEEP(*(.vectors))
+		_vectors_end = .;
+    } > ram
+
+    .text : {
+        . = ALIGN(4);
+        *(.text)
+        *(.text.*)
+    }  > ram
+
+    .rodata : {
+        . = ALIGN(4);
+        /* Small RO data before large RO data */
+        *(.srodata)
+        *(.srodata.*)
+        *(.rodata);
+        *(.rodata.*)
+    } > ram
+
+    .data : {
+        . = ALIGN(4);
+        /* Small data before large data */
+        *(.sdata)
+        *(.sdata.*)
+        *(.data);
+        *(.data.*)
+    } > ram
+
+    .bss :
+    {
+        . = ALIGN(4);
+        _bss_start = .;
+        /* Small BSS before large BSS */
+        *(.sbss)
+        *(.sbss.*)
+        *(.bss)
+        *(.bss.*)
+        *(COMMON)
+        _bss_end = .;
+    } > ram
+
+    /* ensure there is enough room for stack */
+    .stack (NOLOAD): {
+        . = ALIGN(4);
+        . = . + _min_stack ;
+        . = ALIGN(4);
+        stack = . ;
+        _stack = . ;
+    } > stack
+}
@@ -0,0 +1,185 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+#include "simple_system_common.h"
+
+int putchar(int c) {
+  DEV_WRITE(SIM_CTRL_BASE + SIM_CTRL_OUT, (unsigned char)c);
+
+  return c;
+}
+
+int puts(const char *str) {
+  while (*str) {
+    putchar(*str++);
+  }
+
+  return 0;
+}
+
+void puthex(uint32_t h) {
+  int cur_digit;
+  // Iterate through h taking top 4 bits each time and outputting ASCII of hex
+  // digit for those 4 bits
+  for (int i = 0; i < 8; i++) {
+    cur_digit = h >> 28;
+
+    if (cur_digit < 10)
+      putchar('0' + cur_digit);
+    else
+      putchar('A' - 10 + cur_digit);
+
+    h <<= 4;
+  }
+}
+
+void sim_halt() { DEV_WRITE(SIM_CTRL_BASE + SIM_CTRL_CTRL, 1); }
+
+void pcount_reset() {
+  asm volatile(
+      "csrw minstret,       x0\n"
+      "csrw mcycle,         x0\n"
+      "csrw mhpmcounter3,   x0\n"
+      "csrw mhpmcounter4,   x0\n"
+      "csrw mhpmcounter5,   x0\n"
+      "csrw mhpmcounter6,   x0\n"
+      "csrw mhpmcounter7,   x0\n"
+      "csrw mhpmcounter8,   x0\n"
+      "csrw mhpmcounter9,   x0\n"
+      "csrw mhpmcounter10,  x0\n"
+      "csrw mhpmcounter11,  x0\n"
+      "csrw mhpmcounter12,  x0\n"
+      "csrw mhpmcounter13,  x0\n"
+      "csrw mhpmcounter14,  x0\n"
+      "csrw mhpmcounter15,  x0\n"
+      "csrw mhpmcounter16,  x0\n"
+      "csrw mhpmcounter17,  x0\n"
+      "csrw mhpmcounter18,  x0\n"
+      "csrw mhpmcounter19,  x0\n"
+      "csrw mhpmcounter20,  x0\n"
+      "csrw mhpmcounter21,  x0\n"
+      "csrw mhpmcounter22,  x0\n"
+      "csrw mhpmcounter23,  x0\n"
+      "csrw mhpmcounter24,  x0\n"
+      "csrw mhpmcounter25,  x0\n"
+      "csrw mhpmcounter26,  x0\n"
+      "csrw mhpmcounter27,  x0\n"
+      "csrw mhpmcounter28,  x0\n"
+      "csrw mhpmcounter29,  x0\n"
+      "csrw mhpmcounter30,  x0\n"
+      "csrw mhpmcounter31,  x0\n"
+      "csrw minstreth,      x0\n"
+      "csrw mcycleh,        x0\n"
+      "csrw mhpmcounter3h,  x0\n"
+      "csrw mhpmcounter4h,  x0\n"
+      "csrw mhpmcounter5h,  x0\n"
+      "csrw mhpmcounter6h,  x0\n"
+      "csrw mhpmcounter7h,  x0\n"
+      "csrw mhpmcounter8h,  x0\n"
+      "csrw mhpmcounter9h,  x0\n"
+      "csrw mhpmcounter10h, x0\n"
+      "csrw mhpmcounter11h, x0\n"
+      "csrw mhpmcounter12h, x0\n"
+      "csrw mhpmcounter13h, x0\n"
+      "csrw mhpmcounter14h, x0\n"
+      "csrw mhpmcounter15h, x0\n"
+      "csrw mhpmcounter16h, x0\n"
+      "csrw mhpmcounter17h, x0\n"
+      "csrw mhpmcounter18h, x0\n"
+      "csrw mhpmcounter19h, x0\n"
+      "csrw mhpmcounter20h, x0\n"
+      "csrw mhpmcounter21h, x0\n"
+      "csrw mhpmcounter22h, x0\n"
+      "csrw mhpmcounter23h, x0\n"
+      "csrw mhpmcounter24h, x0\n"
+      "csrw mhpmcounter25h, x0\n"
+      "csrw mhpmcounter26h, x0\n"
+      "csrw mhpmcounter27h, x0\n"
+      "csrw mhpmcounter28h, x0\n"
+      "csrw mhpmcounter29h, x0\n"
+      "csrw mhpmcounter30h, x0\n"
+      "csrw mhpmcounter31h, x0\n");
+}
+
+unsigned int get_mepc() {
+  uint32_t result;
+  __asm__ volatile("csrr %0, mepc;" : "=r"(result));
+  return result;
+}
+
+unsigned int get_mcause() {
+  uint32_t result;
+  __asm__ volatile("csrr %0, mcause;" : "=r"(result));
+  return result;
+}
+
+unsigned int get_mtval() {
+  uint32_t result;
+  __asm__ volatile("csrr %0, mtval;" : "=r"(result));
+  return result;
+}
+
+void simple_exc_handler(void) {
+  puts("EXCEPTION!!!\n");
+  puts("============\n");
+  puts("MEPC:   0x");
+  puthex(get_mepc());
+  puts("\nMCAUSE: 0x");
+  puthex(get_mcause());
+  puts("\nMTVAL:  0x");
+  puthex(get_mtval());
+  putchar('\n');
+  sim_halt();
+
+  while(1);
+}
+
+volatile uint64_t time_elapsed;
+uint64_t time_increment;
+
+inline static void increment_timecmp(uint64_t time_base) {
+  uint64_t current_time = timer_read();
+  current_time += time_base;
+  timecmp_update(current_time);
+}
+
+void timer_enable(uint64_t time_base) {
+  time_elapsed = 0;
+  time_increment = time_base;
+  // Set timer values
+  increment_timecmp(time_base);
+  // enable timer interrupt
+  asm volatile("csrs  mie, %0\n" : : "r"(0x80));
+  // enable global interrupt
+  asm volatile("csrs  mstatus, %0\n" : : "r"(0x8));
+}
+
+void timer_disable(void) { asm volatile("csrc  mie, %0\n" : : "r"(0x80)); }
+
+uint64_t timer_read(void) {
+  uint32_t current_timeh;
+  uint32_t current_time;
+  // check if time overflowed while reading and try again
+  do {
+    current_timeh = DEV_READ(TIMER_BASE + TIMER_MTIMEH, 0);
+    current_time = DEV_READ(TIMER_BASE + TIMER_MTIME, 0);
+  } while (current_timeh != DEV_READ(TIMER_BASE + TIMER_MTIMEH, 0));
+  uint64_t final_time = ((uint64_t)current_timeh << 32) | current_time;
+  return final_time;
+}
+
+void timecmp_update(uint64_t new_time) {
+  DEV_WRITE(TIMER_BASE + TIMER_MTIMECMP, -1);
+  DEV_WRITE(TIMER_BASE + TIMER_MTIMECMPH, new_time >> 32);
+  DEV_WRITE(TIMER_BASE + TIMER_MTIMECMP, new_time);
+}
+
+uint64_t get_elapsed_time(void) { return time_elapsed; }
+
+void simple_timer_handler(void) __attribute__((interrupt));
+
+void simple_timer_handler(void) {
+  increment_timecmp(time_increment);
+  time_elapsed++;
+}
@@ -0,0 +1,4 @@
+/home/alex/Desktop/ibex_tools/ibex/examples/sw/simple_system/common/simple_system_common.o: \
+ /home/alex/Desktop/ibex_tools/ibex/examples/sw/simple_system/common/simple_system_common.c \
+ /home/alex/Desktop/ibex_tools/ibex/examples/sw/simple_system/common/simple_system_common.h \
+ /home/alex/Desktop/ibex_tools/ibex/examples/sw/simple_system/common/simple_system_regs.h
@@ -0,0 +1,99 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef SIMPLE_SYSTEM_COMMON_H__
+
+#include <stdint.h>
+#include <stdio.h>
+
+#include "simple_system_regs.h"
+
+#define DEV_WRITE(addr, val) (*((volatile uint32_t *)(addr)) = val)
+#define DEV_READ(addr, val) (*((volatile uint32_t *)(addr)))
+#define PCOUNT_READ(name, dst) asm volatile("csrr %0, " #name ";" : "=r"(dst))
+
+/**
+ * Writes character to simulator out log. Signature matches c stdlib function
+ * of the same name.
+ *
+ * @param c Character to output
+ * @returns Character output (never fails so no EOF ever returned)
+ */
+int putchar(int c);
+
+/**
+ * Writes string to simulator out log.  Signature matches c stdlib function of
+ * the same name.
+ *
+ * @param str String to output
+ * @returns 0 always (never fails so no error)
+ */
+int puts(const char *str);
+
+/**
+ * Writes ASCII hex representation of number to simulator out log.
+ *
+ * @param h Number to output in hex
+ */
+void puthex(uint32_t h);
+
+/**
+ * Immediately halts the simulation
+ */
+void sim_halt();
+
+/**
+ * Enables/disables performance counters.  This effects mcycle and minstret as
+ * well as the mhpmcounterN counters.
+ *
+ * @param enable if non-zero enables, otherwise disables
+ */
+static inline void pcount_enable(int enable) {
+  // Note cycle is disabled with everything else
+  unsigned int inhibit_val = enable ? 0x0 : 0xFFFFFFFF;
+  // CSR 0x320 was called `mucounteren` in the privileged spec v1.9.1, it was
+  // then dropped in v1.10, and then re-added in v1.11 with the name
+  // `mcountinhibit`. Unfortunately, the version of binutils we use only allows
+  // the old name, and LLVM only supports the new name (though this is changed
+  // on trunk to support both), so we use the numeric value here for maximum
+  // compatibility.
+  asm volatile("csrw  0x320, %0\n" : : "r"(inhibit_val));
+}
+
+/**
+ * Resets all performance counters.  This effects mcycle and minstret as well
+ * as the mhpmcounterN counters.
+ */
+void pcount_reset();
+
+/**
+ * Enables timer interrupt
+ *
+ * @param time_base Number of time ticks to count before interrupt
+ */
+void timer_enable(uint64_t time_base);
+
+/**
+ * Returns current mtime value
+ */
+uint64_t timer_read(void);
+
+/**
+ * Set a new timer value
+ *
+ * @param new_time New value for time
+ */
+void timecmp_update(uint64_t new_time);
+
+/**
+ * Disables timer interrupt
+ */
+void timer_disable(void);
+
+/**
+ * Returns current global time value
+ */
+uint64_t get_elapsed_time(void);
+
+#endif
@@ -0,0 +1,18 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef SIMPLE_SYSTEM_REGS_H__
+#define SIMPLE_SYSTEM_REGS_H__
+
+#define SIM_CTRL_BASE 0x20000
+#define SIM_CTRL_OUT 0x0
+#define SIM_CTRL_CTRL 0x8
+
+#define TIMER_BASE 0x30000
+#define TIMER_MTIME 0x0
+#define TIMER_MTIMEH 0x4
+#define TIMER_MTIMECMP 0x8
+#define TIMER_MTIMECMPH 0xC
+
+#endif  // SIMPLE_SYSTEM_REGS_H__