upload

2020-09-16 11:50:53 -07:00
commit a61dda4612
97 changed files with 15410 additions and 0 deletions
@@ -0,0 +1,263 @@
+import acl
+import numpy as np
+import datetime
+from atlas_utils.utils import *
+from atlas_utils.acl_image import AclImage
+
+
+class Model(object):
+    def __init__(self, acl_resource, model_path):
+        self._run_mode = acl_resource.run_mode
+        self.model_path = model_path    # string
+        self.model_id = None            # pointer
+        self.input_dataset = None
+        self.output_dataset = None
+        self._output_info = []
+        self.model_desc = None          # pointer when using
+        self._init_resource()
+        
+
+    def __del__(self):
+        if self.input_dataset:
+            self._release_dataset(self.input_dataset)        
+        if self.output_dataset:
+            self._release_dataset(self.output_dataset)
+        if self.model_id:
+            ret = acl.mdl.unload(self.model_id)
+            if ret != ACL_ERROR_NONE:
+                print("acl.mdl.unload error:", ret)
+        if self.model_desc:
+            ret = acl.mdl.destroy_desc(self.model_desc)
+            if ret != ACL_ERROR_NONE:
+                print("acl.mdl.destroy_desc error:", ret)
+        print("Model release source success")
+
+    def _init_resource(self):
+        print("Init model resource")
+        # loading model file
+        self.model_id, ret = acl.mdl.load_from_file(self.model_path)
+        check_ret("acl.mdl.load_from_file", ret)
+        self.model_desc = acl.mdl.create_desc()
+        ret = acl.mdl.get_desc(self.model_desc, self.model_id)
+        check_ret("acl.mdl.get_desc", ret)
+        # obtain model outputs number
+        output_size = acl.mdl.get_num_outputs(self.model_desc)
+        # create model output dataset structure
+        self._gen_output_dataset(output_size)
+        print("[Model] class Model init resource stage success")
+        # obtain shape and type of each output of the model
+        self._get_output_desc(output_size)
+        # create a table to record memory for input data, which can be reused when allocating memory for the input
+        self._init_input_buffer()
+
+        return SUCCESS
+
+    def _get_output_desc(self, output_size):
+        for i in range(output_size):
+            # obtain shape and type of each output
+            dims = acl.mdl.get_output_dims(self.model_desc, i)
+            shape = tuple(dims[0]["dims"])
+            datatype = acl.mdl.get_output_data_type(self.model_desc, i)
+            size = acl.mdl.get_output_size_by_index(self.model_desc, i)
+
+            if datatype == ACL_FLOAT:
+                np_type = np.float32            
+            elif datatype == ACL_INT32:
+                np_type = np.int32
+            elif datatype == ACL_UINT32:
+                np_type = np.uint32
+            else:
+                print("Unspport model output datatype ", datatype)
+                return None
+            # create a numpy array corresponding to outputs, with the same datatype and shape of outputs        
+            output_tensor = np.zeros(size//4, dtype=np_type).reshape(shape)
+            if not output_tensor.flags['C_CONTIGUOUS']:
+                output_tensor = np.ascontiguousarray(output_tensor)
+
+            tensor_ptr = acl.util.numpy_to_ptr(output_tensor)           
+            self._output_info.append({"ptr": tensor_ptr,
+                                      "tensor": output_tensor})            
+
+    def _gen_output_dataset(self, size):
+        print("[Model] create model output dataset:")
+        dataset = acl.mdl.create_dataset()
+        for i in range(size):
+            # allocate device memory for each output
+            size = acl.mdl.get_output_size_by_index(self.model_desc, i)
+            buffer, ret = acl.rt.malloc(size, ACL_MEM_MALLOC_NORMAL_ONLY)
+            check_ret("acl.rt.malloc", ret)
+            # create output data buffer structure, fill allocated memory into the data buffer
+            dataset_buffer = acl.create_data_buffer(buffer, size)
+            #add data buffer to output dataset 
+            _, ret = acl.mdl.add_dataset_buffer(dataset, dataset_buffer)
+            print("malloc output %d, size %d"%(i, size))
+            if ret:
+                # release resource if failed 
+                acl.rt.free(buffer)
+                acl.destroy_data_buffer(dataset)
+                check_ret("acl.destroy_data_buffer", ret)
+        self.output_dataset = dataset
+        print("[Model] create model output dataset success")
+
+    def _init_input_buffer(self):
+        # create a table recording the input data memory allocated for users
+        # currently, the numpy data needs to be copied to device for memory allocation only when the input is numpy array
+        self._input_num = acl.mdl.get_num_inputs(self.model_desc)
+        self._input_buffer = []
+        for i in range(self._input_num):
+            # none of inputs is allocated memory initially 
+            item = {"addr":None, "size":0}
+            self._input_buffer.append(item)
+
+    def _gen_input_dataset(self, input_list):
+        # organize input dataset structure 
+        ret = SUCCESS
+        # return if the input number does not match model requirements
+        if len(input_list) != self._input_num:
+            print("Current input data num %d unequal to"
+                  " model input num %d"%(len(input_list), self._input_num))
+            return FAILED
+
+        self.input_dataset = acl.mdl.create_dataset()
+        for i in range(self._input_num):
+            item = input_list[i]
+            # parse input, currently supports AclImage type, Acl pointer and numpy array
+            data, size = self._parse_input_data(item, i)            
+            if (data is None) or (size == 0):
+                # not parse the remaining data when parsing data fails
+                ret = FAILED
+                print("The %d input is invalid"%(i))
+                break
+            # create input dataset buffer structure, fill in input data
+            dataset_buffer = acl.create_data_buffer(data, size)
+            # add dataset buffer to dataset 
+            _, ret = acl.mdl.add_dataset_buffer(self.input_dataset,
+                                                dataset_buffer)
+            if ret:
+                print("Add input dataset buffer failed")
+                acl.destroy_data_buffer(self.input_dataset)
+                ret = FAILED
+                break
+        if ret == FAILED:
+            # release dataset if fails 
+            self._release_dataset(self.input_dataset)
+
+        return ret
+
+    def _parse_input_data(self, input, index):
+        data = None
+        size = 0
+        if isinstance(input, AclImage):
+            # if input data is AclImage, directly return memory pointer and sieze of image
+            # defautly image memory is data on the device 
+            size = input.size
+            data = input.data()
+        elif isinstance(input, np.ndarray):
+            # if input is numpy data, allocate device memory for data and copy data to device
+            # allocated memory can be reused, no need to apply for it everytime
+            ptr = acl.util.numpy_to_ptr(input)
+            size = input.size * input.itemsize
+            data = self._copy_input_to_device(ptr, size, index)
+            if data == None:
+                size = 0
+                print("Copy input to device failed")
+        # if directly input memory pointer, structure should be dict like {"data":, "size":}, and default memory is on the device side
+        elif (isinstance(input, dict) and
+              input.has_key('data') and input.has_key('size')):
+            size = input['size']
+            data = input['data']
+        else:
+            print("Unsupport input")
+
+        return data, size
+
+    def _copy_input_to_device(self, input_ptr, size, index):
+        # allocate device memory for input, and copy data to this memory
+        buffer_item = self._input_buffer[index]
+        data = None
+        # according to the index of the data in the model input, check whether the input has already been allocated memory
+        if buffer_item['addr'] is None:
+            # if not, allocate memory, copy data and record memory for resue
+            data = copy_data_device_to_device(input_ptr, size)
+            if data is None:
+                print("Malloc memory and copy model %dth "
+                      "input to device failed"%(index))
+                return None
+            buffer_item['addr'] = data
+            buffer_item['size'] = size
+        elif size == buffer_item['size']:
+            # if memory has already been allocated for the input, and memory size is consistent with current input data
+            # copy data to this memory for inference 
+            ret = acl.rt.memcpy(buffer_item['addr'], size,
+                                input_ptr, size,
+                                ACL_MEMCPY_DEVICE_TO_DEVICE)
+            if ret != ACL_ERROR_NONE:
+                print("Copy model %dth input to device failed"%(index))
+                return None
+            data = buffer_item['addr']
+        else:
+            # if memory has already been allocated for the input, but memory size is not consistent with current input data
+            # it would be considered as exception. because size of each model input is fixed 
+            print("The model %dth input size %d is change,"
+                  " before is %d"%(index, size, buffer_item['size']))
+            return None
+
+        return data
+
+    def execute(self, input_list):
+        # create dataset object instance for offline model inference
+        ret = self._gen_input_dataset(input_list)
+        if ret == FAILED:
+            print("Gen model input dataset failed")
+            return None
+        # call execute inference data of offline model
+        start = datetime.datetime.now()
+        ret = acl.mdl.execute(self.model_id,
+                              self.input_dataset,
+                              self.output_dataset)
+        if ret != ACL_ERROR_NONE:
+            print("Execute model failed for acl.mdl.execute error ", ret)
+            return None
+        end = datetime.datetime.now()        
+        print("acl.mdl.execute exhaust ", end - start)
+        # release input dataset object instance without releasing input data memory 
+        #self._release_dataset(self.input_dataset)
+        # decode the binary data stream output from inference to numpy array, shape and datatype of the array are consistent with model outputs
+        return self._output_dataset_to_numpy()
+
+    def _output_dataset_to_numpy(self):
+        dataset = []
+        num = acl.mdl.get_dataset_num_buffers(self.output_dataset)
+        # iterative each output 
+        for i in range(num):
+            # obtain memory address from output buffer
+            buffer = acl.mdl.get_dataset_buffer(self.output_dataset, i)
+            data = acl.get_data_buffer_addr(buffer)
+            size = int(acl.get_data_buffer_size(buffer))
+            output_ptr = self._output_info[i]["ptr"]
+            output_tensor = self._output_info[i]["tensor"]
+            ret = acl.rt.memcpy(output_ptr, output_tensor.size*output_tensor.itemsize,                            
+                                data, size, ACL_MEMCPY_DEVICE_TO_DEVICE)
+            if ret != ACL_ERROR_NONE:
+                print("Memcpy inference output to local failed")
+                return None
+
+            dataset.append(output_tensor)
+
+        return dataset  
+
+    def _release_dataset(self, dataset):
+        if not dataset:
+            return
+        print("destroy dataset")
+        num = acl.mdl.get_dataset_num_buffers(dataset)
+        for i in range(num):
+            data_buf = acl.mdl.get_dataset_buffer(dataset, i)
+            if data_buf:
+                ret = acl.destroy_data_buffer(data_buf)
+                if ret != ACL_ERROR_NONE:
+                    print("Destroy data buffer error ", ret)
+        ret = acl.mdl.destroy_dataset(dataset)
+        if ret != ACL_ERROR_NONE:
+            print("Destroy data buffer error ", ret)
+