How to Cooperative use udmabuf with PL DMA and RAM in vivado 2018.2 block design?

Question

How to Cooperative use udmabuf with PL DMA and RAM in vivado 2018.2 block design?

honorpeter opened this issue 5 years ago · comments

I had insmod udmabuf.ko on zcu102 board and succeed.but it's only a dma_buf, how to Cooperative use it with PL DMA and RAM in vivado 2018.2 block design ？ Simple and efficient methods?

honorpeter · Answer 1 · Fri Jun 14 2019 15:49:19 GMT+0800 (China Standard Time)

By the way , Can this be used directly? https://github.com/pulp-platform/udma_external_per

KAWAZOME Ichiro · Answer 2 · Fri Jun 14 2019 16:48:47 GMT+0800 (China Standard Time)

Here is a simple example using Vivado-HLS.

https://github.com/ikwzm/ZynqMP-FPGA-Linux-Example-2-Ultra96

In this example, the following C code is implemented in PL using Vivado-HLS.

int negative(volatile int *in, volatile int *out, int size){
#pragma HLS INTERFACE m_axi depth=10 port=out offset=slave
#pragma HLS INTERFACE m_axi depth=10 port=in  offset=slave
#pragma HLS INTERFACE s_axilite port=size
#pragma HLS INTERFACE s_axilite port=return
    int i;

    for (i = 0; i < size; i++){
        out[i] = -in[i];
    }

    return(0);
}

From PS, it is executed by script file written in Python.

from udmabuf import Udmabuf
from uio     import Uio
import numpy as np
import time

if __name__ == '__main__':
    uio1       = Uio('uio1')
    regs       = uio1.regs()
    udmabuf4   = Udmabuf('udmabuf4')
    udmabuf5   = Udmabuf('udmabuf5')
    test_dtype = np.uint32
    test_size  = min(int(udmabuf4.buf_size/(np.dtype(test_dtype).itemsize)),
                     int(udmabuf5.buf_size/(np.dtype(test_dtype).itemsize)))
  
    udmabuf4_array    = udmabuf4.memmap(dtype=test_dtype, shape=(test_size))
    udmabuf4_array[:] = np.random.randint(-21474836478,2147483647,(test_size))
    udmabuf4.set_sync_to_device(0, test_size*(np.dtype(test_dtype).itemsize))

    udmabuf5_array    = udmabuf5.memmap(dtype=test_dtype, shape=(test_size))
    udmabuf5_array[:] = np.random.randint(-21474836478,2147483647,(test_size))
    udmabuf5.set_sync_to_cpu(   0, test_size*(np.dtype(test_dtype).itemsize))

    total_setup_time   = 0
    total_cleanup_time = 0
    total_xfer_time    = 0
    total_xfer_size    = 0
    count              = 0

    for i in range (0,9):

        start_time  = time.time()
        udmabuf4.sync_for_device()
        udmabuf5.sync_for_device()
        regs.write_word(0x18, udmabuf4.phys_addr & 0xFFFFFFFF)
        regs.write_word(0x20, udmabuf5.phys_addr & 0xFFFFFFFF)
        regs.write_word(0x28, test_size)
        regs.write_word(0x04, 0x000000001)
        regs.write_word(0x08, 0x000000001)
        regs.write_word(0x0C, 0x000000001)
        uio1.irq_on()
        phase0_time = time.time()
        regs.write_word(0x00, 0x000000001)
        uio1.wait_irq()

        phase1_time = time.time()
        regs.write_word(0x0C, 0x000000001)
        udmabuf4.sync_for_cpu()
        udmabuf5.sync_for_cpu()

        end_time     = time.time()
        setup_time   = phase0_time - start_time
        xfer_time    = phase1_time - phase0_time
        cleanup_time = end_time    - phase1_time
        total_time   = end_time    - start_time

        total_setup_time   = total_setup_time   + setup_time
        total_cleanup_time = total_cleanup_time + cleanup_time
        total_xfer_time    = total_xfer_time    + xfer_time
        total_xfer_size    = total_xfer_size    + test_size
        count              = count              + 1
        print ("total:{0:.3f}[msec] setup:{1:.3f}[msec] xfer:{2:.3f}[msec] cleanup:{3:.3f}[msec]".format(round(total_time*1000.0,3), round(setup_time*1000.0,3), round(xfer_time*1000.0,3), round(cleanup_time*1000.0,3)))


    print ("average_setup_time  :{0:.3f}".format(round((total_setup_time  /count)*1000.0,3)) + "[msec]")
    print ("average_cleanup_time:{0:.3f}".format(round((total_cleanup_time/count)*1000.0,3)) + "[msec]")
    print ("average_xfer_time   :{0:.3f}".format(round((total_xfer_time   /count)*1000.0,3)) + "[msec]")
    print ("throughput          :{0:.3f}".format(round(((total_xfer_size/total_xfer_time)/(1000*1000)),3)) + "[MByte/sec]")

    udmabuf4_negative_array = np.negative(udmabuf4_array)
    if np.array_equal(udmabuf4_negative_array, udmabuf5_array):
         print("np.negative(udmabuf4) == udmabuf5 : OK")
    else:
         print("np.negative(udmabuf4) == udmabuf5 : NG")
         count = 0
         for i in range(test_size):
             if udmabuf4_negative_array[i] != udmabuf5_array[i] :
                 count = count + 1
                 if count < 16:
                     print("udmabuf4_negative_array[0x{0:08X}] = 0x{1:08X} udmabuf5_array[0x{0:08X}] = 0x{2:08X}".format(i, udmabuf4_negative_array[i], udmabuf5_array[i]))
         print("NG Count:{0}".format(count))

honorpeter · Answer 3 · Fri Jun 14 2019 17:00:45 GMT+0800 (China Standard Time)

@ikwzm Thanks ,I will try. BUT there were not PL DMA to Cooperative use with the udma buf.