高位合成に用いたベンチマークのC++での実行

Vitisというソフトを用いてハードウェアアクセラレーションを行ったのですが，その比較として高位合成前のC++のコードを実行しようとしました．
用いたコードは以下のものです

krnl_vadd.cpp
1#define BUFFER_SIZE 256
2#define DATA_SIZE 4194304
3#define m_axi_port 128 //変更点はここだけ
4//TRIPCOUNT identifier
5const unsigned int c_len = DATA_SIZE / BUFFER_SIZE;
6const unsigned int c_size = BUFFER_SIZE;
7
8/*
9    Vector Addition Kernel Implementation 
10    Arguments:
11        in1   (input)     --> Input Vector1
12        in2   (input)     --> Input Vector2
13        out_r   (output)    --> Output Vector
14        size  (input)     --> Size of Vector in Integer
15*/
16
17extern "C" {
18void krnl_vadd(const unsigned int *in1, // Read-Only Vector 1
19          const unsigned int *in2, // Read-Only Vector 2
20          unsigned int *out_r,     // Output Result
21          int size                 // Size in integer
22) {
23#pragma HLS INTERFACE m_axi port = in1 offset = slave bundle = gmem
24#pragma HLS INTERFACE m_axi port = in2 offset = slave bundle = gmem
25#pragma HLS INTERFACE m_axi port = out_r offset = slave bundle = gmem
26#pragma HLS INTERFACE s_axilite port = in1 bundle = control
27#pragma HLS INTERFACE s_axilite port = in2 bundle = control
28#pragma HLS INTERFACE s_axilite port = out_r bundle = control
29#pragma HLS INTERFACE s_axilite port = size bundle = control
30#pragma HLS INTERFACE s_axilite port = return bundle = control
31
32    unsigned int v1_buffer[BUFFER_SIZE];   // Local memory to store vector1
33
34    //Per iteration of this loop perform BUFFER_SIZE vector addition
35    for (int i = 0; i < size; i += BUFFER_SIZE) {
36       #pragma HLS LOOP_TRIPCOUNT min=c_len max=c_len
37        int chunk_size = BUFFER_SIZE;
38        //boundary checks
39        if ((i + BUFFER_SIZE) > size)
40            chunk_size = size - i;
41
42        read1: for (int j = 0; j < chunk_size; j++) {
43           #pragma HLS LOOP_TRIPCOUNT min=c_size max=c_size
44           #pragma HLS PIPELINE II=1
45            v1_buffer[j] = in1[i + j];
46        }
47
48        //Burst reading B and calculating C and Burst writing 
49        // to  Global memory
50        vadd_writeC: for (int j = 0; j < chunk_size; j++) {
51           #pragma HLS LOOP_TRIPCOUNT min=c_size max=c_size
52           #pragma HLS PIPELINE II=1
53            //perform vector addition
54            out_r[i+j] = v1_buffer[j] + in2[i+j];
55        }
56
57    }
58}
59}
60

vadd.cpp
1#include <stdlib.h>
2#include <fstream>
3#include <iostream>
4#include "vadd.h"
5
6static const int DATA_SIZE = 4194304;
7
8static const std::string error_message =
9    "Error: Result mismatch:\n"
10    "i = %d CPU result = %d Device result = %d\n";
11
12int main(int argc, char* argv[]) {
13
14    //TARGET_DEVICE macro needs to be passed from gcc command line
15    if(argc != 2) {
16		std::cout << "Usage: " << argv[0] <<" <xclbin>" << std::endl;
17		return EXIT_FAILURE;
18	}
19
20    char* xclbinFilename = argv[1];
21    
22    // Compute the size of array in bytes
23    size_t size_in_bytes = DATA_SIZE * sizeof(int);
24    
25    // Creates a vector of DATA_SIZE elements with an initial value of 10 and 32
26    // using customized allocator for getting buffer alignment to 4k boundary
27    
28    std::vector<cl::Device> devices;
29    cl::Device device;
30    std::vector<cl::Platform> platforms;
31    bool found_device = false;
32
33    //traversing all Platforms To find Xilinx Platform and targeted
34    //Device in Xilinx Platform
35    cl::Platform::get(&platforms);
36    for(size_t i = 0; (i < platforms.size() ) & (found_device == false) ;i++){
37        cl::Platform platform = platforms[i];
38        std::string platformName = platform.getInfo<CL_PLATFORM_NAME>();
39        if ( platformName == "Xilinx"){
40            devices.clear();
41            platform.getDevices(CL_DEVICE_TYPE_ACCELERATOR, &devices);
42	    if (devices.size()){
43		    device = devices[0];
44		    found_device = true;
45		    break;
46	    }
47        }
48    }
49    if (found_device == false){
50       std::cout << "Error: Unable to find Target Device " 
51           << device.getInfo<CL_DEVICE_NAME>() << std::endl;
52       return EXIT_FAILURE; 
53    }
54
55    // Creating Context and Command Queue for selected device
56    cl::Context context(device);
57    cl::CommandQueue q(context, device, CL_QUEUE_PROFILING_ENABLE);
58
59    // Load xclbin 
60    std::cout << "Loading: '" << xclbinFilename << "'\n";
61    std::ifstream bin_file(xclbinFilename, std::ifstream::binary);
62    bin_file.seekg (0, bin_file.end);
63    unsigned nb = bin_file.tellg();
64    bin_file.seekg (0, bin_file.beg);
65    char *buf = new char [nb];
66    bin_file.read(buf, nb);
67    
68    // Creating Program from Binary File
69    cl::Program::Binaries bins;
70    bins.push_back({buf,nb});
71    devices.resize(1);
72    cl::Program program(context, devices, bins);
73    
74    // This call will get the kernel object from program. A kernel is an 
75    // OpenCL function that is executed on the FPGA. 
76    cl::Kernel krnl_vector_add(program,"krnl_vadd");
77    
78    // These commands will allocate memory on the Device. The cl::Buffer objects can
79    // be used to reference the memory locations on the device. 
80    cl::Buffer buffer_a(context, CL_MEM_READ_ONLY, size_in_bytes);
81    cl::Buffer buffer_b(context, CL_MEM_READ_ONLY, size_in_bytes);
82    cl::Buffer buffer_result(context, CL_MEM_WRITE_ONLY, size_in_bytes);
83    
84    //set the kernel Arguments
85    int narg=0;
86    krnl_vector_add.setArg(narg++,buffer_a);
87    krnl_vector_add.setArg(narg++,buffer_b);
88    krnl_vector_add.setArg(narg++,buffer_result);
89    krnl_vector_add.setArg(narg++,DATA_SIZE);
90
91    //We then need to map our OpenCL buffers to get the pointers
92    int *ptr_a = (int *) q.enqueueMapBuffer (buffer_a , CL_TRUE , CL_MAP_WRITE , 0, size_in_bytes);
93    int *ptr_b = (int *) q.enqueueMapBuffer (buffer_b , CL_TRUE , CL_MAP_WRITE , 0, size_in_bytes);
94    int *ptr_result = (int *) q.enqueueMapBuffer (buffer_result , CL_TRUE , CL_MAP_READ , 0, size_in_bytes);
95
96    //setting input data
97    for(int i = 0 ; i< DATA_SIZE; i++){
98	    ptr_a[i] = 10;
99	    ptr_b[i] = 20;
100    }
101
102    // Data will be migrated to kernel space
103    q.enqueueMigrateMemObjects({buffer_a,buffer_b},0/* 0 means from host*/);
104
105    //Launch the Kernel
106    q.enqueueTask(krnl_vector_add);
107
108    // The result of the previous kernel execution will need to be retrieved in
109    // order to view the results. This call will transfer the data from FPGA to
110    // source_results vector
111    q.enqueueMigrateMemObjects({buffer_result},CL_MIGRATE_MEM_OBJECT_HOST);
112
113    q.finish();
114
115    //Verify the result
116    int match = 0;
117    for (int i = 0; i < DATA_SIZE; i++) {
118        int host_result = ptr_a[i] + ptr_b[i];
119        if (ptr_result[i] != host_result) {
120            printf(error_message.c_str(), i, host_result, ptr_result[i]);
121            match = 1;
122            break;
123        }
124    }
125
126    q.enqueueUnmapMemObject(buffer_a , ptr_a);
127    q.enqueueUnmapMemObject(buffer_b , ptr_b);
128    q.enqueueUnmapMemObject(buffer_result , ptr_result);
129    q.finish();
130
131    std::cout << "TEST " << (match ? "FAILED" : "PASSED") << std::endl; 
132    return (match ? EXIT_FAILURE :  EXIT_SUCCESS);
133
134}
135

vadd.h
1#pragma once
2
3#define CL_HPP_CL_1_2_DEFAULT_BUILD
4#define CL_HPP_TARGET_OPENCL_VERSION 120
5#define CL_HPP_MINIMUM_OPENCL_VERSION 120
6#define CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY 1
7
8#include <CL/cl2.hpp>
9
10//Customized buffer allocation for 4K boundary alignment
11template <typename T>
12struct aligned_allocator
13{
14  using value_type = T;
15  T* allocate(std::size_t num)
16  {
17    void* ptr = nullptr;
18    if (posix_memalign(&ptr,4096,num*sizeof(T)))
19      throw std::bad_alloc();
20    return reinterpret_cast<T*>(ptr);
21  }
22  void deallocate(T* p, std::size_t num)
23  {
24    free(p);
25  }
26};
27

しかし，そのまま実行した場合は

PS C:\cpp\vadd_src> g++ krnl_vadd.cpp vadd.cpp -o vadd
In file included from vadd.cpp:47:
vadd.h:53:10: fatal error: CL/cl2.hpp: No such file or directory
 #include <CL/cl2.hpp>
          ^~~~~~~~~~~~
compilation terminated.

というエラーが発生（CL/cl2.hppがない）してしまいます．

CL/cl2.hppを用いずにこのプログラムを動かす方法はありますでしょうか．

また，実行にかかった時間なども知りたいのでその方法も教えていただけると助かります．

ozwk

2022/02/01 07:28

Vitis HLS側のCシミュレーションではだめですか?

行動規範の内容に同意します

回答2件

ご回答ありがとうございます。
試してみた結果、以下のようなエラーとなりました。

vadd.cpp: In function ‘int main(int, char**)’:
vadd.cpp:71:10: error: ‘vector’ is not a member of ‘std’
     std::vector<cl::Device> devices;
          ^~~~~~
vadd.cpp:71:17: error: ‘cl’ has not been declared
     std::vector<cl::Device> devices;
                 ^~
vadd.cpp:71:29: error: ‘devices’ was not declared in this scope
     std::vector<cl::Device> devices;
                             ^~~~~~~
vadd.cpp:72:5: error: ‘cl’ has not been declared
     cl::Device device;
     ^~
vadd.cpp:73:10: error: ‘vector’ is not a member of ‘std’
     std::vector<cl::Platform> platforms;
          ^~~~~~
vadd.cpp:73:17: error: ‘cl’ has not been declared
     std::vector<cl::Platform> platforms;
                 ^~
vadd.cpp:73:31: error: ‘platforms’ was not declared in this scope
     std::vector<cl::Platform> platforms;
                               ^~~~~~~~~
vadd.cpp:78:5: error: ‘cl’ has not been declared
     cl::Platform::get(&platforms);
     ^~
vadd.cpp:80:9: error: ‘cl’ has not been declared
         cl::Platform platform = platforms[i];
         ^~
vadd.cpp:81:36: error: ‘platform’ was not declared in this scope
         std::string platformName = platform.getInfo<CL_PLATFORM_NAME>();
                                    ^~~~~~~~
vadd.cpp:81:36: note: suggested alternative: ‘platformName’
         std::string platformName = platform.getInfo<CL_PLATFORM_NAME>();
                                    ^~~~~~~~
                                    platformName
vadd.cpp:81:53: error: ‘CL_PLATFORM_NAME’ was not declared in this scope
         std::string platformName = platform.getInfo<CL_PLATFORM_NAME>();
                                                     ^~~~~~~~~~~~~~~~
vadd.cpp:81:71: error: expected primary-expression before ‘)’ token
         std::string platformName = platform.getInfo<CL_PLATFORM_NAME>();
                                                                       ^
vadd.cpp:84:33: error: ‘CL_DEVICE_TYPE_ACCELERATOR’ was not declared in this scope
             platform.getDevices(CL_DEVICE_TYPE_ACCELERATOR, &devices);
                                 ^~~~~~~~~~~~~~~~~~~~~~~~~~
vadd.cpp:86:7: error: ‘device’ was not declared in this scope
       device = devices[0];
       ^~~~~~
vadd.cpp:86:7: note: suggested alternative: ‘dysize’
       device = devices[0];
       ^~~~~~
       dysize
vadd.cpp:94:15: error: ‘device’ was not declared in this scope
            << device.getInfo<CL_DEVICE_NAME>() << std::endl;
               ^~~~~~
vadd.cpp:94:15: note: suggested alternative: ‘dysize’
            << device.getInfo<CL_DEVICE_NAME>() << std::endl;
               ^~~~~~
               dysize
vadd.cpp:94:30: error: ‘CL_DEVICE_NAME’ was not declared in this scope
            << device.getInfo<CL_DEVICE_NAME>() << std::endl;
                              ^~~~~~~~~~~~~~
vadd.cpp:94:30: note: suggested alternative: ‘__LC_NAME’
            << device.getInfo<CL_DEVICE_NAME>() << std::endl;
                              ^~~~~~~~~~~~~~
                              __LC_NAME
vadd.cpp:94:46: error: expected primary-expression before ‘)’ token
            << device.getInfo<CL_DEVICE_NAME>() << std::endl;
                                              ^
vadd.cpp:99:5: error: ‘cl’ has not been declared
     cl::Context context(device);
     ^~
vadd.cpp:100:5: error: ‘cl’ has not been declared
     cl::CommandQueue q(context, device, CL_QUEUE_PROFILING_ENABLE);
     ^~
vadd.cpp:112:5: error: ‘cl’ has not been declared
     cl::Program::Binaries bins;
     ^~
vadd.cpp:113:5: error: ‘bins’ was not declared in this scope
     bins.push_back({buf,nb});
     ^~~~
vadd.cpp:113:5: note: suggested alternative: ‘uint’
     bins.push_back({buf,nb});
     ^~~~
     uint
vadd.cpp:115:5: error: ‘cl’ has not been declared
     cl::Program program(context, devices, bins);
     ^~
vadd.cpp:119:5: error: ‘cl’ has not been declared
     cl::Kernel krnl_vector_add(program,"krnl_vadd");
     ^~
vadd.cpp:123:5: error: ‘cl’ has not been declared
     cl::Buffer buffer_a(context, CL_MEM_READ_ONLY, size_in_bytes);
     ^~
vadd.cpp:124:5: error: ‘cl’ has not been declared
     cl::Buffer buffer_b(context, CL_MEM_READ_ONLY, size_in_bytes);
     ^~
vadd.cpp:125:5: error: ‘cl’ has not been declared
     cl::Buffer buffer_result(context, CL_MEM_WRITE_ONLY, size_in_bytes);
     ^~
vadd.cpp:129:5: error: ‘krnl_vector_add’ was not declared in this scope
     krnl_vector_add.setArg(narg++,buffer_a);
     ^~~~~~~~~~~~~~~
vadd.cpp:129:35: error: ‘buffer_a’ was not declared in this scope
     krnl_vector_add.setArg(narg++,buffer_a);
                                   ^~~~~~~~
vadd.cpp:130:35: error: ‘buffer_b’ was not declared in this scope
     krnl_vector_add.setArg(narg++,buffer_b);
                                   ^~~~~~~~
vadd.cpp:131:35: error: ‘buffer_result’ was not declared in this scope
     krnl_vector_add.setArg(narg++,buffer_result);
                                   ^~~~~~~~~~~~~
vadd.cpp:131:35: note: suggested alternative: ‘__codecvt_result’
     krnl_vector_add.setArg(narg++,buffer_result);
                                   ^~~~~~~~~~~~~
                                   __codecvt_result
vadd.cpp:135:26: error: ‘q’ was not declared in this scope
     int *ptr_a = (int *) q.enqueueMapBuffer (buffer_a , CL_TRUE , CL_MAP_WRITE , 0, size_in_bytes);
                          ^
vadd.cpp:135:57: error: ‘CL_TRUE’ was not declared in this scope
     int *ptr_a = (int *) q.enqueueMapBuffer (buffer_a , CL_TRUE , CL_MAP_WRITE , 0, size_in_bytes);
                                                         ^~~~~~~
vadd.cpp:135:67: error: ‘CL_MAP_WRITE’ was not declared in this scope
     int *ptr_a = (int *) q.enqueueMapBuffer (buffer_a , CL_TRUE , CL_MAP_WRITE , 0, size_in_bytes);
                                                                   ^~~~~~~~~~~~
vadd.cpp:137:77: error: ‘CL_MAP_READ’ was not declared in this scope
 *ptr_result = (int *) q.enqueueMapBuffer (buffer_result , CL_TRUE , CL_MAP_READ , 0, size_in_bytes);
                                                                     ^~~~~~~~~~~
vadd.cpp:137:77: note: suggested alternative: ‘CLONE_THREAD’
 *ptr_result = (int *) q.enqueueMapBuffer (buffer_result , CL_TRUE , CL_MAP_READ , 0, size_in_bytes);
                                                                     ^~~~~~~~~~~
                                                                             CLONE_THREAD
vadd.cpp:154:48: error: ‘CL_MIGRATE_MEM_OBJECT_HOST’ was not declared in this scope
     q.enqueueMigrateMemObjects({buffer_result},CL_MIGRATE_MEM_OBJECT_HOST);

投稿2022/02/01 05:16