0

我尝试编写互相关函数。在我的程序中,我编写了一个 Map 框架,它包装 OneAPI 调用,通过一些指定目标类型(CPU 或 GPU/加速器)的参数隐藏硬件定位问题。问题是,在 Sycl 部分,程序出现了一些错误,我无法解决它们。我的代码:

  <!-- language: c++ -->
    //Definition of function which apply filter on matrices
    template<class T>
    T applyFilter(std::vector<std::vector<T>> f, std::vector<std::vector<T>> g) {
        int n_rows = f.size();
        int n_cols = f[0].size();
        double sum = 0;
        for (int i = 0; i < n_rows; i++) {
            for (int j = 0; j < n_cols; j++) {
                sum += f[i][j] * g[i][j];
            }
        }
        return sum;
    }
    ;
    
    //function which print a specific part of my matrix
    template<class T>
    void print_matrix(std::vector<std::vector<T>> matrix) {
        int m = matrix.size();
        int n = matrix[0].size();
    
        for (int i = 0; i < m; i++) {
            for (int j = 0; j < n; j++) {
                std::cout << matrix[i][j] << ' ';
            }
            std::cout << "\n";
        }
    
    }
    
    //Function which Slice a specific part of my matricx
    template<class T>
    std::vector<std::vector<T>> slice_matrix(std::vector<std::vector<T>> mat, int i,
            int j, int r, int c) {
    
        std::vector<std::vector<T>> out(r, std::vector<T>(c, 0));
    
        for (int k = 0; k < r; k++) {
            std::vector<T> temp(mat[i + k].begin() + j, mat[i + k].begin() + j + c);
            out[k] = temp;
        }
    
        return out;
    }
    
    //Start to produce for my Matrix  random numbers
    template<class T>
    void rand_fill_row(std::vector<T> &row) {
        std::generate(row.begin(), row.end(), []() {
            return rand() % 100;
        });
    }
    
    //A function that for each cell of my matrix execute to fill it with random numbers
    template<class T>
    void rand_fill_matrix(std::vector<std::vector<T>> &mat) {
        for_each(mat.begin(), mat.end(), rand_fill_row<T>);
    }
 //Definition of Map Skeleton
        template<class Tin, class Tout, class Function>
        class Map {
        private:
            Function fun;
        public:
            Map() {
            }
            Map(Function f) :
                    fun(f) {
            }
            //Overriding () operator
            std::vector<std::vector<Tout>> operator()(bool use_tbb,
                    std::vector<std::vector<Tin>> &img,
                    std::vector<std::vector<Tin>> &ker) {
                int img_row = img.size();
                int img_col = img[0].size();
                int filt_row = ker.size();
                int filt_col = ker[0].size();
                int out_row = img_row - filt_row;
                int out_col = img_col - filt_col;
                std::vector<std::vector<Tout>> out;
        
                if (use_tbb) {
                    uTimer *timer = new uTimer("Executing Code On CPU");
                    tbb::parallel_for(
                            tbb::blocked_range2d<int, int>(0, out_row, 0, out_col),
                            [&](tbb::blocked_range2d<int, int> &t) {
                                for (int n = t.rows().begin(); n < t.rows().end();
                                        ++n) {
                                    for (int m = t.cols().begin(); m < t.cols().end();
                                            ++m) {
                                        out[n][m] = fun(
                                                slice_matrix(img, n, m, filt_row,
                                                        filt_col), ker);
                                    }
                                }
                            });
                    timer->~uTimer();
                    return out;
                } else {
        
                    /* A 2D std::vector<std::vector<T>>
                     * does not have elements stored contiguously in the memory.
                     * Thus I define a vector<T> and operate on them as contiguous blocks.*/
        
                    //Define Buffer for
                sycl::buffer<Tin, 1> img_buffer(img.data(), img.size());
                sycl::buffer<Tin, 1> ker_buffer(ker.data(), ker.size());
                sycl::buffer<Tin, 2> out_buffer(out.data(), { out_row, out_col });
    
                //Profiling GPU
    
                // Initialize property list with profiling information
                sycl::property_list propList {
                        sycl::property::queue::enable_profiling() };
                // Build the command queue (constructed to handle event profling)
                sycl::queue gpuQueue = cl::sycl::queue(sycl::gpu_selector(),
                        propList);
                // print out the device information used for the kernel code
                std::cout << "Device: "
                        << gpuQueue.get_device().get_info<sycl::info::device::name>()
                        << std::endl;
    
                std::cout << "Compute Units: "
                        << gpuQueue.get_device().get_info<
                                sycl::info::device::max_compute_units>()
                        << std::endl;
    
                auto start_overall = std::chrono::system_clock::now();
    
                auto event = gpuQueue.submit(
                        [&](sycl::handler &h) {
                            //local copy of fun
                            auto f = fun;
                            sycl::accessor img_accessor(img_buffer, h,
                                    sycl::read_only);
                            sycl::accessor ker_accessor(ker_buffer, h,
                                    sycl::read_only);
                            sycl::accessor out_accessor(out_buffer, h,
                                    sycl::write_only);
                                    
    h.parallel_for(sycl::range<2> { out_row, out_col },
                                    [=](sycl::id<2> index) {
                                        int row = index[0];
                                        int col = index[1];
                        
    out_accessor[row][col] = f(slice_matrix(img_accessor, row, col,filt_row, filt_col)
                                            , ker_accessor);
        
                                        });
        
                            });
                    event.wait();
                    auto end_overall = std::chrono::system_clock::now();
                    cl_ulong submit_time = event.template get_profiling_info<
                            cl::sycl::info::event_profiling::command_submit>();
                    cl_ulong start_time = event.template get_profiling_info<
                            cl::sycl::info::event_profiling::command_start>();
                    cl_ulong end_time = event.template get_profiling_info<
                            cl::sycl::info::event_profiling::command_end>();
                    auto submission_time = (start_time - submit_time) / 1000000.0f;
                    std::cout << "Submit Time: " << submission_time << " ms"
                            << std::endl;
                    auto execution_time = (end_time - start_time) / 1000000.0f;
                    std::cout << "Execution Time: " << execution_time << " ms"
                            << std::endl;
                    auto execution_overall = std::chrono::duration_cast<
                            std::chrono::milliseconds>(end_overall - start_overall);
                    std::cout << "Overall Execution Time: " << execution_overall.count()
                            << " ms" << std::endl;
                }
                ;
                return out;
            }
        
        };
             
//The main part
 template<class Tin, class Tout, class Function>
        Map<Tin, Tout, Function> make_map(Function f) {
            return Map<Tin, Tout, Function>(f);
        }
        
        int main(int argc, char *argv[]) {
        
            std::cout << "The Exutable File! " << argv[0] << std::endl;
            std::cout << "The Device Is! " << argv[1] << std::endl;
            std::cout << "The Fist Vector Size! " << argv[2] << std::endl;
            std::cout << "The Second Vector Size! " << argv[3] << std::endl;
            //The Device
            std::string device = argv[1];
            // Image's row count
            int m = std::stoi(argv[2]);
            // Image's col count
            int n = std::stoi(argv[3]);
        
            std::vector<std::vector<double>> img(m, std::vector<double>(n, 0));
        
            // Filter's row count
            int k = std::stoi(argv[4]);
            // Filter's row count
            int l = std::stoi(argv[5]);
        
            std::vector<std::vector<double>> ker(k, std::vector<double>(l, 0));
        
            //std::vector<std::vector<T>> out(r, std::vector<T>(c, 0));
        
            rand_fill_matrix(img);
            rand_fill_matrix(ker);
    /*Error is : no matching function for call to 'make_map'*/
        <!-- language: lang-js -->
            auto m1 = make_map<double, double>(applyFilter);
    <!-- language: lang-js -->
            std::vector<std::vector<double>> r = m1(true, img, ker);
            //print the result
            //for (auto &e : r) {
            //std::cout << e << " ";
            //}
            return 0;
        }

错误是:

'sycl::buffer 的初始化没有匹配的构造函数

//Define Buffer for
            sycl::buffer<Tin, 1> img_buffer(&img[0], img.size());
            sycl::buffer<Tin, 1> ker_buffer(&ker[0], ker.size());
            sycl::buffer<Tin, 2> out_buffer(out.data(), sycl::range<2>{ out_row, out_col });

==================================================== ====

non-constant-expression cannot be narrowed from type 'int' to 'size_t' (aka 'unsigned long') in initializer list [-Wc++11-narrowing]
h.parallel_for(sycl::range<2> { out_row, out_col },
                                [=](sycl::id<2> index) {
                                    int row = index[0];
                                    int col = index[1];

===============================================

  Invalid arguments '
    Candidates are:
    std::vector<std::vector<#0,std::allocator<#0>>,std::allocator<std::vector<#0,std::allocator<#0>>>> slice_matrix(std::vector<std::vector<#0,std::allocator<#0>>,std::allocator<std::vector<#0,std::allocator<#0>>>>, int, int, int, int)

'
  out_accessor[row][col] = f(slice_matrix(img_accessor, row, col,
                                                            filt_row, filt_col),
                                                    ker_accessor);  }); });

==================================================== =

no matching function for call to 'make_map'
auto m1 = make_map<double, double>(applyFilter);
4

1 回答 1

0

通过匹配类型修复第一个错误 - 只需将 img_row、img_col、filt_row、filt_col、out)row 和 out_col 的声明更改为 size_t 而不是 int。

对于第二个错误 - 编译器是否也发出了有关问题的提示?我不得不根据你的片段做出一些假设,但我最终得到:

错误:没有用于调用“make_map”的匹配函数注意:候选模板被忽略:无法推断模板参数“函数”

这告诉我,我们需要添加的不仅仅是 Tin 和 Tout (<double,double>) - 我们还需要指定函数的类型。就像是

auto m1 = make_map<double,double,double(std::vectorstd::vector<double>,std::vectorstd::vector<double>)>(applyFilter);

但这在我对您的代码的模拟中并不完全正确。你应该尝试这样的事情。

如果您仍有问题 - 请提供完整的代码示例,我们可以尝试编译。如果你修复它 - 请将你发现的内容发回这里,以便我们一起学习。

于 2021-07-12T19:45:23.697 回答