testocl.cc
testocl.cc
—
C++ source code,
5 KB (5241 bytes)
ファイルコンテンツ
#include <cstdio> #include <cstdlib> #include <cstring> #include <fstream> #include <string> #include <time.h> #include <CL/cl.h> #include <exception> #include <functional> using namespace std; static void checkResult(const cl_int code, const string &method, function<bool()> hook = []{ return false; } ) { if (code == CL_SUCCESS) return; if (hook()) return; fprintf(stderr, "Failed: %s, code: %d\n", method.c_str(), code); throw new exception(); } struct ProgramContext { static constexpr size_t NUM_ELEM = 3; const uint num; string krnl_add_src; cl_platform_id platform_id; cl_uint num_platforms; float *mem[NUM_ELEM]; cl_mem dev[NUM_ELEM]; ProgramContext(const size_t n) : num(n) { memForeach([&](float *&m) { m = new float[n]; }); initInput(); } virtual ~ProgramContext() { for (int i = 0; i < NUM_ELEM; i++) { delete [] mem[i]; clReleaseMemObject(dev[i]); } } void memForeach(function<void (float *&)> fn) { for (auto &m: mem) fn(m); } cl_mem &outDev(void) { return dev[2]; } float *&outMem(void) { return mem[2]; } void devForeach(function<void (cl_mem &)> fn) { for (auto &d: dev) fn(d); } void memDevForeach(function<void (float *&mem, cl_mem &dev)> fn) { for (int i = 0; i < NUM_ELEM; i++) fn(mem[i], dev[i]); } void readKernelSource(void) { const char *filename = "add.cl"; ifstream ifs(filename); if (!ifs) { fprintf(stderr, "Failed to read kernel source: %s\n", filename); throw new exception(); } istreambuf_iterator<char> it(ifs); istreambuf_iterator<char> last; krnl_add_src = string(it, last); } void initInput(void) { srand(time(NULL)); for (size_t i = 0; i < num; ++i) { mem[0][i] = (float)random() / RAND_MAX; mem[1][i] = (float)random() / RAND_MAX; mem[2][i] = 0; // for output } } void showResult(void) { for (size_t i = 0; i < num; ++i) { printf("i: %zd, %f + %f => %f\n", i, mem[0][i], mem[1][i], mem[2][i]); } } }; int main(void) { cl_int ret; ProgramContext pcx(10); pcx.readKernelSource(); ret = clGetPlatformIDs(1, &pcx.platform_id, &pcx.num_platforms); checkResult(ret, "clGetPlatformIDs"); printf("Platfrom ID: %p\n", pcx.platform_id); cl_uint num_devices; cl_device_id device_id; ret = clGetDeviceIDs(pcx.platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, &num_devices); checkResult(ret, "clGetDeviceIDs"); printf("Device ID: %p\n", device_id); cl_context ctx = clCreateContext(NULL, 1u, &device_id, NULL, NULL, &ret); checkResult(ret, "clCreateContext"); #ifdef CL_EXT_PREFIX__VERSION_2_0_DEPRECATED cl_command_queue cmdq = clCreateCommandQueueWithProperties( ctx, device_id, NULL, &ret); #else cl_command_queue cmdq = clCreateCommandQueue( ctx, device_id, 0, &ret); #endif checkResult(ret, "clCreateCommandQueueWithProperties"); const char *src = pcx.krnl_add_src.c_str(); const size_t size = pcx.krnl_add_src.size(); cl_program prog = clCreateProgramWithSource(ctx, 1, &src, &size, &ret); checkResult(ret, "clCreateProgramWithSource"); ret = clBuildProgram(prog, 1, &device_id, NULL, NULL, NULL); auto showBuildInfo = [&] { const size_t bufsize = 0x1000; char buf[bufsize]; size_t actual_size; cl_int code = clGetProgramBuildInfo( prog, device_id, CL_PROGRAM_BUILD_LOG, bufsize, buf, &actual_size); fprintf(stderr, "[size:%ld] %s", actual_size, buf); checkResult(code, "clGetProgramBuildInfo"); return false; }; checkResult(ret, "clBuildProgram", showBuildInfo); cl_kernel krnl = clCreateKernel(prog, "add", &ret); checkResult(ret, "clCreateKernel"); pcx.devForeach([&](cl_mem &dev) { dev = clCreateBuffer(ctx, CL_MEM_READ_WRITE, pcx.num * sizeof(float), NULL, &ret); checkResult(ret, "clCreateKernel"); }); pcx.memDevForeach([&](float *&mem, cl_mem &dev) { ret = clEnqueueWriteBuffer(cmdq, dev, CL_TRUE, 0, pcx.num * sizeof(float), mem, 0, NULL, NULL); checkResult(ret, "clEnqueueWriteBuffer"); }); int idx = 0; clSetKernelArg(krnl, idx++, sizeof(pcx.num), &pcx.num); pcx.devForeach([&](cl_mem &dev) { clSetKernelArg(krnl, idx++, sizeof(cl_mem), &dev); }); size_t global_work_size[3] = {pcx.num, 0, 0}; size_t local_work_size[3] = {pcx.num, 0, 0}; ret = clEnqueueNDRangeKernel(cmdq, krnl, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL); checkResult(ret, "clEnqueueNDRangeKernel"); ret = clFinish(cmdq); checkResult(ret, "clFinish"); ret = clEnqueueReadBuffer(cmdq, pcx.outDev(), CL_TRUE, 0, pcx.num * sizeof(float), pcx.outMem(), 0, NULL, NULL); checkResult(ret, "clEnqueueReadBuffer"); pcx.showResult(); checkResult(clReleaseKernel(krnl), "clRleaseKernel"); checkResult(clReleaseProgram(prog), "clRelaseProgram"); checkResult(clReleaseCommandQueue(cmdq), "clReleaseCommandQueue"); checkResult(clReleaseContext(ctx), "clRelaseContext"); return EXIT_SUCCESS; }