testocl.cc

C++ source code icon testocl.cc — C++ source code, 5 KB (5241 bytes)

ファイルコンテンツ

#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <fstream>
#include <string>
#include <time.h>
#include <CL/cl.h>
#include <exception>
#include <functional>
using namespace std;

static void checkResult(const cl_int code, const string &method,
                        function<bool()> hook = []{ return false; } )
{
	if (code == CL_SUCCESS)
		return;
	if (hook())
		return;
	fprintf(stderr, "Failed: %s, code: %d\n", method.c_str(), code);
	throw new exception();
}

struct ProgramContext {
	static constexpr size_t NUM_ELEM = 3;
	const uint     num;
	string         krnl_add_src;
	cl_platform_id platform_id;
	cl_uint        num_platforms;
	float         *mem[NUM_ELEM];
	cl_mem         dev[NUM_ELEM];

	ProgramContext(const size_t n)
	: num(n)
	{
		memForeach([&](float *&m) { m = new float[n]; });
		initInput();
	}

	virtual ~ProgramContext()
	{
		for (int i = 0; i < NUM_ELEM; i++) {
			delete [] mem[i];
			clReleaseMemObject(dev[i]);
		}
	}

	void memForeach(function<void (float *&)> fn)
	{
		for (auto &m: mem)
			fn(m);
	}

	cl_mem &outDev(void)
	{
		return dev[2];
	}

	float *&outMem(void)
	{
		return mem[2];
	}

	void devForeach(function<void (cl_mem &)> fn)
	{
		for (auto &d: dev)
			fn(d);
	}

	void memDevForeach(function<void (float *&mem, cl_mem &dev)> fn)
	{
		for (int i = 0; i < NUM_ELEM; i++)
			fn(mem[i], dev[i]);
	}

	void readKernelSource(void)
	{
		const char *filename = "add.cl";
		ifstream ifs(filename);
		if (!ifs) {
			fprintf(stderr, "Failed to read kernel source: %s\n",
			        filename);
			throw new exception();
		}

		istreambuf_iterator<char> it(ifs);
		istreambuf_iterator<char> last;
		krnl_add_src = string(it, last);
	}

	void initInput(void)
	{
		srand(time(NULL));
		for (size_t i = 0; i < num; ++i) {
			mem[0][i] = (float)random() / RAND_MAX;
			mem[1][i] = (float)random() / RAND_MAX;
			mem[2][i] = 0; // for output
		}
	}

	void showResult(void)
	{
		for (size_t i = 0; i < num; ++i) {
			printf("i: %zd, %f + %f => %f\n",
			       i, mem[0][i], mem[1][i], mem[2][i]);
		}
	}
};

int main(void)
{
	cl_int ret;
	ProgramContext pcx(10);
	pcx.readKernelSource();

	ret = clGetPlatformIDs(1, &pcx.platform_id, &pcx.num_platforms);
	checkResult(ret, "clGetPlatformIDs");
	printf("Platfrom ID: %p\n", pcx.platform_id);

	cl_uint num_devices;
	cl_device_id device_id;
	ret  = clGetDeviceIDs(pcx.platform_id, CL_DEVICE_TYPE_DEFAULT,
	                      1, &device_id, &num_devices);
	checkResult(ret, "clGetDeviceIDs");
	printf("Device ID: %p\n", device_id);

	cl_context ctx = clCreateContext(NULL, 1u,
	                                 &device_id, NULL, NULL, &ret);
	checkResult(ret, "clCreateContext");

#ifdef CL_EXT_PREFIX__VERSION_2_0_DEPRECATED
	cl_command_queue cmdq = clCreateCommandQueueWithProperties(
	                          ctx, device_id, NULL, &ret);
#else
	cl_command_queue cmdq = clCreateCommandQueue(
	                          ctx, device_id, 0, &ret);
#endif
	checkResult(ret, "clCreateCommandQueueWithProperties");

	const char *src = pcx.krnl_add_src.c_str();
	const size_t size = pcx.krnl_add_src.size();
	cl_program prog = clCreateProgramWithSource(ctx, 1, &src, &size, &ret);
	checkResult(ret, "clCreateProgramWithSource");

	ret = clBuildProgram(prog, 1, &device_id, NULL, NULL, NULL);
	auto showBuildInfo = [&] {
		const size_t bufsize = 0x1000;
		char buf[bufsize];
		size_t actual_size;
		cl_int code = clGetProgramBuildInfo(
		  prog, device_id, CL_PROGRAM_BUILD_LOG,
		  bufsize, buf, &actual_size);
		fprintf(stderr, "[size:%ld] %s", actual_size, buf);
		checkResult(code, "clGetProgramBuildInfo");
		return false;
	};
	checkResult(ret, "clBuildProgram", showBuildInfo);

	cl_kernel krnl = clCreateKernel(prog, "add", &ret);
	checkResult(ret, "clCreateKernel");

	pcx.devForeach([&](cl_mem &dev) {
		dev = clCreateBuffer(ctx, CL_MEM_READ_WRITE,
		                     pcx.num * sizeof(float), NULL, &ret);
		checkResult(ret, "clCreateKernel");
	});

	pcx.memDevForeach([&](float *&mem, cl_mem &dev) {
		ret = clEnqueueWriteBuffer(cmdq, dev, CL_TRUE,
		                           0, pcx.num * sizeof(float), mem,
		                           0, NULL, NULL);
		checkResult(ret, "clEnqueueWriteBuffer");
	});

	int idx = 0;
	clSetKernelArg(krnl, idx++, sizeof(pcx.num), &pcx.num);
	pcx.devForeach([&](cl_mem &dev) {
		clSetKernelArg(krnl, idx++, sizeof(cl_mem), &dev);
	});

	size_t global_work_size[3] = {pcx.num, 0, 0};
	size_t local_work_size[3] = {pcx.num, 0, 0};
	ret = clEnqueueNDRangeKernel(cmdq, krnl, 1, NULL,
	                             global_work_size, local_work_size,
	                             0, NULL, NULL);
	checkResult(ret, "clEnqueueNDRangeKernel");

	ret = clFinish(cmdq);
	checkResult(ret, "clFinish");

	ret = clEnqueueReadBuffer(cmdq, pcx.outDev(), CL_TRUE, 0,
	                          pcx.num * sizeof(float),
	                          pcx.outMem(), 0, NULL, NULL);
	checkResult(ret, "clEnqueueReadBuffer");
	pcx.showResult();

	checkResult(clReleaseKernel(krnl),       "clRleaseKernel");
	checkResult(clReleaseProgram(prog),      "clRelaseProgram");
	checkResult(clReleaseCommandQueue(cmdq), "clReleaseCommandQueue");
	checkResult(clReleaseContext(ctx),       "clRelaseContext");

	return EXIT_SUCCESS;
}