我对clEnqueueWriteBuffer有一个很好奇的问题。在我当前的项目中,我想将约500张图像(1GB)复制到图形卡上,并平均一些像素。图像存储在一个大的double *数组中(大小:width * height * nImages)。如果我将300张图像复制到VRAM中并使用clEnqueueReadBuffer读出,我将得到存储在RAM中的确切信息:

内存:14450,5006076793 14450,5006076793 14456,8079379383 14455,2294939826 14444,7361060619

VRAM:14450,5006076793 14450,5006076793 14456,8079379383 14455,2294939826 14444,7361060619

但是,如果加载的图像超过350张,则cl_mem对象的内容已损坏:

RAM:14450,5006076793 14450,5006076793 14456,8079379383 14455,2294939826 14444,7361060619

VRAM:-6,2​​7743856220419E + 66 -6,27743856220419E + 66 -6,27743856220419E + 66 -6,27743856220419E + 66 -6,27743856220419E + 66

如果您能帮助我,我将非常高兴!
这是我的代码:

private: System::Void button7_Click(System::Object^  sender, System::EventArgs^  e) {
         std::string text;
         text = StringConvA(maskedTextBox1->Text);
         textBox1->Text += "You want a bin size of " + atoi(text.c_str()) + ". You have "+ nforegroundImages+" images.\r\n";
         binWidth = atoi(text.c_str());
         nbins = (int)ceil((double)nforegroundImages / (double)binWidth);
         textBox1->Text += "That is going to give you "+nbins+" bins\r\n";

         //create context and cmd_queue

         context = clCreateContext(NULL, nDevices, &deviceID[0], NULL, NULL, &err);
         cmd_queue = clCreateCommandQueue(context, deviceID[0], NULL, &err);


         //allocate result memory
         //each result image will have width*height double entries. res_im is an array of pointer to double.


         res_im = (double*)malloc(width*height*sizeof(double)*nbins);


         cl_mem imageData_mem, result_mem, nWavenumber_mem, binSize_mem, imageSizeInPixels_mem, nbins_mem;
         imageData_mem = clCreateBuffer(context, CL_MEM_READ_ONLY, width * height * sizeof(double)*nforegroundImages, NULL, NULL);
         result_mem = clCreateBuffer(context, CL_MEM_READ_WRITE, width * height * sizeof(double)*nbins, NULL, NULL);
         nWavenumber_mem = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(int), NULL, NULL);
         binSize_mem = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(int), NULL, NULL);
         imageSizeInPixels_mem = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(int), NULL, NULL);
         nbins_mem = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(int), NULL, NULL);

         clFinish(cmd_queue);

         int imageSizeInPixels = width*height;
         err = clEnqueueWriteBuffer(cmd_queue, imageData_mem, CL_TRUE, 0, width*height*sizeof(double)*nforegroundImages, (void*)images, 0, NULL, NULL); //this is where the images are copied into VRAM. If nforegroundImages>300, the data in VRAM is wrong, otherwise it is the same as in the images array
         err = clEnqueueWriteBuffer(cmd_queue, nWavenumber_mem, CL_TRUE, 0, sizeof(int), (void*)&nforegroundImages, 0, NULL, NULL);
         err = clEnqueueWriteBuffer(cmd_queue, binSize_mem, CL_TRUE, 0, sizeof(int), (void*)&binWidth, 0, NULL, NULL);
         err = clEnqueueWriteBuffer(cmd_queue, imageSizeInPixels_mem, CL_TRUE, 0, sizeof(int), (void*)&imageSizeInPixels, 0, NULL, NULL);
         err = clEnqueueWriteBuffer(cmd_queue, nbins_mem, CL_TRUE, 0, sizeof(int), (void*)&nbins, 0, NULL, NULL);

         clFinish(cmd_queue);

         //read the content of imageData_mem and store it in test array
         double * test = (double*)malloc(width*height*sizeof(double)*nforegroundImages);
         err = clEnqueueReadBuffer(cmd_queue, imageData_mem, CL_TRUE, 0, width*height*sizeof(double)*nforegroundImages,
             test, 0, NULL, NULL);

         clFinish(cmd_queue);

         //compare original value from the images array to the value retrieved from the VRAM
         textBox1->Text += images[1] + "\t" + images[1] + "\t" + images[10] + "\t" + images[100] + "\t" + images[1000] + "\t\r\n"; //original data
         textBox1->Text += test[1] + "\t" + test[1] + "\t" + test[10] + "\t" + test[100] + "\t" + test[1000] + "\t\r\n"; //retrieved from imageData_mem

         free(test);

         //build the program from the source file and print the program build log
         cl_program program[2];
         cl_kernel kernel[2];
         const char * filename = "addKernel.c";
         char *program_source = load_program_source(filename);
         program[0] = clCreateProgramWithSource(context, 1, (const char**)&program_source,
             NULL, &err);
         if (err == CL_OUT_OF_HOST_MEMORY){
             textBox1->Text += "Error: out of Host Memory!\r\n";
         }
         else if (err == CL_INVALID_CONTEXT){
             textBox1->Text += "Error: invalid Context!\r\n";
         }
         else if (err == CL_INVALID_VALUE){
             textBox1->Text += "Error: invalid Value!\r\n";
         }



         err = clBuildProgram(program[0], 0, NULL, NULL, NULL, NULL);
         textBox1->Text += "Program build error: " + err + "\r\n";
         cl_build_status status;
         size_t logSize;
         clGetProgramBuildInfo(program[0], deviceID[0], CL_PROGRAM_BUILD_STATUS, sizeof(cl_build_status), &status, NULL);
         clGetProgramBuildInfo(program[0], deviceID[0], CL_PROGRAM_BUILD_LOG, 0, NULL, &logSize);

         char* programLog;
         programLog = (char*)calloc(logSize + 1, sizeof(char));
         clGetProgramBuildInfo(program[0], deviceID[0], CL_PROGRAM_BUILD_LOG, logSize + 1, programLog, NULL);
         this->textBox1->Text += "Program build info: error=" + err + ", status=" + status + ", programLog:\r\n" + *programLog + "\r\n" + "In case of an error please make sure that openCL has been initialized\r\n";

         kernel[0] = clCreateKernel(program[0], "filterSpectrum", &err);

         //(__global double *imageData, __global double *result, __constant int *nWavenumbers, __constant int *binSize, __constant int *imageSizeInPixels,__constant int * nbins)
         // Now setup the arguments to our kernel
         err = clSetKernelArg(kernel[0], 0, sizeof(cl_mem), &imageData_mem);
         err |= clSetKernelArg(kernel[0], 1, sizeof(cl_mem), &result_mem);
         err |= clSetKernelArg(kernel[0], 2, sizeof(cl_mem), &nWavenumber_mem);
         err |= clSetKernelArg(kernel[0], 3, sizeof(cl_mem), &binSize_mem);
         err |= clSetKernelArg(kernel[0], 4, sizeof(cl_mem), &imageSizeInPixels_mem);
         err |= clSetKernelArg(kernel[0], 5, sizeof(cl_mem), &nbins_mem);

         size_t local_work_size = 32;

         // Run the calculation by enqueuing it and forcing the
         // command queue to complete the task
         size_t global_work_size = width*height;
         err = clEnqueueNDRangeKernel(cmd_queue, kernel[0], 1, NULL,&global_work_size, &local_work_size, 0, NULL, NULL);
         clFinish(cmd_queue);

         // Once finished read back the results from the answer
         // array into the results array
         err = clEnqueueReadBuffer(cmd_queue, result_mem, CL_TRUE, 0, width*height*sizeof(double)*nbins,
             res_im, 0, NULL, NULL);


         clFinish(cmd_queue);
         textBox1->Text += "result values " + res_im[1] + "\t" + res_im[100] + "\t" + res_im[1000] + "\t" + res_im[10000] + "\t" + res_im[100000] + "\t" + res_im[1000000] + "\r\n";

         hScrollBar2->Maximum = nbins+3;

         clReleaseMemObject(imageSizeInPixels_mem);
         clReleaseMemObject(imageData_mem);
         clReleaseMemObject(result_mem);
         clReleaseMemObject(nWavenumber_mem);
         clReleaseMemObject(binSize_mem);
         clReleaseMemObject(nbins_mem);

         clReleaseCommandQueue(cmd_queue);
         clReleaseContext(context);



}

最佳答案

您最有可能要求的内存大于驱动程序在单个分配中所允许的内存。看来您没有检查OpenCL运行时函数返回的大多数错误代码;这样做使诊断OpenCL程序的问题变得更加容易。您确实应该为每个API调用执行此操作。

您可以使用以下代码片段找出设备支持的最大单个内存分配:

cl_ulong maxMemAlloc;
clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), &maxMemAlloc, NULL);
textBox1->Text += "Maximum memory allocation size is " + maxMemAlloc + " bytes\r\n";


通常情况下,最大的内存分配远远小于GPU内存的总大小。 OpenCL规范仅要求它至少为最大大小的1/4,或至少为128 MB。

10-08 11:56