CUDA：纹理内存入门到精通--纹理对象

纹理对象是CUDA针对纹理参考缺点而提出的升级版，其作用和纹理参考完全一致，但是使用方法更加灵活。

与纹理参考相比，CUDA对其进行各方面的升级，一方面是可以再代码中申请和销毁，另一方面则可以作为设备函数的参数进行传入；可以满足一些特殊的需求。

使用纹理对象主要包括纹理对象创建、纹理访问和纹理对象销毁。

纹理对象创建之前首先要分别对纹理资源和纹理对象属性进行确定，分别对应cudaResoruceDesc和cudaTextureDesc；然后即可利用cudaCreateTextureObject来创建纹理对象。本文通过代码来解释这个过程：

    // 纹理资源     struct cudaResourceDesc resDesc;     memset(&resDesc, 0, sizeof(resDesc));     //resType指定对应设备内存的形式，主要包括     //cudaResourceTypeArray(二维纹理内存和二维纹理对象）     //cudaResourceTypeMipmappedArray（不常用）     //cudaResourceTypeLinear（一维纹理内存和一维纹理对象）     //cudaResourceTypePitch2D（一维纹理内存和二维纹理对象）     resDesc.resType = cudaResourceTypeArray;     //res是一个枚举变量，针对不同内存也有不同的形式     //cudaResourceTypeArray 对应 res.array.array     //cudaResourceTypeMipmappedArray 对应res.mipmap.mipmap     //cudaResourceTypeLinear 对应 res.linear.devPtr（同时还需要设置res.linear.sizeInBytes和res.linear.desc）     //cudaResourceTypePitch2D 对应 res.pitch2D.devPtr(同时需要设定res.pitch2D.pitchInBytes,res.pitch2D.width,res.pitch2D.height,res.pitch2D.ddesc)     resDesc.res.array.array = cuArray;//指定需要绑定的二维纹理内存      // 纹理对象的属性     // 由于与纹理参考类似，不再重复介绍     struct cudaTextureDesc texDesc;     memset(&texDesc, 0, sizeof(texDesc));     texDesc.addressMode[0]   = cudaAddressModeWrap;     texDesc.addressMode[1]   = cudaAddressModeWrap;     texDesc.filterMode       = cudaFilterModeLinear;     texDesc.readMode         = cudaReadModeElementType;     texDesc.normalizedCoords = 1;      // 创建纹理对象     cudaTextureObject_t texObj = 0;     cudaCreateTextureObject(&texObj, &resDesc, &texDesc, NULL);

纹理对象的纹理访问也和纹理参考一样，也是使用tex1D或tex2D等函数进行操作。

//一维纹理 template<class T> T tex1D(cudaTextureObject_t texObj, float x);  //二维纹理 template<class T> T tex2D(cudaTextureObject_t texObj, float x, float y);

纹理对象的销毁直接使用cudaDestroyTextureObject即可。值得注意的是应该先销毁对象，然后再释放对应的设备内存。

// 核函数 __global__ void transformKernel(float* output,                                 cudaTextureObject_t texObj,                                 int width, int height,                                 float theta)  {     unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;     unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;      float u = x / (float)width;     float v = y / (float)height;      u -= 0.5f;     v -= 0.5f;     float tu = u * cosf(theta) - v * sinf(theta) + 0.5f;     float tv = v * cosf(theta) + u * sinf(theta) + 0.5f;      // 获取纹理     output[y * width + x] = tex2D<float>(texObj, tu, tv); }  int main() {     //实验数据     int width = 10;     int height = 10;     float h_data[width*height];      for(int y=0;y<height;y++)     {       for(int x=0;x<width;x++)       {         h_data[y*width+x] = x+y;       }     }      cudaChannelFormatDesc channelDesc =                cudaCreateChannelDesc(32, 0, 0, 0,                                      cudaChannelFormatKindFloat);     cudaArray* cuArray;     cudaMallocArray(&cuArray, &channelDesc, width, height);      cudaMemcpyToArray(cuArray, 0, 0, h_data, size,                       cudaMemcpyHostToDevice);      // 创建纹理对象     struct cudaResourceDesc resDesc;     memset(&resDesc, 0, sizeof(resDesc));     resDesc.resType = cudaResourceTypeArray;     resDesc.res.array.array = cuArray;      struct cudaTextureDesc texDesc;     memset(&texDesc, 0, sizeof(texDesc));     texDesc.addressMode[0]   = cudaAddressModeWrap;     texDesc.addressMode[1]   = cudaAddressModeWrap;     texDesc.filterMode       = cudaFilterModeLinear;     texDesc.readMode         = cudaReadModeElementType;     texDesc.normalizedCoords = 1;      cudaTextureObject_t texObj = 0;     cudaCreateTextureObject(&texObj, &resDesc, &texDesc, NULL);      float* output;     cudaMalloc(&output, width * height * sizeof(float));      // 调用核函数     dim3 dimBlock(16, 16);     dim3 dimGrid((width  + dimBlock.x - 1) / dimBlock.x,                  (height + dimBlock.y - 1) / dimBlock.y);     transformKernel<<<dimGrid, dimBlock>>>(output,                                            texObj, width, height,                                            angle);      // 销毁纹理对象     cudaDestroyTextureObject(texObj);      // 释放设备内存     cudaFreeArray(cuArray);     cudaFree(output);      return 0; }

在使用纹理对象时还有一些特殊注意。通常而言，使用纹理对象的目的就是使用多个纹理对象，因此会申请一个cudaTextureObject_t 数组。注意在使用时必须将其拷贝到device端才能正常使用。下边给出一个示例。

#include "cuda_runtime.h" #include "device_launch_parameters.h"   #include <stdio.h> #include <time.h>  __global__ void kernel_set_value(const cudaTextureObject_t *texObj,float *dev_result,int width,int height) {     int x = threadIdx.x + blockIdx.x*blockDim.x;     int y = threadIdx.y + blockIdx.y*blockDim.y;      if(x<0 || x>width || y<0 || y>height)     {         return;     }      float sum = 0;     for(int i=0;i<4;i++)     {         sum += tex2D<float>(texObj[i],x,y);     }      int pos = y*width + x;     dev_result[pos]= sum;  }   int main() {     const int array_size_width = 10;     const int array_size_height = 10;     float random_array[array_size_width*array_size_height];     for(int i=0;i<array_size_width*array_size_height;i++)     {         random_array[i] = 1;     }      //error status     cudaError_t cuda_status;      //only chose one GPU     cuda_status = cudaSetDevice(0);     if(cuda_status != cudaSuccess)     {         fprintf(stderr,"cudaSetDevice failed! Do you have a CUDA-Capable GPU installed?");         return 1;     }      cudaArray *dev_random_array[4];     cudaTextureObject_t texObj[4];     for(int i=0;i<4;i++)     {         cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();         //allocate memory on the GPU         cuda_status = cudaMallocArray(&dev_random_array[i],                                         &channelDesc,                                         array_size_width,                                         array_size_height);         if(cuda_status != cudaSuccess)         {             fprintf(stderr,"cudaMallocArray Failed");             exit( EXIT_FAILURE );         }          cuda_status = cudaMemcpyToArray(dev_random_array[i],                                             0,                                             0,                                             random_array,                                             sizeof(float)*array_size_height*array_size_width,                                             cudaMemcpyHostToDevice);         if(cuda_status != cudaSuccess)         {             fprintf(stderr,"cudaMemcpyToArray Failed");             exit( EXIT_FAILURE );         }          // Specify texture         struct cudaResourceDesc resDesc;         memset(&resDesc, 0, sizeof(resDesc));         resDesc.resType         = cudaResourceTypeArray;         resDesc.res.array.array = dev_random_array[i];          // Specify texture object parameters         struct cudaTextureDesc texDesc;         memset(&texDesc, 0, sizeof(texDesc));         texDesc.addressMode[0]   = cudaAddressModeWrap;         texDesc.addressMode[1]   = cudaAddressModeWrap;         texDesc.filterMode       = cudaFilterModePoint;         texDesc.readMode         = cudaReadModeElementType;         texDesc.normalizedCoords = 0;           cudaCreateTextureObject(&texObj[i], &resDesc, &texDesc, NULL);     }      //将纹理对象拷贝到设备端     cudaTextureObject_t *dev_texObj;     cudaMalloc((void**)&dev_texObj,sizeof(cudaTextureObject_t)*4);     cudaMemcpy(dev_texObj,texObj,sizeof(cudaTextureObject_t)*4,cudaMemcpyHostToDevice);      float *dev_result;     cudaMalloc((void**)&dev_result,sizeof(float)*array_size_height*array_size_width);      dim3 threads(16,16);     dim3 grid((array_size_width+threads.x-1)/threads.x,(array_size_height+threads.y-1)/threads.y);        kernel_set_value<<<grid,threads>>>(dev_texObj,dev_result,array_size_width,array_size_height);     cuda_status = cudaGetLastError();     if(cuda_status != cudaSuccess)     {         fprintf(stderr,"kernel_set_value Failed");         exit( EXIT_FAILURE );     }      cuda_status = cudaMemcpy(random_array,dev_result,sizeof(float)*array_size_width*array_size_height,cudaMemcpyDeviceToHost);//dev_depthMap     if(cuda_status != cudaSuccess)     {         fprintf(stderr,"cudaMemcpy Failed");         exit( EXIT_FAILURE );     }      for(int i=0;i<array_size_width*array_size_height;i++)     {         printf("%f\n",random_array[i]);     }      //free     cudaFree(dev_texObj);     cudaFree(dev_result);     for(int i=0;i<4;i++)     {         cudaFreeArray(dev_random_array[i]);         cudaDestroyTextureObject(texObj[i]);     }      return 0; }

文章来源: CUDA：纹理内存入门到精通--纹理对象

标签

sizeof

dev

cuda

random