The different addressing modes of CUDA textures

旧街凉风 提交于 2020-01-22 07:30:56

问题


I am using a CUDA texture in border addressing mode (cudaAddressModeBorder). I am reading texture coordinates using tex2D<float>(). When the texture coordinates fall outside the texture, tex2D<float>() returns 0.

How can I change this returned border value from 0 to something else? I could check the texture coordinate manually and set the border value myself. I was wondering if there was CUDA API where I can set such a border value.


回答1:


As mentioned by sgarizvi, CUDA supports only four, non-customizable address modes, namely, clamp, border, wrap and mirror, which are described in Section 3.2.11.1. of the CUDA programming guide.

The former two work in both unnormalized and normalized coordinates, while the latter two in normalized coordinates only.

To describe the first two, let us consider the unnormalized coordinates case and consider 1D signals, for the sake of simplicity. In this case, the input sequence is c[k], with k=0,...,M-1.

cudaAddressModeClamp

The signal c[k] is continued outside k=0,...,M-1 so that c[k] = c[0] for k < 0, and c[k] = c[M-1] for k >= M.

cudaAddressModeBorder

The signal c[k] is continued outside k=0,...,M-1 so that c[k] = 0 for k < 0and for k >= M.

Now, to describe the last two address modes, we are forced to consider normalized coordinates, so that the 1D input signal samples are assumed to be c[k / M], with k=0,...,M-1.

cudaAddressModeWrap

The signal c[k / M] is continued outside k=0,...,M-1 so that it is periodic with period equal to M. In other words, c[(k + p * M) / M] = c[k / M] for any (positive, negative or vanishing) integer p.

cudaAddressModeMirror

The signal c[k / M] is continued outside k=0,...,M-1 so that it is periodic with period equal to 2 * M - 2. In other words, c[l / M] = c[k / M] for any l and k such that (l + k)mod(2 * M - 2) = 0.

The following code illustrates all the four available address modes

#include <stdio.h>

texture<float, 1, cudaReadModeElementType> texture_clamp;
texture<float, 1, cudaReadModeElementType> texture_border;
texture<float, 1, cudaReadModeElementType> texture_wrap;
texture<float, 1, cudaReadModeElementType> texture_mirror;

/********************/
/* CUDA ERROR CHECK */
/********************/
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, char *file, int line, bool abort=true)
{
    if (code != cudaSuccess) 
    {
        fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
        if (abort) exit(code);
    }
}

/******************************/
/* CUDA ADDRESS MODE CLAMPING */
/******************************/
__global__ void Test_texture_clamping(const int M) {

    printf("Texture clamping - i = %i; value = %f\n", -threadIdx.x, tex1D(texture_clamp, -(float)threadIdx.x));
    printf("Texture clamping - i = %i; value = %f\n", M + threadIdx.x, tex1D(texture_clamp, (float)(M + threadIdx.x)));

}

/****************************/
/* CUDA ADDRESS MODE BORDER */
/****************************/
__global__ void Test_texture_border(const int M) {

    printf("Texture border - i = %i; value = %f\n", -threadIdx.x, tex1D(texture_border, -(float)threadIdx.x));
    printf("Texture border - i = %i; value = %f\n", M + threadIdx.x, tex1D(texture_border, (float)(M + threadIdx.x)));

}

/**************************/
/* CUDA ADDRESS MODE WRAP */
/**************************/
__global__ void Test_texture_wrap(const int M) {

    printf("Texture wrap - i = %i; value = %f\n", -threadIdx.x, tex1D(texture_wrap, -(float)threadIdx.x/(float)M));
    printf("Texture wrap - i = %i; value = %f\n", M + threadIdx.x, tex1D(texture_wrap, (float)(M + threadIdx.x)/(float)M));

}

/****************************/
/* CUDA ADDRESS MODE MIRROR */
/****************************/
__global__ void Test_texture_mirror(const int M) {

    printf("Texture mirror - i = %i; value = %f\n", -threadIdx.x, tex1D(texture_mirror, -(float)threadIdx.x/(float)M));
    printf("Texture mirror - i = %i; value = %f\n", M + threadIdx.x, tex1D(texture_mirror, (float)(M + threadIdx.x)/(float)M));

}

/********/
/* MAIN */
/********/
void main(){

    const int M = 4;

    // --- Host side memory allocation and initialization
    float *h_data = (float*)malloc(M * sizeof(float));

    for (int i=0; i<M; i++) h_data[i] = (float)i;

    // --- Texture clamping
    cudaArray* d_data_clamping = NULL; gpuErrchk(cudaMallocArray(&d_data_clamping, &texture_clamp.channelDesc, M, 1)); 
    gpuErrchk(cudaMemcpyToArray(d_data_clamping, 0, 0, h_data, M * sizeof(float), cudaMemcpyHostToDevice)); 
    cudaBindTextureToArray(texture_clamp, d_data_clamping); 
    texture_clamp.normalized = false; 
    texture_clamp.addressMode[0] = cudaAddressModeClamp;

    dim3 dimBlock(2 * M, 1); dim3 dimGrid(1, 1);
    Test_texture_clamping<<<dimGrid,dimBlock>>>(M);

    printf("\n\n\n");

    // --- Texture border
    cudaArray* d_data_border = NULL; gpuErrchk(cudaMallocArray(&d_data_border, &texture_border.channelDesc, M, 1)); 
    gpuErrchk(cudaMemcpyToArray(d_data_border, 0, 0, h_data, M * sizeof(float), cudaMemcpyHostToDevice)); 
    cudaBindTextureToArray(texture_border, d_data_border); 
    texture_border.normalized = false; 
    texture_border.addressMode[0] = cudaAddressModeBorder;

    Test_texture_border<<<dimGrid,dimBlock>>>(M);

    printf("\n\n\n");

    // --- Texture wrap
    cudaArray* d_data_wrap = NULL; gpuErrchk(cudaMallocArray(&d_data_wrap, &texture_wrap.channelDesc, M, 1)); 
    gpuErrchk(cudaMemcpyToArray(d_data_wrap, 0, 0, h_data, M * sizeof(float), cudaMemcpyHostToDevice)); 
    cudaBindTextureToArray(texture_wrap, d_data_wrap); 
    texture_wrap.normalized = true; 
    texture_wrap.addressMode[0] = cudaAddressModeWrap;

    Test_texture_wrap<<<dimGrid,dimBlock>>>(M);

    printf("\n\n\n");

    // --- Texture mirror
    cudaArray* d_data_mirror = NULL; gpuErrchk(cudaMallocArray(&d_data_mirror, &texture_mirror.channelDesc, M, 1)); 
    gpuErrchk(cudaMemcpyToArray(d_data_mirror, 0, 0, h_data, M * sizeof(float), cudaMemcpyHostToDevice)); 
    cudaBindTextureToArray(texture_mirror, d_data_mirror); 
    texture_mirror.normalized = true ; 
    texture_mirror.addressMode[0] = cudaAddressModeMirror;

    Test_texture_mirror<<<dimGrid,dimBlock>>>(M);

    printf("\n\n\n");
}

Those are the outputs

index                  -7  -6  -5  -4  -3  -2  -1  0  1  2  3  4  5  6  7  8  9  10  11
clamp                   0   0   0   0   0   0   0  0  1  2  3  3  3  3  3  3  3   3   3
border                  0   0   0   0   0   0   0  0  1  2  3  0  0  0  0  0  0   0   0
wrap                    1   2   3   0   1   2   3  0  1  2  3  0  1  2  3  0  1   2   3
mirror                  1   2   3   3   2   1   0  0  1  2  3  3  2  1  0  0  1   2   3



回答2:


As of now (CUDA 5.5), the CUDA texture fetch behavior is not customizable. Only 1 of the 4 automatic built-in modes (i.e. Border, Clamp, Wrap and Mirror) can be utilized for out of range texture fetch.



来源:https://stackoverflow.com/questions/19020963/the-different-addressing-modes-of-cuda-textures

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!