Bank

Example: Accessing Row-Major versus Column-Major

Row-major write/column-major read:

__global__ void setRowReadRow(int *out) {
   // static shared memory
   __shared__ int tile[BDIMY][BDIMX];
    
   // mapping from thread index to global memory index
   unsigned int idx = threadIdx.y * blockDim.x + threadIdx.x;
    
   // shared memory store operation (conflict-free)
   tile[threadIdx.y][threadIdx.x] = idx;
    
   // wait for all threads to complete
   __syncthreads();
    
   // shared memory load operation (bank conflict)
   out[idx] = tile[threadIdx.x][threadIdx.y] ;
}

Padding Statically Declared Shared Memory

__global__ void setRowReadColPad(int *out) {
    
    // static shared memory
    __shared__ int tile[BDIMY][BDIMX+IPAD];
    
    // mapping from thread index to global memory offset
    unsigned int idx = threadIdx.y * blockDim.x + threadIdx.x;
    
    // shared memory store operation
    tile[threadIdx.y][threadIdx.x] = idx;
    
    // wait for all threads to complete
    __syncthreads();
    
    // shared memory load operation
    out[idx] = tile[threadIdx.x][threadIdx.y];
}

Conflict-free Conflict-free Conflict-free Conflict-free (broadcast) Bank conflict