Source code for pyfr.backends.cuda.generator
# -*- coding: utf-8 -*-
from pyfr.backends.base.generator import BaseKernelGenerator
[docs]class CUDAKernelGenerator(BaseKernelGenerator):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
# Specialise
if self.ndim == 1:
self._ix = 'int _x = blockIdx.x*blockDim.x + threadIdx.x;'
self._limits = 'if (_x < _nx)'
else:
self._ix = ('int _x = blockIdx.x*blockDim.x + threadIdx.x;'
'int _y = blockIdx.y*blockDim.y + threadIdx.y;')
self._limits = 'if (_x < _nx && _y < _ny)'
[docs] def render(self):
# Kernel spec
spec = self._render_spec()
# Iteration indicies and limits
ix, limits = self._ix, self._limits
# Combine
return '''{spec}
{{
{ix}
#define X_IDX (_x)
#define X_IDX_AOSOA(v, nv) SOA_IX(X_IDX, v, nv)
{limits}
{{
{body}
}}
#undef X_IDX
#undef X_IDX_AOSOA
}}'''.format(spec=spec, ix=ix, limits=limits, body=self.body)
[docs] def _render_spec(self):
# We first need the argument list; starting with the dimensions
kargs = ['int ' + d for d in self._dims]
# Now add any scalar arguments
kargs.extend('{0.dtype} {0.name}'.format(sa) for sa in self.scalargs)
# Finally, add the vector arguments
for va in self.vectargs:
# Views
if va.isview:
kargs.append('{0.dtype}* __restrict__ {0.name}_v'.format(va))
kargs.append('const int* __restrict__ {0.name}_vix'
.format(va))
if va.ncdim == 2:
kargs.append('const int* __restrict__ {0.name}_vrstri'
.format(va))
# Arrays
else:
# Intent in arguments should be marked constant
const = 'const' if va.intent == 'in' else ''
kargs.append('{0} {1.dtype}* __restrict__ {1.name}_v'
.format(const, va).strip())
if self.needs_ldim(va):
kargs.append('int ld{0.name}'.format(va))
return '__global__ void {0}({1})'.format(self.name, ', '.join(kargs))