forked from BilalY/Rasagar
147 lines
4.7 KiB
Plaintext
147 lines
4.7 KiB
Plaintext
#pragma kernel CSVFXIndirectArgs
|
|
|
|
#pragma only_renderers d3d11 playstation xboxone xboxseries vulkan metal switch glcore gles3 webgpu
|
|
|
|
#include "HLSLSupport.cginc"
|
|
|
|
CBUFFER_START(Uniform)
|
|
uint maxNb;
|
|
uint offset;
|
|
CBUFFER_END
|
|
|
|
#define NB_THREADS_PER_GROUP 64
|
|
|
|
struct InputData //TODO use generic name (indexCount isn't always what it expects to be)
|
|
{
|
|
uint type;
|
|
uint indexCount;
|
|
uint startIndex;
|
|
uint baseVertex;
|
|
};
|
|
|
|
ByteAddressBuffer inputGpuBuffer;
|
|
StructuredBuffer<InputData> inputCpuBuffer;
|
|
#if SHADER_API_WEBGPU
|
|
// WebGPU does not support "texel buffers" yet.
|
|
RWStructuredBuffer<uint> outputIndirectArgs;
|
|
#else
|
|
RWBuffer<uint> outputIndirectArgs;
|
|
#endif
|
|
|
|
[numthreads(NB_THREADS_PER_GROUP,1,1)]
|
|
void CSVFXIndirectArgs(uint3 id : SV_DispatchThreadID)
|
|
{
|
|
uint index = id.x + offset;
|
|
if (index < maxNb)
|
|
{
|
|
uint elementCount = inputGpuBuffer.Load(index << 2);
|
|
InputData data = inputCpuBuffer[index];
|
|
|
|
uint4 args = (uint4)0;
|
|
|
|
switch(data.type)
|
|
{
|
|
case 0: // point
|
|
{
|
|
args.x = elementCount;
|
|
args.y = 1;
|
|
break;
|
|
}
|
|
|
|
case 1: // lines
|
|
{
|
|
args.x = elementCount * 2;
|
|
args.y = 1;
|
|
break;
|
|
}
|
|
|
|
case 2: // quads
|
|
{
|
|
if (elementCount > 16384) // Due to 16bits indices (4 vertices per element)
|
|
{
|
|
const uint NB_PARTICLES_PER_INSTANCE = 2048;
|
|
args.x = NB_PARTICLES_PER_INSTANCE * 6;
|
|
args.y = (elementCount + (NB_PARTICLES_PER_INSTANCE - 1)) / NB_PARTICLES_PER_INSTANCE;
|
|
}
|
|
else
|
|
{
|
|
args.x = elementCount * 6;
|
|
args.y = 1;
|
|
}
|
|
break;
|
|
}
|
|
|
|
case 3: // hexahedron
|
|
{
|
|
if (elementCount > 8192) // Due to 16bits indices (8 vertices per element)
|
|
{
|
|
const uint NB_PARTICLES_PER_INSTANCE = 1024;
|
|
args.x = NB_PARTICLES_PER_INSTANCE * 36;
|
|
args.y = (elementCount + (NB_PARTICLES_PER_INSTANCE - 1)) / NB_PARTICLES_PER_INSTANCE;
|
|
}
|
|
else
|
|
{
|
|
args.x = elementCount * 36;
|
|
args.y = 1;
|
|
}
|
|
break;
|
|
}
|
|
|
|
case 4: // meshes
|
|
{
|
|
args.x = data.indexCount;
|
|
args.y = elementCount;
|
|
args.z = data.startIndex;
|
|
args.w = data.baseVertex;
|
|
break;
|
|
}
|
|
|
|
case 5: // triangles
|
|
{
|
|
args.x = elementCount * 3;
|
|
args.y = 1;
|
|
break;
|
|
}
|
|
|
|
case 6: // octagon
|
|
{
|
|
if (elementCount > 8192) // Due to 16bits indices (8 vertices per element)
|
|
{
|
|
const uint NB_PARTICLES_PER_INSTANCE = 1024;
|
|
args.x = NB_PARTICLES_PER_INSTANCE * 18;
|
|
args.y = (elementCount + (NB_PARTICLES_PER_INSTANCE - 1)) / NB_PARTICLES_PER_INSTANCE;
|
|
}
|
|
else
|
|
{
|
|
args.x = elementCount * 18;
|
|
args.y = 1;
|
|
}
|
|
break;
|
|
}
|
|
|
|
case 0xffffffff: // dispatch
|
|
{
|
|
//Clamp indirect dispatch to the maximum dispatch width of DX11 : 65535 (D3D11_CS_DISPATCH_MAX_THREAD_GROUPS_PER_DIMENSION)
|
|
elementCount = min(elementCount, 65535 * NB_THREADS_PER_GROUP);
|
|
args.x = (elementCount + NB_THREADS_PER_GROUP - 1) / NB_THREADS_PER_GROUP;
|
|
args.y = args.z = 1u;
|
|
break;
|
|
}
|
|
}
|
|
|
|
uint indexOutput = index * 10;
|
|
outputIndirectArgs[indexOutput + 0] = args.x; //IndexCountPerInstance or ThreadGroupCountX
|
|
outputIndirectArgs[indexOutput + 1] = args.y; //InstanceCount or ThreadGroupCountY
|
|
outputIndirectArgs[indexOutput + 2] = args.z; //startIndex or ThreadGroupCountz
|
|
outputIndirectArgs[indexOutput + 3] = args.w; //baseVertex
|
|
// next uint (StartInstance) set to 0 at initialization
|
|
|
|
// XR single-pass instancing support (same data as above but instanceCount is multiplied by the number of instanced views)
|
|
outputIndirectArgs[indexOutput + 5] = args.x;
|
|
outputIndirectArgs[indexOutput + 6] = args.y * ((data.type != 0xffffffff) ? 2 : 1);
|
|
outputIndirectArgs[indexOutput + 7] = args.z;
|
|
outputIndirectArgs[indexOutput + 8] = args.w;
|
|
// next uint (StartInstance) set to 0 at initialization
|
|
}
|
|
}
|