Rasagar/Library/PackageCache/com.unity.visualeffectgraph/Shaders/VFXFillIndirectArgs.compute
2024-08-26 23:07:20 +03:00

147 lines
4.7 KiB
Plaintext

#pragma kernel CSVFXIndirectArgs
#pragma only_renderers d3d11 playstation xboxone xboxseries vulkan metal switch glcore gles3 webgpu
#include "HLSLSupport.cginc"
CBUFFER_START(Uniform)
uint maxNb;
uint offset;
CBUFFER_END
#define NB_THREADS_PER_GROUP 64
struct InputData //TODO use generic name (indexCount isn't always what it expects to be)
{
uint type;
uint indexCount;
uint startIndex;
uint baseVertex;
};
ByteAddressBuffer inputGpuBuffer;
StructuredBuffer<InputData> inputCpuBuffer;
#if SHADER_API_WEBGPU
// WebGPU does not support "texel buffers" yet.
RWStructuredBuffer<uint> outputIndirectArgs;
#else
RWBuffer<uint> outputIndirectArgs;
#endif
[numthreads(NB_THREADS_PER_GROUP,1,1)]
void CSVFXIndirectArgs(uint3 id : SV_DispatchThreadID)
{
uint index = id.x + offset;
if (index < maxNb)
{
uint elementCount = inputGpuBuffer.Load(index << 2);
InputData data = inputCpuBuffer[index];
uint4 args = (uint4)0;
switch(data.type)
{
case 0: // point
{
args.x = elementCount;
args.y = 1;
break;
}
case 1: // lines
{
args.x = elementCount * 2;
args.y = 1;
break;
}
case 2: // quads
{
if (elementCount > 16384) // Due to 16bits indices (4 vertices per element)
{
const uint NB_PARTICLES_PER_INSTANCE = 2048;
args.x = NB_PARTICLES_PER_INSTANCE * 6;
args.y = (elementCount + (NB_PARTICLES_PER_INSTANCE - 1)) / NB_PARTICLES_PER_INSTANCE;
}
else
{
args.x = elementCount * 6;
args.y = 1;
}
break;
}
case 3: // hexahedron
{
if (elementCount > 8192) // Due to 16bits indices (8 vertices per element)
{
const uint NB_PARTICLES_PER_INSTANCE = 1024;
args.x = NB_PARTICLES_PER_INSTANCE * 36;
args.y = (elementCount + (NB_PARTICLES_PER_INSTANCE - 1)) / NB_PARTICLES_PER_INSTANCE;
}
else
{
args.x = elementCount * 36;
args.y = 1;
}
break;
}
case 4: // meshes
{
args.x = data.indexCount;
args.y = elementCount;
args.z = data.startIndex;
args.w = data.baseVertex;
break;
}
case 5: // triangles
{
args.x = elementCount * 3;
args.y = 1;
break;
}
case 6: // octagon
{
if (elementCount > 8192) // Due to 16bits indices (8 vertices per element)
{
const uint NB_PARTICLES_PER_INSTANCE = 1024;
args.x = NB_PARTICLES_PER_INSTANCE * 18;
args.y = (elementCount + (NB_PARTICLES_PER_INSTANCE - 1)) / NB_PARTICLES_PER_INSTANCE;
}
else
{
args.x = elementCount * 18;
args.y = 1;
}
break;
}
case 0xffffffff: // dispatch
{
//Clamp indirect dispatch to the maximum dispatch width of DX11 : 65535 (D3D11_CS_DISPATCH_MAX_THREAD_GROUPS_PER_DIMENSION)
elementCount = min(elementCount, 65535 * NB_THREADS_PER_GROUP);
args.x = (elementCount + NB_THREADS_PER_GROUP - 1) / NB_THREADS_PER_GROUP;
args.y = args.z = 1u;
break;
}
}
uint indexOutput = index * 10;
outputIndirectArgs[indexOutput + 0] = args.x; //IndexCountPerInstance or ThreadGroupCountX
outputIndirectArgs[indexOutput + 1] = args.y; //InstanceCount or ThreadGroupCountY
outputIndirectArgs[indexOutput + 2] = args.z; //startIndex or ThreadGroupCountz
outputIndirectArgs[indexOutput + 3] = args.w; //baseVertex
// next uint (StartInstance) set to 0 at initialization
// XR single-pass instancing support (same data as above but instanceCount is multiplied by the number of instanced views)
outputIndirectArgs[indexOutput + 5] = args.x;
outputIndirectArgs[indexOutput + 6] = args.y * ((data.type != 0xffffffff) ? 2 : 1);
outputIndirectArgs[indexOutput + 7] = args.z;
outputIndirectArgs[indexOutput + 8] = args.w;
// next uint (StartInstance) set to 0 at initialization
}
}