Rasagar/Library/PackageCache/com.unity.visualeffectgraph/Shaders/VFXFillIndirectArgs.compute

#pragma kernel CSVFXIndirectArgs

#pragma only_renderers d3d11 playstation xboxone xboxseries vulkan metal switch glcore gles3 webgpu

#include "HLSLSupport.cginc"

CBUFFER_START(Uniform)
    uint maxNb;
    uint offset;
CBUFFER_END

#define NB_THREADS_PER_GROUP 64

struct InputData //TODO use generic name (indexCount isn't always what it expects to be)
{
    uint type;
    uint indexCount;
    uint startIndex;
    uint baseVertex;
};

ByteAddressBuffer inputGpuBuffer;
StructuredBuffer<InputData> inputCpuBuffer;
#if SHADER_API_WEBGPU
// WebGPU does not support "texel buffers" yet.
RWStructuredBuffer<uint> outputIndirectArgs;
#else
RWBuffer<uint> outputIndirectArgs;
#endif

[numthreads(NB_THREADS_PER_GROUP,1,1)]
void CSVFXIndirectArgs(uint3 id : SV_DispatchThreadID)
{
    uint index = id.x + offset;
    if (index < maxNb)
    {
        uint elementCount = inputGpuBuffer.Load(index << 2);
        InputData data = inputCpuBuffer[index];

        uint4 args = (uint4)0;

        switch(data.type)
        {
            case 0: // point
            {
                args.x = elementCount;
                args.y = 1;
                break;
            }

            case 1: // lines
            {
                args.x = elementCount * 2;
                args.y = 1;
                break;
            }

            case 2: // quads
            {
                if (elementCount > 16384) // Due to 16bits indices (4 vertices per element)
                {
                    const uint NB_PARTICLES_PER_INSTANCE = 2048;
                    args.x = NB_PARTICLES_PER_INSTANCE * 6;
                    args.y = (elementCount + (NB_PARTICLES_PER_INSTANCE - 1)) / NB_PARTICLES_PER_INSTANCE;
                }
                else
                {
                    args.x = elementCount * 6;
                    args.y = 1;
                }
                break;
            }

            case 3: // hexahedron
            {
                if (elementCount > 8192) // Due to 16bits indices (8 vertices per element)
                {
                    const uint NB_PARTICLES_PER_INSTANCE = 1024;
                    args.x = NB_PARTICLES_PER_INSTANCE * 36;
                    args.y = (elementCount + (NB_PARTICLES_PER_INSTANCE - 1)) / NB_PARTICLES_PER_INSTANCE;
                }
                else
                {
                    args.x = elementCount * 36;
                    args.y = 1;
                }
                break;
            }

            case 4: // meshes
            {
                args.x = data.indexCount;
                args.y = elementCount;
                args.z = data.startIndex;
                args.w = data.baseVertex;
                break;
            }

            case 5: // triangles
            {
                args.x = elementCount * 3;
                args.y = 1;
                break;
            }

            case 6: // octagon
            {
                if (elementCount > 8192) // Due to 16bits indices (8 vertices per element)
                {
                    const uint NB_PARTICLES_PER_INSTANCE = 1024;
                    args.x = NB_PARTICLES_PER_INSTANCE * 18;
                    args.y = (elementCount + (NB_PARTICLES_PER_INSTANCE - 1)) / NB_PARTICLES_PER_INSTANCE;
                }
                else
                {
                    args.x = elementCount * 18;
                    args.y = 1;
                }
                break;
            }

            case 0xffffffff: // dispatch
            {
                //Clamp indirect dispatch to the maximum dispatch width of DX11 : 65535 (D3D11_CS_DISPATCH_MAX_THREAD_GROUPS_PER_DIMENSION)
                elementCount = min(elementCount, 65535 * NB_THREADS_PER_GROUP);
                args.x = (elementCount + NB_THREADS_PER_GROUP - 1) / NB_THREADS_PER_GROUP;
                args.y = args.z = 1u;
                break;
            }
        }

        uint indexOutput = index * 10;
        outputIndirectArgs[indexOutput + 0] = args.x; //IndexCountPerInstance or ThreadGroupCountX
        outputIndirectArgs[indexOutput + 1] = args.y; //InstanceCount or ThreadGroupCountY
        outputIndirectArgs[indexOutput + 2] = args.z; //startIndex or ThreadGroupCountz
        outputIndirectArgs[indexOutput + 3] = args.w; //baseVertex
        // next uint (StartInstance) set to 0 at initialization

        // XR single-pass instancing support (same data as above but instanceCount is multiplied by the number of instanced views)
        outputIndirectArgs[indexOutput + 5] = args.x;
        outputIndirectArgs[indexOutput + 6] = args.y * ((data.type != 0xffffffff) ? 2 : 1);
        outputIndirectArgs[indexOutput + 7] = args.z;
        outputIndirectArgs[indexOutput + 8] = args.w;
        // next uint (StartInstance) set to 0 at initialization
    }
}