forked from BilalY/Rasagar
117 lines
4.8 KiB
C#
117 lines
4.8 KiB
C#
|
|
||
|
namespace UnityEngine.Rendering.RadeonRays
|
||
|
{
|
||
|
internal class Scan
|
||
|
{
|
||
|
private ComputeShader shaderScan;
|
||
|
private int kernelScan;
|
||
|
|
||
|
private ComputeShader shaderReduce;
|
||
|
private int kernelReduce;
|
||
|
|
||
|
const uint kKeysPerThread = 4u;
|
||
|
const uint kGroupSize = 256u;
|
||
|
const uint kKeysPerGroup = kKeysPerThread * kGroupSize;
|
||
|
|
||
|
public Scan(RadeonRaysShaders shaders)
|
||
|
{
|
||
|
shaderScan = shaders.blockScan;
|
||
|
kernelScan = shaderScan.FindKernel("BlockScanAdd");
|
||
|
|
||
|
shaderReduce = shaders.blockReducePart;
|
||
|
kernelReduce = shaderReduce.FindKernel("BlockReducePart");
|
||
|
}
|
||
|
public void Execute(CommandBuffer cmd, GraphicsBuffer buffer, uint inputKeysOffset, uint outputKeysOffset, uint scratchDataOffset, uint size)
|
||
|
{
|
||
|
if (size > kKeysPerGroup)
|
||
|
{
|
||
|
var num_groups_level_1 = Common.CeilDivide(size, kKeysPerGroup);
|
||
|
|
||
|
// Do first round of part sum reduction.
|
||
|
SetState(cmd, shaderReduce, kernelReduce, size, buffer,
|
||
|
inputKeysOffset,
|
||
|
scratchDataOffset,
|
||
|
outputKeysOffset);
|
||
|
cmd.DispatchCompute(shaderReduce, kernelReduce, (int)num_groups_level_1, 1, 1);
|
||
|
|
||
|
if (num_groups_level_1 > kKeysPerGroup)
|
||
|
{
|
||
|
var num_groups_level_2 = Common.CeilDivide(num_groups_level_1, kKeysPerGroup);
|
||
|
|
||
|
// Do second round of part sum reduction.
|
||
|
SetState(cmd, shaderReduce, kernelReduce, num_groups_level_1, buffer,
|
||
|
scratchDataOffset,
|
||
|
scratchDataOffset + num_groups_level_1,
|
||
|
scratchDataOffset);
|
||
|
cmd.DispatchCompute(shaderReduce, kernelReduce, (int)num_groups_level_2, 1, 1);
|
||
|
|
||
|
// Scan level 2 inplace.
|
||
|
Common.EnableKeyword(cmd, shaderScan, "ADD_PART_SUM", false);
|
||
|
SetState(cmd, shaderScan, kernelScan, num_groups_level_2, buffer,
|
||
|
scratchDataOffset + num_groups_level_1,
|
||
|
scratchDataOffset,
|
||
|
scratchDataOffset + num_groups_level_1);
|
||
|
cmd.DispatchCompute(shaderScan, kernelScan, 1, 1, 1);
|
||
|
}
|
||
|
|
||
|
// Scan and add level 2 back to level 1.
|
||
|
{
|
||
|
Common.EnableKeyword(cmd, shaderScan, "ADD_PART_SUM", num_groups_level_1 > kKeysPerGroup);
|
||
|
SetState(cmd, shaderScan, kernelScan, num_groups_level_1, buffer,
|
||
|
scratchDataOffset,
|
||
|
scratchDataOffset + num_groups_level_1,
|
||
|
scratchDataOffset);
|
||
|
var num_groups_scan_level_1 = Common.CeilDivide(num_groups_level_1, kKeysPerGroup);
|
||
|
cmd.DispatchCompute(shaderScan, kernelScan, (int)num_groups_scan_level_1, 1, 1);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Scan and add level 1 back.
|
||
|
{
|
||
|
Common.EnableKeyword(cmd, shaderScan, "ADD_PART_SUM", size > kKeysPerGroup);
|
||
|
SetState(cmd, shaderScan, kernelScan, size, buffer,
|
||
|
inputKeysOffset,
|
||
|
scratchDataOffset,
|
||
|
outputKeysOffset);
|
||
|
var num_groups_scan_level_0 = Common.CeilDivide(size, kKeysPerGroup);
|
||
|
cmd.DispatchCompute(shaderScan, kernelScan, (int)num_groups_scan_level_0, 1, 1);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void SetState(
|
||
|
CommandBuffer cmd,
|
||
|
ComputeShader shader, int kernelIndex, uint size, GraphicsBuffer buffer,
|
||
|
uint inputKeysOffset, uint scratchDataOffset, uint outputKeysOffset)
|
||
|
{
|
||
|
cmd.SetComputeIntParam(shader, SID.g_constants_num_keys, (int)size);
|
||
|
|
||
|
cmd.SetComputeIntParam(shader, SID.g_constants_input_keys_offset, (int)inputKeysOffset);
|
||
|
cmd.SetComputeIntParam(shader, SID.g_constants_part_sums_offset, (int)scratchDataOffset);
|
||
|
cmd.SetComputeIntParam(shader, SID.g_constants_output_keys_offset, (int)outputKeysOffset);
|
||
|
|
||
|
cmd.SetComputeBufferParam(shader, kernelIndex, SID.g_buffer, buffer);
|
||
|
}
|
||
|
|
||
|
public ulong GetScratchDataSizeInDwords(uint size)
|
||
|
{
|
||
|
if (size <= kKeysPerGroup)
|
||
|
{
|
||
|
return 0;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
var sizeLevel1 = Common.CeilDivide(size, kKeysPerGroup);
|
||
|
if (sizeLevel1 <= kKeysPerGroup)
|
||
|
{
|
||
|
return sizeLevel1;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
var sizeLevel2 = Common.CeilDivide(sizeLevel1, kKeysPerGroup);
|
||
|
return (sizeLevel1 + sizeLevel2);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|