forked from BilalY/Rasagar
276 lines
14 KiB
C#
276 lines
14 KiB
C#
|
using Unity.Mathematics;
|
||
|
|
||
|
namespace UnityEngine.Rendering.RadeonRays
|
||
|
{
|
||
|
internal class HlbvhBuilder
|
||
|
{
|
||
|
private ComputeShader shaderBuildHlbvh;
|
||
|
private int kernelInit;
|
||
|
private int kernelCalculateAabb;
|
||
|
private int kernelCalculateMortonCodes;
|
||
|
private int kernelInitClusters;
|
||
|
private int kernelMergeClusters;
|
||
|
private int kernelFindPreferredNeighbor;
|
||
|
private int kernelWriteLeafNodes;
|
||
|
private int kernelBuildTreeBottomUp;
|
||
|
private int kernelClearUpdateFlags;
|
||
|
|
||
|
private ComputeShader shaderReorderTriangleIndices;
|
||
|
private int kernelOrderIndices;
|
||
|
private int kernelCopyOrderedIndicesBack;
|
||
|
|
||
|
private RadixSort radixSort;
|
||
|
private Scan scan;
|
||
|
|
||
|
const uint kTrianglesPerThread = 8u;
|
||
|
const uint kGroupSize = 256u;
|
||
|
const uint kTrianglesPerGroup = kTrianglesPerThread * kGroupSize;
|
||
|
|
||
|
public HlbvhBuilder(RadeonRaysShaders shaders)
|
||
|
{
|
||
|
shaderBuildHlbvh = shaders.buildHlbvh;
|
||
|
kernelInit = shaderBuildHlbvh.FindKernel("Init");
|
||
|
kernelCalculateAabb = shaderBuildHlbvh.FindKernel("CalculateAabb");
|
||
|
kernelCalculateMortonCodes = shaderBuildHlbvh.FindKernel("CalculateMortonCodes");
|
||
|
kernelWriteLeafNodes = shaderBuildHlbvh.FindKernel("WriteLeafNodes");
|
||
|
kernelBuildTreeBottomUp = shaderBuildHlbvh.FindKernel("BuildTreeBottomUp");
|
||
|
|
||
|
kernelInitClusters = shaderBuildHlbvh.FindKernel("InitClusters");
|
||
|
kernelFindPreferredNeighbor = shaderBuildHlbvh.FindKernel("FindPreferredNeighbor");
|
||
|
kernelMergeClusters = shaderBuildHlbvh.FindKernel("MergeClusters");
|
||
|
kernelClearUpdateFlags = shaderBuildHlbvh.FindKernel("ClearUpdateFlags");
|
||
|
|
||
|
shaderReorderTriangleIndices = shaders.reorderTriangleIndices;
|
||
|
kernelOrderIndices = shaderReorderTriangleIndices.FindKernel("OrderIndices");
|
||
|
kernelCopyOrderedIndicesBack = shaderReorderTriangleIndices.FindKernel("CopyOrderedIndicesBack");
|
||
|
|
||
|
radixSort = new RadixSort(shaders);
|
||
|
scan = new Scan(shaders);
|
||
|
}
|
||
|
|
||
|
public uint GetScratchDataSizeInDwords(uint triangleCount)
|
||
|
{
|
||
|
var scratchLayout = GetScratchBufferLayout(triangleCount);
|
||
|
return scratchLayout.TotalSize;
|
||
|
}
|
||
|
|
||
|
public static uint GetBvhNodeCount(uint leafCount)
|
||
|
{
|
||
|
return 2 * leafCount - 1;
|
||
|
}
|
||
|
|
||
|
public static uint GetBvhNodeCountPrediction(uint leafCount)
|
||
|
{
|
||
|
return (uint)((double)leafCount*0.8) + 10;
|
||
|
}
|
||
|
|
||
|
public uint GetResultDataSizeInDwords(uint triangleCount)
|
||
|
{
|
||
|
var bvhNodeCount = GetBvhNodeCount(triangleCount) + 1; // plus one for header
|
||
|
uint sizeOfNode = 16;
|
||
|
return bvhNodeCount * sizeOfNode;
|
||
|
}
|
||
|
|
||
|
public uint GetResultDataSizeInDwordsPrediction(uint triangleCount)
|
||
|
{
|
||
|
var bvhNodeCount = GetBvhNodeCountPrediction(triangleCount) + 1; // plus one for header
|
||
|
uint sizeOfNode = 16;
|
||
|
return bvhNodeCount * sizeOfNode;
|
||
|
}
|
||
|
|
||
|
struct ScratchBufferOffsets
|
||
|
{
|
||
|
public uint Aabb;
|
||
|
public uint SortedPrimitiveRefs;
|
||
|
public uint SortedMortonCodes;
|
||
|
|
||
|
// Overlaps with TempBvh
|
||
|
public uint PrimitiveRefs;
|
||
|
public uint MortonCodes;
|
||
|
public uint SortMemory;
|
||
|
|
||
|
// Overlaps with PrimitiveRefs
|
||
|
public uint TempBvh;
|
||
|
public uint EnabledNodes;
|
||
|
public uint ScanScratch;
|
||
|
|
||
|
public uint ClusterValidity;
|
||
|
public uint ClusterRange;
|
||
|
public uint PreferredNeighbor;
|
||
|
public uint ClusterToNodeIndex;
|
||
|
public uint Deltas;
|
||
|
public uint InternalNodeRange;
|
||
|
|
||
|
public uint TotalSize;
|
||
|
}
|
||
|
|
||
|
public void Execute(
|
||
|
CommandBuffer cmd,
|
||
|
GraphicsBuffer vertices, int verticesOffset, uint vertexStride,
|
||
|
GraphicsBuffer indices, int indicesOffset, uint triangleCount,
|
||
|
GraphicsBuffer scratch, GraphicsBuffer result, uint resultOffset, uint resultSizeInNodes,
|
||
|
uint reduceMemoryIterations = 2)
|
||
|
{
|
||
|
Common.EnableKeyword(cmd, shaderBuildHlbvh, "TOP_LEVEL", false);
|
||
|
Common.EnableKeyword(cmd, shaderBuildHlbvh, "NO_REDUCTION", reduceMemoryIterations == 0);
|
||
|
var scratchLayout = GetScratchBufferLayout(triangleCount);
|
||
|
|
||
|
cmd.SetComputeIntParam(shaderBuildHlbvh, SID.g_indices_offset, indicesOffset);
|
||
|
cmd.SetComputeIntParam(shaderBuildHlbvh, SID.g_vertices_offset, verticesOffset);
|
||
|
cmd.SetComputeIntParam(shaderBuildHlbvh, SID.g_constants_vertex_stride, (int)vertexStride);
|
||
|
cmd.SetComputeIntParam(shaderBuildHlbvh, SID.g_constants_triangle_count, (int)triangleCount);
|
||
|
cmd.SetComputeIntParam(shaderBuildHlbvh, SID.g_bvh_offset, (int)resultOffset);
|
||
|
cmd.SetComputeIntParam(shaderBuildHlbvh, SID.g_bvh_max_node_count, (int)resultSizeInNodes-1);
|
||
|
cmd.SetComputeIntParam(shaderBuildHlbvh, SID.g_cluster_validity_offset, (int)scratchLayout.ClusterValidity);
|
||
|
cmd.SetComputeIntParam(shaderBuildHlbvh, SID.g_cluster_range_offset, (int)scratchLayout.ClusterRange);
|
||
|
cmd.SetComputeIntParam(shaderBuildHlbvh, SID.g_neighbor_offset, (int)scratchLayout.PreferredNeighbor);
|
||
|
cmd.SetComputeIntParam(shaderBuildHlbvh, SID.g_cluster_to_node_offset, (int)scratchLayout.ClusterToNodeIndex);
|
||
|
cmd.SetComputeIntParam(shaderBuildHlbvh, SID.g_deltas_offset, (int)scratchLayout.Deltas);
|
||
|
cmd.SetComputeIntParam(shaderBuildHlbvh, SID.g_internal_node_range_offset, (int)scratchLayout.InternalNodeRange);
|
||
|
|
||
|
BindKernelArguments(cmd, kernelInit, vertices, indices, scratch, scratchLayout, result, false);
|
||
|
cmd.DispatchCompute(shaderBuildHlbvh, kernelInit, 1, 1, 1);
|
||
|
|
||
|
BindKernelArguments(cmd, kernelCalculateAabb, vertices, indices, scratch, scratchLayout, result, false);
|
||
|
cmd.DispatchCompute(shaderBuildHlbvh, kernelCalculateAabb, (int)Common.CeilDivide(triangleCount, kTrianglesPerGroup), 1, 1);
|
||
|
|
||
|
BindKernelArguments(cmd, kernelCalculateMortonCodes, vertices, indices, scratch, scratchLayout, result, false);
|
||
|
cmd.DispatchCompute(shaderBuildHlbvh, kernelCalculateMortonCodes, (int)Common.CeilDivide(triangleCount, kTrianglesPerGroup), 1, 1);
|
||
|
|
||
|
radixSort.Execute(cmd, scratch,
|
||
|
scratchLayout.MortonCodes, scratchLayout.SortedMortonCodes,
|
||
|
scratchLayout.PrimitiveRefs, scratchLayout.SortedPrimitiveRefs,
|
||
|
scratchLayout.SortMemory, triangleCount);
|
||
|
|
||
|
if (reduceMemoryIterations != 0)
|
||
|
{
|
||
|
// Original RadeonRays impl stores only one triangle per leaf noe
|
||
|
// Added optional path that starts by agglomerating multiple triangles per node before starting the BVH tree construction.
|
||
|
// Based on PLOC paper ("Parallel Locally-Ordered Clustering for Bounding Volume Hierarchy Construction")
|
||
|
BindKernelArguments(cmd, kernelInitClusters, vertices, indices, scratch, scratchLayout, result, true);
|
||
|
cmd.DispatchCompute(shaderBuildHlbvh, kernelInitClusters, (int)Common.CeilDivide(triangleCount, kGroupSize), 1, 1);
|
||
|
|
||
|
for (int i = 0; i < reduceMemoryIterations; ++i)
|
||
|
{
|
||
|
BindKernelArguments(cmd, kernelFindPreferredNeighbor, vertices, indices, scratch, scratchLayout, result, true);
|
||
|
cmd.DispatchCompute(shaderBuildHlbvh, kernelFindPreferredNeighbor, (int)Common.CeilDivide(triangleCount, kGroupSize), 1, 1);
|
||
|
|
||
|
BindKernelArguments(cmd, kernelMergeClusters, vertices, indices, scratch, scratchLayout, result, true);
|
||
|
cmd.DispatchCompute(shaderBuildHlbvh, kernelMergeClusters, (int)Common.CeilDivide(triangleCount, kGroupSize), 1, 1);
|
||
|
}
|
||
|
|
||
|
scan.Execute(cmd, scratch, scratchLayout.ClusterValidity, scratchLayout.ClusterToNodeIndex, scratchLayout.ScanScratch, triangleCount);
|
||
|
|
||
|
BindKernelArguments(cmd, kernelWriteLeafNodes, vertices, indices, scratch, scratchLayout, result, true);
|
||
|
cmd.DispatchCompute(shaderBuildHlbvh, kernelWriteLeafNodes, (int)Common.CeilDivide(triangleCount, kGroupSize), 1, 1);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
BindKernelArguments(cmd, kernelClearUpdateFlags, vertices, indices, scratch, scratchLayout, result, true);
|
||
|
cmd.DispatchCompute(shaderBuildHlbvh, kernelClearUpdateFlags, (int)Common.CeilDivide(triangleCount, kTrianglesPerGroup), 1, 1);
|
||
|
}
|
||
|
|
||
|
// In RadeonRays, HLBVH construction was based on "Maximizing Parallelism in the Construction of BVHs, Octrees, and k-d Trees" paper
|
||
|
// Replaced by impl by "Fast and Simple Agglomerative LBVH Construction" paper that does everything in a single bottom-up pass.
|
||
|
BindKernelArguments(cmd, kernelBuildTreeBottomUp, vertices, indices, scratch, scratchLayout, result, true);
|
||
|
cmd.DispatchCompute(shaderBuildHlbvh, kernelBuildTreeBottomUp, (int)Common.CeilDivide(triangleCount, kTrianglesPerGroup), 1, 1);
|
||
|
|
||
|
}
|
||
|
|
||
|
private ScratchBufferOffsets cachedScratchOffsets;
|
||
|
private uint cachedTriangleCount = 0;
|
||
|
|
||
|
ScratchBufferOffsets GetScratchBufferLayout(uint triangleCount)
|
||
|
{
|
||
|
if (cachedTriangleCount == triangleCount)
|
||
|
{
|
||
|
return cachedScratchOffsets;
|
||
|
}
|
||
|
|
||
|
var result = new ScratchBufferOffsets();
|
||
|
|
||
|
uint offset = 0;
|
||
|
result.Aabb = offset;
|
||
|
offset += 6;
|
||
|
result.SortedPrimitiveRefs = offset;
|
||
|
offset += triangleCount;
|
||
|
result.SortedMortonCodes = offset;
|
||
|
offset += triangleCount;
|
||
|
|
||
|
result.PrimitiveRefs = offset;
|
||
|
offset += triangleCount;
|
||
|
result.MortonCodes = offset;
|
||
|
offset += triangleCount;
|
||
|
result.SortMemory = offset;
|
||
|
offset += (uint)radixSort.GetScratchDataSizeInDwords(triangleCount);
|
||
|
result.TotalSize = offset;
|
||
|
|
||
|
// used by kernelWriteLeafNodes
|
||
|
result.ClusterValidity = result.PrimitiveRefs;
|
||
|
result.ClusterRange = result.PrimitiveRefs + triangleCount;
|
||
|
result.ClusterToNodeIndex = result.PrimitiveRefs + 2*triangleCount;
|
||
|
result.Deltas = result.PrimitiveRefs + 3*triangleCount;
|
||
|
|
||
|
result.ScanScratch = result.Deltas;
|
||
|
|
||
|
// used by Clustering
|
||
|
result.PreferredNeighbor = result.ClusterToNodeIndex;
|
||
|
|
||
|
// used by kernelBuildTreeBottomUp
|
||
|
result.InternalNodeRange = result.ClusterValidity;
|
||
|
|
||
|
result.TotalSize = math.max(result.TotalSize, result.Deltas+triangleCount);
|
||
|
|
||
|
cachedScratchOffsets = result;
|
||
|
cachedTriangleCount = triangleCount;
|
||
|
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
private void BindKernelArguments(
|
||
|
CommandBuffer cmd,
|
||
|
int kernel,
|
||
|
GraphicsBuffer vertices,
|
||
|
GraphicsBuffer indices,
|
||
|
GraphicsBuffer scratch,
|
||
|
ScratchBufferOffsets scratchLayout,
|
||
|
GraphicsBuffer result,
|
||
|
bool setSortedCodes)
|
||
|
{
|
||
|
cmd.SetComputeBufferParam(shaderBuildHlbvh, kernel, SID.g_vertices, vertices);
|
||
|
cmd.SetComputeBufferParam(shaderBuildHlbvh, kernel, SID.g_indices, indices);
|
||
|
cmd.SetComputeBufferParam(shaderBuildHlbvh, kernel, SID.g_scratch_buffer, scratch);
|
||
|
cmd.SetComputeBufferParam(shaderBuildHlbvh, kernel, SID.g_bvh, result);
|
||
|
|
||
|
if (setSortedCodes)
|
||
|
{
|
||
|
cmd.SetComputeIntParam(shaderBuildHlbvh, SID.g_morton_codes_offset, (int)scratchLayout.SortedMortonCodes);
|
||
|
cmd.SetComputeIntParam(shaderBuildHlbvh, SID.g_primitive_refs_offset, (int)scratchLayout.SortedPrimitiveRefs);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
cmd.SetComputeIntParam(shaderBuildHlbvh, SID.g_morton_codes_offset, (int)scratchLayout.MortonCodes);
|
||
|
cmd.SetComputeIntParam(shaderBuildHlbvh, SID.g_primitive_refs_offset, (int)scratchLayout.PrimitiveRefs);
|
||
|
}
|
||
|
}
|
||
|
private void ReorderIndexBuffer(
|
||
|
CommandBuffer cmd,
|
||
|
GraphicsBuffer indices, int indicesOffset, uint triangleCount,
|
||
|
GraphicsBuffer scratch, ScratchBufferOffsets scratchLayout)
|
||
|
{
|
||
|
cmd.SetComputeIntParam(shaderReorderTriangleIndices, SID.g_indices_offset, indicesOffset);
|
||
|
cmd.SetComputeIntParam(shaderReorderTriangleIndices, SID.g_constants_triangle_count, (int)triangleCount);
|
||
|
cmd.SetComputeIntParam(shaderReorderTriangleIndices, SID.g_sorted_prim_refs_offset, (int)scratchLayout.SortedPrimitiveRefs);
|
||
|
cmd.SetComputeIntParam(shaderReorderTriangleIndices, SID.g_temp_indices_offset, (int)scratchLayout.PrimitiveRefs);
|
||
|
|
||
|
cmd.SetComputeBufferParam(shaderReorderTriangleIndices, kernelOrderIndices, SID.g_indices, indices);
|
||
|
cmd.SetComputeBufferParam(shaderReorderTriangleIndices, kernelOrderIndices, SID.g_scratch_buffer, scratch);
|
||
|
cmd.DispatchCompute(shaderReorderTriangleIndices, kernelOrderIndices, (int)Common.CeilDivide(triangleCount, kTrianglesPerGroup), 1, 1);
|
||
|
|
||
|
cmd.SetComputeBufferParam(shaderReorderTriangleIndices, kernelCopyOrderedIndicesBack, SID.g_indices, indices);
|
||
|
cmd.SetComputeBufferParam(shaderReorderTriangleIndices, kernelCopyOrderedIndicesBack, SID.g_scratch_buffer, scratch);
|
||
|
cmd.DispatchCompute(shaderReorderTriangleIndices, kernelCopyOrderedIndicesBack, (int)Common.CeilDivide(triangleCount, kTrianglesPerGroup), 1, 1);
|
||
|
}
|
||
|
}
|
||
|
}
|