#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Common.hlsl" #include "Packages/com.unity.render-pipelines.high-definition/Runtime/ShaderLibrary/ShaderVariables.hlsl" #include "Packages/com.unity.render-pipelines.core/ShaderLibrary/TextureXR.hlsl" #pragma only_renderers d3d11 playstation xboxone xboxseries vulkan metal switch #pragma kernel ComputeMaxZ MAX_Z_DOWNSAMPLE=1 #pragma kernel ComputeFinalMask FINAL_MASK=1 #pragma kernel DilateMask DILATE_MASK=1 #pragma multi_compile _ PLANAR_OBLIQUE_DEPTH // In some cases we might want to avoid stopping integrating volumetric even if > max distance if the gradient is very big. // Realistically, with the dilation step this was never seen as necessary. #define GENERATE_GRADIENT 0 #if GENERATE_GRADIENT #define OUT_MASK_FORMAT float2 // This must match the component count defined in VolumetricLighting.cs #define MASK_SWIZZLE xy #else #define OUT_MASK_FORMAT float #define MASK_SWIZZLE x #endif #ifdef PLATFORM_LANE_COUNT #define WAVE_SIZE PLATFORM_LANE_COUNT #else #define WAVE_SIZE 64 #endif // --------------------------------- CBUFFER_START(cb) float4 _SrcOffsetAndLimit; float _DilationWidth; CBUFFER_END #define _SrcLimit _SrcOffsetAndLimit.xy #define _DepthMipOffset _SrcOffsetAndLimit.zw // --------------------------------- #ifdef MAX_Z_DOWNSAMPLE #define GROUP_SIZE 8 #ifndef PLATFORM_SUPPORTS_WAVE_INTRINSICS groupshared float gs_maxDepth[GROUP_SIZE * GROUP_SIZE]; #endif RW_TEXTURE2D_X(float, _OutputTexture); float GetDepthToDownsample(uint2 pixCoord) { float deviceDepth = LoadCameraDepth(pixCoord); float outputDepth = 0; if (deviceDepth == UNITY_RAW_FAR_CLIP_VALUE) outputDepth = 1e10f; else #ifdef PLANAR_OBLIQUE_DEPTH outputDepth = ComputeViewSpacePosition(float2(pixCoord) * _ScreenSize.zw, deviceDepth, UNITY_MATRIX_I_P).z; #else outputDepth = LinearEyeDepth(LoadCameraDepth(pixCoord), _ZBufferParams); #endif return outputDepth; } float PrepareDepthForOutput(float depth) { return depth; } // Important! This function assumes that a max operation is carried so if using reversed Z the depth must be passed as 1-depth or must be linear. // GetDepthToDownsample should be used to enforce the right depth is used. float ParallelReduction(uint gid, uint threadIdx, float depth) { #ifdef PLATFORM_SUPPORTS_WAVE_INTRINSICS return WaveActiveMax(depth); #else gs_maxDepth[threadIdx] = depth; GroupMemoryBarrierWithGroupSync(); UNITY_UNROLL for (uint s = (GROUP_SIZE * GROUP_SIZE) / 2u; s > 0u; s >>= 1u) { if (threadIdx < s) { gs_maxDepth[threadIdx] = max(gs_maxDepth[threadIdx], gs_maxDepth[threadIdx + s]); } GroupMemoryBarrierWithGroupSync(); } return gs_maxDepth[0]; #endif } [numthreads(GROUP_SIZE, GROUP_SIZE, 1)] void ComputeMaxZ(uint3 dispatchThreadId : SV_DispatchThreadID, uint gid : SV_GroupIndex, uint2 groupThreadId : SV_GroupThreadID, uint3 groupID : SV_GroupID) { UNITY_XR_ASSIGN_VIEW_INDEX(dispatchThreadId.z); uint threadIdx = groupThreadId.y * GROUP_SIZE + groupThreadId.x; float currDepth = -1; if (all((float2)(dispatchThreadId.xy) < _ScreenSize.xy)) currDepth = GetDepthToDownsample(dispatchThreadId.xy); float maxDepth = ParallelReduction(gid, threadIdx, currDepth); // Race condition which is fine, but errors on some platforms. if (threadIdx == 0) { _OutputTexture[COORD_TEXTURE2D_X(groupID.xy)] = maxDepth; } } #elif FINAL_MASK TEXTURE2D_X(_InputTexture); RW_TEXTURE2D_X(OUT_MASK_FORMAT, _OutputTexture); void DownsampleDepth(float s0, float s1, float s2, float s3, out float maxDepth) { maxDepth = max(Max3(s0, s1, s2), s3); } float GetMinDepth(uint2 pixCoord) { uint2 minDepthLocation = _DepthMipOffset + pixCoord; float minDepth = LoadCameraDepth(minDepthLocation); return LinearEyeDepth(minDepth, _ZBufferParams); } float GetDepthGradient(float minDepth, float maxDepth) { //return minDepth; return abs(maxDepth - minDepth) / maxDepth; } [numthreads(8, 8, 1)] void ComputeFinalMask(uint3 dispatchThreadId : SV_DispatchThreadID, uint gid : SV_GroupIndex, uint2 groupThreadId : SV_GroupThreadID, uint3 groupID : SV_GroupID) { UNITY_XR_ASSIGN_VIEW_INDEX(dispatchThreadId.z); // Upper-left pixel coordinate of quad that this thread will read uint2 srcPixelUL = (dispatchThreadId.xy << 1); float s0 = _InputTexture[COORD_TEXTURE2D_X(min(srcPixelUL + uint2(0u, 0u), _SrcLimit))].x; float s1 = _InputTexture[COORD_TEXTURE2D_X(min(srcPixelUL + uint2(1u, 0u), _SrcLimit))].x; float s2 = _InputTexture[COORD_TEXTURE2D_X(min(srcPixelUL + uint2(0u, 1u), _SrcLimit))].x; float s3 = _InputTexture[COORD_TEXTURE2D_X(min(srcPixelUL + uint2(1u, 1u), _SrcLimit))].x; float maxDepth; DownsampleDepth(s0, s1, s2, s3, maxDepth); #if GENERATE_GRADIENT float minDepth = GetMinDepth(dispatchThreadId.xy); float gradient = GetDepthGradient(minDepth, maxDepth); _OutputTexture[COORD_TEXTURE2D_X(dispatchThreadId.xy)] = float2(maxDepth, gradient); #else _OutputTexture[COORD_TEXTURE2D_X(dispatchThreadId.xy)] = maxDepth; #endif } #elif DILATE_MASK TEXTURE2D_X(_InputTexture); RW_TEXTURE2D_X(OUT_MASK_FORMAT, _OutputTexture); OUT_MASK_FORMAT DilateValue(OUT_MASK_FORMAT currMax, OUT_MASK_FORMAT currSample) { #if GENERATE_GRADIENT return float2(max(currMax.x, currSample.x), max(currMax.y, currSample.y)); #else return max(currMax, currSample); #endif } [numthreads(8, 8, 1)] void DilateMask(uint3 dispatchThreadId : SV_DispatchThreadID, uint gid : SV_GroupIndex, uint2 groupThreadId : SV_GroupThreadID, uint3 groupID : SV_GroupID) { UNITY_XR_ASSIGN_VIEW_INDEX(dispatchThreadId.z); int dilationWidth = _DilationWidth; // Make sure to convert the coordinate to signed for the kernel taps int2 currentCoord = (int2)dispatchThreadId.xy; OUT_MASK_FORMAT dilatedMaxVals = -1; for (int i = -dilationWidth; i <= dilationWidth; ++i) { for (int j = -dilationWidth; j <= dilationWidth; ++j) { // Evaluate the tap coordinate and clamp it to the texture int2 tapCoordinate = clamp(currentCoord + int2(i, j), 0, _SrcLimit - 1); OUT_MASK_FORMAT s = _InputTexture[COORD_TEXTURE2D_X(tapCoordinate)].MASK_SWIZZLE; dilatedMaxVals = DilateValue(dilatedMaxVals, s); } } _OutputTexture[COORD_TEXTURE2D_X(dispatchThreadId.xy)] = dilatedMaxVals; } #endif