#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Common.hlsl" #include "Packages/com.unity.render-pipelines.high-definition/Runtime/ShaderLibrary/ShaderVariables.hlsl" #include "Packages/com.unity.render-pipelines.high-definition/Runtime/PostProcessing/Shaders/DepthOfFieldCommon.hlsl" #pragma only_renderers d3d11 playstation xboxone xboxseries vulkan metal switch #pragma kernel KMainNear MAIN=KMainNear NEAR #pragma kernel KMainFar MAIN=KMainFar FAR #pragma multi_compile _ LOW_RESOLUTION #pragma multi_compile _ USE_TILES #pragma multi_compile _ ENABLE_ALPHA #include "Packages/com.unity.render-pipelines.high-definition/Runtime/PostProcessing/Shaders/PostProcessDefines.hlsl" TEXTURE2D_X(_InputTexture); TEXTURE2D_X(_InputCoCTexture); TEXTURE2D_X(_InputDilatedCoCTexture); RW_TEXTURE2D_X(CTYPE, _OutputTexture); RW_TEXTURE2D_X(float, _OutputAlphaTexture); SAMPLER(sampler_LinearClamp); SAMPLER(sampler_TrilinearClamp); // [NOTE-FORCE-DOF-TEX-LOAD]: Only enable sampler usage on higher resolutions. //on low resolutions with hardware DRS (quarter res) having a bilinear sampler can add dynamic resolution artifacts due to rounding of pixels during dof pyramid downsample //the proper fix would be to constrain the available DRS resolutions to multiples of 8, so we can perform a good quarter res downsample and not loose the extra column. //To avoid complexity, we just use a Load (no filter) so we grab the closest sample. Using a filter will aggravate this artifact and cause jitterness on the dof blur result. #if LOW_RESOLUTION #define USE_SAMPLER 0 #else #define USE_SAMPLER 1 #endif #if FAR #define SamplerTap sampler_TrilinearClamp #else #define SamplerTap sampler_LinearClamp #endif StructuredBuffer _BokehKernel; #if USE_TILES ConsumeStructuredBuffer _TileList; // Tile coordinates extracted from _TileList for the thread group groupshared uint2 gs_tileCoord; #endif CBUFFER_START(cb0) float4 _Params1; float4 _Params2; float4 _TexelSize; CBUFFER_END #define SampleCount _Params1.x #define ScaledRadius _Params1.y #define BarrelClipping _Params1.z #define Radius _Params1.w #define MaxMips _Params2.x #define GROUP_RES 8u #define GROUP_SIZE (GROUP_RES * GROUP_RES) #if USE_TILES [numthreads(GROUP_SIZE, 1, 1)] void MAIN(uint groupThreadId : SV_GroupThreadID) #else [numthreads(GROUP_RES, GROUP_RES, 1)] void MAIN(uint3 dispatchThreadId : SV_DispatchThreadID) #endif { #if USE_TILES // First thread of the group is responsible for grabbing the tile coordinates if (groupThreadId == 0u) { TileData tileData = _TileList.Consume(); gs_tileCoord = UnpackTileCoord(tileData); } GroupMemoryBarrierWithGroupSync(); // Compute the actual pixel coordinate we're working uint2 dispatchThreadId = gs_tileCoord + uint2(groupThreadId % GROUP_RES, groupThreadId / GROUP_RES); #else UNITY_XR_ASSIGN_VIEW_INDEX(dispatchThreadId.z); #endif if (any(dispatchThreadId.xy >= uint2(_TexelSize.xy))) return; // Out of bounds, discard PositionInputs posInputs = GetPositionInput(float2(dispatchThreadId.xy), _TexelSize.zw, uint2(GROUP_RES, GROUP_RES)); float2 uv = posInputs.positionNDC; float2 barrelUV = (uv * 2.0 - 1.0) * BarrelClipping; // Current pixel CoC #if NEAR float samp0CoC = LOAD_TEXTURE2D_X(_InputDilatedCoCTexture, posInputs.positionSS).x; #else // FAR float samp0CoC = LOAD_TEXTURE2D_X(_InputCoCTexture, posInputs.positionSS).x; #endif float2 sampDist = _PostProcessScreenSize.zw * samp0CoC * Radius; #if NEAR float mip = 0.0; #else float mip = min(MaxMips, (1.0 / (SampleCount - 1.5)) * samp0CoC * ScaledRadius); #endif uint mipCeiled = ceil(mip); float texelsToClamp = (1u << mipCeiled) + 1; float4 acc = 0.0; float nearWeightAcc = 0.0; float accAlpha = 0.0; float TotalSampleCount = SampleCount * SampleCount; // Gather samples UNITY_LOOP for (uint i = 0u; i < uint(TotalSampleCount); i++) { float2 sampTap = UnpackKernelCoord(_BokehKernel, i); // Note: this barrel clipping / optical vignetting trick isn't correct as we're symetrically // shifting samples instead of actually clipping them but due to the low number of samples // we can't do a proper implementation so this will do for now and it looks close-enough at // low clipping values float clipping = 1.0 - abs(dot(sampTap, barrelUV)); sampTap = sampTap * sampDist * clipping + uv; #if USE_SAMPLER sampTap = ClampAndScaleUVPostProcessTexture(sampTap, _PostProcessScreenSize.zw, texelsToClamp); CTYPE sampColor = SAMPLE_TEXTURE2D_X_LOD(_InputTexture, SamplerTap, sampTap, mip).CTYPE_SWIZZLE; float sampCoC = SAMPLE_TEXTURE2D_X_LOD(_InputCoCTexture, SamplerTap, sampTap, mip).x; #else // See: [NOTE-FORCE-DOF-TEX-LOAD] // Need to manually compute a coordinate w.r.t. the mip level for a texture load. sampTap *= _TexelSize.xy / (1u << (uint)mip); CTYPE sampColor = LOAD_TEXTURE2D_X_LOD(_InputTexture, sampTap, mip).CTYPE_SWIZZLE; float sampCoC = LOAD_TEXTURE2D_X_LOD(_InputCoCTexture, sampTap, mip).x; #endif #if NEAR { float weight = saturate(1.0 - (sampCoC - samp0CoC)); acc += float4(sampColor.xyz, sampCoC > 0.0) * weight; nearWeightAcc += weight; #ifdef ENABLE_ALPHA accAlpha += sampColor.w * weight; #endif } #else // FAR { // Weight & pre-multiply to limit bleeding on the focused area float weight = saturate(1.0 - (samp0CoC - sampCoC)); acc += float4(sampColor.xyz, sampCoC) * weight; #ifdef ENABLE_ALPHA accAlpha += sampColor.w * weight; #endif } #endif } #if NEAR float alpha = saturate(sqrt((acc.w / TotalSampleCount) * PI)); alpha = smoothstep(0.0, 1.0, alpha); #ifdef ENABLE_ALPHA _OutputTexture[COORD_TEXTURE2D_X(posInputs.positionSS)] = float4(acc.xyz, accAlpha) / (nearWeightAcc + 1e-5); // zero-div guard #else _OutputTexture[COORD_TEXTURE2D_X(posInputs.positionSS)] = acc.xyz / (nearWeightAcc + 1e-5); // zero-div guard #endif _OutputAlphaTexture[COORD_TEXTURE2D_X(posInputs.positionSS)] = alpha; #else // FAR #ifdef ENABLE_ALPHA _OutputTexture[COORD_TEXTURE2D_X(posInputs.positionSS)] = float4(acc.xyz, accAlpha) / (acc.w + 1e-5); // zero-div guard #else _OutputTexture[COORD_TEXTURE2D_X(posInputs.positionSS)] = acc.xyz / (acc.w + 1e-5); // zero-div guard #endif #endif }