Rasagar/Library/PackageCache/com.unity.render-pipelines.high-definition/Runtime/PostProcessing/Shaders/DepthOfFieldGather.compute

187 lines
6.5 KiB
Plaintext
Raw Normal View History

2024-08-26 13:07:20 -07:00
#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Common.hlsl"
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/ShaderLibrary/ShaderVariables.hlsl"
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/PostProcessing/Shaders/DepthOfFieldCommon.hlsl"
#pragma only_renderers d3d11 playstation xboxone xboxseries vulkan metal switch
#pragma kernel KMainNear MAIN=KMainNear NEAR
#pragma kernel KMainFar MAIN=KMainFar FAR
#pragma multi_compile _ LOW_RESOLUTION
#pragma multi_compile _ USE_TILES
#pragma multi_compile _ ENABLE_ALPHA
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/PostProcessing/Shaders/PostProcessDefines.hlsl"
TEXTURE2D_X(_InputTexture);
TEXTURE2D_X(_InputCoCTexture);
TEXTURE2D_X(_InputDilatedCoCTexture);
RW_TEXTURE2D_X(CTYPE, _OutputTexture);
RW_TEXTURE2D_X(float, _OutputAlphaTexture);
SAMPLER(sampler_LinearClamp);
SAMPLER(sampler_TrilinearClamp);
// [NOTE-FORCE-DOF-TEX-LOAD]: Only enable sampler usage on higher resolutions.
//on low resolutions with hardware DRS (quarter res) having a bilinear sampler can add dynamic resolution artifacts due to rounding of pixels during dof pyramid downsample
//the proper fix would be to constrain the available DRS resolutions to multiples of 8, so we can perform a good quarter res downsample and not loose the extra column.
//To avoid complexity, we just use a Load (no filter) so we grab the closest sample. Using a filter will aggravate this artifact and cause jitterness on the dof blur result.
#if LOW_RESOLUTION
#define USE_SAMPLER 0
#else
#define USE_SAMPLER 1
#endif
#if FAR
#define SamplerTap sampler_TrilinearClamp
#else
#define SamplerTap sampler_LinearClamp
#endif
StructuredBuffer<uint> _BokehKernel;
#if USE_TILES
ConsumeStructuredBuffer<TileData> _TileList;
// Tile coordinates extracted from _TileList for the thread group
groupshared uint2 gs_tileCoord;
#endif
CBUFFER_START(cb0)
float4 _Params1;
float4 _Params2;
float4 _TexelSize;
CBUFFER_END
#define SampleCount _Params1.x
#define ScaledRadius _Params1.y
#define BarrelClipping _Params1.z
#define Radius _Params1.w
#define MaxMips _Params2.x
#define GROUP_RES 8u
#define GROUP_SIZE (GROUP_RES * GROUP_RES)
#if USE_TILES
[numthreads(GROUP_SIZE, 1, 1)]
void MAIN(uint groupThreadId : SV_GroupThreadID)
#else
[numthreads(GROUP_RES, GROUP_RES, 1)]
void MAIN(uint3 dispatchThreadId : SV_DispatchThreadID)
#endif
{
#if USE_TILES
// First thread of the group is responsible for grabbing the tile coordinates
if (groupThreadId == 0u)
{
TileData tileData = _TileList.Consume();
gs_tileCoord = UnpackTileCoord(tileData);
}
GroupMemoryBarrierWithGroupSync();
// Compute the actual pixel coordinate we're working
uint2 dispatchThreadId = gs_tileCoord + uint2(groupThreadId % GROUP_RES, groupThreadId / GROUP_RES);
#else
UNITY_XR_ASSIGN_VIEW_INDEX(dispatchThreadId.z);
#endif
if (any(dispatchThreadId.xy >= uint2(_TexelSize.xy)))
return; // Out of bounds, discard
PositionInputs posInputs = GetPositionInput(float2(dispatchThreadId.xy), _TexelSize.zw, uint2(GROUP_RES, GROUP_RES));
float2 uv = posInputs.positionNDC;
float2 barrelUV = (uv * 2.0 - 1.0) * BarrelClipping;
// Current pixel CoC
#if NEAR
float samp0CoC = LOAD_TEXTURE2D_X(_InputDilatedCoCTexture, posInputs.positionSS).x;
#else // FAR
float samp0CoC = LOAD_TEXTURE2D_X(_InputCoCTexture, posInputs.positionSS).x;
#endif
float2 sampDist = _PostProcessScreenSize.zw * samp0CoC * Radius;
#if NEAR
float mip = 0.0;
#else
float mip = min(MaxMips, (1.0 / (SampleCount - 1.5)) * samp0CoC * ScaledRadius);
#endif
uint mipCeiled = ceil(mip);
float texelsToClamp = (1u << mipCeiled) + 1;
float4 acc = 0.0;
float nearWeightAcc = 0.0;
float accAlpha = 0.0;
float TotalSampleCount = SampleCount * SampleCount;
// Gather samples
UNITY_LOOP
for (uint i = 0u; i < uint(TotalSampleCount); i++)
{
float2 sampTap = UnpackKernelCoord(_BokehKernel, i);
// Note: this barrel clipping / optical vignetting trick isn't correct as we're symetrically
// shifting samples instead of actually clipping them but due to the low number of samples
// we can't do a proper implementation so this will do for now and it looks close-enough at
// low clipping values
float clipping = 1.0 - abs(dot(sampTap, barrelUV));
sampTap = sampTap * sampDist * clipping + uv;
#if USE_SAMPLER
sampTap = ClampAndScaleUVPostProcessTexture(sampTap, _PostProcessScreenSize.zw, texelsToClamp);
CTYPE sampColor = SAMPLE_TEXTURE2D_X_LOD(_InputTexture, SamplerTap, sampTap, mip).CTYPE_SWIZZLE;
float sampCoC = SAMPLE_TEXTURE2D_X_LOD(_InputCoCTexture, SamplerTap, sampTap, mip).x;
#else
// See: [NOTE-FORCE-DOF-TEX-LOAD]
// Need to manually compute a coordinate w.r.t. the mip level for a texture load.
sampTap *= _TexelSize.xy / (1u << (uint)mip);
CTYPE sampColor = LOAD_TEXTURE2D_X_LOD(_InputTexture, sampTap, mip).CTYPE_SWIZZLE;
float sampCoC = LOAD_TEXTURE2D_X_LOD(_InputCoCTexture, sampTap, mip).x;
#endif
#if NEAR
{
float weight = saturate(1.0 - (sampCoC - samp0CoC));
acc += float4(sampColor.xyz, sampCoC > 0.0) * weight;
nearWeightAcc += weight;
#ifdef ENABLE_ALPHA
accAlpha += sampColor.w * weight;
#endif
}
#else // FAR
{
// Weight & pre-multiply to limit bleeding on the focused area
float weight = saturate(1.0 - (samp0CoC - sampCoC));
acc += float4(sampColor.xyz, sampCoC) * weight;
#ifdef ENABLE_ALPHA
accAlpha += sampColor.w * weight;
#endif
}
#endif
}
#if NEAR
float alpha = saturate(sqrt((acc.w / TotalSampleCount) * PI));
alpha = smoothstep(0.0, 1.0, alpha);
#ifdef ENABLE_ALPHA
_OutputTexture[COORD_TEXTURE2D_X(posInputs.positionSS)] = float4(acc.xyz, accAlpha) / (nearWeightAcc + 1e-5); // zero-div guard
#else
_OutputTexture[COORD_TEXTURE2D_X(posInputs.positionSS)] = acc.xyz / (nearWeightAcc + 1e-5); // zero-div guard
#endif
_OutputAlphaTexture[COORD_TEXTURE2D_X(posInputs.positionSS)] = alpha;
#else // FAR
#ifdef ENABLE_ALPHA
_OutputTexture[COORD_TEXTURE2D_X(posInputs.positionSS)] = float4(acc.xyz, accAlpha) / (acc.w + 1e-5); // zero-div guard
#else
_OutputTexture[COORD_TEXTURE2D_X(posInputs.positionSS)] = acc.xyz / (acc.w + 1e-5); // zero-div guard
#endif
#endif
}