178 lines
6.7 KiB
Plaintext
178 lines
6.7 KiB
Plaintext
|
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/PostProcessing/Shaders/ExposureCommon.hlsl"
|
||
|
|
||
|
#pragma only_renderers d3d11 playstation xboxone xboxseries vulkan metal switch
|
||
|
|
||
|
#pragma kernel KFixedExposure
|
||
|
#pragma kernel KManualCameraExposure
|
||
|
#pragma kernel KPrePass
|
||
|
#pragma kernel KReduction
|
||
|
#pragma kernel KReset
|
||
|
|
||
|
TEXTURE2D(_InputTexture);
|
||
|
|
||
|
#define PREPASS_TEX_SIZE 1024.0
|
||
|
#define PREPASS_TEX_HALF_SIZE 512.0
|
||
|
//#pragma enable_d3d11_debug_symbols
|
||
|
|
||
|
//
|
||
|
// Fixed exposure
|
||
|
// Doesn't do anything fancy, simply copies the exposure & clamp values set in the volume system
|
||
|
//
|
||
|
[numthreads(1,1,1)]
|
||
|
void KFixedExposure(uint2 dispatchThreadId : SV_DispatchThreadID)
|
||
|
{
|
||
|
float ev100 = ParamEV100;
|
||
|
ev100 -= ParamExposureCompensation;
|
||
|
_OutputTexture[dispatchThreadId] = float2(ConvertEV100ToExposure(ev100, LensImperfectionExposureScale), ev100);
|
||
|
}
|
||
|
|
||
|
//
|
||
|
// Manual camera exposure
|
||
|
// Converts aperture / shutter speed / iso / compensation to EV100
|
||
|
//
|
||
|
[numthreads(1,1,1)]
|
||
|
void KManualCameraExposure(uint2 dispatchThreadId : SV_DispatchThreadID)
|
||
|
{
|
||
|
float ev100 = ComputeEV100(ParamAperture, ParamShutterSpeed, ParamISO);
|
||
|
ev100 -= ParamExposureCompensation;
|
||
|
_OutputTexture[dispatchThreadId] = float2(ConvertEV100ToExposure(ev100, LensImperfectionExposureScale), ev100);
|
||
|
}
|
||
|
|
||
|
//
|
||
|
// Average luminance pre-pass
|
||
|
// Transforms the input to log luminance in a square-POT target
|
||
|
//
|
||
|
[numthreads(8,8,1)]
|
||
|
void KPrePass(uint2 dispatchThreadId : SV_DispatchThreadID)
|
||
|
{
|
||
|
// For XR, interleave single-pass views in a checkerboard pattern
|
||
|
UNITY_XR_ASSIGN_VIEW_INDEX((dispatchThreadId.x + dispatchThreadId.y) % _XRViewCount)
|
||
|
|
||
|
PositionInputs posInputs = GetPositionInput(float2(dispatchThreadId), rcp(PREPASS_TEX_SIZE), uint2(8u, 8u));
|
||
|
float2 uv = ClampAndScaleUVForBilinear(posInputs.positionNDC);
|
||
|
float luma = SampleLuminance(uv);
|
||
|
|
||
|
float weight = WeightSample(dispatchThreadId, PREPASS_TEX_SIZE.xx, luma);
|
||
|
|
||
|
float logLuma = ComputeEV100FromAvgLuminance(max(luma, 1e-4), MeterCalibrationConstant);
|
||
|
_OutputTexture[posInputs.positionSS] = float2(logLuma, weight);
|
||
|
}
|
||
|
|
||
|
//
|
||
|
// Average luminance 2nd & 3rd pass + Evaluation
|
||
|
// - 2nd: Reduction 1024 -> 32
|
||
|
// - 3rd: Reduction 32 -> 1
|
||
|
//
|
||
|
#define REDUCTION_GROUP_SIZE 16
|
||
|
#define REDUCTION_TOTAL_THREADS 256
|
||
|
|
||
|
groupshared float4 gs_luminances[REDUCTION_TOTAL_THREADS];
|
||
|
groupshared float gs_weights[REDUCTION_TOTAL_THREADS];
|
||
|
|
||
|
// This kernel runs twice, and as the final output, produces the average normalized luminance of the texture produced by
|
||
|
// the pre-pass.
|
||
|
//
|
||
|
// Let's work through the math, but with a simplified example. Instead of a 2D texture, let's assume we have a 1D
|
||
|
// texture. And instead of a 1024 -> 32 -> 1 reduction, let's assume we have a 4 -> 2 -> 1 reduction.
|
||
|
//
|
||
|
// Say the input texture has the following four pixels: (a, A), (b, B), (c, C), (d, D). The first channel of each pixel
|
||
|
// is the log luminance, and the second channel is the weight.
|
||
|
//
|
||
|
// The first pass combines two pixels per thread, and outputs the following two-pixel two-channel intermediate texture:
|
||
|
// ((a*A + b*B) / (A + B), (A + B)), ((c*C + d*D) / (C + D), (C + D))
|
||
|
// The second pass calculates exposure as follows:
|
||
|
// ((a*A + b*B) / (A + B) * (A + B) + (c*C + d*D) / (C + D) * (C + D)) / (A + B + C + D)
|
||
|
// which simplifies to:
|
||
|
// (a*A + b*B + c*C + d*D) / (A + B + C + D)
|
||
|
// which is the normalized weighted average of the log luminances. We can thus work with weights that don't have to sum
|
||
|
// up to 1.
|
||
|
//
|
||
|
// Notice that (A + B) multiplied in the first pass is cancelled out in the second pass. This is done for two reasons:
|
||
|
// It enables parallel reduction, and it keeps the values of the intermediate texture in a reasonable range to fit in
|
||
|
// the fp16 data format. We spend a bit more ALU, but we avoid fp16 quantization artifacts.
|
||
|
[numthreads(REDUCTION_GROUP_SIZE,REDUCTION_GROUP_SIZE,1)]
|
||
|
void KReduction(uint2 groupId : SV_GroupID, uint2 groupThreadId : SV_GroupThreadID)
|
||
|
{
|
||
|
uint threadIdx = groupThreadId.y * REDUCTION_GROUP_SIZE + groupThreadId.x;
|
||
|
uint2 sampleIdx = (groupId.xy * REDUCTION_GROUP_SIZE + groupThreadId.xy) * 2u;
|
||
|
|
||
|
// Store 4 pixels & their weights in the lds
|
||
|
float2 p1 = _InputTexture[sampleIdx + uint2(0u, 0u)].xy;
|
||
|
float2 p2 = _InputTexture[sampleIdx + uint2(1u, 0u)].xy;
|
||
|
float2 p3 = _InputTexture[sampleIdx + uint2(0u, 1u)].xy;
|
||
|
float2 p4 = _InputTexture[sampleIdx + uint2(1u, 1u)].xy;
|
||
|
|
||
|
float4 smp = float4(p1.x, p2.x, p3.x, p4.x);
|
||
|
float4 weights = float4(p1.y, p2.y, p3.y, p4.y);
|
||
|
|
||
|
gs_luminances[threadIdx] = smp * weights;
|
||
|
gs_weights[threadIdx] = dot(weights, 1.0);
|
||
|
|
||
|
GroupMemoryBarrierWithGroupSync();
|
||
|
|
||
|
// Parallel reduction of luminances & weights
|
||
|
UNITY_UNROLL
|
||
|
for(uint s = REDUCTION_TOTAL_THREADS / 2u; s > 0u; s >>= 1u)
|
||
|
{
|
||
|
if(threadIdx < s)
|
||
|
{
|
||
|
gs_luminances[threadIdx] += gs_luminances[threadIdx + s];
|
||
|
gs_weights[threadIdx] += gs_weights[threadIdx + s];
|
||
|
}
|
||
|
|
||
|
GroupMemoryBarrierWithGroupSync();
|
||
|
}
|
||
|
|
||
|
// Evaluate on group thread 0
|
||
|
if(threadIdx == 0u)
|
||
|
{
|
||
|
float avgLuminance = dot(gs_luminances[0], 0.25);
|
||
|
|
||
|
if (IsNaN(avgLuminance) || IsInf(avgLuminance))
|
||
|
avgLuminance = 1.0;
|
||
|
|
||
|
if (gs_weights[0] > 0.0)
|
||
|
avgLuminance /= (gs_weights[0] * 0.25);
|
||
|
|
||
|
UNITY_BRANCH
|
||
|
switch (ParamEvaluateMode)
|
||
|
{
|
||
|
case 1u:
|
||
|
{
|
||
|
// Automatic
|
||
|
float exposure = AdaptExposure(avgLuminance - ParamExposureCompensation);
|
||
|
exposure = clamp(exposure, ParamExposureLimitMin, ParamExposureLimitMax);
|
||
|
_OutputTexture[groupId.xy] = float2(ConvertEV100ToExposure(exposure, LensImperfectionExposureScale), exposure);
|
||
|
break;
|
||
|
}
|
||
|
case 2u:
|
||
|
{
|
||
|
// Curve remapping
|
||
|
float minExposure = ParamExposureLimitMin;
|
||
|
float maxExposure = ParamExposureLimitMax;
|
||
|
float exposure = CurveRemap(avgLuminance, minExposure, maxExposure);
|
||
|
exposure = AdaptExposure(exposure - ParamExposureCompensation);
|
||
|
exposure = clamp(exposure, minExposure, maxExposure);
|
||
|
_OutputTexture[groupId.xy] = float2(ConvertEV100ToExposure(exposure, LensImperfectionExposureScale), exposure);
|
||
|
break;
|
||
|
}
|
||
|
default:
|
||
|
{
|
||
|
// No evaluate - passthrough to next pass
|
||
|
// This is only used when going from 1024 to 32
|
||
|
_OutputTexture[groupId.xy] = float2(avgLuminance, gs_weights[0]);
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
//
|
||
|
// Reset the exposure texture to a default state (1,0)
|
||
|
//
|
||
|
[numthreads(1, 1, 1)]
|
||
|
void KReset(uint2 dispatchThreadId : SV_DispatchThreadID)
|
||
|
{
|
||
|
_OutputTexture[dispatchThreadId] = float2(1.0, 0.0);
|
||
|
}
|