178 lines
6.7 KiB
178 lines
6.7 KiB
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/PostProcessing/Shaders/ExposureCommon.hlsl"
#pragma only_renderers d3d11 playstation xboxone xboxseries vulkan metal switch
#pragma kernel KFixedExposure
#pragma kernel KManualCameraExposure
#pragma kernel KPrePass
#pragma kernel KReduction
#pragma kernel KReset
#define PREPASS_TEX_SIZE 1024.0
//#pragma enable_d3d11_debug_symbols
// Fixed exposure
// Doesn't do anything fancy, simply copies the exposure & clamp values set in the volume system
void KFixedExposure(uint2 dispatchThreadId : SV_DispatchThreadID)
float ev100 = ParamEV100;
ev100 -= ParamExposureCompensation;
_OutputTexture[dispatchThreadId] = float2(ConvertEV100ToExposure(ev100, LensImperfectionExposureScale), ev100);
// Manual camera exposure
// Converts aperture / shutter speed / iso / compensation to EV100
void KManualCameraExposure(uint2 dispatchThreadId : SV_DispatchThreadID)
float ev100 = ComputeEV100(ParamAperture, ParamShutterSpeed, ParamISO);
ev100 -= ParamExposureCompensation;
_OutputTexture[dispatchThreadId] = float2(ConvertEV100ToExposure(ev100, LensImperfectionExposureScale), ev100);
// Average luminance pre-pass
// Transforms the input to log luminance in a square-POT target
void KPrePass(uint2 dispatchThreadId : SV_DispatchThreadID)
// For XR, interleave single-pass views in a checkerboard pattern
UNITY_XR_ASSIGN_VIEW_INDEX((dispatchThreadId.x + dispatchThreadId.y) % _XRViewCount)
PositionInputs posInputs = GetPositionInput(float2(dispatchThreadId), rcp(PREPASS_TEX_SIZE), uint2(8u, 8u));
float2 uv = ClampAndScaleUVForBilinear(posInputs.positionNDC);
float luma = SampleLuminance(uv);
float weight = WeightSample(dispatchThreadId, PREPASS_TEX_SIZE.xx, luma);
float logLuma = ComputeEV100FromAvgLuminance(max(luma, 1e-4), MeterCalibrationConstant);
_OutputTexture[posInputs.positionSS] = float2(logLuma, weight);
// Average luminance 2nd & 3rd pass + Evaluation
// - 2nd: Reduction 1024 -> 32
// - 3rd: Reduction 32 -> 1
groupshared float4 gs_luminances[REDUCTION_TOTAL_THREADS];
groupshared float gs_weights[REDUCTION_TOTAL_THREADS];
// This kernel runs twice, and as the final output, produces the average normalized luminance of the texture produced by
// the pre-pass.
// Let's work through the math, but with a simplified example. Instead of a 2D texture, let's assume we have a 1D
// texture. And instead of a 1024 -> 32 -> 1 reduction, let's assume we have a 4 -> 2 -> 1 reduction.
// Say the input texture has the following four pixels: (a, A), (b, B), (c, C), (d, D). The first channel of each pixel
// is the log luminance, and the second channel is the weight.
// The first pass combines two pixels per thread, and outputs the following two-pixel two-channel intermediate texture:
// ((a*A + b*B) / (A + B), (A + B)), ((c*C + d*D) / (C + D), (C + D))
// The second pass calculates exposure as follows:
// ((a*A + b*B) / (A + B) * (A + B) + (c*C + d*D) / (C + D) * (C + D)) / (A + B + C + D)
// which simplifies to:
// (a*A + b*B + c*C + d*D) / (A + B + C + D)
// which is the normalized weighted average of the log luminances. We can thus work with weights that don't have to sum
// up to 1.
// Notice that (A + B) multiplied in the first pass is cancelled out in the second pass. This is done for two reasons:
// It enables parallel reduction, and it keeps the values of the intermediate texture in a reasonable range to fit in
// the fp16 data format. We spend a bit more ALU, but we avoid fp16 quantization artifacts.
void KReduction(uint2 groupId : SV_GroupID, uint2 groupThreadId : SV_GroupThreadID)
uint threadIdx = groupThreadId.y * REDUCTION_GROUP_SIZE + groupThreadId.x;
uint2 sampleIdx = (groupId.xy * REDUCTION_GROUP_SIZE + groupThreadId.xy) * 2u;
// Store 4 pixels & their weights in the lds
float2 p1 = _InputTexture[sampleIdx + uint2(0u, 0u)].xy;
float2 p2 = _InputTexture[sampleIdx + uint2(1u, 0u)].xy;
float2 p3 = _InputTexture[sampleIdx + uint2(0u, 1u)].xy;
float2 p4 = _InputTexture[sampleIdx + uint2(1u, 1u)].xy;
float4 smp = float4(p1.x, p2.x, p3.x, p4.x);
float4 weights = float4(p1.y, p2.y, p3.y, p4.y);
gs_luminances[threadIdx] = smp * weights;
gs_weights[threadIdx] = dot(weights, 1.0);
// Parallel reduction of luminances & weights
for(uint s = REDUCTION_TOTAL_THREADS / 2u; s > 0u; s >>= 1u)
if(threadIdx < s)
gs_luminances[threadIdx] += gs_luminances[threadIdx + s];
gs_weights[threadIdx] += gs_weights[threadIdx + s];
// Evaluate on group thread 0
if(threadIdx == 0u)
float avgLuminance = dot(gs_luminances[0], 0.25);
if (IsNaN(avgLuminance) || IsInf(avgLuminance))
avgLuminance = 1.0;
if (gs_weights[0] > 0.0)
avgLuminance /= (gs_weights[0] * 0.25);
switch (ParamEvaluateMode)
case 1u:
// Automatic
float exposure = AdaptExposure(avgLuminance - ParamExposureCompensation);
exposure = clamp(exposure, ParamExposureLimitMin, ParamExposureLimitMax);
_OutputTexture[groupId.xy] = float2(ConvertEV100ToExposure(exposure, LensImperfectionExposureScale), exposure);
case 2u:
// Curve remapping
float minExposure = ParamExposureLimitMin;
float maxExposure = ParamExposureLimitMax;
float exposure = CurveRemap(avgLuminance, minExposure, maxExposure);
exposure = AdaptExposure(exposure - ParamExposureCompensation);
exposure = clamp(exposure, minExposure, maxExposure);
_OutputTexture[groupId.xy] = float2(ConvertEV100ToExposure(exposure, LensImperfectionExposureScale), exposure);
// No evaluate - passthrough to next pass
// This is only used when going from 1024 to 32
_OutputTexture[groupId.xy] = float2(avgLuminance, gs_weights[0]);
// Reset the exposure texture to a default state (1,0)
[numthreads(1, 1, 1)]
void KReset(uint2 dispatchThreadId : SV_DispatchThreadID)
_OutputTexture[dispatchThreadId] = float2(1.0, 0.0);