Rasagar/Library/PackageCache/com.unity.render-pipelines.high-definition/Runtime/PostProcessing/Shaders/Exposure.compute

#include "Packages/com.unity.render-pipelines.high-definition/Runtime/PostProcessing/Shaders/ExposureCommon.hlsl"

#pragma only_renderers d3d11 playstation xboxone xboxseries vulkan metal switch

#pragma kernel KFixedExposure
#pragma kernel KManualCameraExposure
#pragma kernel KPrePass
#pragma kernel KReduction
#pragma kernel KReset

TEXTURE2D(_InputTexture);

#define PREPASS_TEX_SIZE 1024.0
#define PREPASS_TEX_HALF_SIZE 512.0
//#pragma enable_d3d11_debug_symbols

//
// Fixed exposure
// Doesn't do anything fancy, simply copies the exposure & clamp values set in the volume system
//
[numthreads(1,1,1)]
void KFixedExposure(uint2 dispatchThreadId : SV_DispatchThreadID)
{
    float ev100 = ParamEV100;
    ev100 -= ParamExposureCompensation;
    _OutputTexture[dispatchThreadId] = float2(ConvertEV100ToExposure(ev100, LensImperfectionExposureScale), ev100);
}

//
// Manual camera exposure
// Converts aperture / shutter speed / iso / compensation to EV100
//
[numthreads(1,1,1)]
void KManualCameraExposure(uint2 dispatchThreadId : SV_DispatchThreadID)
{
    float ev100 = ComputeEV100(ParamAperture, ParamShutterSpeed, ParamISO);
    ev100 -= ParamExposureCompensation;
    _OutputTexture[dispatchThreadId] = float2(ConvertEV100ToExposure(ev100, LensImperfectionExposureScale), ev100);
}

//
// Average luminance pre-pass
// Transforms the input to log luminance in a square-POT target
//
[numthreads(8,8,1)]
void KPrePass(uint2 dispatchThreadId : SV_DispatchThreadID)
{
    // For XR, interleave single-pass views in a checkerboard pattern
    UNITY_XR_ASSIGN_VIEW_INDEX((dispatchThreadId.x + dispatchThreadId.y) % _XRViewCount)

    PositionInputs posInputs = GetPositionInput(float2(dispatchThreadId), rcp(PREPASS_TEX_SIZE), uint2(8u, 8u));
    float2 uv = ClampAndScaleUVForBilinear(posInputs.positionNDC);
    float luma = SampleLuminance(uv);

    float weight = WeightSample(dispatchThreadId, PREPASS_TEX_SIZE.xx, luma);

    float logLuma = ComputeEV100FromAvgLuminance(max(luma, 1e-4), MeterCalibrationConstant);
    _OutputTexture[posInputs.positionSS] = float2(logLuma, weight);
}

//
// Average luminance 2nd & 3rd pass + Evaluation
//   - 2nd: Reduction 1024 -> 32
//   - 3rd: Reduction 32 -> 1
//
#define REDUCTION_GROUP_SIZE 16
#define REDUCTION_TOTAL_THREADS 256

groupshared float4 gs_luminances[REDUCTION_TOTAL_THREADS];
groupshared float gs_weights[REDUCTION_TOTAL_THREADS];

// This kernel runs twice, and as the final output, produces the average normalized luminance of the texture produced by
// the pre-pass.
//
// Let's work through the math, but with a simplified example. Instead of a 2D texture, let's assume we have a 1D
// texture. And instead of a 1024 -> 32 -> 1 reduction, let's assume we have a 4 -> 2 -> 1 reduction.
//
// Say the input texture has the following four pixels: (a, A), (b, B), (c, C), (d, D). The first channel of each pixel
// is the log luminance, and the second channel is the weight.
//
// The first pass combines two pixels per thread, and outputs the following two-pixel two-channel intermediate texture:
// ((a*A + b*B) / (A + B), (A + B)), ((c*C + d*D) / (C + D), (C + D))
// The second pass calculates exposure as follows:
// ((a*A + b*B) / (A + B) * (A + B) + (c*C + d*D) / (C + D) * (C + D)) / (A + B + C + D)
// which simplifies to:
// (a*A + b*B + c*C + d*D) / (A + B + C + D)
// which is the normalized weighted average of the log luminances. We can thus work with weights that don't have to sum
// up to 1.
//
// Notice that (A + B) multiplied in the first pass is cancelled out in the second pass. This is done for two reasons:
// It enables parallel reduction, and it keeps the values of the intermediate texture in a reasonable range to fit in
// the fp16 data format. We spend a bit more ALU, but we avoid fp16 quantization artifacts.
[numthreads(REDUCTION_GROUP_SIZE,REDUCTION_GROUP_SIZE,1)]
void KReduction(uint2 groupId : SV_GroupID, uint2 groupThreadId : SV_GroupThreadID)
{
    uint threadIdx = groupThreadId.y * REDUCTION_GROUP_SIZE + groupThreadId.x;
    uint2 sampleIdx = (groupId.xy * REDUCTION_GROUP_SIZE + groupThreadId.xy) * 2u;

    // Store 4 pixels & their weights in the lds
    float2 p1 = _InputTexture[sampleIdx + uint2(0u, 0u)].xy;
    float2 p2 = _InputTexture[sampleIdx + uint2(1u, 0u)].xy;
    float2 p3 = _InputTexture[sampleIdx + uint2(0u, 1u)].xy;
    float2 p4 = _InputTexture[sampleIdx + uint2(1u, 1u)].xy;

    float4 smp = float4(p1.x, p2.x, p3.x, p4.x);
    float4 weights = float4(p1.y, p2.y, p3.y, p4.y);

    gs_luminances[threadIdx] = smp * weights;
    gs_weights[threadIdx] = dot(weights, 1.0);

    GroupMemoryBarrierWithGroupSync();

    // Parallel reduction of luminances & weights
    UNITY_UNROLL
    for(uint s = REDUCTION_TOTAL_THREADS / 2u; s > 0u; s >>= 1u)
    {
        if(threadIdx < s)
        {
            gs_luminances[threadIdx] += gs_luminances[threadIdx + s];
            gs_weights[threadIdx] += gs_weights[threadIdx + s];
        }

        GroupMemoryBarrierWithGroupSync();
    }

    // Evaluate on group thread 0
    if(threadIdx == 0u)
    {
        float avgLuminance = dot(gs_luminances[0], 0.25);

        if (IsNaN(avgLuminance) || IsInf(avgLuminance))
            avgLuminance = 1.0;

        if (gs_weights[0] > 0.0)
            avgLuminance /= (gs_weights[0] * 0.25);

        UNITY_BRANCH
        switch (ParamEvaluateMode)
        {
            case 1u:
            {
                // Automatic
                float exposure = AdaptExposure(avgLuminance - ParamExposureCompensation);
                exposure = clamp(exposure, ParamExposureLimitMin, ParamExposureLimitMax);
                _OutputTexture[groupId.xy] = float2(ConvertEV100ToExposure(exposure, LensImperfectionExposureScale), exposure);
                break;
            }
            case 2u:
            {
                // Curve remapping
                float minExposure = ParamExposureLimitMin;
                float maxExposure = ParamExposureLimitMax;
                float exposure = CurveRemap(avgLuminance, minExposure, maxExposure);
                exposure = AdaptExposure(exposure - ParamExposureCompensation);
                exposure = clamp(exposure, minExposure, maxExposure);
                _OutputTexture[groupId.xy] = float2(ConvertEV100ToExposure(exposure, LensImperfectionExposureScale), exposure);
                break;
            }
            default:
            {
                // No evaluate - passthrough to next pass
                // This is only used when going from 1024 to 32
                _OutputTexture[groupId.xy] = float2(avgLuminance, gs_weights[0]);
                break;
            }
        }
    }
}

//
// Reset the exposure texture to a default state (1,0)
//
[numthreads(1, 1, 1)]
void KReset(uint2 dispatchThreadId : SV_DispatchThreadID)
{
    _OutputTexture[dispatchThreadId] = float2(1.0, 0.0);
}
deneme 2024-08-26 13:07:20 -07:00			`#include "Packages/com.unity.render-pipelines.high-definition/Runtime/PostProcessing/Shaders/ExposureCommon.hlsl"`

			`#pragma only_renderers d3d11 playstation xboxone xboxseries vulkan metal switch`

			`#pragma kernel KFixedExposure`
			`#pragma kernel KManualCameraExposure`
			`#pragma kernel KPrePass`
			`#pragma kernel KReduction`
			`#pragma kernel KReset`

			`TEXTURE2D(_InputTexture);`

			`#define PREPASS_TEX_SIZE 1024.0`
			`#define PREPASS_TEX_HALF_SIZE 512.0`
			`//#pragma enable_d3d11_debug_symbols`

			`//`
			`// Fixed exposure`
			`// Doesn't do anything fancy, simply copies the exposure & clamp values set in the volume system`
			`//`
			`[numthreads(1,1,1)]`
			`void KFixedExposure(uint2 dispatchThreadId : SV_DispatchThreadID)`
			`{`
			`float ev100 = ParamEV100;`
			`ev100 -= ParamExposureCompensation;`
			`_OutputTexture[dispatchThreadId] = float2(ConvertEV100ToExposure(ev100, LensImperfectionExposureScale), ev100);`
			`}`

			`//`
			`// Manual camera exposure`
			`// Converts aperture / shutter speed / iso / compensation to EV100`
			`//`
			`[numthreads(1,1,1)]`
			`void KManualCameraExposure(uint2 dispatchThreadId : SV_DispatchThreadID)`
			`{`
			`float ev100 = ComputeEV100(ParamAperture, ParamShutterSpeed, ParamISO);`
			`ev100 -= ParamExposureCompensation;`
			`_OutputTexture[dispatchThreadId] = float2(ConvertEV100ToExposure(ev100, LensImperfectionExposureScale), ev100);`
			`}`

			`//`
			`// Average luminance pre-pass`
			`// Transforms the input to log luminance in a square-POT target`
			`//`
			`[numthreads(8,8,1)]`
			`void KPrePass(uint2 dispatchThreadId : SV_DispatchThreadID)`
			`{`
			`// For XR, interleave single-pass views in a checkerboard pattern`
			`UNITY_XR_ASSIGN_VIEW_INDEX((dispatchThreadId.x + dispatchThreadId.y) % _XRViewCount)`

			`PositionInputs posInputs = GetPositionInput(float2(dispatchThreadId), rcp(PREPASS_TEX_SIZE), uint2(8u, 8u));`
			`float2 uv = ClampAndScaleUVForBilinear(posInputs.positionNDC);`
			`float luma = SampleLuminance(uv);`

			`float weight = WeightSample(dispatchThreadId, PREPASS_TEX_SIZE.xx, luma);`

			`float logLuma = ComputeEV100FromAvgLuminance(max(luma, 1e-4), MeterCalibrationConstant);`
			`_OutputTexture[posInputs.positionSS] = float2(logLuma, weight);`
			`}`

			`//`
			`// Average luminance 2nd & 3rd pass + Evaluation`
			`// - 2nd: Reduction 1024 -> 32`
			`// - 3rd: Reduction 32 -> 1`
			`//`
			`#define REDUCTION_GROUP_SIZE 16`
			`#define REDUCTION_TOTAL_THREADS 256`

			`groupshared float4 gs_luminances[REDUCTION_TOTAL_THREADS];`
			`groupshared float gs_weights[REDUCTION_TOTAL_THREADS];`

			`// This kernel runs twice, and as the final output, produces the average normalized luminance of the texture produced by`
			`// the pre-pass.`
			`//`
			`// Let's work through the math, but with a simplified example. Instead of a 2D texture, let's assume we have a 1D`
			`// texture. And instead of a 1024 -> 32 -> 1 reduction, let's assume we have a 4 -> 2 -> 1 reduction.`
			`//`
			`// Say the input texture has the following four pixels: (a, A), (b, B), (c, C), (d, D). The first channel of each pixel`
			`// is the log luminance, and the second channel is the weight.`
			`//`
			`// The first pass combines two pixels per thread, and outputs the following two-pixel two-channel intermediate texture:`
			`// ((aA + bB) / (A + B), (A + B)), ((cC + dD) / (C + D), (C + D))`
			`// The second pass calculates exposure as follows:`
			`// ((aA + bB) / (A + B) * (A + B) + (cC + dD) / (C + D) * (C + D)) / (A + B + C + D)`
			`// which simplifies to:`
			`// (aA + bB + cC + dD) / (A + B + C + D)`
			`// which is the normalized weighted average of the log luminances. We can thus work with weights that don't have to sum`
			`// up to 1.`
			`//`
			`// Notice that (A + B) multiplied in the first pass is cancelled out in the second pass. This is done for two reasons:`
			`// It enables parallel reduction, and it keeps the values of the intermediate texture in a reasonable range to fit in`
			`// the fp16 data format. We spend a bit more ALU, but we avoid fp16 quantization artifacts.`
			`[numthreads(REDUCTION_GROUP_SIZE,REDUCTION_GROUP_SIZE,1)]`
			`void KReduction(uint2 groupId : SV_GroupID, uint2 groupThreadId : SV_GroupThreadID)`
			`{`
			`uint threadIdx = groupThreadId.y * REDUCTION_GROUP_SIZE + groupThreadId.x;`
			`uint2 sampleIdx = (groupId.xy * REDUCTION_GROUP_SIZE + groupThreadId.xy) * 2u;`

			`// Store 4 pixels & their weights in the lds`
			`float2 p1 = _InputTexture[sampleIdx + uint2(0u, 0u)].xy;`
			`float2 p2 = _InputTexture[sampleIdx + uint2(1u, 0u)].xy;`
			`float2 p3 = _InputTexture[sampleIdx + uint2(0u, 1u)].xy;`
			`float2 p4 = _InputTexture[sampleIdx + uint2(1u, 1u)].xy;`

			`float4 smp = float4(p1.x, p2.x, p3.x, p4.x);`
			`float4 weights = float4(p1.y, p2.y, p3.y, p4.y);`

			`gs_luminances[threadIdx] = smp * weights;`
			`gs_weights[threadIdx] = dot(weights, 1.0);`

			`GroupMemoryBarrierWithGroupSync();`

			`// Parallel reduction of luminances & weights`
			`UNITY_UNROLL`
			`for(uint s = REDUCTION_TOTAL_THREADS / 2u; s > 0u; s >>= 1u)`
			`{`
			`if(threadIdx < s)`
			`{`
			`gs_luminances[threadIdx] += gs_luminances[threadIdx + s];`
			`gs_weights[threadIdx] += gs_weights[threadIdx + s];`
			`}`

			`GroupMemoryBarrierWithGroupSync();`
			`}`

			`// Evaluate on group thread 0`
			`if(threadIdx == 0u)`
			`{`
			`float avgLuminance = dot(gs_luminances[0], 0.25);`

			`if (IsNaN(avgLuminance) \|\| IsInf(avgLuminance))`
			`avgLuminance = 1.0;`

			`if (gs_weights[0] > 0.0)`
			`avgLuminance /= (gs_weights[0] * 0.25);`

			`UNITY_BRANCH`
			`switch (ParamEvaluateMode)`
			`{`
			`case 1u:`
			`{`
			`// Automatic`
			`float exposure = AdaptExposure(avgLuminance - ParamExposureCompensation);`
			`exposure = clamp(exposure, ParamExposureLimitMin, ParamExposureLimitMax);`
			`_OutputTexture[groupId.xy] = float2(ConvertEV100ToExposure(exposure, LensImperfectionExposureScale), exposure);`
			`break;`
			`}`
			`case 2u:`
			`{`
			`// Curve remapping`
			`float minExposure = ParamExposureLimitMin;`
			`float maxExposure = ParamExposureLimitMax;`
			`float exposure = CurveRemap(avgLuminance, minExposure, maxExposure);`
			`exposure = AdaptExposure(exposure - ParamExposureCompensation);`
			`exposure = clamp(exposure, minExposure, maxExposure);`
			`_OutputTexture[groupId.xy] = float2(ConvertEV100ToExposure(exposure, LensImperfectionExposureScale), exposure);`
			`break;`
			`}`
			`default:`
			`{`
			`// No evaluate - passthrough to next pass`
			`// This is only used when going from 1024 to 32`
			`_OutputTexture[groupId.xy] = float2(avgLuminance, gs_weights[0]);`
			`break;`
			`}`
			`}`
			`}`
			`}`

			`//`
			`// Reset the exposure texture to a default state (1,0)`
			`//`
			`[numthreads(1, 1, 1)]`
			`void KReset(uint2 dispatchThreadId : SV_DispatchThreadID)`
			`{`
			`_OutputTexture[dispatchThreadId] = float2(1.0, 0.0);`
			`}`