Files
maszyna/betterRenderer/renderer/source/XeGTAO.h

263 lines
16 KiB
C++

///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Copyright (C) 2016-2021, Intel Corporation
//
// SPDX-License-Identifier: MIT
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//
// XeGTAO is based on GTAO/GTSO "Jimenez et al. / Practical Real-Time Strategies for Accurate Indirect Occlusion",
// https://www.activision.com/cdn/research/Practical_Real_Time_Strategies_for_Accurate_Indirect_Occlusion_NEW%20VERSION_COLOR.pdf
//
// Implementation: Filip Strugar (filip.strugar@intel.com), Steve Mccalla <stephen.mccalla@intel.com> (\_/)
// Version: 1.02 (='.'=)
// Details: https://github.com/GameTechDev/XeGTAO (")_(")
//
// Version history:
// 1.00 (2021-08-09): Initial release
// 1.01 (2021-09-02): Fix for depth going to inf for 'far' depth buffer values that are out of fp16 range
// 1.02 (2021-09-03): More fast_acos use and made final horizon cos clamping optional (off by default): 3-4% perf boost
// 1.10 (2021-09-03): Added a couple of heuristics to combat over-darkening errors in certain scenarios
// 1.20 (2021-09-06): Optional normal from depth generation is now a standalone pass: no longer integrated into
// main XeGTAO pass to reduce complexity and allow reuse; also quality of generated normals improved
// 1.21 (2021-09-28): Replaced 'groupshared'-based denoiser with a slightly slower multi-pass one where a 2-pass new
// equals 1-pass old. However, 1-pass new is faster than the 1-pass old and enough when TAA enabled.
// 1.22 (2021-09-28): Added 'XeGTAO_' prefix to all local functions to avoid name clashes with various user codebases.
// 1.30 (2021-10-10): Added support for directional component (bent normals).
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
#ifndef __XE_GTAO_TYPES_H__
#define __XE_GTAO_TYPES_H__
#ifdef __cplusplus
#include <cmath>
namespace XeGTAO
{
// cpp<->hlsl mapping
struct Matrix4x4 { float m[16]; };
struct Vector3 { float x,y,z; };
struct Vector2 { float x,y; };
struct Vector2i { int x,y; };
typedef unsigned int uint;
#else // #ifdef __cplusplus
// cpp<->hlsl mapping
#define Matrix4x4 float4x4
#define Vector3 float3
#define Vector2 float2
#define Vector2i int2
#endif
// Global consts that need to be visible from both shader and cpp side
#define XE_GTAO_DEPTH_MIP_LEVELS 5 // this one is hard-coded to 5 for now
#define XE_GTAO_NUMTHREADS_X 8 // these can be changed
#define XE_GTAO_NUMTHREADS_Y 8 // these can be changed
struct GTAOConstants
{
Vector2i ViewportSize;
Vector2 ViewportPixelSize; // .zw == 1.0 / ViewportSize.xy
Vector2 DepthUnpackConsts;
Vector2 CameraTanHalfFOV;
Vector2 NDCToViewMul;
Vector2 NDCToViewAdd;
Vector2 NDCToViewMul_x_PixelSize;
float EffectRadius; // world (viewspace) maximum size of the shadow
float EffectFalloffRange;
float RadiusMultiplier;
float Padding0;
float FinalValuePower;
float DenoiseBlurBeta;
float SampleDistributionPower;
float ThinOccluderCompensation;
float DepthMIPSamplingOffset;
int NoiseIndex; // frameIndex % 64 if using TAA or 0 otherwise
};
// This is used only for the development (ray traced ground truth).
struct ReferenceRTAOConstants
{
float TotalRaysLength ; // similar to Radius from GTAO
float Albedo ; // the assumption on the average material albedo
int MaxBounces ; // how many rays to recurse before stopping
int AccumulatedFrames ; // how many frames have we accumulated so far (after resetting/clearing). If 0 - this is the first.
int AccumulateFrameMax ; // how many frames are we aiming to accumulate; stop when we hit!
int Padding0;
int Padding1;
int Padding2;
#ifdef __cplusplus
ReferenceRTAOConstants( ) { TotalRaysLength = 1.0f; Albedo = 0.0f; MaxBounces = 1; AccumulatedFrames = 0; AccumulateFrameMax = 0; }
#endif
};
#ifndef XE_GTAO_USE_DEFAULT_CONSTANTS
#define XE_GTAO_USE_DEFAULT_CONSTANTS 1
#endif
// some constants reduce performance if provided as dynamic values; if these constants are not required to be dynamic and they match default values,
// set XE_GTAO_USE_DEFAULT_CONSTANTS and the code will compile into a more efficient shader
#define XE_GTAO_DEFAULT_RADIUS_MULTIPLIER (1.457f ) // allows us to use different value as compared to ground truth radius to counter inherent screen space biases
#define XE_GTAO_DEFAULT_FALLOFF_RANGE (0.615f ) // distant samples contribute less
#define XE_GTAO_DEFAULT_SAMPLE_DISTRIBUTION_POWER (2.0f ) // small crevices more important than big surfaces
#define XE_GTAO_DEFAULT_THIN_OCCLUDER_COMPENSATION (0.0f ) // the new 'thickness heuristic' approach
#define XE_GTAO_DEFAULT_FINAL_VALUE_POWER (2.2f ) // modifies the final ambient occlusion value using power function - this allows some of the above heuristics to do different things
#define XE_GTAO_DEFAULT_DEPTH_MIP_SAMPLING_OFFSET (3.30f ) // main trade-off between performance (memory bandwidth) and quality (temporal stability is the first affected, thin objects next)
#define XE_GTAO_OCCLUSION_TERM_SCALE (1.5f) // for packing in UNORM (because raw, pre-denoised occlusion term can overshoot 1 but will later average out to 1)
// From https://www.shadertoy.com/view/3tB3z3 - except we're using R2 here
#define XE_HILBERT_LEVEL 6U
#define XE_HILBERT_WIDTH ( (1U << XE_HILBERT_LEVEL) )
#define XE_HILBERT_AREA ( XE_HILBERT_WIDTH * XE_HILBERT_WIDTH )
inline uint HilbertIndex( uint posX, uint posY )
{
uint index = 0U;
for( uint curLevel = XE_HILBERT_WIDTH/2U; curLevel > 0U; curLevel /= 2U )
{
uint regionX = ( posX & curLevel ) > 0U;
uint regionY = ( posY & curLevel ) > 0U;
index += curLevel * curLevel * ( (3U * regionX) ^ regionY);
if( regionY == 0U )
{
if( regionX == 1U )
{
posX = uint( (XE_HILBERT_WIDTH - 1U) ) - posX;
posY = uint( (XE_HILBERT_WIDTH - 1U) ) - posY;
}
uint temp = posX;
posX = posY;
posY = temp;
}
}
return index;
}
#ifdef __cplusplus
struct GTAOSettings
{
int QualityLevel = 2; // 0: low; 1: medium; 2: high; 3: ultra
int DenoisePasses = 1; // 0: disabled; 1: sharp; 2: medium; 3: soft
float Radius = 0.5f; // [0.0, ~ ] World (view) space size of the occlusion sphere.
// auto-tune-d settings
float RadiusMultiplier = XE_GTAO_DEFAULT_RADIUS_MULTIPLIER;
float FalloffRange = XE_GTAO_DEFAULT_FALLOFF_RANGE;
float SampleDistributionPower = XE_GTAO_DEFAULT_SAMPLE_DISTRIBUTION_POWER;
float ThinOccluderCompensation = XE_GTAO_DEFAULT_THIN_OCCLUDER_COMPENSATION;
float FinalValuePower = XE_GTAO_DEFAULT_FINAL_VALUE_POWER;
float DepthMIPSamplingOffset = XE_GTAO_DEFAULT_DEPTH_MIP_SAMPLING_OFFSET;
};
template<class T> inline T clamp( T const & v, T const & min, T const & max ) { assert( max >= min ); if( v < min ) return min; if( v > max ) return max; return v; }
// If using TAA then set noiseIndex to frameIndex % 64 - otherwise use 0
inline void GTAOUpdateConstants( XeGTAO::GTAOConstants& consts, int viewportWidth, int viewportHeight, const XeGTAO::GTAOSettings & settings, const glm::mat4& projMatrix, bool rowMajor, unsigned int frameCounter )
{
consts.ViewportSize = { viewportWidth, viewportHeight };
consts.ViewportPixelSize = { 1.0f / (float)viewportWidth, 1.0f / (float)viewportHeight };
float depthLinearizeMul = -projMatrix[3][2]; // float depthLinearizeMul = ( clipFar * clipNear ) / ( clipFar - clipNear );
float depthLinearizeAdd = -projMatrix[2][2]; // float depthLinearizeAdd = clipFar / ( clipFar - clipNear );
// correct the handedness issue. need to make sure this below is correct, but I think it is.
if( depthLinearizeMul * depthLinearizeAdd < 0 )
depthLinearizeAdd = -depthLinearizeAdd;
consts.DepthUnpackConsts = { depthLinearizeMul, depthLinearizeAdd };
float tanHalfFOVY = 1.0f / projMatrix[1][1]; // = tanf( drawContext.Camera.GetYFOV( ) * 0.5f );
float tanHalfFOVX = 1.0F / projMatrix[0][0]; // = tanHalfFOVY * drawContext.Camera.GetAspect( );
consts.CameraTanHalfFOV = { tanHalfFOVX, tanHalfFOVY };
consts.NDCToViewMul = { consts.CameraTanHalfFOV.x * 2.0f, consts.CameraTanHalfFOV.y * -2.0f };
consts.NDCToViewAdd = { consts.CameraTanHalfFOV.x * -1.0f, consts.CameraTanHalfFOV.y * 1.0f };
consts.NDCToViewMul_x_PixelSize = { consts.NDCToViewMul.x * consts.ViewportPixelSize.x, consts.NDCToViewMul.y * consts.ViewportPixelSize.y };
consts.EffectRadius = settings.Radius;
consts.EffectFalloffRange = settings.FalloffRange;
consts.DenoiseBlurBeta = (settings.DenoisePasses==0)?(1e4f):(1.2f); // high value disables denoise - more elegant & correct way would be do set all edges to 0
consts.RadiusMultiplier = settings.RadiusMultiplier;
consts.SampleDistributionPower = settings.SampleDistributionPower;
consts.ThinOccluderCompensation = settings.ThinOccluderCompensation;
consts.FinalValuePower = settings.FinalValuePower;
consts.DepthMIPSamplingOffset = settings.DepthMIPSamplingOffset;
consts.NoiseIndex = (settings.DenoisePasses>0)?(frameCounter % 64):(0);
consts.Padding0 = 0;
}
#ifdef IMGUI_API
inline bool GTAOImGuiSettings( XeGTAO::GTAOSettings & settings )
{
bool hadChanges = false;
ImGui::PushItemWidth( 120.0f );
ImGui::Text( "Performance/quality settings:" );
ImGui::Combo( "Quality Level", &settings.QualityLevel, "Low\0Medium\0High\0Ultra\00");
if( ImGui::IsItemHovered( ) ) ImGui::SetTooltip( "Higher quality settings use more samples per pixel but are slower" );
settings.QualityLevel = clamp( settings.QualityLevel , 0, 3 );
ImGui::Combo( "Denoising level", &settings.DenoisePasses, "Disabled\0Sharp\0Medium\0Soft\00");
if( ImGui::IsItemHovered( ) ) ImGui::SetTooltip( "The amount of edge-aware spatial denoise applied" );
settings.DenoisePasses = clamp( settings.DenoisePasses , 0, 3 );
ImGui::Text( "Visual settings:" );
settings.Radius = clamp( settings.Radius, 0.0f, 100000.0f );
hadChanges |= ImGui::InputFloat( "Effect radius", &settings.Radius , 0.05f, 0.0f, "%.2f" );
if( ImGui::IsItemHovered( ) ) ImGui::SetTooltip( "World (viewspace) effect radius\nExpected range: depends on the scene & requirements, anything from 0.01 to 1000+" );
settings.Radius = clamp( settings.Radius , 0.0f, 10000.0f );
if( ImGui::CollapsingHeader( "Auto-tuned settings (heuristics)" ) )
{
hadChanges |= ImGui::InputFloat( "Radius multiplier", &settings.RadiusMultiplier , 0.05f, 0.0f, "%.2f" );
if( ImGui::IsItemHovered( ) ) ImGui::SetTooltip( "Multiplies the 'Effect Radius' - used by the auto-tune to best match raytraced ground truth\nExpected range: [0.3, 3.0], defaults to %.3f", XE_GTAO_DEFAULT_RADIUS_MULTIPLIER );
settings.RadiusMultiplier = clamp( settings.RadiusMultiplier , 0.3f, 3.0f );
hadChanges |= ImGui::InputFloat( "Falloff range", &settings.FalloffRange , 0.05f, 0.0f, "%.2f" );
if( ImGui::IsItemHovered( ) ) ImGui::SetTooltip( "Gently reduce sample impact as it gets out of 'Effect radius' bounds\nExpected range: [0.0, 1.0], defaults to %.3f", XE_GTAO_DEFAULT_FALLOFF_RANGE );
settings.FalloffRange = clamp( settings.FalloffRange , 0.0f, 1.0f );
hadChanges |= ImGui::InputFloat( "Sample distribution power", &settings.SampleDistributionPower , 0.05f, 0.0f, "%.2f" );
if( ImGui::IsItemHovered( ) ) ImGui::SetTooltip( "Make samples on a slice equally distributed (1.0) or focus more towards the center (>1.0)\nExpected range: [1.0, 3.0], 2defaults to %.3f", XE_GTAO_DEFAULT_SAMPLE_DISTRIBUTION_POWER );
settings.SampleDistributionPower = clamp( settings.SampleDistributionPower , 1.0f, 3.0f );
hadChanges |= ImGui::InputFloat( "Thin occluder compensation", &settings.ThinOccluderCompensation, 0.05f, 0.0f, "%.2f" );
if( ImGui::IsItemHovered( ) ) ImGui::SetTooltip( "Slightly reduce impact of samples further back to counter the bias from depth-based (incomplete) input scene geometry data\nExpected range: [0.0, 0.7], defaults to %.3f", XE_GTAO_DEFAULT_THIN_OCCLUDER_COMPENSATION );
settings.ThinOccluderCompensation = clamp( settings.ThinOccluderCompensation , 0.0f, 0.7f );
hadChanges |= ImGui::InputFloat( "Final power", &settings.FinalValuePower, 0.05f, 0.0f, "%.2f" );
if( ImGui::IsItemHovered( ) ) ImGui::SetTooltip( "Applies power function to the final value: occlusion = pow( occlusion, finalPower )\nExpected range: [0.5, 5.0], defaults to %.3f", XE_GTAO_DEFAULT_FINAL_VALUE_POWER );
settings.FinalValuePower = clamp( settings.FinalValuePower , 0.5f, 5.0f );
hadChanges |= ImGui::InputFloat( "Depth MIP sampling offset", &settings.DepthMIPSamplingOffset, 0.05f, 0.0f, "%.2f" );
if( ImGui::IsItemHovered( ) ) ImGui::SetTooltip( "Mainly performance (texture memory bandwidth) setting but as a side-effect reduces overshadowing by thin objects and increases temporal instability\nExpected range: [2.0, 6.0], defaults to %.3f", XE_GTAO_DEFAULT_DEPTH_MIP_SAMPLING_OFFSET );
settings.DepthMIPSamplingOffset = clamp( settings.DepthMIPSamplingOffset , 0.0f, 30.0f );
}
ImGui::PopItemWidth( );
return hadChanges;
}
#endif // IMGUI_API
} // close the namespace
#endif // #ifdef __cplusplus
#endif // __XE_GTAO_TYPES_H__