mirror of
https://github.com/MaSzyna-EU07/maszyna.git
synced 2026-03-22 06:55:02 +01:00
use gfx queue when loading dynamic textures fix possible timeout in light culling exclude lods & transparencies from lod test add cpu-side submodel structure for ray testing
436 lines
16 KiB
HLSL
436 lines
16 KiB
HLSL
// Shamelessly stolen from https://www.3dgep.com/forward-plus/
|
|
|
|
#include "light.hlsli"
|
|
#include "primitives.hlsli"
|
|
|
|
#define FORWARD_PLUS_TILE_SIZE 8
|
|
#define FORWARD_PLUS_LIGHTS_PER_TILE 64
|
|
|
|
struct ComputeShaderInput
|
|
{
|
|
uint3 groupID : SV_GroupID;
|
|
uint3 groupThreadID : SV_GroupThreadID;
|
|
uint3 dispatchThreadID : SV_DispatchThreadID;
|
|
uint groupIndex : SV_GroupIndex;
|
|
};
|
|
|
|
/* ---------------------------------------------------------------------------------------------- */
|
|
/* Constant Buffers */
|
|
/* ---------------------------------------------------------------------------------------------- */
|
|
|
|
// Global variables
|
|
cbuffer DispatchParams : register(b4)
|
|
{
|
|
// Number of groups dispatched. (This parameter is not available as an HLSL system value!)
|
|
uint3 numThreadGroups;
|
|
|
|
// Total number of threads dispatched. (Also not available as an HLSL system value!)
|
|
// Note: This value may be less than the actual number of threads executed
|
|
// if the screen size is not evenly divisible by the block size.
|
|
uint3 numThreads;
|
|
};
|
|
|
|
// Parameters required to convert screen space coordinates to view space.
|
|
cbuffer ScreenToViewParams : register( b3 )
|
|
{
|
|
float4x4 InverseProjection;
|
|
float4x4 WorldToViewSpace;
|
|
float2 ScreenDimensions;
|
|
uint NumLights;
|
|
}
|
|
|
|
/* ---------------------------------------------------------------------------------------------- */
|
|
/* Shader Resources */
|
|
/* ---------------------------------------------------------------------------------------------- */
|
|
|
|
// The depth from the screen space texture.
|
|
Texture2D<float> DepthTextureVS : register(t3);
|
|
|
|
StructuredBuffer<PackedLight> Lights : register(t8);
|
|
|
|
// Precomputed frustums for the grid.
|
|
StructuredBuffer<Frustum> in_Frustums : register(t9);
|
|
|
|
/* ---------------------------------------------------------------------------------------------- */
|
|
/* Unordered Access views */
|
|
/* ---------------------------------------------------------------------------------------------- */
|
|
|
|
// View space frustums for the grid cells.
|
|
RWStructuredBuffer<Frustum> out_Frustums: register(u0);
|
|
|
|
// Global counter for current index into the light index list.
|
|
// "o_" prefix indicates light lists for opaque geometry while
|
|
// "t_" prefix indicates light lists for transparent geometry.
|
|
globallycoherent RWStructuredBuffer<uint> o_LightIndexCounter: register(u1);
|
|
globallycoherent RWStructuredBuffer<uint> t_LightIndexCounter: register(u2);
|
|
|
|
// Light index lists and light grids.
|
|
globallycoherent RWStructuredBuffer<uint> o_LightIndexList : register(u3);
|
|
globallycoherent RWStructuredBuffer<uint> t_LightIndexList : register(u4);
|
|
globallycoherent RWTexture2D<uint2> o_LightGrid : register(u5);
|
|
globallycoherent RWTexture2D<uint2> t_LightGrid : register(u6);
|
|
|
|
/* ---------------------------------------------------------------------------------------------- */
|
|
/* Group shared globals */
|
|
/* ---------------------------------------------------------------------------------------------- */
|
|
|
|
groupshared uint uMinDepth;
|
|
groupshared uint uMaxDepth;
|
|
groupshared Frustum GroupFrustum;
|
|
|
|
// Opaque geometry light lists.
|
|
groupshared uint o_LightCount;
|
|
groupshared uint o_LightIndexStartOffset;
|
|
groupshared uint o_LightList[FORWARD_PLUS_LIGHTS_PER_TILE];
|
|
|
|
// Transparent geometry light lists.
|
|
groupshared uint t_LightCount;
|
|
groupshared uint t_LightIndexStartOffset;
|
|
groupshared uint t_LightList[FORWARD_PLUS_LIGHTS_PER_TILE];
|
|
|
|
/* ---------------------------------------------------------------------------------------------- */
|
|
/* Utility functions */
|
|
/* ---------------------------------------------------------------------------------------------- */
|
|
|
|
// Add the light to the visible light list for opaque geometry.
|
|
void o_AppendLight(uint lightIndex);
|
|
|
|
// Add the light to the visible light list for transparent geometry.
|
|
void t_AppendLight(uint lightIndex);
|
|
|
|
// Convert clip space coordinates to view space
|
|
float4 ClipToView(float4 clip);
|
|
|
|
// Convert screen space coordinates to view space.
|
|
float4 ScreenToView(float4 screen);
|
|
|
|
/* ---------------------------------------------------------------------------------------------- */
|
|
/* Frustum computation */
|
|
/* ---------------------------------------------------------------------------------------------- */
|
|
|
|
[numthreads(FORWARD_PLUS_TILE_SIZE, FORWARD_PLUS_TILE_SIZE, 1)]
|
|
void CS_ComputeFrustums(in ComputeShaderInput input) {
|
|
// View space eye position is always at the origin.
|
|
const float3 eyePos = (float3)0.;
|
|
|
|
// Compute the 4 corner points on the far clipping plane to use as the
|
|
// frustum vertices.
|
|
float4 screenSpace[5];
|
|
// Top left point
|
|
screenSpace[0] = float4(input.dispatchThreadID.xy * FORWARD_PLUS_TILE_SIZE, -1.0f, 1.0f);
|
|
// Top right point
|
|
screenSpace[1] = float4(float2(input.dispatchThreadID.x + 1, input.dispatchThreadID.y) * FORWARD_PLUS_TILE_SIZE, -1.0f, 1.0f);
|
|
// Bottom left point
|
|
screenSpace[2] = float4(float2(input.dispatchThreadID.x, input.dispatchThreadID.y + 1) * FORWARD_PLUS_TILE_SIZE, -1.0f, 1.0f);
|
|
// Bottom right point
|
|
screenSpace[3] = float4(float2(input.dispatchThreadID.x + 1, input.dispatchThreadID.y + 1) * FORWARD_PLUS_TILE_SIZE, -1.0f, 1.0f);
|
|
// View ray
|
|
screenSpace[4] = float4(float2(input.dispatchThreadID.x + .5, input.dispatchThreadID.y + .5) * FORWARD_PLUS_TILE_SIZE, -1.0f, 1.0f);
|
|
|
|
float3 viewSpace[5];
|
|
// Now convert the screen space points to view space
|
|
for (int i = 0; i < 5; i++)
|
|
{
|
|
viewSpace[i] = -ScreenToView(screenSpace[i]).xyz;
|
|
}
|
|
|
|
// Now build the frustum planes from the view space points
|
|
Frustum frustum;
|
|
|
|
// Left plane
|
|
frustum.planes[0] = ComputePlane(eyePos, viewSpace[2], viewSpace[0]);
|
|
// Right plane
|
|
frustum.planes[1] = ComputePlane(eyePos, viewSpace[1], viewSpace[3]);
|
|
// Top plane
|
|
frustum.planes[2] = ComputePlane(eyePos, viewSpace[0], viewSpace[1]);
|
|
// Bottom plane
|
|
frustum.planes[3] = ComputePlane(eyePos, viewSpace[3], viewSpace[2]);
|
|
|
|
frustum.m_view_ray = normalize(viewSpace[4]);
|
|
float cos_frustum_angle = dot(normalize(viewSpace[0]), normalize(viewSpace[4]));
|
|
float sin_frustum_angle = sqrt(1. - saturate(cos_frustum_angle * cos_frustum_angle));
|
|
frustum.m_tan_frustum_angle = sin_frustum_angle / cos_frustum_angle;
|
|
//frustum.cone = ComputeConeFrustum((float3)0., normalize(viewSpace[4]), abs());
|
|
|
|
// Store the computed frustum in global memory (if our thread ID is in bounds of the grid).
|
|
if (input.dispatchThreadID.x < numThreads.x && input.dispatchThreadID.y < numThreads.y)
|
|
{
|
|
uint index = input.dispatchThreadID.x + (input.dispatchThreadID.y * numThreads.x);
|
|
out_Frustums[index] = frustum;
|
|
}
|
|
}
|
|
|
|
/* ---------------------------------------------------------------------------------------------- */
|
|
/* Light culling */
|
|
/* ---------------------------------------------------------------------------------------------- */
|
|
|
|
float sdSphere( float3 pos, float s )
|
|
{
|
|
return length(pos)-s;
|
|
}
|
|
|
|
float sdSolidAngle(float3 pos, float2 c, float ra)
|
|
{
|
|
float2 p = float2( length(pos.xz), pos.y );
|
|
//float2 p = float2( length(pos.xz), pos.y );
|
|
float l = length(p) - ra;
|
|
float m = length(p - c*clamp(dot(p,c),0.0,ra) );
|
|
return max(l,m*sign(c.y*p.x-c.x*p.y));
|
|
}
|
|
|
|
float sdSolidAngleOutside(float3 pos, float2 c, float ra)
|
|
{
|
|
float2 p = float2( length(pos.xz), pos.y );
|
|
//float2 p = float2( length(pos.xz), pos.y );
|
|
float l = length(p) - ra;
|
|
float m = length(p - c*clamp(dot(p,c),0.0,ra) );
|
|
return max(l,-m*sign(c.y*p.x-c.x*p.y));
|
|
}
|
|
|
|
|
|
[numthreads(FORWARD_PLUS_TILE_SIZE, FORWARD_PLUS_TILE_SIZE, 1)]
|
|
void CS_CullLights(in ComputeShaderInput input) {
|
|
// Calculate min & max depth in threadgroup / tile.
|
|
int2 texCoord = input.dispatchThreadID.xy;
|
|
float fDepth = DepthTextureVS[texCoord].r;
|
|
|
|
uint uDepth = asuint(fDepth);
|
|
|
|
if (!input.groupIndex) // Avoid contention by other threads in the group.
|
|
{
|
|
uMinDepth = 0xffffffff;
|
|
uMaxDepth = 0;
|
|
o_LightCount = 0;
|
|
t_LightCount = 0;
|
|
GroupFrustum = in_Frustums[input.groupID.x + (input.groupID.y * numThreadGroups.x)];
|
|
}
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
InterlockedMin(uMinDepth, uDepth);
|
|
InterlockedMax(uMaxDepth, uDepth);
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
float fMinDepth = asfloat(uMaxDepth);
|
|
float fMaxDepth = asfloat(uMinDepth);
|
|
|
|
// Convert depth values to view space.
|
|
float minDepthVS = ClipToView(float4(0, 0, fMinDepth, 1)).z;
|
|
float maxDepthVS = ClipToView(float4(0, 0, fMaxDepth, 1)).z;
|
|
float nearClipVS = ClipToView(float4(0, 0, 1, 1)).z;
|
|
|
|
// Clipping plane for minimum depth value
|
|
// (used for testing lights within the bounds of opaque geometry).
|
|
Plane minPlane = { float3(0, 0, -1), -minDepthVS };
|
|
Plane farPlane = { float3(0, 0, 1), maxDepthVS };
|
|
Plane nearPlane = { float3(0, 0, -1), -nearClipVS };
|
|
|
|
for (uint i = input.groupIndex; i < NumLights; i += FORWARD_PLUS_TILE_SIZE * FORWARD_PLUS_TILE_SIZE) {
|
|
Light light = UnpackLight(Lights[i]);
|
|
float3 lightPosVS = mul(WorldToViewSpace, float4(light.m_origin, 1.)).xyz;
|
|
Sphere sphere = { lightPosVS, light.m_radius };
|
|
float tan_angle = GroupFrustum.m_tan_frustum_angle;
|
|
float3 rd = GroupFrustum.m_view_ray;
|
|
float dist = nearClipVS / rd.z;
|
|
float maxDist = maxDepthVS / rd.z;
|
|
float minDistOpaque = minDepthVS / rd.z;
|
|
bool intersection = false;
|
|
bool is_opaque = false;
|
|
|
|
if(light.m_cos_outer < -1.5) {
|
|
for(int i = 0; i < 100 && dist < maxDist; ++i) {
|
|
float3 ro = dist * rd;
|
|
float distance = sdSphere(ro - lightPosVS, light.m_radius);
|
|
if(distance <= dist * tan_angle) {
|
|
intersection = true;
|
|
is_opaque = dist > minDistOpaque;
|
|
break;
|
|
}
|
|
dist += distance;
|
|
}
|
|
|
|
if(!is_opaque) {
|
|
dist = minDistOpaque - 1.e-2;
|
|
for(int i = 0; i < 100 && dist < maxDist; ++i) {
|
|
float3 ro = dist * rd;
|
|
float distance = sdSphere(ro - lightPosVS, light.m_radius);
|
|
if(distance <= dist * tan_angle) {
|
|
is_opaque = true;
|
|
break;
|
|
}
|
|
dist += distance;
|
|
}
|
|
}
|
|
} else {
|
|
float3 lightDirVS = mul((float3x3)WorldToViewSpace, light.m_direction);
|
|
float3 tang = select(lightDirVS.z < .999, float3(0., 0., 1.), float3(0., 1., 0.));
|
|
tang = normalize(tang - lightDirVS * dot(tang, lightDirVS));
|
|
float3 bitang = cross(lightDirVS, tang);
|
|
|
|
float3x3 transform = {tang, lightDirVS, bitang};
|
|
|
|
float2 angle = {sqrt(1. - light.m_cos_outer * light.m_cos_outer), light.m_cos_outer};
|
|
for(int i = 0; i < 100 && dist < maxDist; ++i) {
|
|
float3 ro = dist * rd;
|
|
float distance = sdSolidAngle(mul(transform, ro - lightPosVS), angle, light.m_radius);
|
|
if(distance <= dist * tan_angle) {
|
|
intersection = true;
|
|
is_opaque = dist > minDistOpaque;
|
|
break;
|
|
}
|
|
dist += distance;
|
|
}
|
|
|
|
if(!is_opaque) {
|
|
dist = minDistOpaque - 1.e-2;
|
|
for(int i = 0; i < 100 && dist < maxDist; ++i) {
|
|
float3 ro = dist * rd;
|
|
float distance = sdSolidAngle(mul(transform, ro - lightPosVS), angle, light.m_radius);
|
|
if(distance <= dist * tan_angle) {
|
|
is_opaque = true;
|
|
break;
|
|
}
|
|
dist += distance;
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
|
|
if(intersection) {
|
|
t_AppendLight(i);
|
|
|
|
if (is_opaque)
|
|
{
|
|
// Add light to light list for opaque geometry.
|
|
o_AppendLight(i);
|
|
}
|
|
}
|
|
|
|
//continue;
|
|
//
|
|
//
|
|
//if(!DoQueryInfiniteCone(sphere, GroupFrustum.cone)) continue;
|
|
//if (dot(light.m_color, (float3)1.)) {
|
|
// float cosOuterCone = light.m_cos_outer;
|
|
//
|
|
// if (cosOuterCone == 0. || true) { // Point light
|
|
// if(SphereInsidePlane(sphere, nearPlane) || SphereInsidePlane(sphere, farPlane))
|
|
// continue;
|
|
//
|
|
// //if (SphereInsideFrustum(sphere, GroupFrustum, nearClipVS, maxDepthVS))
|
|
// //{
|
|
// // Add light to light list for transparent geometry.
|
|
// t_AppendLight(i);
|
|
//
|
|
// if (!SphereInsidePlane(sphere, minPlane))
|
|
// {
|
|
// // Add light to light list for opaque geometry.
|
|
// o_AppendLight(i);
|
|
// }
|
|
// //}
|
|
// }
|
|
// else { // Spot light
|
|
// float sinOuterCone = sqrt(saturate(1. - cosOuterCone * cosOuterCone));
|
|
// float tanOuterCone = sinOuterCone / cosOuterCone;
|
|
// float3 lightDirVS = mul((float3x3)WorldToViewSpace, light.m_direction);
|
|
// float coneRadius = tanOuterCone * light.m_radius;
|
|
// Cone cone = { lightPosVS, light.m_radius, lightDirVS, coneRadius };
|
|
// //if (SphereInsideFrustum(sphere, GroupFrustum, nearClipVS, maxDepthVS) &&
|
|
// // ConeInsideFrustum(cone, GroupFrustum, nearClipVS, maxDepthVS))
|
|
// //{
|
|
// // Add light to light list for transparent geometry.
|
|
// t_AppendLight(i);
|
|
//
|
|
// //if (!SphereInsidePlane(sphere, minPlane)
|
|
// // && !ConeInsidePlane(cone, minPlane))
|
|
// //{
|
|
// // Add light to light list for opaque geometry.
|
|
// o_AppendLight(i);
|
|
// //}
|
|
// //}
|
|
// }
|
|
//}
|
|
}
|
|
|
|
// Wait till all threads in group have caught up.
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
// Update global memory with visible light buffer.
|
|
// First update the light grid (only thread 0 in group needs to do this)
|
|
if (!input.groupIndex)
|
|
{
|
|
// Update light grid for opaque geometry.
|
|
InterlockedAdd(o_LightIndexCounter[0], o_LightCount, o_LightIndexStartOffset);
|
|
o_LightGrid[input.groupID.xy] = uint2(o_LightIndexStartOffset, o_LightCount);
|
|
|
|
// Update light grid for transparent geometry.
|
|
InterlockedAdd(t_LightIndexCounter[0], t_LightCount, t_LightIndexStartOffset);
|
|
t_LightGrid[input.groupID.xy] = uint2(t_LightIndexStartOffset, t_LightCount);
|
|
}
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
// Now update the light index list (all threads).
|
|
// For opaque geometry.
|
|
for (uint i = input.groupIndex; i < o_LightCount; i += FORWARD_PLUS_TILE_SIZE * FORWARD_PLUS_TILE_SIZE)
|
|
{
|
|
o_LightIndexList[o_LightIndexStartOffset + i] = o_LightList[i];
|
|
}
|
|
|
|
// For transparent geometry.
|
|
for (uint i = input.groupIndex; i < t_LightCount; i += FORWARD_PLUS_TILE_SIZE * FORWARD_PLUS_TILE_SIZE)
|
|
{
|
|
t_LightIndexList[t_LightIndexStartOffset + i] = t_LightList[i];
|
|
}
|
|
}
|
|
|
|
/* ---------------------------------------------------------------------------------------------- */
|
|
/* Function implementations */
|
|
/* ---------------------------------------------------------------------------------------------- */
|
|
|
|
// Add the light to the visible light list for opaque geometry.
|
|
void o_AppendLight(uint lightIndex)
|
|
{
|
|
uint index; // Index into the visible lights array.
|
|
InterlockedAdd(o_LightCount, 1, index);
|
|
if (index < FORWARD_PLUS_LIGHTS_PER_TILE)
|
|
{
|
|
o_LightList[index] = lightIndex;
|
|
}
|
|
}
|
|
|
|
// Add the light to the visible light list for transparent geometry.
|
|
void t_AppendLight(uint lightIndex)
|
|
{
|
|
uint index; // Index into the visible lights array.
|
|
InterlockedAdd(t_LightCount, 1, index);
|
|
if (index < FORWARD_PLUS_LIGHTS_PER_TILE)
|
|
{
|
|
t_LightList[index] = lightIndex;
|
|
}
|
|
}
|
|
|
|
// Convert clip space coordinates to view space
|
|
float4 ClipToView( float4 clip )
|
|
{
|
|
// View space position.
|
|
float4 view = mul( InverseProjection, clip );
|
|
// Perspective projection.
|
|
view = view / view.w;
|
|
|
|
return view;
|
|
}
|
|
|
|
// Convert screen space coordinates to view space.
|
|
float4 ScreenToView( float4 screen )
|
|
{
|
|
// Convert to normalized texture coordinates
|
|
float2 texCoord = screen.xy / ScreenDimensions;
|
|
|
|
// Convert to clip space
|
|
float4 clip = float4( float2( texCoord.x, 1.0f - texCoord.y ) * 2.0f - 1.0f, screen.z, screen.w );
|
|
|
|
return ClipToView( clip );
|
|
} |