Skip to content

Commit 8a975d7

Browse files
committed
GITechDemo:
* shadow tuning * small shader optimizations
1 parent 04fdfff commit 8a975d7

File tree

10 files changed

+49
-50
lines changed

10 files changed

+49
-50
lines changed

GITechDemo/Code/AppMain/GITechDemo/RenderScheme/ShadowMapDirectionalLightPass.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,8 @@ namespace GITechDemoApp
3131

3232
extern const Vec<unsigned int, 2> SHADOW_MAP_SIZE = Vec<unsigned int, 2>(4096, 4096);
3333

34-
float DEPTH_BIAS[NUM_CASCADES] = { 0.004f, 0.002f, 0.003f, 0.002f };
35-
float SLOPE_SCALED_DEPTH_BIAS[NUM_CASCADES] = { 5.f, 5.5f, 6.f, 5.5f };
34+
float DEPTH_BIAS[NUM_CASCADES] = { 0.002f, 0.002f, 0.0015f, 0.001f };
35+
float SLOPE_SCALED_DEPTH_BIAS[NUM_CASCADES] = { 2.f, 2.5f, 2.5f, 1.5f };
3636

3737
AABoxf SceneAABB;
3838
AABoxf SceneLightSpaceAABB;

GITechDemo/Data/shaders/BokehDoF.hlsl

Lines changed: 17 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -157,9 +157,9 @@ const float fVignFade; // F-stops until vignette fades
157157
#define DEPTH_BLUR_SAMPLE_COUNT (9)
158158
static const float fKernelWeight[DEPTH_BLUR_SAMPLE_COUNT] =
159159
{
160-
1.f / 16.f, 2.f / 16.f, 1.f / 16.f,
161-
2.f / 16.f, 4.f / 16.f, 2.f / 16.f,
162-
1.f / 16.f, 2.f / 16.f, 1.f / 16.f
160+
1/16, 2/16, 1/16,
161+
2/16, 4/16, 2/16,
162+
1/16, 2/16, 1/16
163163
};
164164
static const float2 f2KernelOffset[DEPTH_BLUR_SAMPLE_COUNT] =
165165
{
@@ -214,8 +214,8 @@ void psmain(VSOut input, out float4 f4Color : SV_TARGET)
214214
if (bManualDof)
215215
{
216216
const float fFocalPlane = fLinearDepth - fFocalPoint;
217-
const float fFarPlane = (fFocalPlane - fFarDofStart) / fFarDofFalloff;
218-
const float fNearPlane = (-fFocalPlane - fNearDofStart) / fNearDofFalloff;
217+
const float fFarPlane = (fFocalPlane - fFarDofStart) * rcp(fFarDofFalloff);
218+
const float fNearPlane = (-fFocalPlane - fNearDofStart) * rcp(fNearDofFalloff);
219219
//fDofBlurFactor = (fFocalPlane > 0.f) ? fFarPlane : fNearPlane;
220220
fDofBlurFactor = lerp(fNearPlane, fFarPlane, fFocalPlane > 0.f); // Avoid dynamic branching
221221
}
@@ -248,8 +248,7 @@ void psmain(VSOut input, out float4 f4Color : SV_TARGET)
248248
// Since fDofBlurFactor >= 0.05f is coherent across the screen //
249249
// (i.e. the focal plane has a coherent coverage), dynamic branching //
250250
// isn't such a big deal when compared to a bunch of sin() and cos(). //
251-
// NB: See note about dynamic branching in DeferredLightDir.hlsl //
252-
// NB2: Yes, I have tested it both ways: dynamic branching version //
251+
// NB: I have tested it both ways and the dynamic branching version //
253252
// is faster than extra ALU by about 13% on AMD Mobility Radeon HD 5650 //
254253
//----------------------------------------------------------------------//
255254
if (fDofBlurFactor >= 0.05f)
@@ -258,18 +257,18 @@ void psmain(VSOut input, out float4 f4Color : SV_TARGET)
258257

259258
DOF_UNROLL_LV1 for (int i = 1; i <= nRingCount; i++)
260259
{
261-
const int fRingSampleCount = i * nSampleCount;
260+
const int nRingSampleCount = i * nSampleCount;
262261

263-
DOF_UNROLL_LV2 for (int j = 0; j < fRingSampleCount; j++)
262+
DOF_UNROLL_LV2 for (int j = 0; j < nRingSampleCount; j++)
264263
{
265264
// Distribute the samples across the ring's edge evenly
266-
const float fStep = PI * 2.f / float(fRingSampleCount);
267-
const float fAngle = float(j) * fStep;
268-
const float2 f2RingPattern = float2(cos(fAngle), sin(fAngle)) * float(i);
265+
const float fStep = PI * 2.f * rcp(nRingSampleCount);
266+
const float fAngle = j * fStep;
267+
const float2 f2RingPattern = float2(cos(fAngle), sin(fAngle)) * i;
269268

270269
// Shift sampling weights toward bokeh edge according to the value of 'fBokehBias'
271-
float3 f3ColorAdd = CalculateColor(input.f2TexCoord + f2RingPattern * f2BlurStepFactor, fDofBlurFactor) * lerp(1.f, (float(i)) / (float(nRingCount)), fBokehBias);
272-
float fSampleDivAdd = lerp(1.f, (float(i)) / (float(nRingCount)), fBokehBias);
270+
float3 f3ColorAdd = CalculateColor(input.f2TexCoord + f2RingPattern * f2BlurStepFactor, fDofBlurFactor) * lerp(1.f, i * rcp(nRingCount), fBokehBias);
271+
float fSampleDivAdd = lerp(1.f, i * rcp(nRingCount), fBokehBias);
273272

274273
// Optionally, use pentagon shape for bokeh
275274
if (bPentagonBokeh)
@@ -284,7 +283,7 @@ void psmain(VSOut input, out float4 f4Color : SV_TARGET)
284283
}
285284
}
286285

287-
f3Color /= fSampleDiv;
286+
f3Color *= rcp(fSampleDiv);
288287
}
289288

290289
// Apply vignetting
@@ -302,7 +301,7 @@ void psmain(VSOut input, out float4 f4Color : SV_TARGET)
302301
// Generate pentagon pattern
303302
float GeneratePentagon(float2 f2Coords)
304303
{
305-
const float fScale = float(nRingCount) - 1.3f;
304+
const float fScale = nRingCount - 1.3f;
306305

307306
const float4 f4HS0 = float4( 1.f, 0.f, 0.f, 1.f);
308307
const float4 f4HS1 = float4( 0.309016994f, 0.951056516f, 0.f, 1.f);
@@ -343,7 +342,7 @@ float BlurDepth(float2 f2Coords)
343342
// overlapping in and out of focus objects would be bigger as the resolution got lower.
344343
// Just maintain a proper aspect ratio so that the kernel is always square.
345344
//float2 f2KernelSize = f2TexelSize * fDepthBlurSize;
346-
const float2 f2KernelSize = float2(fDepthBlurSize, f2TexelSize.y * fDepthBlurSize / f2TexelSize.x);
345+
const float2 f2KernelSize = float2(fDepthBlurSize, f2TexelSize.y * fDepthBlurSize * rcp(f2TexelSize.x));
347346

348347
float fDepth = 0.f;
349348
UNROLL for (int i = 0; i < DEPTH_BLUR_SAMPLE_COUNT; i++)
@@ -416,6 +415,6 @@ float3 DebugFocus(float3 f3Color, float fDofBlurFactor, float fLinearDepth)
416415
// Calculate vignetting effect factor of a pixel
417416
float CalculateVignetting(float2 f2Coords)
418417
{
419-
const float fFadeFactor = fFStop / fVignFade;
418+
const float fFadeFactor = fFStop * rcp(fVignFade);
420419
return smoothstep(fVignOut + fFadeFactor, fVignIn + fFadeFactor, distance(f2Coords, float2(0.5f, 0.5f)));
421420
}

GITechDemo/Data/shaders/DeferredLightDir.hlsl

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ static const float fCascadeNormSize = rcp(nCascadesPerRow); // Normalized siz
6565
// Conditional PCF shadow sampling for different sampling methods for each cascade
6666
// NB: PCF_SAMPLE0 corresponds to most detailed cascade (highest resolution),
6767
// whereas PCF_SAMPLE3 corresponds to the least detailed cascade (lowest resolution)
68-
#define USE_CONDITIONAL_PCF (1)
68+
#define USE_CONDITIONAL_PCF (0)
6969
#if USE_CONDITIONAL_PCF
7070
#define PCF_SAMPLE0 PCF4x4Poisson
7171
#define PCF_SAMPLE1 PCF12TapPoisson
@@ -116,7 +116,7 @@ void psmain(VSOut input, out PSOut output)
116116
//////////////////////////////////////////////////////////////////////////////////////////
117117
// Step 1: Calculate light-view space position of current pixel
118118
float4 f4LightViewPos = mul(f44ScreenToLightViewMat, float4(input.f2ScreenPos, fDepth, 1.f));
119-
f4LightViewPos /= f4LightViewPos.w;
119+
f4LightViewPos *= rcp(f4LightViewPos.w);
120120

121121
// Step 2: Find the best valid cascade
122122
unsigned int nValidCascade = 0;
@@ -150,7 +150,7 @@ void psmain(VSOut input, out PSOut output)
150150
f3CascadeTexCoord.xy =
151151
(f3CascadeTexCoord.xy * float2(0.5f, -0.5f) + float2(0.5f, 0.5f)) *
152152
fCascadeNormSize +
153-
float2(fCascadeNormSize * fmod(nValidCascade, nCascadesPerRow), fCascadeNormSize * floor(nValidCascade / nCascadesPerRow));
153+
float2(fCascadeNormSize * fmod(nValidCascade, nCascadesPerRow), fCascadeNormSize * floor(nValidCascade * rcp(nCascadesPerRow)));
154154

155155
// Conditional PCF shadow sampling
156156
#if USE_CONDITIONAL_PCF
@@ -238,7 +238,7 @@ void psmain(VSOut input, out PSOut output)
238238
f4BlendAmount.z,
239239
f4BlendAmount.w
240240
)
241-
) / fScaledBlendSize;
241+
) * rcp(fScaledBlendSize);
242242

243243
// If our point is inside the blend band, we can continue with blending
244244
if (fBlendAmount > 0.f)
@@ -248,7 +248,7 @@ void psmain(VSOut input, out PSOut output)
248248
f3LQCascadeTexCoord.xy =
249249
(f3LQCascadeTexCoord.xy * float2(0.5f, -0.5f) + float2(0.5f, 0.5f)) *
250250
fCascadeNormSize +
251-
float2(fCascadeNormSize * fmod(nValidCascade + 1, nCascadesPerRow), fCascadeNormSize * floor((nValidCascade + 1) / nCascadesPerRow));
251+
float2(fCascadeNormSize * fmod(nValidCascade + 1, nCascadesPerRow), fCascadeNormSize * floor((nValidCascade + 1) * rcp(nCascadesPerRow)));
252252

253253
// Sample from the lower quality cascade and blend between samples appropriately
254254
#if USE_CONDITIONAL_PCF
@@ -369,7 +369,7 @@ float3 CookTorranceGGX(const float3 f3MaterialColor, const float fMaterialType,
369369

370370
// Distribution term
371371
const float fP = fNdotH * fNdotH * (fRoughness2 - 1.f) + 1.f;
372-
const float fDistrib = fRoughness2 / (PI * fP * fP);
372+
const float fDistrib = fRoughness2 * rcp(PI * fP * fP);
373373

374374
// Calculate the matching visibility term
375375
const float fV1i = GGXVisibilityTerm(fRoughness2, fNdotL);
@@ -380,7 +380,7 @@ float3 CookTorranceGGX(const float3 f3MaterialColor, const float fMaterialType,
380380
const float3 f3Fresnel = FresnelTerm(f3SpecularAlbedo, f3H, -f3LightDirView);
381381

382382
// Color components
383-
const float3 f3DiffuseColor = f3DiffuseAlbedo / PI * (1.f - f3Fresnel);
383+
const float3 f3DiffuseColor = f3DiffuseAlbedo * rcp(PI * (1.f - f3Fresnel));
384384
const float3 f3SpecularColor = fDistrib * f3Fresnel * fVis;
385385

386386
const float3 f3EnvAlbedo = texCUBEbias(texEnvMap, float4(mul((float3x3)f44InvViewMat, reflect(f3ViewVec, f3Normal)), fRoughness2 * ENVIRONMENT_MAP_MIP_COUNT)).rgb;
@@ -397,12 +397,12 @@ float3 CookTorranceGGX(const float3 f3MaterialColor, const float fMaterialType,
397397
float BeckmannGeometricTerm(const float fRoughness, const float fNdotX)
398398
{
399399
float fNdotX2 = fNdotX * fNdotX;
400-
float fTanTheta = sqrt((1.f - fNdotX2) / fNdotX2);
400+
float fTanTheta = sqrt((1.f - fNdotX2) * rcp(fNdotX2));
401401
float fAlpha = rcp((fRoughness * fTanTheta));
402402
float fAlpha2 = fAlpha * fAlpha;
403403
float fGeom = 1.f;
404404
if (fAlpha < 1.6f)
405-
fGeom *= (3.535f * fAlpha + 2.181f * fAlpha2) / (1.f + 2.276f * fAlpha + 2.577f * fAlpha2);
405+
fGeom *= (3.535f * fAlpha + 2.181f * fAlpha2) * rcp(1.f + 2.276f * fAlpha + 2.577f * fAlpha2);
406406
return fGeom;
407407
}
408408

@@ -423,9 +423,9 @@ float3 CookTorranceBeckmann(const float3 f3MaterialColor, const float fMaterialT
423423
const float fRoughness2 = fRoughness * fRoughness;
424424

425425
// Distribution term
426-
const float fTanTheta2 = (1.f - fNdotH2) / fNdotH2;
427-
const float fExpTerm = exp(-fTanTheta2 / fRoughness2);
428-
const float fDistrib = fExpTerm / (PI * fRoughness2 * fNdotH4);
426+
const float fTanTheta2 = (1.f - fNdotH2) * rcp(fNdotH2);
427+
const float fExpTerm = exp(-fTanTheta2 * rcp(fRoughness2));
428+
const float fDistrib = fExpTerm * rcp(PI * fRoughness2 * fNdotH4);
429429

430430
// Geometric term
431431
const float fG1i = BeckmannGeometricTerm(fRoughness, fNdotL);
@@ -436,7 +436,7 @@ float3 CookTorranceBeckmann(const float3 f3MaterialColor, const float fMaterialT
436436
const float3 f3Fresnel = FresnelTerm(f3SpecularAlbedo, f3H, -f3LightDirView);
437437

438438
// Color components
439-
const float3 f3DiffuseColor = f3DiffuseAlbedo / PI * (1.f - f3Fresnel);
439+
const float3 f3DiffuseColor = f3DiffuseAlbedo * rcp(PI * (1.f - f3Fresnel));
440440
const float3 f3SpecularColor = fDistrib * fGeom * f3Fresnel * rcp(4.f * fNdotL * fNdotV);
441441

442442
const float3 f3EnvAlbedo = texCUBEbias(texEnvMap, float4(mul((float3x3)f44InvViewMat, reflect(f3ViewVec, f3Normal)), fRoughness2 * ENVIRONMENT_MAP_MIP_COUNT)).rgb;

GITechDemo/Data/shaders/Downsample.hlsl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ void psmain(VSOut input, out float4 f4Color : SV_TARGET)
4141
{
4242
const float fBrightness = dot(f4Color.rgb, LUMINANCE_VECTOR);
4343
f4Color.rgb *= step(fBrightnessThreshold, fBrightness);
44-
//f4Color.rgb /= fBrightness;
44+
//f4Color.rgb *= rcp(fBrightness);
4545
f4Color.rgb *= rcp(1.f + fBrightness);
4646
}
4747
}

GITechDemo/Data/shaders/HDRToneMapping.hlsl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ float3 ReinhardTonemap(const float3 f3Color, const float fAvgLuma)
3939
float3 DuikerOptimizedTonemap(const float3 f3Color)
4040
{
4141
float3 x = max(0, f3Color - 0.004f);
42-
return (x * (6.2f * x + 0.5f)) / (x * (6.2f * x + 1.7f) + 0.06f);
42+
return (x * (6.2f * x + 0.5f)) * rcp(x * (6.2f * x + 1.7f) + 0.06f);
4343
}
4444

4545
//////////////////////////////////////////////////////////////////////////////
@@ -58,7 +58,7 @@ float3 FilmicTonemap(const float3 f3Color)
5858
f3Color * (fShoulderStrength * f3Color + fLinearStrength)
5959
+ fToeStrength * fToeDenominator
6060
)
61-
) - fToeNumerator / fToeDenominator;
61+
) - fToeNumerator * rcp(fToeDenominator);
6262
}
6363

6464
void psmain(VSOut input, out float4 f4Color : SV_TARGET)
@@ -73,7 +73,7 @@ void psmain(VSOut input, out float4 f4Color : SV_TARGET)
7373
float3 f3Color = tex2D(texSource, input.f2TexCoord).rgb;
7474

7575
float fAvgLuma = tex2D(texAvgLuma, float2(0.5f, 0.5f)).r;
76-
f3Color /= fAvgLuma;
76+
f3Color *= rcp(fAvgLuma);
7777

7878
//////////////////////////////////////////
7979
// Apply tone mapping operator //
@@ -88,7 +88,7 @@ void psmain(VSOut input, out float4 f4Color : SV_TARGET)
8888

8989
// Uncharted 2
9090
float3 f3FinalColor = FilmicTonemap(f3Color * fExposureBias);
91-
const float3 f3WhiteScale = 1.0f / FilmicTonemap(fLinearWhite);
91+
const float3 f3WhiteScale = rcp(FilmicTonemap(fLinearWhite));
9292
f3FinalColor *= f3WhiteScale;
9393

9494
// Convert back to gamma space (not required for Duiker tonemap)

GITechDemo/Data/shaders/LumaCalc.hlsl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ void psmain(VSOut input, out float4 f4Color : SV_TARGET)
5151
// containing the log() of averages
5252
if (bInitialLumaPass)
5353
{
54-
const float2 f2Kernel = f2TexelSize / 3.f;
54+
const float2 f2Kernel = f2TexelSize * 0.33333333f;
5555
float fLogLumSum = 0.f;
5656
UNROLL for (float i = -1.f; i <= 1.f; i++)
5757
UNROLL for (float j = -1.f; j <= 1.f; j++)
@@ -63,7 +63,7 @@ void psmain(VSOut input, out float4 f4Color : SV_TARGET)
6363
fLogLumSum += log(dot(f3Sample, LUMINANCE_VECTOR) + 0.0001f);
6464
}
6565

66-
fLogLumSum /= 9.f;
66+
fLogLumSum *= 0.11111111f;
6767
f4Color = float4(fLogLumSum, fLogLumSum, fLogLumSum, 1.f);
6868
}
6969
// The rest of the passes further downscale the average luma texture
@@ -78,11 +78,11 @@ void psmain(VSOut input, out float4 f4Color : SV_TARGET)
7878
// the final 1x1 average luma texture
7979
if (bFinalLumaPass)
8080
{
81-
fAvgLuma = exp(fAvgLuma / 16.f);
81+
fAvgLuma = exp(fAvgLuma * 0.0625f);
8282
fAvgLuma = clamp(fAvgLuma, f2AvgLumaClamp.x, f2AvgLumaClamp.y);
8383
}
8484
else
85-
fAvgLuma /= 16.f;
85+
fAvgLuma *= 0.0625f;
8686

8787
f4Color = float4(fAvgLuma, fAvgLuma, fAvgLuma, 1.f);
8888
}

GITechDemo/Data/shaders/PostProcessingUtils.hlsl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ const float fZFar;
2525
const float2 f2LinearDepthEquation;
2626
float ReconstructDepth(float fHyperbolicDepth)
2727
{
28-
return f2LinearDepthEquation.x / (fHyperbolicDepth - f2LinearDepthEquation.y);
28+
return f2LinearDepthEquation.x * rcp(fHyperbolicDepth - f2LinearDepthEquation.y);
2929
}
3030

3131

GITechDemo/Data/shaders/RSMCommon.hlsl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ void ApplyRSM(const float2 f2TexCoord, const float fDepth, out float4 colorOut)
7272
// Transform pixel coordinates from NDC space to RSM view space
7373
const float4 f4RSMViewSpacePos = mul(f44ScreenToLightViewMat, f4ScreenProjSpacePos);
7474
// Perspective w-divide
75-
const float3 f3RSMViewSpacePos = f4RSMViewSpacePos.xyz / f4RSMViewSpacePos.w;
75+
const float3 f3RSMViewSpacePos = f4RSMViewSpacePos.xyz * rcp(f4RSMViewSpacePos.w);
7676
// Sample normal for currently shaded pixel and transform to RSM view space
7777
const float3 f3RSMViewSpaceNormal = mul((float3x3)f44ViewToRSMViewMat, DecodeNormal(tex2D(texNormalBuffer, f2TexCoord)));
7878
// Transform point to RSM NDC space

GITechDemo/Data/shaders/SSAO.hlsl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ float AOCalc(const float2 f2TexCoord, const float2 f2Offset, const float3 f3Posi
5454
const float fSampleDepth = tex2D(texDepthBuffer, f2SampleTexCoord).r;
5555
const float4 f4SampleProjPosition = float4(f2SampleScreenPos, fSampleDepth, 1.f);
5656
const float4 f4SamplePositionPreW = mul(f44InvProjMat, f4SampleProjPosition);
57-
const float3 f3SamplePosition = f4SamplePositionPreW.xyz / f4SamplePositionPreW.w;
57+
const float3 f3SamplePosition = f4SamplePositionPreW.xyz * rcp(f4SamplePositionPreW.w);
5858

5959
const float3 f3PosDiff = f3SamplePosition - f3Position;
6060
const float3 f3Dir = normalize(f3PosDiff);
@@ -68,12 +68,12 @@ void psmain(VSOut input, out float4 f4Color : SV_TARGET)
6868
const float fDepth = tex2D(texDepthBuffer, input.f2TexCoord).r;
6969
const float4 f4ProjPosition = float4(input.f2ScreenPos, fDepth, 1.f);
7070
const float4 f4PositionPreW = mul(f44InvProjMat, f4ProjPosition);
71-
const float3 f3Position = f4PositionPreW.xyz / f4PositionPreW.w;
71+
const float3 f3Position = f4PositionPreW.xyz * rcp(f4PositionPreW.w);
7272
const float3 f3Normal = DecodeNormal(tex2D(texNormalBuffer, input.f2TexCoord));
7373
//const float2 f2Rand = float2(GenerateRandomNumber(input.f2TexCoord.xy), GenerateRandomNumber(input.f2TexCoord.yx));
7474

7575
float fAO = 0.0f;
76-
const float fRad = fSSAOSampleRadius / f3Position.z;
76+
const float fRad = fSSAOSampleRadius * rcp(f3Position.z);
7777

7878
UNROLL for (int i = 0; i < 4; i++)
7979
{

GITechDemo/Data/shaders/Utils.hlsl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@ float4 EncodeNormal(float3 n)
99
{
1010
n = normalize(n);
1111
const float scale = 1.7777f;
12-
float2 enc = n.xy / (n.z + 1.f);
13-
enc /= scale;
12+
float2 enc = n.xy * rcp(n.z + 1.f);
13+
enc *= rcp(scale);
1414
enc = enc * 0.5f + 0.5f;
1515
return float4(enc, 0.f, 0.f);
1616
}
@@ -19,7 +19,7 @@ float3 DecodeNormal(float4 enc)
1919
{
2020
const float scale = 1.7777f;
2121
const float3 nn = enc.xyz * float3(2.f * scale, 2.f * scale, 0.f) + float3(-scale, -scale, 1.f);
22-
const float g = 2.f / dot(nn.xyz, nn.xyz);
22+
const float g = 2.f * rcp(dot(nn.xyz, nn.xyz));
2323
const float3 n = float3(g * nn.xy, g - 1.f);
2424
return normalize(n);
2525
}

0 commit comments

Comments
 (0)