GITechDemo:

iftodebogdan · iftodebogdan · commit 8a975d7b2c08 · 2015-10-30T02:52:16.000+02:00
* shadow tuning
* small shader optimizations
diff --git a/GITechDemo/Code/AppMain/GITechDemo/RenderScheme/ShadowMapDirectionalLightPass.cpp b/GITechDemo/Code/AppMain/GITechDemo/RenderScheme/ShadowMapDirectionalLightPass.cpp
@@ -31,8 +31,8 @@ namespace GITechDemoApp
 
 	extern const Vec<unsigned int, 2> SHADOW_MAP_SIZE = Vec<unsigned int, 2>(4096, 4096);
 
-	float DEPTH_BIAS[NUM_CASCADES]				= {	0.004f,		0.002f,		0.003f,		0.002f	};
-	float SLOPE_SCALED_DEPTH_BIAS[NUM_CASCADES]	= {	5.f,		5.5f,		6.f,		5.5f	};
+	float DEPTH_BIAS[NUM_CASCADES]				= {	0.002f,		0.002f,		0.0015f,	0.001f	};
+	float SLOPE_SCALED_DEPTH_BIAS[NUM_CASCADES]	= {	2.f,		2.5f,		2.5f,		1.5f	};
 
 	AABoxf SceneAABB;
 	AABoxf SceneLightSpaceAABB;
diff --git a/GITechDemo/Data/shaders/BokehDoF.hlsl b/GITechDemo/Data/shaders/BokehDoF.hlsl
@@ -157,9 +157,9 @@ const float	fVignFade;		//	F-stops until vignette fades
 #define DEPTH_BLUR_SAMPLE_COUNT (9)
 static const float fKernelWeight[DEPTH_BLUR_SAMPLE_COUNT] =
 {
-	1.f / 16.f, 2.f / 16.f, 1.f / 16.f,
-	2.f / 16.f, 4.f / 16.f, 2.f / 16.f,
-	1.f / 16.f, 2.f / 16.f, 1.f / 16.f
+	1/16, 2/16, 1/16,
+	2/16, 4/16, 2/16,
+	1/16, 2/16, 1/16
 };
 static const float2 f2KernelOffset[DEPTH_BLUR_SAMPLE_COUNT] =
 {
@@ -214,8 +214,8 @@ void psmain(VSOut input, out float4 f4Color : SV_TARGET)
 	if (bManualDof)
 	{
 		const float fFocalPlane	= fLinearDepth - fFocalPoint;
-		const float fFarPlane	= (fFocalPlane - fFarDofStart) / fFarDofFalloff;
-		const float fNearPlane	= (-fFocalPlane - fNearDofStart) / fNearDofFalloff;
+		const float fFarPlane	= (fFocalPlane - fFarDofStart) * rcp(fFarDofFalloff);
+		const float fNearPlane	= (-fFocalPlane - fNearDofStart) * rcp(fNearDofFalloff);
 		//fDofBlurFactor		= (fFocalPlane > 0.f) ? fFarPlane : fNearPlane;
 		fDofBlurFactor			= lerp(fNearPlane, fFarPlane, fFocalPlane > 0.f);	// Avoid dynamic branching
 	}
@@ -248,8 +248,7 @@ void psmain(VSOut input, out float4 f4Color : SV_TARGET)
 	// Since fDofBlurFactor >= 0.05f is coherent across the screen			//
 	// (i.e. the focal plane has a coherent coverage), dynamic branching	//
 	// isn't such a big deal when compared to a bunch of sin() and cos().	//
-	// NB: See note about dynamic branching in DeferredLightDir.hlsl		//
-	// NB2: Yes, I have tested it both ways: dynamic branching version		//
+	// NB: I have tested it both ways and the dynamic branching version		//
 	// is faster than extra ALU by about 13% on AMD Mobility Radeon HD 5650	//
 	//----------------------------------------------------------------------//
 	if (fDofBlurFactor >= 0.05f)
@@ -258,18 +257,18 @@ void psmain(VSOut input, out float4 f4Color : SV_TARGET)
 
 		DOF_UNROLL_LV1 for (int i = 1; i <= nRingCount; i++)
 		{
-			const int fRingSampleCount = i * nSampleCount;
+			const int nRingSampleCount = i * nSampleCount;
 
-			DOF_UNROLL_LV2 for (int j = 0; j < fRingSampleCount; j++)
+			DOF_UNROLL_LV2 for (int j = 0; j < nRingSampleCount; j++)
 			{
 				// Distribute the samples across the ring's edge evenly
-				const float		fStep			=	PI * 2.f / float(fRingSampleCount);
-				const float		fAngle			=	float(j) * fStep;
-				const float2	f2RingPattern	=	float2(cos(fAngle), sin(fAngle)) * float(i);
+				const float		fStep			=	PI * 2.f * rcp(nRingSampleCount);
+				const float		fAngle			=	j * fStep;
+				const float2	f2RingPattern	=	float2(cos(fAngle), sin(fAngle)) * i;
 
 				// Shift sampling weights toward bokeh edge according to the value of 'fBokehBias'
-				float3	f3ColorAdd		=	CalculateColor(input.f2TexCoord + f2RingPattern * f2BlurStepFactor, fDofBlurFactor) * lerp(1.f, (float(i)) / (float(nRingCount)), fBokehBias);
-				float	fSampleDivAdd	=	lerp(1.f, (float(i)) / (float(nRingCount)), fBokehBias);
+				float3	f3ColorAdd		=	CalculateColor(input.f2TexCoord + f2RingPattern * f2BlurStepFactor, fDofBlurFactor) * lerp(1.f, i * rcp(nRingCount), fBokehBias);
+				float	fSampleDivAdd	=	lerp(1.f, i * rcp(nRingCount), fBokehBias);
 
 				// Optionally, use pentagon shape for bokeh
 				if (bPentagonBokeh)
@@ -284,7 +283,7 @@ void psmain(VSOut input, out float4 f4Color : SV_TARGET)
 			}
 		}
 
-		f3Color /= fSampleDiv;
+		f3Color *= rcp(fSampleDiv);
 	}
 
 	// Apply vignetting
@@ -302,7 +301,7 @@ void psmain(VSOut input, out float4 f4Color : SV_TARGET)
 // Generate pentagon pattern
 float GeneratePentagon(float2 f2Coords)
 {
-	const float fScale = float(nRingCount) - 1.3f;
+	const float fScale = nRingCount - 1.3f;
 
 	const float4 f4HS0 = float4( 1.f,			 0.f,			0.f,	1.f);
 	const float4 f4HS1 = float4( 0.309016994f,	 0.951056516f,	0.f,	1.f);
@@ -343,7 +342,7 @@ float BlurDepth(float2 f2Coords)
 	// overlapping in and out of focus objects would be bigger as the resolution got lower.
 	// Just maintain a proper aspect ratio so that the kernel is always square.
 	//float2 f2KernelSize = f2TexelSize * fDepthBlurSize;
-	const float2 f2KernelSize = float2(fDepthBlurSize, f2TexelSize.y * fDepthBlurSize / f2TexelSize.x);
+	const float2 f2KernelSize = float2(fDepthBlurSize, f2TexelSize.y * fDepthBlurSize * rcp(f2TexelSize.x));
 
 	float fDepth = 0.f;
 	UNROLL for (int i = 0; i < DEPTH_BLUR_SAMPLE_COUNT; i++)
@@ -416,6 +415,6 @@ float3 DebugFocus(float3 f3Color, float fDofBlurFactor, float fLinearDepth)
 // Calculate vignetting effect factor of a pixel
 float CalculateVignetting(float2 f2Coords)
 {
-	const float fFadeFactor = fFStop / fVignFade;
+	const float fFadeFactor = fFStop * rcp(fVignFade);
 	return smoothstep(fVignOut + fFadeFactor, fVignIn + fFadeFactor, distance(f2Coords, float2(0.5f, 0.5f)));
 }
diff --git a/GITechDemo/Data/shaders/DeferredLightDir.hlsl b/GITechDemo/Data/shaders/DeferredLightDir.hlsl
@@ -65,7 +65,7 @@ static const float fCascadeNormSize = rcp(nCascadesPerRow);				// Normalized siz
 // Conditional PCF shadow sampling for different sampling methods for each cascade
 // NB: PCF_SAMPLE0 corresponds to most detailed cascade (highest resolution),
 // whereas PCF_SAMPLE3 corresponds to the least detailed cascade (lowest resolution)
-#define USE_CONDITIONAL_PCF (1)
+#define USE_CONDITIONAL_PCF (0)
 #if USE_CONDITIONAL_PCF
 	#define PCF_SAMPLE0	PCF4x4Poisson
 	#define PCF_SAMPLE1	PCF12TapPoisson
@@ -116,7 +116,7 @@ void psmain(VSOut input, out PSOut output)
 	//////////////////////////////////////////////////////////////////////////////////////////
 	// Step 1: Calculate light-view space position of current pixel
 	float4 f4LightViewPos = mul(f44ScreenToLightViewMat, float4(input.f2ScreenPos, fDepth, 1.f));
-	f4LightViewPos /= f4LightViewPos.w;
+	f4LightViewPos *= rcp(f4LightViewPos.w);
 
 	// Step 2: Find the best valid cascade
 	unsigned int nValidCascade = 0;
@@ -150,7 +150,7 @@ void psmain(VSOut input, out PSOut output)
 	f3CascadeTexCoord.xy =
 		(f3CascadeTexCoord.xy * float2(0.5f, -0.5f) + float2(0.5f, 0.5f)) *
 		fCascadeNormSize +
-		float2(fCascadeNormSize * fmod(nValidCascade, nCascadesPerRow), fCascadeNormSize * floor(nValidCascade / nCascadesPerRow));
+		float2(fCascadeNormSize * fmod(nValidCascade, nCascadesPerRow), fCascadeNormSize * floor(nValidCascade * rcp(nCascadesPerRow)));
 
 	// Conditional PCF shadow sampling
 #if USE_CONDITIONAL_PCF
@@ -238,7 +238,7 @@ void psmain(VSOut input, out PSOut output)
 						f4BlendAmount.z,
 						f4BlendAmount.w
 						)
-					) / fScaledBlendSize;
+					) * rcp(fScaledBlendSize);
 
 			// If our point is inside the blend band, we can continue with blending
 			if (fBlendAmount > 0.f)
@@ -248,7 +248,7 @@ void psmain(VSOut input, out PSOut output)
 				f3LQCascadeTexCoord.xy =
 					(f3LQCascadeTexCoord.xy * float2(0.5f, -0.5f) + float2(0.5f, 0.5f)) *
 					fCascadeNormSize +
-					float2(fCascadeNormSize * fmod(nValidCascade + 1, nCascadesPerRow), fCascadeNormSize * floor((nValidCascade + 1) / nCascadesPerRow));
+					float2(fCascadeNormSize * fmod(nValidCascade + 1, nCascadesPerRow), fCascadeNormSize * floor((nValidCascade + 1) * rcp(nCascadesPerRow)));
 
 				// Sample from the lower quality cascade and blend between samples appropriately
 			#if USE_CONDITIONAL_PCF
@@ -369,7 +369,7 @@ float3 CookTorranceGGX(const float3 f3MaterialColor, const float fMaterialType,
 
 	// Distribution term
 	const float fP = fNdotH * fNdotH * (fRoughness2 - 1.f) + 1.f;
-	const float fDistrib = fRoughness2 / (PI * fP * fP);
+	const float fDistrib = fRoughness2 * rcp(PI * fP * fP);
 
 	// Calculate the matching visibility term
 	const float fV1i = GGXVisibilityTerm(fRoughness2, fNdotL);
@@ -380,7 +380,7 @@ float3 CookTorranceGGX(const float3 f3MaterialColor, const float fMaterialType,
 	const float3 f3Fresnel = FresnelTerm(f3SpecularAlbedo, f3H, -f3LightDirView);
 
 	// Color components
-	const float3 f3DiffuseColor = f3DiffuseAlbedo / PI * (1.f - f3Fresnel);
+	const float3 f3DiffuseColor = f3DiffuseAlbedo * rcp(PI * (1.f - f3Fresnel));
 	const float3 f3SpecularColor = fDistrib * f3Fresnel * fVis;
 
 	const float3 f3EnvAlbedo = texCUBEbias(texEnvMap, float4(mul((float3x3)f44InvViewMat, reflect(f3ViewVec, f3Normal)), fRoughness2 * ENVIRONMENT_MAP_MIP_COUNT)).rgb;
@@ -397,12 +397,12 @@ float3 CookTorranceGGX(const float3 f3MaterialColor, const float fMaterialType,
 float BeckmannGeometricTerm(const float fRoughness, const float fNdotX)
 {
 	float fNdotX2 = fNdotX * fNdotX;
-	float fTanTheta = sqrt((1.f - fNdotX2) / fNdotX2);
+	float fTanTheta = sqrt((1.f - fNdotX2) * rcp(fNdotX2));
 	float fAlpha = rcp((fRoughness * fTanTheta));
 	float fAlpha2 = fAlpha * fAlpha;
 	float fGeom = 1.f;
 	if (fAlpha < 1.6f)
-		fGeom *= (3.535f * fAlpha + 2.181f * fAlpha2) / (1.f + 2.276f * fAlpha + 2.577f * fAlpha2);
+		fGeom *= (3.535f * fAlpha + 2.181f * fAlpha2) * rcp(1.f + 2.276f * fAlpha + 2.577f * fAlpha2);
 	return fGeom;
 }
 
@@ -423,9 +423,9 @@ float3 CookTorranceBeckmann(const float3 f3MaterialColor, const float fMaterialT
 	const float fRoughness2 = fRoughness * fRoughness;
 
 	// Distribution term
-	const float fTanTheta2 = (1.f - fNdotH2) / fNdotH2;
-	const float fExpTerm = exp(-fTanTheta2 / fRoughness2);
-	const float fDistrib = fExpTerm / (PI * fRoughness2 * fNdotH4);
+	const float fTanTheta2 = (1.f - fNdotH2) * rcp(fNdotH2);
+	const float fExpTerm = exp(-fTanTheta2 * rcp(fRoughness2));
+	const float fDistrib = fExpTerm * rcp(PI * fRoughness2 * fNdotH4);
 
 	// Geometric term
 	const float fG1i = BeckmannGeometricTerm(fRoughness, fNdotL);
@@ -436,7 +436,7 @@ float3 CookTorranceBeckmann(const float3 f3MaterialColor, const float fMaterialT
 	const float3 f3Fresnel = FresnelTerm(f3SpecularAlbedo, f3H, -f3LightDirView);
 	
 	// Color components
-	const float3 f3DiffuseColor = f3DiffuseAlbedo / PI * (1.f - f3Fresnel);
+	const float3 f3DiffuseColor = f3DiffuseAlbedo * rcp(PI * (1.f - f3Fresnel));
 	const float3 f3SpecularColor = fDistrib * fGeom * f3Fresnel * rcp(4.f * fNdotL * fNdotV);
 
 	const float3 f3EnvAlbedo = texCUBEbias(texEnvMap, float4(mul((float3x3)f44InvViewMat, reflect(f3ViewVec, f3Normal)), fRoughness2 * ENVIRONMENT_MAP_MIP_COUNT)).rgb;
diff --git a/GITechDemo/Data/shaders/Downsample.hlsl b/GITechDemo/Data/shaders/Downsample.hlsl
@@ -41,7 +41,7 @@ void psmain(VSOut input, out float4 f4Color : SV_TARGET)
 	{
 		const float fBrightness = dot(f4Color.rgb, LUMINANCE_VECTOR);
 		f4Color.rgb *= step(fBrightnessThreshold, fBrightness);
-		//f4Color.rgb /= fBrightness;
+		//f4Color.rgb *= rcp(fBrightness);
 		f4Color.rgb *= rcp(1.f + fBrightness);
 	}
 }
diff --git a/GITechDemo/Data/shaders/HDRToneMapping.hlsl b/GITechDemo/Data/shaders/HDRToneMapping.hlsl
@@ -39,7 +39,7 @@ float3 ReinhardTonemap(const float3 f3Color, const float fAvgLuma)
 float3 DuikerOptimizedTonemap(const float3 f3Color)
 {
 	float3 x = max(0, f3Color - 0.004f);
-	return (x * (6.2f * x + 0.5f)) / (x * (6.2f * x + 1.7f) + 0.06f);
+	return (x * (6.2f * x + 0.5f)) * rcp(x * (6.2f * x + 1.7f) + 0.06f);
 }
 
 //////////////////////////////////////////////////////////////////////////////
@@ -58,7 +58,7 @@ float3 FilmicTonemap(const float3 f3Color)
 				f3Color * (fShoulderStrength * f3Color + fLinearStrength)
 				+ fToeStrength * fToeDenominator
 			)
-		) - fToeNumerator / fToeDenominator;
+		) - fToeNumerator * rcp(fToeDenominator);
 }
 
 void psmain(VSOut input, out float4 f4Color : SV_TARGET)
@@ -73,7 +73,7 @@ void psmain(VSOut input, out float4 f4Color : SV_TARGET)
 	float3 f3Color = tex2D(texSource, input.f2TexCoord).rgb;
 
 	float fAvgLuma = tex2D(texAvgLuma, float2(0.5f, 0.5f)).r;
-	f3Color /= fAvgLuma;
+	f3Color *= rcp(fAvgLuma);
 
 	//////////////////////////////////////////
 	// Apply tone mapping operator			//
@@ -88,7 +88,7 @@ void psmain(VSOut input, out float4 f4Color : SV_TARGET)
 
 	// Uncharted 2
 	float3 f3FinalColor = FilmicTonemap(f3Color * fExposureBias);
-	const float3 f3WhiteScale = 1.0f / FilmicTonemap(fLinearWhite);
+	const float3 f3WhiteScale = rcp(FilmicTonemap(fLinearWhite));
 	f3FinalColor *= f3WhiteScale;
 
 	// Convert back to gamma space (not required for Duiker tonemap)
diff --git a/GITechDemo/Data/shaders/LumaCalc.hlsl b/GITechDemo/Data/shaders/LumaCalc.hlsl
@@ -51,7 +51,7 @@ void psmain(VSOut input, out float4 f4Color : SV_TARGET)
 	// containing the log() of averages
 	if (bInitialLumaPass)
 	{
-		const float2 f2Kernel = f2TexelSize / 3.f;
+		const float2 f2Kernel = f2TexelSize * 0.33333333f;
 		float fLogLumSum = 0.f;
 		UNROLL for (float i = -1.f; i <= 1.f; i++)
 			UNROLL for (float j = -1.f; j <= 1.f; j++)
@@ -63,7 +63,7 @@ void psmain(VSOut input, out float4 f4Color : SV_TARGET)
 				fLogLumSum += log(dot(f3Sample, LUMINANCE_VECTOR) + 0.0001f);
 			}
 
-		fLogLumSum /= 9.f;
+		fLogLumSum *= 0.11111111f;
 		f4Color = float4(fLogLumSum, fLogLumSum, fLogLumSum, 1.f);
 	}
 	// The rest of the passes further downscale the average luma texture
@@ -78,11 +78,11 @@ void psmain(VSOut input, out float4 f4Color : SV_TARGET)
 		// the final 1x1 average luma texture
 		if (bFinalLumaPass)
 		{
-			fAvgLuma = exp(fAvgLuma / 16.f);
+			fAvgLuma = exp(fAvgLuma * 0.0625f);
 			fAvgLuma = clamp(fAvgLuma, f2AvgLumaClamp.x, f2AvgLumaClamp.y);
 		}
 		else
-			fAvgLuma /= 16.f;
+			fAvgLuma *= 0.0625f;
 
 		f4Color = float4(fAvgLuma, fAvgLuma, fAvgLuma, 1.f);
 	}
diff --git a/GITechDemo/Data/shaders/PostProcessingUtils.hlsl b/GITechDemo/Data/shaders/PostProcessingUtils.hlsl
@@ -25,7 +25,7 @@ const float fZFar;
 const float2 f2LinearDepthEquation;
 float ReconstructDepth(float fHyperbolicDepth)
 {
-	return f2LinearDepthEquation.x / (fHyperbolicDepth - f2LinearDepthEquation.y);
+	return f2LinearDepthEquation.x * rcp(fHyperbolicDepth - f2LinearDepthEquation.y);
 }
 
 
diff --git a/GITechDemo/Data/shaders/RSMCommon.hlsl b/GITechDemo/Data/shaders/RSMCommon.hlsl
@@ -72,7 +72,7 @@ void ApplyRSM(const float2 f2TexCoord, const float fDepth, out float4 colorOut)
 	// Transform pixel coordinates from NDC space to RSM view space
 	const float4 f4RSMViewSpacePos = mul(f44ScreenToLightViewMat, f4ScreenProjSpacePos);
 	// Perspective w-divide
-	const float3 f3RSMViewSpacePos = f4RSMViewSpacePos.xyz / f4RSMViewSpacePos.w;
+	const float3 f3RSMViewSpacePos = f4RSMViewSpacePos.xyz * rcp(f4RSMViewSpacePos.w);
 	// Sample normal for currently shaded pixel and transform to RSM view space
 	const float3 f3RSMViewSpaceNormal = mul((float3x3)f44ViewToRSMViewMat, DecodeNormal(tex2D(texNormalBuffer, f2TexCoord)));
 	// Transform point to RSM NDC space
diff --git a/GITechDemo/Data/shaders/SSAO.hlsl b/GITechDemo/Data/shaders/SSAO.hlsl
@@ -54,7 +54,7 @@ float AOCalc(const float2 f2TexCoord, const float2 f2Offset, const float3 f3Posi
 	const float fSampleDepth			= tex2D(texDepthBuffer, f2SampleTexCoord).r;
 	const float4 f4SampleProjPosition	= float4(f2SampleScreenPos, fSampleDepth, 1.f);
 	const float4 f4SamplePositionPreW	= mul(f44InvProjMat, f4SampleProjPosition);
-	const float3 f3SamplePosition		= f4SamplePositionPreW.xyz / f4SamplePositionPreW.w;
+	const float3 f3SamplePosition		= f4SamplePositionPreW.xyz * rcp(f4SamplePositionPreW.w);
 
 	const float3 f3PosDiff = f3SamplePosition - f3Position;
 	const float3 f3Dir = normalize(f3PosDiff);
@@ -68,12 +68,12 @@ void psmain(VSOut input, out float4 f4Color : SV_TARGET)
 	const float fDepth = tex2D(texDepthBuffer, input.f2TexCoord).r;
 	const float4 f4ProjPosition = float4(input.f2ScreenPos, fDepth, 1.f);
 	const float4 f4PositionPreW = mul(f44InvProjMat, f4ProjPosition);
-	const float3 f3Position = f4PositionPreW.xyz / f4PositionPreW.w;
+	const float3 f3Position = f4PositionPreW.xyz * rcp(f4PositionPreW.w);
 	const float3 f3Normal = DecodeNormal(tex2D(texNormalBuffer, input.f2TexCoord));
 	//const float2 f2Rand = float2(GenerateRandomNumber(input.f2TexCoord.xy), GenerateRandomNumber(input.f2TexCoord.yx));
 
 	float fAO = 0.0f;
-	const float fRad = fSSAOSampleRadius / f3Position.z;
+	const float fRad = fSSAOSampleRadius * rcp(f3Position.z);
 
 	UNROLL for (int i = 0; i < 4; i++)
 	{
diff --git a/GITechDemo/Data/shaders/Utils.hlsl b/GITechDemo/Data/shaders/Utils.hlsl
@@ -9,8 +9,8 @@ float4 EncodeNormal(float3 n)
 {
 	n = normalize(n);
 	const float scale = 1.7777f;
-	float2 enc = n.xy / (n.z + 1.f);
-	enc /= scale;
+	float2 enc = n.xy * rcp(n.z + 1.f);
+	enc *= rcp(scale);
 	enc = enc * 0.5f + 0.5f;
 	return float4(enc, 0.f, 0.f);
 }
@@ -19,7 +19,7 @@ float3 DecodeNormal(float4 enc)
 {
 	const float scale = 1.7777f;
 	const float3 nn = enc.xyz * float3(2.f * scale, 2.f * scale, 0.f) + float3(-scale, -scale, 1.f);
-	const float g = 2.f / dot(nn.xyz, nn.xyz);
+	const float g = 2.f * rcp(dot(nn.xyz, nn.xyz));
 	const float3 n = float3(g * nn.xy, g - 1.f);
 	return normalize(n);
 }

Original file line number	Diff line number	Diff line change
`@@ -41,7 +41,7 @@ void psmain(VSOut input, out float4 f4Color : SV_TARGET)`
`41`	`41`	`{`
`42`	`42`	`const float fBrightness = dot(f4Color.rgb, LUMINANCE_VECTOR);`
`43`	`43`	`f4Color.rgb *= step(fBrightnessThreshold, fBrightness);`
`44`		`- //f4Color.rgb /= fBrightness;`
	`44`	`+ //f4Color.rgb *= rcp(fBrightness);`
`45`	`45`	`f4Color.rgb *= rcp(1.f + fBrightness);`
`46`	`46`	`}`
`47`	`47`	`}`
Original file line number	Diff line number	Diff line change
`@@ -51,7 +51,7 @@ void psmain(VSOut input, out float4 f4Color : SV_TARGET)`
`51`	`51`	`// containing the log() of averages`
`52`	`52`	`if (bInitialLumaPass)`
`53`	`53`	`{`
`54`		`- const float2 f2Kernel = f2TexelSize / 3.f;`
	`54`	`+ const float2 f2Kernel = f2TexelSize * 0.33333333f;`
`55`	`55`	`float fLogLumSum = 0.f;`
`56`	`56`	`UNROLL for (float i = -1.f; i <= 1.f; i++)`
`57`	`57`	`UNROLL for (float j = -1.f; j <= 1.f; j++)`
`@@ -63,7 +63,7 @@ void psmain(VSOut input, out float4 f4Color : SV_TARGET)`
`63`	`63`	`fLogLumSum += log(dot(f3Sample, LUMINANCE_VECTOR) + 0.0001f);`
`64`	`64`	`}`
`65`	`65`
`66`		`- fLogLumSum /= 9.f;`
	`66`	`+ fLogLumSum *= 0.11111111f;`
`67`	`67`	`f4Color = float4(fLogLumSum, fLogLumSum, fLogLumSum, 1.f);`
`68`	`68`	`}`
`69`	`69`	`// The rest of the passes further downscale the average luma texture`
`@@ -78,11 +78,11 @@ void psmain(VSOut input, out float4 f4Color : SV_TARGET)`
`78`	`78`	`// the final 1x1 average luma texture`
`79`	`79`	`if (bFinalLumaPass)`
`80`	`80`	`{`
`81`		`- fAvgLuma = exp(fAvgLuma / 16.f);`
	`81`	`+ fAvgLuma = exp(fAvgLuma * 0.0625f);`
`82`	`82`	`fAvgLuma = clamp(fAvgLuma, f2AvgLumaClamp.x, f2AvgLumaClamp.y);`
`83`	`83`	`}`
`84`	`84`	`else`
`85`		`- fAvgLuma /= 16.f;`
	`85`	`+ fAvgLuma *= 0.0625f;`
`86`	`86`
`87`	`87`	`f4Color = float4(fAvgLuma, fAvgLuma, fAvgLuma, 1.f);`
`88`	`88`	`}`
Original file line number	Diff line number	Diff line change
`@@ -25,7 +25,7 @@ const float fZFar;`
`25`	`25`	`const float2 f2LinearDepthEquation;`
`26`	`26`	`float ReconstructDepth(float fHyperbolicDepth)`
`27`	`27`	`{`
`28`		`- return f2LinearDepthEquation.x / (fHyperbolicDepth - f2LinearDepthEquation.y);`
	`28`	`+ return f2LinearDepthEquation.x * rcp(fHyperbolicDepth - f2LinearDepthEquation.y);`
`29`	`29`	`}`
`30`	`30`
`31`	`31`
Original file line number	Diff line number	Diff line change
`@@ -9,8 +9,8 @@ float4 EncodeNormal(float3 n)`
`9`	`9`	`{`
`10`	`10`	`n = normalize(n);`
`11`	`11`	`const float scale = 1.7777f;`
`12`		`- float2 enc = n.xy / (n.z + 1.f);`
`13`		`- enc /= scale;`
	`12`	`+ float2 enc = n.xy * rcp(n.z + 1.f);`
	`13`	`+ enc *= rcp(scale);`
`14`	`14`	`enc = enc * 0.5f + 0.5f;`
`15`	`15`	`return float4(enc, 0.f, 0.f);`
`16`	`16`	`}`
`@@ -19,7 +19,7 @@ float3 DecodeNormal(float4 enc)`
`19`	`19`	`{`
`20`	`20`	`const float scale = 1.7777f;`
`21`	`21`	`const float3 nn = enc.xyz * float3(2.f * scale, 2.f * scale, 0.f) + float3(-scale, -scale, 1.f);`
`22`		`- const float g = 2.f / dot(nn.xyz, nn.xyz);`
	`22`	`+ const float g = 2.f * rcp(dot(nn.xyz, nn.xyz));`
`23`	`23`	`const float3 n = float3(g * nn.xy, g - 1.f);`
`24`	`24`	`return normalize(n);`
`25`	`25`	`}`