Merge branch 'master' into for-0.56.0/sync

illwieckz · illwieckz · commit bb81c8f2cda2 · 2024-12-17T19:29:04.000Z
diff --git a/cmake/DaemonGame.cmake b/cmake/DaemonGame.cmake
@@ -78,6 +78,7 @@ function(GAMEMODULE)
             endforeach(inherited_option)
 
             if (USE_NACL_SAIGO)
+                add_custom_target(nacl-vms ALL)
                 unset(NACL_VMS_PROJECTS)
 
                 foreach(NACL_TARGET ${NACL_TARGETS})
@@ -93,6 +94,7 @@ function(GAMEMODULE)
 
                     set(NACL_VMS_PROJECT nacl-vms-${NACL_TARGET})
                     list(APPEND NACL_VMS_PROJECTS ${NACL_VMS_PROJECT})
+                    add_dependencies(nacl-vms ${NACL_VMS_PROJECT})
 
                     ExternalProject_Add(${NACL_VMS_PROJECT}
                         SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}
@@ -126,7 +128,7 @@ function(GAMEMODULE)
                     )
                 endforeach()
             else()
-                set(NACL_VMS_PROJECT nacl-vms-pexe)
+                set(NACL_VMS_PROJECT nacl-vms)
                 set(NACL_VMS_PROJECTS ${NACL_VMS_PROJECT})
 
                 # Workaround a bug where CMake ExternalProject lists-as-args are cut on first “;”
@@ -161,6 +163,7 @@ function(GAMEMODULE)
                     ALWAYS 1
                 )
             endif()
+            set(NACL_VMS_PROJECTS ${NACL_VMS_PROJECTS} PARENT_SCOPE)
         endif()
     else()
         if (FORK EQUAL 2)
diff --git a/src/common/Defs.h b/src/common/Defs.h
@@ -37,7 +37,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 /** No case, No spaces */
 #define PRODUCT_NAME_LOWER  "unvanquished"
 
-#define PRODUCT_VERSION     "0.55.1"
+#define PRODUCT_VERSION     "0.55.2"
 
 /** Default base package */
 #define DEFAULT_BASE_PAK    PRODUCT_NAME_LOWER
diff --git a/src/engine/qcommon/q_math_test.cpp b/src/engine/qcommon/q_math_test.cpp
@@ -205,4 +205,32 @@ TEST(QMathTransformTest, TransInverse)
         {-0.4833702, 0.42157, 0.7551386, -0.1356377}, {1.244436,1.155842,-0.5278334}, 0.4);
 }
 
+TEST(QSharedMathTest, InverseSquareRoot)
+{
+    constexpr float relativeTolerance = 5.0e-6;
+    auto RsqrtEq = [=](float expected) { return FloatNear(expected, expected * relativeTolerance); };
+
+    EXPECT_THAT(Q_rsqrt(1e-6), RsqrtEq(1e3));
+    EXPECT_THAT(Q_rsqrt(0.036), RsqrtEq(5.270463));
+    EXPECT_THAT(Q_rsqrt(0.2), RsqrtEq(2.236068));
+    EXPECT_THAT(Q_rsqrt(1), RsqrtEq(1));
+    EXPECT_THAT(Q_rsqrt(3), RsqrtEq(0.5773503));
+    EXPECT_THAT(Q_rsqrt(29.1), RsqrtEq(0.1853760));
+    EXPECT_THAT(Q_rsqrt(1e6), RsqrtEq(1e-3));
+}
+
+TEST(QSharedMathTest, FastInverseSquareRoot)
+{
+    constexpr float relativeTolerance = 6.50196699e-4;
+    auto RsqrtEq = [=](float expected) { return FloatNear(expected, expected * relativeTolerance); };
+
+    EXPECT_THAT(Q_rsqrt_fast(1e-6), RsqrtEq(1e3));
+    EXPECT_THAT(Q_rsqrt_fast(0.036), RsqrtEq(5.270463));
+    EXPECT_THAT(Q_rsqrt_fast(0.2), RsqrtEq(2.236068));
+    EXPECT_THAT(Q_rsqrt_fast(1), RsqrtEq(1));
+    EXPECT_THAT(Q_rsqrt_fast(3), RsqrtEq(0.5773503));
+    EXPECT_THAT(Q_rsqrt_fast(29.1), RsqrtEq(0.1853760));
+    EXPECT_THAT(Q_rsqrt_fast(1e6), RsqrtEq(1e-3));
+}
+
 } // namespace
diff --git a/src/engine/qcommon/q_shared.h b/src/engine/qcommon/q_shared.h
@@ -338,25 +338,101 @@ extern const quat_t   quatIdentity;
 
 #define Q_ftol(x) ((long)(x))
 
-	// Overall relative error bound (ignoring unknown powerpc case): 5 * 10^-6
-	// https://en.wikipedia.org/wiki/Fast_inverse_square_root#/media/File:2nd-iter.png
-	inline float Q_rsqrt( float number )
-	{
-		float x = 0.5f * number;
-		float y;
+/* The original Q_rsqrt algorithm is:
+
+float Q_rsqrt( float n )
+{
+	uint32_t magic = 0x5f3759dful;
+	float a = 0.5f;
+	float b = 3.0f;
+	union { float f; uint32_t u; } o = {n};
+	o.u = magic - ( o.u >> 1 );
+	return a * o.f * ( b - n * o.f * o.f );
+}
+
+It could be written like this, this is what Quake 3 did:
+
+float Q_rsqrt( float n )
+{
+	uint32_t magic = 0x5f3759dful;
+	float a = 0.5f;
+	float b = 3.0f;
+	float c = a * b; // 1.5f
+	union { float f; uint32_t u; } o = {n};
+	o.u = magic - ( o.u >> 1);
+	float x = n * a;
+	return o.f * ( c - ( x * o.f * o.f ) );
+	o.f *= c - ( x * o.f * o.f );
+//	o.f *= c - ( x * o.f * o.f );
+	return o.f;
+}
+
+It was written with a second iteration commented out.
+
+The relative error bound after the initial iteration was: 1.8×10⁻³
+The relative error bound after a second iteration was: 5×10⁻⁶
+
+The 0x5f3759df magic constant comes from the Quake 3 source code:
+https://github.com/id-Software/Quake-III-Arena/blob/dbe4ddb/code/game/q_math.c#L56
+
+That magic constant was good enough but better ones can be used.
 
-		// compute approximate inverse square root
+Chris lomont computed a better magic constant of 0x5f375a86 while
+keeping the other values of 0.5 and 3.0 for all iterations:
+https://www.lomont.org/papers/2003/InvSqrt.pdf
+
+Jan Kadlec computed an ever better magic constant but it requires
+different values for the first iteration: http://rrrola.wz.cz/inv_sqrt.html
+
+float Q_rsqrt( float n )
+{
+	uint32_t magic = 0x5f1ffff9ul:
+	float a = 0.703952253f;
+	float b = 2.38924456f;
+	union { float f; uint32_t u; } o = {n};
+	o.u = magic - ( o.u >> 1 );
+	return a * o.f * ( b - n * y.f * y.f );
+}
+
+The relative error bound is: 6.50196699×10⁻⁴ */
+
+// Compute approximate inverse square root.
+inline float Q_rsqrt_fast( const float n )
+{
 #if defined(DAEMON_USE_ARCH_INTRINSICS_i686_sse)
-		// SSE rsqrt relative error bound: 3.7 * 10^-4
-		_mm_store_ss( &y, _mm_rsqrt_ss( _mm_load_ss( &number ) ) );
+	float o;
+	// The SSE rsqrt relative error bound is 3.7×10⁻⁴.
+	_mm_store_ss( &o, _mm_rsqrt_ss( _mm_load_ss( &n ) ) );
 #else
-		y = Util::bit_cast<float>( 0x5f3759df - ( Util::bit_cast<uint32_t>( number ) >> 1 ) );
-		y *= ( 1.5f - ( x * y * y ) ); // initial iteration
-		// relative error bound after the initial iteration: 1.8 * 10^-3
+	/* Magic constants by Jan Kadlec, with a relative error bound
+	of 6.50196699×10⁻⁴.
+	See: http://rrrola.wz.cz/inv_sqrt.html */
+	constexpr float a = 0.703952253f;
+	constexpr float b = 2.38924456f;
+	constexpr uint32_t magic = 0x5f1ffff9ul;
+	float o = Util::bit_cast<float>( magic - ( Util::bit_cast<uint32_t>( n ) >> 1 ) );
+	o *= a * ( b - n * o * o );
 #endif
-		y *= ( 1.5f - ( x * y * y ) ); // second iteration for higher precision
-		return y;
-	}
+	return o;
+}
+
+inline float Q_rsqrt( const float n )
+{
+	/* When using the magic constants, the relative error bound after the
+	iteration is expected to be at most 5×10⁻⁶. It was achieved with the
+	less-good Quake 3 constants with a first iteration having originally
+	a relative error bound of 1.8×10⁻³.
+	Since the new magic constants provide a better relative error bound of
+	6.50196699×10⁻⁴, the relative error bound is now expected to be smaller.
+	When using the SSE rsqrt, the initial error bound is 3.7×10⁻⁴ so after
+	the iteration it is also expected to be smaller. */
+	constexpr float a = 0.5f;
+	constexpr float b = 3.0f;
+	float o = Q_rsqrt_fast( n );
+	// Do an iteration of Newton's method for finding the zero of: f(x) = 1÷x² - n
+	o *= a * ( b - n * o * o );
+	return o;
+}
 
 inline float Q_fabs( float x )
 {
@@ -617,7 +693,7 @@ inline vec_t VectorNormalize( vec3_t v )
 // that length != 0, nor does it return length
 inline void VectorNormalizeFast( vec3_t v )
 {
-	vec_t ilength = Q_rsqrt( DotProduct( v, v ) );
+	vec_t ilength = Q_rsqrt_fast( DotProduct( v, v ) );
 
 	VectorScale( v, ilength, v );
 }
diff --git a/src/engine/renderer/tr_main.cpp b/src/engine/renderer/tr_main.cpp
@@ -246,7 +246,7 @@ void R_TBNtoQtangents( const vec3_t tangent, const vec3_t binormal,
 	if ( ( trace = tangent2[ 0 ] + binormal2[ 1 ] + normal2[ 2 ] ) > 0.0f )
 	{
 		trace += 1.0f;
-		scale = 0.5f * Q_rsqrt( trace );
+		scale = 0.5f * Q_rsqrt_fast( trace );
 
 		q[ 3 ] = trace * scale;
 		q[ 2 ] = ( tangent2 [ 1 ] - binormal2[ 0 ] ) * scale;
@@ -257,7 +257,7 @@ void R_TBNtoQtangents( const vec3_t tangent, const vec3_t binormal,
 	else if ( tangent2[ 0 ] > binormal2[ 1 ] && tangent2[ 0 ] > normal2[ 2 ] )
 	{
 		trace = tangent2[ 0 ] - binormal2[ 1 ] - normal2[ 2 ] + 1.0f;
-		scale = 0.5f * Q_rsqrt( trace );
+		scale = 0.5f * Q_rsqrt_fast( trace );
 
 		q[ 0 ] = trace * scale;
 		q[ 1 ] = ( tangent2 [ 1 ] + binormal2[ 0 ] ) * scale;
@@ -268,7 +268,7 @@ void R_TBNtoQtangents( const vec3_t tangent, const vec3_t binormal,
 	else if ( binormal2[ 1 ] > normal2[ 2 ] )
 	{
 		trace = -tangent2[ 0 ] + binormal2[ 1 ] - normal2[ 2 ] + 1.0f;
-		scale = 0.5f * Q_rsqrt( trace );
+		scale = 0.5f * Q_rsqrt_fast( trace );
 
 		q[ 1 ] = trace * scale;
 		q[ 0 ] = ( tangent2 [ 1 ] + binormal2[ 0 ] ) * scale;
@@ -279,7 +279,7 @@ void R_TBNtoQtangents( const vec3_t tangent, const vec3_t binormal,
 	else
 	{
 		trace = -tangent2[ 0 ] - binormal2[ 1 ] + normal2[ 2 ] + 1.0f;
-		scale = 0.5f * Q_rsqrt( trace );
+		scale = 0.5f * Q_rsqrt_fast( trace );
 
 		q[ 2 ] = trace * scale;
 		q[ 3 ] = ( tangent2 [ 1 ] - binormal2[ 0 ] ) * scale;