Skip to content

Commit bb81c8f

Browse files
committed
Merge branch 'master' into for-0.56.0/sync
2 parents 36b8468 + 80821bf commit bb81c8f

File tree

5 files changed

+129
-22
lines changed

5 files changed

+129
-22
lines changed

cmake/DaemonGame.cmake

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ function(GAMEMODULE)
7878
endforeach(inherited_option)
7979

8080
if (USE_NACL_SAIGO)
81+
add_custom_target(nacl-vms ALL)
8182
unset(NACL_VMS_PROJECTS)
8283

8384
foreach(NACL_TARGET ${NACL_TARGETS})
@@ -93,6 +94,7 @@ function(GAMEMODULE)
9394

9495
set(NACL_VMS_PROJECT nacl-vms-${NACL_TARGET})
9596
list(APPEND NACL_VMS_PROJECTS ${NACL_VMS_PROJECT})
97+
add_dependencies(nacl-vms ${NACL_VMS_PROJECT})
9698

9799
ExternalProject_Add(${NACL_VMS_PROJECT}
98100
SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}
@@ -126,7 +128,7 @@ function(GAMEMODULE)
126128
)
127129
endforeach()
128130
else()
129-
set(NACL_VMS_PROJECT nacl-vms-pexe)
131+
set(NACL_VMS_PROJECT nacl-vms)
130132
set(NACL_VMS_PROJECTS ${NACL_VMS_PROJECT})
131133

132134
# Workaround a bug where CMake ExternalProject lists-as-args are cut on first “;”
@@ -161,6 +163,7 @@ function(GAMEMODULE)
161163
ALWAYS 1
162164
)
163165
endif()
166+
set(NACL_VMS_PROJECTS ${NACL_VMS_PROJECTS} PARENT_SCOPE)
164167
endif()
165168
else()
166169
if (FORK EQUAL 2)

src/common/Defs.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3737
/** No case, No spaces */
3838
#define PRODUCT_NAME_LOWER "unvanquished"
3939

40-
#define PRODUCT_VERSION "0.55.1"
40+
#define PRODUCT_VERSION "0.55.2"
4141

4242
/** Default base package */
4343
#define DEFAULT_BASE_PAK PRODUCT_NAME_LOWER

src/engine/qcommon/q_math_test.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,4 +205,32 @@ TEST(QMathTransformTest, TransInverse)
205205
{-0.4833702, 0.42157, 0.7551386, -0.1356377}, {1.244436,1.155842,-0.5278334}, 0.4);
206206
}
207207

208+
TEST(QSharedMathTest, InverseSquareRoot)
209+
{
210+
constexpr float relativeTolerance = 5.0e-6;
211+
auto RsqrtEq = [=](float expected) { return FloatNear(expected, expected * relativeTolerance); };
212+
213+
EXPECT_THAT(Q_rsqrt(1e-6), RsqrtEq(1e3));
214+
EXPECT_THAT(Q_rsqrt(0.036), RsqrtEq(5.270463));
215+
EXPECT_THAT(Q_rsqrt(0.2), RsqrtEq(2.236068));
216+
EXPECT_THAT(Q_rsqrt(1), RsqrtEq(1));
217+
EXPECT_THAT(Q_rsqrt(3), RsqrtEq(0.5773503));
218+
EXPECT_THAT(Q_rsqrt(29.1), RsqrtEq(0.1853760));
219+
EXPECT_THAT(Q_rsqrt(1e6), RsqrtEq(1e-3));
220+
}
221+
222+
TEST(QSharedMathTest, FastInverseSquareRoot)
223+
{
224+
constexpr float relativeTolerance = 6.50196699e-4;
225+
auto RsqrtEq = [=](float expected) { return FloatNear(expected, expected * relativeTolerance); };
226+
227+
EXPECT_THAT(Q_rsqrt_fast(1e-6), RsqrtEq(1e3));
228+
EXPECT_THAT(Q_rsqrt_fast(0.036), RsqrtEq(5.270463));
229+
EXPECT_THAT(Q_rsqrt_fast(0.2), RsqrtEq(2.236068));
230+
EXPECT_THAT(Q_rsqrt_fast(1), RsqrtEq(1));
231+
EXPECT_THAT(Q_rsqrt_fast(3), RsqrtEq(0.5773503));
232+
EXPECT_THAT(Q_rsqrt_fast(29.1), RsqrtEq(0.1853760));
233+
EXPECT_THAT(Q_rsqrt_fast(1e6), RsqrtEq(1e-3));
234+
}
235+
208236
} // namespace

src/engine/qcommon/q_shared.h

Lines changed: 92 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -338,25 +338,101 @@ extern const quat_t quatIdentity;
338338

339339
#define Q_ftol(x) ((long)(x))
340340

341-
// Overall relative error bound (ignoring unknown powerpc case): 5 * 10^-6
342-
// https://en.wikipedia.org/wiki/Fast_inverse_square_root#/media/File:2nd-iter.png
343-
inline float Q_rsqrt( float number )
344-
{
345-
float x = 0.5f * number;
346-
float y;
341+
/* The original Q_rsqrt algorithm is:
342+
343+
float Q_rsqrt( float n )
344+
{
345+
uint32_t magic = 0x5f3759dful;
346+
float a = 0.5f;
347+
float b = 3.0f;
348+
union { float f; uint32_t u; } o = {n};
349+
o.u = magic - ( o.u >> 1 );
350+
return a * o.f * ( b - n * o.f * o.f );
351+
}
352+
353+
It could be written like this, this is what Quake 3 did:
354+
355+
float Q_rsqrt( float n )
356+
{
357+
uint32_t magic = 0x5f3759dful;
358+
float a = 0.5f;
359+
float b = 3.0f;
360+
float c = a * b; // 1.5f
361+
union { float f; uint32_t u; } o = {n};
362+
o.u = magic - ( o.u >> 1);
363+
float x = n * a;
364+
return o.f * ( c - ( x * o.f * o.f ) );
365+
o.f *= c - ( x * o.f * o.f );
366+
// o.f *= c - ( x * o.f * o.f );
367+
return o.f;
368+
}
369+
370+
It was written with a second iteration commented out.
371+
372+
The relative error bound after the initial iteration was: 1.8×10⁻³
373+
The relative error bound after a second iteration was: 5×10⁻⁶
374+
375+
The 0x5f3759df magic constant comes from the Quake 3 source code:
376+
https://github.com/id-Software/Quake-III-Arena/blob/dbe4ddb/code/game/q_math.c#L56
377+
378+
That magic constant was good enough but better ones can be used.
347379
348-
// compute approximate inverse square root
380+
Chris lomont computed a better magic constant of 0x5f375a86 while
381+
keeping the other values of 0.5 and 3.0 for all iterations:
382+
https://www.lomont.org/papers/2003/InvSqrt.pdf
383+
384+
Jan Kadlec computed an ever better magic constant but it requires
385+
different values for the first iteration: http://rrrola.wz.cz/inv_sqrt.html
386+
387+
float Q_rsqrt( float n )
388+
{
389+
uint32_t magic = 0x5f1ffff9ul:
390+
float a = 0.703952253f;
391+
float b = 2.38924456f;
392+
union { float f; uint32_t u; } o = {n};
393+
o.u = magic - ( o.u >> 1 );
394+
return a * o.f * ( b - n * y.f * y.f );
395+
}
396+
397+
The relative error bound is: 6.50196699×10⁻⁴ */
398+
399+
// Compute approximate inverse square root.
400+
inline float Q_rsqrt_fast( const float n )
401+
{
349402
#if defined(DAEMON_USE_ARCH_INTRINSICS_i686_sse)
350-
// SSE rsqrt relative error bound: 3.7 * 10^-4
351-
_mm_store_ss( &y, _mm_rsqrt_ss( _mm_load_ss( &number ) ) );
403+
float o;
404+
// The SSE rsqrt relative error bound is 3.7×10⁻⁴.
405+
_mm_store_ss( &o, _mm_rsqrt_ss( _mm_load_ss( &n ) ) );
352406
#else
353-
y = Util::bit_cast<float>( 0x5f3759df - ( Util::bit_cast<uint32_t>( number ) >> 1 ) );
354-
y *= ( 1.5f - ( x * y * y ) ); // initial iteration
355-
// relative error bound after the initial iteration: 1.8 * 10^-3
407+
/* Magic constants by Jan Kadlec, with a relative error bound
408+
of 6.50196699×10⁻⁴.
409+
See: http://rrrola.wz.cz/inv_sqrt.html */
410+
constexpr float a = 0.703952253f;
411+
constexpr float b = 2.38924456f;
412+
constexpr uint32_t magic = 0x5f1ffff9ul;
413+
float o = Util::bit_cast<float>( magic - ( Util::bit_cast<uint32_t>( n ) >> 1 ) );
414+
o *= a * ( b - n * o * o );
356415
#endif
357-
y *= ( 1.5f - ( x * y * y ) ); // second iteration for higher precision
358-
return y;
359-
}
416+
return o;
417+
}
418+
419+
inline float Q_rsqrt( const float n )
420+
{
421+
/* When using the magic constants, the relative error bound after the
422+
iteration is expected to be at most 5×10⁻⁶. It was achieved with the
423+
less-good Quake 3 constants with a first iteration having originally
424+
a relative error bound of 1.8×10⁻³.
425+
Since the new magic constants provide a better relative error bound of
426+
6.50196699×10⁻⁴, the relative error bound is now expected to be smaller.
427+
When using the SSE rsqrt, the initial error bound is 3.7×10⁻⁴ so after
428+
the iteration it is also expected to be smaller. */
429+
constexpr float a = 0.5f;
430+
constexpr float b = 3.0f;
431+
float o = Q_rsqrt_fast( n );
432+
// Do an iteration of Newton's method for finding the zero of: f(x) = 1÷x² - n
433+
o *= a * ( b - n * o * o );
434+
return o;
435+
}
360436

361437
inline float Q_fabs( float x )
362438
{
@@ -617,7 +693,7 @@ inline vec_t VectorNormalize( vec3_t v )
617693
// that length != 0, nor does it return length
618694
inline void VectorNormalizeFast( vec3_t v )
619695
{
620-
vec_t ilength = Q_rsqrt( DotProduct( v, v ) );
696+
vec_t ilength = Q_rsqrt_fast( DotProduct( v, v ) );
621697

622698
VectorScale( v, ilength, v );
623699
}

src/engine/renderer/tr_main.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -246,7 +246,7 @@ void R_TBNtoQtangents( const vec3_t tangent, const vec3_t binormal,
246246
if ( ( trace = tangent2[ 0 ] + binormal2[ 1 ] + normal2[ 2 ] ) > 0.0f )
247247
{
248248
trace += 1.0f;
249-
scale = 0.5f * Q_rsqrt( trace );
249+
scale = 0.5f * Q_rsqrt_fast( trace );
250250

251251
q[ 3 ] = trace * scale;
252252
q[ 2 ] = ( tangent2 [ 1 ] - binormal2[ 0 ] ) * scale;
@@ -257,7 +257,7 @@ void R_TBNtoQtangents( const vec3_t tangent, const vec3_t binormal,
257257
else if ( tangent2[ 0 ] > binormal2[ 1 ] && tangent2[ 0 ] > normal2[ 2 ] )
258258
{
259259
trace = tangent2[ 0 ] - binormal2[ 1 ] - normal2[ 2 ] + 1.0f;
260-
scale = 0.5f * Q_rsqrt( trace );
260+
scale = 0.5f * Q_rsqrt_fast( trace );
261261

262262
q[ 0 ] = trace * scale;
263263
q[ 1 ] = ( tangent2 [ 1 ] + binormal2[ 0 ] ) * scale;
@@ -268,7 +268,7 @@ void R_TBNtoQtangents( const vec3_t tangent, const vec3_t binormal,
268268
else if ( binormal2[ 1 ] > normal2[ 2 ] )
269269
{
270270
trace = -tangent2[ 0 ] + binormal2[ 1 ] - normal2[ 2 ] + 1.0f;
271-
scale = 0.5f * Q_rsqrt( trace );
271+
scale = 0.5f * Q_rsqrt_fast( trace );
272272

273273
q[ 1 ] = trace * scale;
274274
q[ 0 ] = ( tangent2 [ 1 ] + binormal2[ 0 ] ) * scale;
@@ -279,7 +279,7 @@ void R_TBNtoQtangents( const vec3_t tangent, const vec3_t binormal,
279279
else
280280
{
281281
trace = -tangent2[ 0 ] - binormal2[ 1 ] + normal2[ 2 ] + 1.0f;
282-
scale = 0.5f * Q_rsqrt( trace );
282+
scale = 0.5f * Q_rsqrt_fast( trace );
283283

284284
q[ 2 ] = trace * scale;
285285
q[ 3 ] = ( tangent2 [ 1 ] - binormal2[ 0 ] ) * scale;

0 commit comments

Comments
 (0)