Skip to content

Commit 05ddf65

Browse files
committed
xrCore: implement control FP calculations for Linux
1 parent 865d540 commit 05ddf65

File tree

1 file changed

+103
-1
lines changed

1 file changed

+103
-1
lines changed

src/xrCore/_math.cpp

Lines changed: 103 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,14 @@
11
#include "stdafx.h"
22
#pragma hdrstop
33

4+
#if defined(WINDOWS)
45
#include <intrin.h> // __rdtsc
56
#include <process.h>
67
#include <powerbase.h>
8+
#elif defined(LINUX)
9+
#include <fpu_control.h>
10+
#include <pthread.h>
11+
#endif
712

813
typedef struct _PROCESSOR_POWER_INFORMATION
914
{
@@ -20,6 +25,38 @@ XRCORE_API Fmatrix Fidentity;
2025
XRCORE_API Dmatrix Didentity;
2126
XRCORE_API CRandom Random;
2227

28+
#if defined(LINUX)
29+
#define nsec_per_sec 1000*1000*1000
30+
/**
31+
* From https://stackoverflow.com/questions/12468331/queryperformancecounter-linux-equivalent
32+
* @return
33+
*/
34+
void QueryPerformanceCounter(PLARGE_INTEGER result)
35+
{
36+
u64 nsec_count, nsec_per_tick;
37+
/*
38+
* clock_gettime() returns the number of secs. We translate that to number of nanosecs.
39+
* clock_getres() returns number of seconds per tick. We translate that to number of nanosecs per tick.
40+
* Number of nanosecs divided by number of nanosecs per tick - will give the number of ticks.
41+
*/
42+
struct timespec ts1, ts2;
43+
44+
if (clock_gettime(CLOCK_MONOTONIC, &ts1) != 0) {
45+
return;
46+
}
47+
48+
nsec_count = ts1.tv_nsec + ts1.tv_sec * nsec_per_sec;
49+
50+
if (clock_getres(CLOCK_MONOTONIC, &ts2) != 0) {
51+
return;
52+
}
53+
54+
nsec_per_tick = ts2.tv_nsec + ts2.tv_sec * nsec_per_sec;
55+
56+
*result = (nsec_count / nsec_per_tick);
57+
}
58+
#endif
59+
2360
/*
2461
Функции управления точностью вычислений с плавающей точкой.
2562
Более подробную информацию можно получить здесь:
@@ -32,51 +69,103 @@ namespace FPU
3269
{
3370
XRCORE_API void m24()
3471
{
72+
#if defined(WINDOWS)
3573
_controlfp(_PC_24, MCW_PC);
3674
_controlfp(_RC_CHOP, MCW_RC);
75+
#elif defined(LINUX)
76+
fpu_control_t fpu_cw;
77+
_FPU_GETCW(fpu_cw);
78+
fpu_cw = (fpu_cw & ~_FPU_EXTENDED & ~_FPU_DOUBLE) | _FPU_SINGLE;
79+
_FPU_SETCW(fpu_cw);
80+
#endif
3781
}
3882

3983
XRCORE_API void m24r()
4084
{
85+
#if defined(WINDOWS)
4186
_controlfp(_PC_24, MCW_PC);
4287
_controlfp(_RC_NEAR, MCW_RC);
88+
#elif defined(LINUX)
89+
fpu_control_t fpu_cw;
90+
_FPU_GETCW(fpu_cw);
91+
fpu_cw = (fpu_cw & ~_FPU_EXTENDED & ~_FPU_DOUBLE) | _FPU_SINGLE | _FPU_RC_NEAREST;
92+
_FPU_SETCW(fpu_cw);
93+
#endif
4394
}
4495

4596
XRCORE_API void m53()
4697
{
98+
#if defined(WINDOWS)
4799
_controlfp(_PC_53, MCW_PC);
48100
_controlfp(_RC_CHOP, MCW_RC);
101+
#elif defined(LINUX)
102+
fpu_control_t fpu_cw;
103+
_FPU_GETCW(fpu_cw);
104+
fpu_cw = (fpu_cw & ~_FPU_EXTENDED & ~_FPU_SINGLE) | _FPU_DOUBLE;
105+
_FPU_SETCW(fpu_cw);
106+
#endif
49107
}
50108

51109
XRCORE_API void m53r()
52110
{
111+
#if defined(WINDOWS)
53112
_controlfp(_PC_53, MCW_PC);
54113
_controlfp(_RC_NEAR, MCW_RC);
114+
#elif defined(LINUX)
115+
fpu_control_t fpu_cw;
116+
_FPU_GETCW(fpu_cw);
117+
fpu_cw = (fpu_cw & ~_FPU_EXTENDED & ~_FPU_SINGLE) | _FPU_DOUBLE | _FPU_RC_NEAREST;
118+
_FPU_SETCW(fpu_cw);
119+
#endif
55120
}
56121

57122
XRCORE_API void m64()
58123
{
124+
#if defined(WINDOWS)
59125
_controlfp(_PC_64, MCW_PC);
60126
_controlfp(_RC_CHOP, MCW_RC);
127+
#elif defined(LINUX)
128+
fpu_control_t fpu_cw;
129+
_FPU_GETCW(fpu_cw);
130+
fpu_cw = (fpu_cw & ~_FPU_DOUBLE & ~_FPU_SINGLE) | _FPU_EXTENDED;
131+
_FPU_SETCW(fpu_cw);
132+
#endif
61133
}
62134

63135
XRCORE_API void m64r()
64136
{
137+
#if defined(WINDOWS)
65138
_controlfp(_PC_64, MCW_PC);
66139
_controlfp(_RC_NEAR, MCW_RC);
140+
#elif defined(LINUX)
141+
fpu_control_t fpu_cw;
142+
_FPU_GETCW(fpu_cw);
143+
fpu_cw = (fpu_cw & ~_FPU_DOUBLE & ~_FPU_SINGLE) | _FPU_EXTENDED | _FPU_RC_NEAREST;
144+
_FPU_SETCW(fpu_cw);
145+
#endif
67146
}
68147

69148
void initialize()
70149
{
150+
#if defined(WINDOWS)
71151
_clearfp();
152+
#elif defined(LINUX)
153+
fpu_control_t fpu_cw;
154+
fpu_cw = _FPU_DEFAULT;
155+
_FPU_SETCW(fpu_cw);
156+
#endif
72157

73158
// По-умолчанию для плагинов экспорта из 3D-редакторов включена высокая точность вычислений с плавающей точкой
74159
if (Core.PluginMode)
75160
m64r();
76161
else
77162
m24r();
78163

164+
#if defined(WINDOWS)
79165
::Random.seed(u32(CPU::GetCLK() % (1i64 << 32i64)));
166+
#elif defined(LINUX)
167+
::Random.seed(u32(CPU::GetCLK() % ((u64)0x1 << 32)));
168+
#endif
80169
}
81170
};
82171

@@ -135,6 +224,7 @@ void _initialize_cpu()
135224
Msg("* CPU features: %s", features);
136225
Msg("* CPU cores/threads: %d/%d", CPU::ID.n_cores, CPU::ID.n_threads);
137226

227+
#if defined(WINDOWS)
138228
SYSTEM_INFO sysInfo;
139229
GetSystemInfo(&sysInfo);
140230
const size_t cpusCount = sysInfo.dwNumberOfProcessors;
@@ -151,7 +241,7 @@ void _initialize_cpu()
151241
}
152242

153243
Log("");
154-
244+
#endif
155245
Fidentity.identity(); // Identity matrix
156246
Didentity.identity(); // Identity matrix
157247
pvInitializeStatics(); // Lookup table for compressed normals
@@ -186,6 +276,7 @@ void _initialize_cpu_thread()
186276
{
187277
//_mm_setcsr ( _mm_getcsr() | (_MM_FLUSH_ZERO_ON+_MM_DENORMALS_ZERO_ON) );
188278
_MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
279+
#if defined(WINDOWS)
189280
if (_denormals_are_zero_supported)
190281
{
191282
__try
@@ -197,6 +288,7 @@ void _initialize_cpu_thread()
197288
_denormals_are_zero_supported = FALSE;
198289
}
199290
}
291+
#endif
200292
}
201293
}
202294

@@ -217,13 +309,15 @@ void thread_name(const char* name)
217309
tn.szName = name;
218310
tn.dwThreadID = DWORD(-1);
219311
tn.dwFlags = 0;
312+
#if defined(WINDOWS)
220313
__try
221314
{
222315
RaiseException(0x406D1388, 0, sizeof(tn) / sizeof(DWORD), (ULONG_PTR*)&tn);
223316
}
224317
__except (EXCEPTION_CONTINUE_EXECUTION)
225318
{
226319
}
320+
#endif
227321
}
228322
#pragma pack(pop)
229323

@@ -255,7 +349,15 @@ void thread_spawn(thread_t* entry, const char* name, unsigned stack, void* argli
255349
startup->entry = entry;
256350
startup->name = (char*)name;
257351
startup->args = arglist;
352+
#if defined(WINDOWS)
258353
_beginthread(thread_entry, stack, startup);
354+
#elif defined(LINUX)
355+
pthread_t handle;
356+
pthread_attr_t attr;
357+
pthread_attr_init(&attr);
358+
pthread_create(&handle, &attr, NULL, arglist); //TODO convert entry
359+
pthread_attr_destroy(&attr);
360+
#endif
259361
}
260362

261363
void spline1(float t, Fvector* p, Fvector* ret)

0 commit comments

Comments
 (0)