Skip to content

Commit 1f97d5a

Browse files
committed
GS: Handle huge/infinite/nan ST coords in GS vertex input.
1 parent d0411d7 commit 1f97d5a

File tree

6 files changed

+320
-53
lines changed

6 files changed

+320
-53
lines changed

pcsx2/GS/GSState.cpp

Lines changed: 274 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1629,13 +1629,6 @@ inline bool GSState::TestDrawChanged()
16291629
return false;
16301630
}
16311631

1632-
u32 GSState::CalcMask(int exp, int max_exp)
1633-
{
1634-
const int amount = 9 + (max_exp - exp);
1635-
1636-
return (1 << std::min(amount, 23)) - 1;
1637-
}
1638-
16391632
void GSState::FlushPrim()
16401633
{
16411634
if (m_index.tail > 0)
@@ -1710,50 +1703,19 @@ void GSState::FlushPrim()
17101703
#endif
17111704
// Update scissor, it may have been modified by a previous draw
17121705
m_env.CTXT[PRIM->CTXT].UpdateScissor();
1706+
17131707
m_vt.Update(m_vertex.buff, m_index.buff, m_vertex.tail, m_index.tail, GSUtil::GetPrimClass(PRIM->PRIM));
1708+
1709+
// Fix huge or nan ST coordinates
1710+
if (PRIM->TME && !PRIM->FST)
1711+
{
1712+
FixHugeSTCoords();
1713+
}
17141714

1715-
// Texel coordinate rounding
1716-
// Helps Manhunt (lights shining through objects).
1717-
// Can help with some alignment issues when upscaling too, and is for both Software and Hardware renderers.
1718-
// Sometimes hardware doesn't get affected, likely due to the difference in how GPU's handle textures (Persona minimap).
1719-
if (PRIM->TME && (GSUtil::GetPrimClass(PRIM->PRIM) == GS_PRIM_CLASS::GS_SPRITE_CLASS || m_vt.m_eq.z))
1715+
// Round fractional parts of ST coords
1716+
if (PRIM->TME && !PRIM->FST && (GSUtil::GetPrimClass(PRIM->PRIM) == GS_PRIM_CLASS::GS_SPRITE_CLASS || m_vt.m_eq.z))
17201717
{
1721-
if (!PRIM->FST) // STQ's
1722-
{
1723-
const bool is_sprite = GSUtil::GetPrimClass(PRIM->PRIM) == GS_PRIM_CLASS::GS_SPRITE_CLASS;
1724-
// ST's have the lowest 9 bits (or greater depending on exponent difference) rounding down (from hardware tests).
1725-
for (int i = m_index.tail - 1; i >= 0; i--)
1726-
{
1727-
GSVertex* v = &m_vertex.buff[m_index.buff[i]];
1728-
1729-
// Only Q on the second vertex is valid
1730-
if (!(i & 1) && is_sprite)
1731-
v->RGBAQ.Q = m_vertex.buff[m_index.buff[i + 1]].RGBAQ.Q;
1732-
1733-
int T = std::bit_cast<int>(v->ST.T);
1734-
int Q = std::bit_cast<int>(v->RGBAQ.Q);
1735-
int S = std::bit_cast<int>(v->ST.S);
1736-
const int expS = (S >> 23) & 0xff;
1737-
const int expT = (T >> 23) & 0xff;
1738-
const int expQ = (Q >> 23) & 0xff;
1739-
int max_exp = std::max(expS, expQ);
1740-
1741-
u32 mask = CalcMask(expS, max_exp);
1742-
S &= ~mask;
1743-
v->ST.S = std::bit_cast<float>(S);
1744-
max_exp = std::max(expT, expQ);
1745-
mask = CalcMask(expT, max_exp);
1746-
T &= ~mask;
1747-
v->ST.T = std::bit_cast<float>(T);
1748-
Q &= ~0xff;
1749-
1750-
if (!is_sprite || (i & 1))
1751-
v->RGBAQ.Q = std::bit_cast<float>(Q);
1752-
1753-
m_vt.m_min.t.x = std::min(m_vt.m_min.t.x, (v->ST.S / v->RGBAQ.Q) * (1 << m_context->TEX0.TW));
1754-
m_vt.m_min.t.y = std::min(m_vt.m_min.t.y, (v->ST.T / v->RGBAQ.Q) * (1 << m_context->TEX0.TH));
1755-
}
1756-
}
1718+
RoundSTCoords();
17571719
}
17581720

17591721
// Skip draw if Z test is enabled, but set to fail all pixels.
@@ -3978,8 +3940,8 @@ GSState::TextureMinMaxResult GSState::GetTextureMinMax(GIFRegTEX0 TEX0, GIFRegCL
39783940

39793941
u8 uses_border = 0;
39803942

3981-
if (m_vt.m_max.t.x >= FLT_MAX || m_vt.m_min.t.x <= -FLT_MAX ||
3982-
m_vt.m_max.t.y >= FLT_MAX || m_vt.m_min.t.y <= -FLT_MAX)
3943+
if (m_vt.m_max.t.x >= 2047.0f || m_vt.m_min.t.x <= -2047.0f ||
3944+
m_vt.m_max.t.y >= 2047.0f || m_vt.m_min.t.y <= -2047.0f)
39833945
{
39843946
// If any of the min/max values are +-FLT_MAX we can't rely on them
39853947
// so just assume full texture.
@@ -4188,6 +4150,268 @@ GSState::TextureMinMaxResult GSState::GetTextureMinMax(GIFRegTEX0 TEX0, GIFRegCL
41884150
return { vr, uses_border };
41894151
}
41904152

4153+
// ST coordinate rounding
4154+
// Helps Manhunt (lights shining through objects).
4155+
// Can help with some alignment issues when upscaling too, and is for both Software and Hardware renderers.
4156+
// Sometimes hardware doesn't get affected, likely due to the difference in how GPU's handle textures (Persona minimap).
4157+
void GSState::RoundSTCoords()
4158+
{
4159+
const bool is_sprite = GSUtil::GetPrimClass(PRIM->PRIM) == GS_PRIM_CLASS::GS_SPRITE_CLASS;
4160+
4161+
// ST's have the lowest 9 bits (or greater depending on exponent difference) rounded down (from hardware tests).
4162+
// This gives the bitmask for the lower 9 (or more) bits.
4163+
auto LowerBitsMask = [](int exp, int max_exp)
4164+
{
4165+
const int amount = 9 + (max_exp - exp);
4166+
return (1 << std::min(amount, 23)) - 1;
4167+
};
4168+
4169+
for (int i = m_index.tail - 1; i >= 0; i--)
4170+
{
4171+
GSVertex* v = &m_vertex.buff[m_index.buff[i]];
4172+
4173+
// Only Q on the second vertex is valid
4174+
if (!(i & 1) && is_sprite)
4175+
v->RGBAQ.Q = m_vertex.buff[m_index.buff[i + 1]].RGBAQ.Q;
4176+
4177+
int S = std::bit_cast<int>(v->ST.S);
4178+
int T = std::bit_cast<int>(v->ST.T);
4179+
int Q = std::bit_cast<int>(v->RGBAQ.Q);
4180+
4181+
const int expS = (S >> 23) & 0xff;
4182+
const int expT = (T >> 23) & 0xff;
4183+
const int expQ = (Q >> 23) & 0xff;
4184+
4185+
S &= ~LowerBitsMask(expS, std::max(expS, expQ));
4186+
T &= ~LowerBitsMask(expT, std::max(expT, expQ));
4187+
Q &= ~0xff; // Q gets truncated less than ST by hardware tests
4188+
4189+
v->ST.S = std::bit_cast<float>(S);
4190+
v->ST.T = std::bit_cast<float>(T);
4191+
4192+
if (!is_sprite || (i & 1))
4193+
v->RGBAQ.Q = std::bit_cast<float>(Q);
4194+
4195+
const float U = (v->ST.S / v->RGBAQ.Q) * (1 << m_context->TEX0.TW);
4196+
const float V = (v->ST.T / v->RGBAQ.Q) * (1 << m_context->TEX0.TH);
4197+
const float Qf = std::bit_cast<float>(Q);
4198+
4199+
const GSVector4 uvq(U, V, Qf, Qf);
4200+
4201+
// Do min/max with only those values that are not NaN
4202+
m_vt.m_min.t = m_vt.m_min.t.blend32(m_vt.m_min.t.min(uvq), uvq.notnan());
4203+
m_vt.m_max.t = m_vt.m_max.t.blend32(m_vt.m_max.t.max(uvq), uvq.notnan());
4204+
}
4205+
4206+
// Clamp the min/max UV values to the min/max valid UV values.
4207+
m_vt.m_min.t = m_vt.m_min.t.min(GSVector4(2047.0f)).max(GSVector4(-2047.0f)).xyzw(m_vt.m_min.t);
4208+
m_vt.m_max.t = m_vt.m_max.t.min(GSVector4(2047.0f)).max(GSVector4(-2047.0f)).xyzw(m_vt.m_max.t);
4209+
}
4210+
4211+
// Handle the huge ST coords in by culling primitives with NaN coords and
4212+
// replacing the primitives with huge coords with a new one that has the huge coordinate replaced with +/- 2047.
4213+
// This is based on hardware test that show that seem to show that ST coordinate get clamped to +/- 2047
4214+
// (perhaps before applying repeat or region repeat).
4215+
// Note that the huge texture coords may be a symptom of floating point issues upstream in the EE and
4216+
// it would be better to have them fixed there; this is a bandaid.
4217+
void GSState::FixHugeSTCoords()
4218+
{
4219+
bool sprite = GSUtil::GetPrimClass(PRIM->PRIM) == GS_SPRITE_CLASS;
4220+
switch (GSUtil::GetClassVertexCount(GSUtil::GetPrimClass(PRIM->PRIM)))
4221+
{
4222+
case 1:
4223+
if (sprite)
4224+
FixHugeSTCoordsImpl<1, true>();
4225+
else
4226+
FixHugeSTCoordsImpl<1, false>();
4227+
break;
4228+
case 2:
4229+
if (sprite)
4230+
FixHugeSTCoordsImpl<2, true>();
4231+
else
4232+
FixHugeSTCoordsImpl<2, false>();
4233+
break;
4234+
case 3:
4235+
if (sprite)
4236+
FixHugeSTCoordsImpl<3, true>();
4237+
else
4238+
FixHugeSTCoordsImpl<3, false>();
4239+
break;
4240+
default:
4241+
pxFail("Impossible");
4242+
}
4243+
}
4244+
4245+
template <u32 n, bool sprite> void GSState::FixHugeSTCoordsImpl()
4246+
{
4247+
GSVertex* const vertex = m_vertex.buff;
4248+
u16* const index = m_index.buff;
4249+
4250+
u32 new_index_tail = 0;
4251+
4252+
constexpr float huge = 1e10f; // arbitrary large value
4253+
4254+
const float tex_width = 1 << m_context->TEX0.TW;
4255+
const float tex_height = 1 << m_context->TEX0.TH;
4256+
4257+
bool new_prims = false; // Did we generate new primitives?
4258+
4259+
for (u32 i = 0; i < m_index.tail; i += n)
4260+
{
4261+
bool nan_s = false;
4262+
bool nan_t = false;
4263+
bool huge_s_pos = false;
4264+
bool huge_s_neg = false;
4265+
bool huge_t_pos = false;
4266+
bool huge_t_neg = false;
4267+
4268+
if (sprite)
4269+
{
4270+
// Sprites behave as if both Qs are same as the second one
4271+
const float s0 = vertex[index[i + 0]].ST.S / vertex[index[i + 1]].RGBAQ.Q;
4272+
const float t0 = vertex[index[i + 0]].ST.T / vertex[index[i + 1]].RGBAQ.Q;
4273+
const float s1 = vertex[index[i + 1]].ST.S / vertex[index[i + 1]].RGBAQ.Q;
4274+
const float t1 = vertex[index[i + 1]].ST.T / vertex[index[i + 1]].RGBAQ.Q;
4275+
nan_s = std::isnan(s0) || std::isnan(s1);
4276+
nan_t = std::isnan(t0) || std::isnan(t1);
4277+
huge_s_pos = s0 > huge || s1 > huge;
4278+
huge_s_neg = s0 < -huge || s1 < -huge;
4279+
huge_t_pos = t0 > huge || t1 > huge;
4280+
huge_t_neg = t0 < -huge || t1 < -huge;
4281+
}
4282+
else
4283+
{
4284+
for (u32 j = 0; j < n; j++)
4285+
{
4286+
const float s = vertex[index[i + j]].ST.S / vertex[index[i + j]].RGBAQ.Q;
4287+
const float t = vertex[index[i + j]].ST.T / vertex[index[i + j]].RGBAQ.Q;
4288+
nan_s |= std::isnan(s);
4289+
nan_t |= std::isnan(t);
4290+
huge_s_pos |= s > huge;
4291+
huge_t_pos |= t > huge;
4292+
huge_s_neg |= s < -huge;
4293+
huge_t_neg |= t < -huge;
4294+
}
4295+
}
4296+
4297+
// ambiguous = true would probably result in NaN in the SW rasterizer or something undefined in HW.
4298+
// PS2 does not have NaN so there is no really accurate way to emulate this.
4299+
// huge = true and ambiguous = false seems to have well-defined behavior on the PS2:
4300+
// it clamps huge values to +/-2047 in UV coordinates space. We try to approximate this by
4301+
// giving ST the values that would result in exactly +/-2047 across the primitive.
4302+
const bool ambiguous = nan_s || nan_t || (huge_s_pos && huge_s_neg) || (huge_s_pos && huge_s_neg);
4303+
const bool huge = huge_s_pos || huge_t_pos || huge_s_neg || huge_t_neg;
4304+
4305+
if (ambiguous)
4306+
{
4307+
// Cull the primitive by not saving the indices
4308+
continue;
4309+
}
4310+
4311+
if (huge)
4312+
{
4313+
// Add new vertices to replace the primitive with another primitive with clamped values.
4314+
new_prims = true;
4315+
4316+
if (sprite)
4317+
{
4318+
// Handle sprite separately since it uses the second Q for both vertices
4319+
GSVertex v_new0 = vertex[index[i + 0]];
4320+
GSVertex v_new1 = vertex[index[i + 1]];
4321+
4322+
// Try to set values so that we get constant UV +/-2047 across the entire triangle after interpolation
4323+
// Sprites behave as if both Qs are same as the second one
4324+
if (huge_s_pos)
4325+
{
4326+
v_new1.ST.S = v_new0.ST.S = 2047.0f * v_new1.RGBAQ.Q / tex_width;
4327+
}
4328+
else if (huge_s_neg)
4329+
{
4330+
v_new1.ST.S = v_new0.ST.S = -2047.0f * v_new1.RGBAQ.Q / tex_width;
4331+
}
4332+
4333+
if (huge_t_pos)
4334+
{
4335+
v_new1.ST.T = v_new0.ST.T = 2047.0f * v_new1.RGBAQ.Q / tex_height;
4336+
}
4337+
else if (huge_t_neg)
4338+
{
4339+
v_new1.ST.T = v_new0.ST.T = -2047.0f * v_new1.RGBAQ.Q / tex_height;
4340+
}
4341+
4342+
// Copy old values to tail of vertex buffer.
4343+
// The vertex buffer is allocated so that there is always at least room for 3 new vertices at the end.
4344+
vertex[m_vertex.tail + 0] = v_new0;
4345+
vertex[m_vertex.tail + 1] = v_new1;
4346+
4347+
// Make new indices point to new vertices
4348+
index[new_index_tail + 0] = m_vertex.tail + 0;
4349+
index[new_index_tail + 1] = m_vertex.tail + 1;
4350+
}
4351+
else
4352+
{
4353+
// Copy old values to tail of vertex buffer.
4354+
// The vertex buffer is allocated so that there is always at least room for 3 new vertices at the end.
4355+
for (u32 j = 0; j < n; j++)
4356+
vertex[m_vertex.tail + j] = vertex[index[i + j]];
4357+
4358+
// Try to set values so that we get constant UV +/-2047 across the entire primitive after interpolation
4359+
if (huge_s_pos)
4360+
{
4361+
for (u32 j = 0; j < n; j++)
4362+
vertex[m_vertex.tail + j].ST.S = 2047.0f * vertex[m_vertex.tail + j].RGBAQ.Q / tex_width;
4363+
}
4364+
else if (huge_s_neg)
4365+
{
4366+
for (u32 j = 0; j < n; j++)
4367+
vertex[m_vertex.tail + j].ST.S = -2047.0f * vertex[m_vertex.tail + j].RGBAQ.Q / tex_width;
4368+
}
4369+
4370+
if (huge_t_pos)
4371+
{
4372+
for (int j = 0; j < n; j++)
4373+
vertex[m_vertex.tail + j].ST.T = 2047.0f * vertex[m_vertex.tail + j].RGBAQ.Q / tex_height;
4374+
}
4375+
else if (huge_t_neg)
4376+
{
4377+
for (u32 j = 0; j < n; j++)
4378+
vertex[m_vertex.tail + j].ST.T = -2047.0f * vertex[m_vertex.tail + j].RGBAQ.Q / tex_height;
4379+
}
4380+
4381+
// Make new indices point to new vertices
4382+
for (u32 j = 0; j < n; j++)
4383+
{
4384+
index[new_index_tail + j] = m_vertex.tail + j;
4385+
}
4386+
}
4387+
4388+
// Advance tail since we pushed new vertices
4389+
m_vertex.tail += n;
4390+
4391+
if (m_vertex.tail >= m_vertex.maxcount)
4392+
{
4393+
GrowVertexBuffer();
4394+
}
4395+
}
4396+
else if (new_index_tail < i) // If new_index_tail == i, don't update indices since no primitives have been culled
4397+
{
4398+
// Keep the same primitive so shift indices down
4399+
for (u32 j = 0; j < n; j++)
4400+
index[new_index_tail + j] = index[i + j];
4401+
}
4402+
4403+
new_index_tail += n;
4404+
}
4405+
4406+
m_index.tail = new_index_tail;
4407+
4408+
if (new_prims)
4409+
{
4410+
// We indexed new primitives at the end of the buffer so update head and next also
4411+
m_vertex.head = m_vertex.next = m_vertex.tail;
4412+
}
4413+
}
4414+
41914415
void GSState::CalcAlphaMinMax(const int tex_alpha_min, const int tex_alpha_max)
41924416
{
41934417
if (m_vt.m_alpha.valid && tex_alpha_min == 0 && tex_alpha_max == 255)

pcsx2/GS/GSState.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,9 @@ class GSState : public GSAlignedClass<32>
205205
bool IsCoverageAlpha();
206206
void CalcAlphaMinMax(const int tex_min, const int tex_max);
207207
void CorrectATEAlphaMinMax(const u32 atst, const int aref);
208+
void RoundSTCoords();
209+
void FixHugeSTCoords();
210+
template <u32 n, bool sprite> void FixHugeSTCoordsImpl();
208211

209212
public:
210213
struct GSUploadQueue

pcsx2/GS/GSVector4.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,16 @@ class alignas(16) GSVector4
267267
return round<Round_PosInf>();
268268
}
269269

270+
__forceinline GSVector4 notnan() const
271+
{
272+
return GSVector4(_mm_cmpord_ps(m, m));
273+
}
274+
275+
__forceinline GSVector4 isnan() const
276+
{
277+
return GSVector4(_mm_cmpunord_ps(m, m));
278+
}
279+
270280
// http://jrfonseca.blogspot.com/2008/09/fast-sse2-pow-tables-or-polynomials.html
271281

272282
#define LOG_POLY0(x, c0) GSVector4(c0)
@@ -656,6 +666,11 @@ class alignas(16) GSVector4
656666
return neg();
657667
}
658668

669+
__forceinline GSVector4 operator~() const
670+
{
671+
return cast(~GSVector4i::cast(*this));
672+
}
673+
659674
__forceinline void operator+=(const GSVector4& v)
660675
{
661676
m = _mm_add_ps(m, v);

0 commit comments

Comments
 (0)