@@ -1629,13 +1629,6 @@ inline bool GSState::TestDrawChanged()
1629
1629
return false ;
1630
1630
}
1631
1631
1632
- u32 GSState::CalcMask (int exp, int max_exp)
1633
- {
1634
- const int amount = 9 + (max_exp - exp);
1635
-
1636
- return (1 << std::min (amount, 23 )) - 1 ;
1637
- }
1638
-
1639
1632
void GSState::FlushPrim ()
1640
1633
{
1641
1634
if (m_index.tail > 0 )
@@ -1710,50 +1703,19 @@ void GSState::FlushPrim()
1710
1703
#endif
1711
1704
// Update scissor, it may have been modified by a previous draw
1712
1705
m_env.CTXT [PRIM->CTXT ].UpdateScissor ();
1706
+
1713
1707
m_vt.Update (m_vertex.buff , m_index.buff , m_vertex.tail , m_index.tail , GSUtil::GetPrimClass (PRIM->PRIM ));
1708
+
1709
+ // Fix huge or nan ST coordinates
1710
+ if (PRIM->TME && !PRIM->FST )
1711
+ {
1712
+ FixHugeSTCoords ();
1713
+ }
1714
1714
1715
- // Texel coordinate rounding
1716
- // Helps Manhunt (lights shining through objects).
1717
- // Can help with some alignment issues when upscaling too, and is for both Software and Hardware renderers.
1718
- // Sometimes hardware doesn't get affected, likely due to the difference in how GPU's handle textures (Persona minimap).
1719
- if (PRIM->TME && (GSUtil::GetPrimClass (PRIM->PRIM ) == GS_PRIM_CLASS::GS_SPRITE_CLASS || m_vt.m_eq .z ))
1715
+ // Round fractional parts of ST coords
1716
+ if (PRIM->TME && !PRIM->FST && (GSUtil::GetPrimClass (PRIM->PRIM ) == GS_PRIM_CLASS::GS_SPRITE_CLASS || m_vt.m_eq .z ))
1720
1717
{
1721
- if (!PRIM->FST ) // STQ's
1722
- {
1723
- const bool is_sprite = GSUtil::GetPrimClass (PRIM->PRIM ) == GS_PRIM_CLASS::GS_SPRITE_CLASS;
1724
- // ST's have the lowest 9 bits (or greater depending on exponent difference) rounding down (from hardware tests).
1725
- for (int i = m_index.tail - 1 ; i >= 0 ; i--)
1726
- {
1727
- GSVertex* v = &m_vertex.buff [m_index.buff [i]];
1728
-
1729
- // Only Q on the second vertex is valid
1730
- if (!(i & 1 ) && is_sprite)
1731
- v->RGBAQ .Q = m_vertex.buff [m_index.buff [i + 1 ]].RGBAQ .Q ;
1732
-
1733
- int T = std::bit_cast<int >(v->ST .T );
1734
- int Q = std::bit_cast<int >(v->RGBAQ .Q );
1735
- int S = std::bit_cast<int >(v->ST .S );
1736
- const int expS = (S >> 23 ) & 0xff ;
1737
- const int expT = (T >> 23 ) & 0xff ;
1738
- const int expQ = (Q >> 23 ) & 0xff ;
1739
- int max_exp = std::max (expS, expQ);
1740
-
1741
- u32 mask = CalcMask (expS, max_exp);
1742
- S &= ~mask;
1743
- v->ST .S = std::bit_cast<float >(S);
1744
- max_exp = std::max (expT, expQ);
1745
- mask = CalcMask (expT, max_exp);
1746
- T &= ~mask;
1747
- v->ST .T = std::bit_cast<float >(T);
1748
- Q &= ~0xff ;
1749
-
1750
- if (!is_sprite || (i & 1 ))
1751
- v->RGBAQ .Q = std::bit_cast<float >(Q);
1752
-
1753
- m_vt.m_min .t .x = std::min (m_vt.m_min .t .x , (v->ST .S / v->RGBAQ .Q ) * (1 << m_context->TEX0 .TW ));
1754
- m_vt.m_min .t .y = std::min (m_vt.m_min .t .y , (v->ST .T / v->RGBAQ .Q ) * (1 << m_context->TEX0 .TH ));
1755
- }
1756
- }
1718
+ RoundSTCoords ();
1757
1719
}
1758
1720
1759
1721
// Skip draw if Z test is enabled, but set to fail all pixels.
@@ -3978,8 +3940,8 @@ GSState::TextureMinMaxResult GSState::GetTextureMinMax(GIFRegTEX0 TEX0, GIFRegCL
3978
3940
3979
3941
u8 uses_border = 0 ;
3980
3942
3981
- if (m_vt.m_max .t .x >= FLT_MAX || m_vt.m_min .t .x <= -FLT_MAX ||
3982
- m_vt.m_max .t .y >= FLT_MAX || m_vt.m_min .t .y <= -FLT_MAX )
3943
+ if (m_vt.m_max .t .x >= 2047 . 0f || m_vt.m_min .t .x <= -2047 . 0f ||
3944
+ m_vt.m_max .t .y >= 2047 . 0f || m_vt.m_min .t .y <= -2047 . 0f )
3983
3945
{
3984
3946
// If any of the min/max values are +-FLT_MAX we can't rely on them
3985
3947
// so just assume full texture.
@@ -4188,6 +4150,268 @@ GSState::TextureMinMaxResult GSState::GetTextureMinMax(GIFRegTEX0 TEX0, GIFRegCL
4188
4150
return { vr, uses_border };
4189
4151
}
4190
4152
4153
+ // ST coordinate rounding
4154
+ // Helps Manhunt (lights shining through objects).
4155
+ // Can help with some alignment issues when upscaling too, and is for both Software and Hardware renderers.
4156
+ // Sometimes hardware doesn't get affected, likely due to the difference in how GPU's handle textures (Persona minimap).
4157
+ void GSState::RoundSTCoords ()
4158
+ {
4159
+ const bool is_sprite = GSUtil::GetPrimClass (PRIM->PRIM ) == GS_PRIM_CLASS::GS_SPRITE_CLASS;
4160
+
4161
+ // ST's have the lowest 9 bits (or greater depending on exponent difference) rounded down (from hardware tests).
4162
+ // This gives the bitmask for the lower 9 (or more) bits.
4163
+ auto LowerBitsMask = [](int exp, int max_exp)
4164
+ {
4165
+ const int amount = 9 + (max_exp - exp);
4166
+ return (1 << std::min (amount, 23 )) - 1 ;
4167
+ };
4168
+
4169
+ for (int i = m_index.tail - 1 ; i >= 0 ; i--)
4170
+ {
4171
+ GSVertex* v = &m_vertex.buff [m_index.buff [i]];
4172
+
4173
+ // Only Q on the second vertex is valid
4174
+ if (!(i & 1 ) && is_sprite)
4175
+ v->RGBAQ .Q = m_vertex.buff [m_index.buff [i + 1 ]].RGBAQ .Q ;
4176
+
4177
+ int S = std::bit_cast<int >(v->ST .S );
4178
+ int T = std::bit_cast<int >(v->ST .T );
4179
+ int Q = std::bit_cast<int >(v->RGBAQ .Q );
4180
+
4181
+ const int expS = (S >> 23 ) & 0xff ;
4182
+ const int expT = (T >> 23 ) & 0xff ;
4183
+ const int expQ = (Q >> 23 ) & 0xff ;
4184
+
4185
+ S &= ~LowerBitsMask (expS, std::max(expS, expQ));
4186
+ T &= ~LowerBitsMask (expT, std::max(expT, expQ));
4187
+ Q &= ~0xff ; // Q gets truncated less than ST by hardware tests
4188
+
4189
+ v->ST .S = std::bit_cast<float >(S);
4190
+ v->ST .T = std::bit_cast<float >(T);
4191
+
4192
+ if (!is_sprite || (i & 1 ))
4193
+ v->RGBAQ .Q = std::bit_cast<float >(Q);
4194
+
4195
+ const float U = (v->ST .S / v->RGBAQ .Q ) * (1 << m_context->TEX0 .TW );
4196
+ const float V = (v->ST .T / v->RGBAQ .Q ) * (1 << m_context->TEX0 .TH );
4197
+ const float Qf = std::bit_cast<float >(Q);
4198
+
4199
+ const GSVector4 uvq (U, V, Qf, Qf);
4200
+
4201
+ // Do min/max with only those values that are not NaN
4202
+ m_vt.m_min .t = m_vt.m_min .t .blend32 (m_vt.m_min .t .min (uvq), uvq.notnan ());
4203
+ m_vt.m_max .t = m_vt.m_max .t .blend32 (m_vt.m_max .t .max (uvq), uvq.notnan ());
4204
+ }
4205
+
4206
+ // Clamp the min/max UV values to the min/max valid UV values.
4207
+ m_vt.m_min .t = m_vt.m_min .t .min (GSVector4 (2047 .0f )).max (GSVector4 (-2047 .0f )).xyzw (m_vt.m_min .t );
4208
+ m_vt.m_max .t = m_vt.m_max .t .min (GSVector4 (2047 .0f )).max (GSVector4 (-2047 .0f )).xyzw (m_vt.m_max .t );
4209
+ }
4210
+
4211
+ // Handle the huge ST coords in by culling primitives with NaN coords and
4212
+ // replacing the primitives with huge coords with a new one that has the huge coordinate replaced with +/- 2047.
4213
+ // This is based on hardware test that show that seem to show that ST coordinate get clamped to +/- 2047
4214
+ // (perhaps before applying repeat or region repeat).
4215
+ // Note that the huge texture coords may be a symptom of floating point issues upstream in the EE and
4216
+ // it would be better to have them fixed there; this is a bandaid.
4217
+ void GSState::FixHugeSTCoords ()
4218
+ {
4219
+ bool sprite = GSUtil::GetPrimClass (PRIM->PRIM ) == GS_SPRITE_CLASS;
4220
+ switch (GSUtil::GetClassVertexCount (GSUtil::GetPrimClass (PRIM->PRIM )))
4221
+ {
4222
+ case 1 :
4223
+ if (sprite)
4224
+ FixHugeSTCoordsImpl<1 , true >();
4225
+ else
4226
+ FixHugeSTCoordsImpl<1 , false >();
4227
+ break ;
4228
+ case 2 :
4229
+ if (sprite)
4230
+ FixHugeSTCoordsImpl<2 , true >();
4231
+ else
4232
+ FixHugeSTCoordsImpl<2 , false >();
4233
+ break ;
4234
+ case 3 :
4235
+ if (sprite)
4236
+ FixHugeSTCoordsImpl<3 , true >();
4237
+ else
4238
+ FixHugeSTCoordsImpl<3 , false >();
4239
+ break ;
4240
+ default :
4241
+ pxFail (" Impossible" );
4242
+ }
4243
+ }
4244
+
4245
+ template <u32 n, bool sprite> void GSState::FixHugeSTCoordsImpl ()
4246
+ {
4247
+ GSVertex* const vertex = m_vertex.buff ;
4248
+ u16 * const index = m_index.buff ;
4249
+
4250
+ u32 new_index_tail = 0 ;
4251
+
4252
+ constexpr float huge = 1e10f; // arbitrary large value
4253
+
4254
+ const float tex_width = 1 << m_context->TEX0 .TW ;
4255
+ const float tex_height = 1 << m_context->TEX0 .TH ;
4256
+
4257
+ bool new_prims = false ; // Did we generate new primitives?
4258
+
4259
+ for (u32 i = 0 ; i < m_index.tail ; i += n)
4260
+ {
4261
+ bool nan_s = false ;
4262
+ bool nan_t = false ;
4263
+ bool huge_s_pos = false ;
4264
+ bool huge_s_neg = false ;
4265
+ bool huge_t_pos = false ;
4266
+ bool huge_t_neg = false ;
4267
+
4268
+ if (sprite)
4269
+ {
4270
+ // Sprites behave as if both Qs are same as the second one
4271
+ const float s0 = vertex[index[i + 0 ]].ST .S / vertex[index[i + 1 ]].RGBAQ .Q ;
4272
+ const float t0 = vertex[index[i + 0 ]].ST .T / vertex[index[i + 1 ]].RGBAQ .Q ;
4273
+ const float s1 = vertex[index[i + 1 ]].ST .S / vertex[index[i + 1 ]].RGBAQ .Q ;
4274
+ const float t1 = vertex[index[i + 1 ]].ST .T / vertex[index[i + 1 ]].RGBAQ .Q ;
4275
+ nan_s = std::isnan (s0) || std::isnan (s1);
4276
+ nan_t = std::isnan (t0) || std::isnan (t1);
4277
+ huge_s_pos = s0 > huge || s1 > huge;
4278
+ huge_s_neg = s0 < -huge || s1 < -huge;
4279
+ huge_t_pos = t0 > huge || t1 > huge;
4280
+ huge_t_neg = t0 < -huge || t1 < -huge;
4281
+ }
4282
+ else
4283
+ {
4284
+ for (u32 j = 0 ; j < n; j++)
4285
+ {
4286
+ const float s = vertex[index[i + j]].ST .S / vertex[index[i + j]].RGBAQ .Q ;
4287
+ const float t = vertex[index[i + j]].ST .T / vertex[index[i + j]].RGBAQ .Q ;
4288
+ nan_s |= std::isnan (s);
4289
+ nan_t |= std::isnan (t);
4290
+ huge_s_pos |= s > huge;
4291
+ huge_t_pos |= t > huge;
4292
+ huge_s_neg |= s < -huge;
4293
+ huge_t_neg |= t < -huge;
4294
+ }
4295
+ }
4296
+
4297
+ // ambiguous = true would probably result in NaN in the SW rasterizer or something undefined in HW.
4298
+ // PS2 does not have NaN so there is no really accurate way to emulate this.
4299
+ // huge = true and ambiguous = false seems to have well-defined behavior on the PS2:
4300
+ // it clamps huge values to +/-2047 in UV coordinates space. We try to approximate this by
4301
+ // giving ST the values that would result in exactly +/-2047 across the primitive.
4302
+ const bool ambiguous = nan_s || nan_t || (huge_s_pos && huge_s_neg) || (huge_s_pos && huge_s_neg);
4303
+ const bool huge = huge_s_pos || huge_t_pos || huge_s_neg || huge_t_neg;
4304
+
4305
+ if (ambiguous)
4306
+ {
4307
+ // Cull the primitive by not saving the indices
4308
+ continue ;
4309
+ }
4310
+
4311
+ if (huge)
4312
+ {
4313
+ // Add new vertices to replace the primitive with another primitive with clamped values.
4314
+ new_prims = true ;
4315
+
4316
+ if (sprite)
4317
+ {
4318
+ // Handle sprite separately since it uses the second Q for both vertices
4319
+ GSVertex v_new0 = vertex[index[i + 0 ]];
4320
+ GSVertex v_new1 = vertex[index[i + 1 ]];
4321
+
4322
+ // Try to set values so that we get constant UV +/-2047 across the entire triangle after interpolation
4323
+ // Sprites behave as if both Qs are same as the second one
4324
+ if (huge_s_pos)
4325
+ {
4326
+ v_new1.ST .S = v_new0.ST .S = 2047 .0f * v_new1.RGBAQ .Q / tex_width;
4327
+ }
4328
+ else if (huge_s_neg)
4329
+ {
4330
+ v_new1.ST .S = v_new0.ST .S = -2047 .0f * v_new1.RGBAQ .Q / tex_width;
4331
+ }
4332
+
4333
+ if (huge_t_pos)
4334
+ {
4335
+ v_new1.ST .T = v_new0.ST .T = 2047 .0f * v_new1.RGBAQ .Q / tex_height;
4336
+ }
4337
+ else if (huge_t_neg)
4338
+ {
4339
+ v_new1.ST .T = v_new0.ST .T = -2047 .0f * v_new1.RGBAQ .Q / tex_height;
4340
+ }
4341
+
4342
+ // Copy old values to tail of vertex buffer.
4343
+ // The vertex buffer is allocated so that there is always at least room for 3 new vertices at the end.
4344
+ vertex[m_vertex.tail + 0 ] = v_new0;
4345
+ vertex[m_vertex.tail + 1 ] = v_new1;
4346
+
4347
+ // Make new indices point to new vertices
4348
+ index[new_index_tail + 0 ] = m_vertex.tail + 0 ;
4349
+ index[new_index_tail + 1 ] = m_vertex.tail + 1 ;
4350
+ }
4351
+ else
4352
+ {
4353
+ // Copy old values to tail of vertex buffer.
4354
+ // The vertex buffer is allocated so that there is always at least room for 3 new vertices at the end.
4355
+ for (u32 j = 0 ; j < n; j++)
4356
+ vertex[m_vertex.tail + j] = vertex[index[i + j]];
4357
+
4358
+ // Try to set values so that we get constant UV +/-2047 across the entire primitive after interpolation
4359
+ if (huge_s_pos)
4360
+ {
4361
+ for (u32 j = 0 ; j < n; j++)
4362
+ vertex[m_vertex.tail + j].ST .S = 2047 .0f * vertex[m_vertex.tail + j].RGBAQ .Q / tex_width;
4363
+ }
4364
+ else if (huge_s_neg)
4365
+ {
4366
+ for (u32 j = 0 ; j < n; j++)
4367
+ vertex[m_vertex.tail + j].ST .S = -2047 .0f * vertex[m_vertex.tail + j].RGBAQ .Q / tex_width;
4368
+ }
4369
+
4370
+ if (huge_t_pos)
4371
+ {
4372
+ for (int j = 0 ; j < n; j++)
4373
+ vertex[m_vertex.tail + j].ST .T = 2047 .0f * vertex[m_vertex.tail + j].RGBAQ .Q / tex_height;
4374
+ }
4375
+ else if (huge_t_neg)
4376
+ {
4377
+ for (u32 j = 0 ; j < n; j++)
4378
+ vertex[m_vertex.tail + j].ST .T = -2047 .0f * vertex[m_vertex.tail + j].RGBAQ .Q / tex_height;
4379
+ }
4380
+
4381
+ // Make new indices point to new vertices
4382
+ for (u32 j = 0 ; j < n; j++)
4383
+ {
4384
+ index[new_index_tail + j] = m_vertex.tail + j;
4385
+ }
4386
+ }
4387
+
4388
+ // Advance tail since we pushed new vertices
4389
+ m_vertex.tail += n;
4390
+
4391
+ if (m_vertex.tail >= m_vertex.maxcount )
4392
+ {
4393
+ GrowVertexBuffer ();
4394
+ }
4395
+ }
4396
+ else if (new_index_tail < i) // If new_index_tail == i, don't update indices since no primitives have been culled
4397
+ {
4398
+ // Keep the same primitive so shift indices down
4399
+ for (u32 j = 0 ; j < n; j++)
4400
+ index[new_index_tail + j] = index[i + j];
4401
+ }
4402
+
4403
+ new_index_tail += n;
4404
+ }
4405
+
4406
+ m_index.tail = new_index_tail;
4407
+
4408
+ if (new_prims)
4409
+ {
4410
+ // We indexed new primitives at the end of the buffer so update head and next also
4411
+ m_vertex.head = m_vertex.next = m_vertex.tail ;
4412
+ }
4413
+ }
4414
+
4191
4415
void GSState::CalcAlphaMinMax (const int tex_alpha_min, const int tex_alpha_max)
4192
4416
{
4193
4417
if (m_vt.m_alpha .valid && tex_alpha_min == 0 && tex_alpha_max == 255 )
0 commit comments