|
107 | 107 | #define SW_BLEND_NEEDS_RT (SW_BLEND && (PS_BLEND_A == 1 || PS_BLEND_B == 1 || PS_BLEND_C == 1 || PS_BLEND_D == 1))
|
108 | 108 | #define SW_AD_TO_HW (PS_BLEND_C == 1 && PS_A_MASKED)
|
109 | 109 |
|
| 110 | +// If this is true, shuffle is simply swapping g/a or r/b within a single pass, by doing two 16<->32 bit reinterprets on read and write. |
| 111 | +// Note that we might also want to accept "PS_TFX == 0" if the whole pass vertex color is 128, but that's not necessary until proven otherwise. |
| 112 | +// Other types of blends might also be active as long as they didn't influence the output (e.g. different blend flags, or fixed alpha at 128 (neutral)), we can't can't easily account for them all. |
| 113 | +#define SHUFFLE_TEX_PASSTHROUGH (PS_FOG == 0 && PS_TFX == 1 && PS_TCC == 0 && PS_FIXED_ONE_A == 0 && SW_BLEND ? (PS_BLEND_A == PS_BLEND_B && PS_BLEND_D == 0) : (PS_BLEND_MIX == 0 && PS_BLEND_HW == 0)) |
| 114 | +#define SHUFFLE_RT_PASSTHROUGH (SW_BLEND && PS_BLEND_A == PS_BLEND_B && PS_BLEND_D == 1) |
| 115 | + |
110 | 116 | #define FLT_MIN asfloat(0x00800000) //1.175494351e-38f
|
111 | 117 | #define FLT_MAX asfloat(0x7F7FFFFF) //3.402823466e+38f
|
112 | 118 |
|
@@ -1017,6 +1023,10 @@ float4 fog(float4 c, float f)
|
1017 | 1023 | return c;
|
1018 | 1024 | }
|
1019 | 1025 |
|
| 1026 | +// In 0-255 range |
| 1027 | +static float4 pre_shuffle_c = 0.f; |
| 1028 | +static float4 pre_shuffle_rt = 0.f; |
| 1029 | + |
1020 | 1030 | float4 ps_color(PS_INPUT input)
|
1021 | 1031 | {
|
1022 | 1032 | #if PS_FST == 0
|
@@ -1045,6 +1055,8 @@ float4 ps_color(PS_INPUT input)
|
1045 | 1055 | float4 T = sample_color(st, input.t.w);
|
1046 | 1056 | #endif
|
1047 | 1057 |
|
| 1058 | + pre_shuffle_c = T; |
| 1059 | + |
1048 | 1060 | if (PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && !(PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE))
|
1049 | 1061 | {
|
1050 | 1062 | T = shuffle(T, true);
|
@@ -1205,6 +1217,7 @@ void ps_blend(inout float4 Color, inout float4 As_rgba, float2 pos_xy)
|
1205 | 1217 | }
|
1206 | 1218 |
|
1207 | 1219 | float4 RT = SW_BLEND_NEEDS_RT ? DecodeTex(RtTexture.Load(int3(pos_xy, 0))) : (float4)0.0f;
|
| 1220 | + pre_shuffle_rt = RT; |
1208 | 1221 |
|
1209 | 1222 | if (PS_SHUFFLE && SW_BLEND_NEEDS_RT)
|
1210 | 1223 | {
|
@@ -1495,10 +1508,39 @@ PS_OUTPUT ps_main(PS_INPUT input)
|
1495 | 1508 | C.ga = C.gg;
|
1496 | 1509 | }
|
1497 | 1510 |
|
1498 |
| - if (PS_HDR > 1) // Avoid alpha ever going beyond two in HDR (if its value came from another HDR channel) |
| 1511 | + bool clamp_alpha = true; |
| 1512 | + |
| 1513 | + // If we have shuffles in read and write, we are essentially doing two 16<->32 bit reinterprets and swapping channels. |
| 1514 | + // In the SDR code, to emulate real HW, this clipped all values beyond 255. For HDR, we want to preserve the original value without clipping it |
| 1515 | + // (e.g. GoW stores the scene green color in the alpha channel during shadow calculations, to later restore it) |
| 1516 | +#if PS_HDR > 1 //TODO... |
| 1517 | +#if SHUFFLE_TEX_PASSTHROUGH |
| 1518 | +#if PS_PROCESS_BA |
| 1519 | + C.a = pre_shuffle_c.g; |
| 1520 | +#else |
| 1521 | + C.g = pre_shuffle_c.a; |
| 1522 | +#endif |
| 1523 | + //TODO: find which channels to restore based on "PS_PROCESS_BA" etc |
| 1524 | + C.a = pre_shuffle_c.g; |
| 1525 | + C.g = pre_shuffle_c.a; |
| 1526 | + //C.a = 0; |
| 1527 | + clamp_alpha = false; |
| 1528 | +#elif SHUFFLE_RT_PASSTHROUGH |
| 1529 | +#if PS_PROCESS_BA |
| 1530 | + C.a = pre_shuffle_RT.g; |
| 1531 | +#else |
| 1532 | + C.g = pre_shuffle_RT.a; |
| 1533 | +#endif |
| 1534 | + clamp_alpha = false; |
| 1535 | +#endif |
| 1536 | +#endif |
| 1537 | + |
| 1538 | +#if PS_HDR > 1 // Avoid alpha ever going beyond two in HDR (if its value came from another HDR channel) |
| 1539 | + if (clamp_alpha) |
1499 | 1540 | {
|
1500 | 1541 | C.a = min(C.a, 255.0f);
|
1501 | 1542 | }
|
| 1543 | +#endif |
1502 | 1544 | }
|
1503 | 1545 | }
|
1504 | 1546 |
|
|
0 commit comments